first commit
This commit is contained in:
5
nodes/infers/README.md
Normal file
5
nodes/infers/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Summary
|
||||
|
||||
1. If the inference node works on the whole frame, let it derived from `vp_primary_infer_node` class.
|
||||
2. If the inference node works on the small cropped images, let it derived from `vp_secondary_infer_node` class.
|
||||
3. We can define multi derived classes to handle different types of dl models (`detector/pose estimation/classification`). Also, if they work on the same type of target AND with the same logic (for example, hava the same `preprocess/postprocess`), we can use a unique class to load different deep learning models (like resnet18 and resnet50).
|
||||
99
nodes/infers/vp_classifier_node.cpp
Executable file
99
nodes/infers/vp_classifier_node.cpp
Executable file
@@ -0,0 +1,99 @@
|
||||
|
||||
|
||||
#include "vp_classifier_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_classifier_node::vp_classifier_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path,
|
||||
std::string labels_path,
|
||||
int input_width,
|
||||
int input_height,
|
||||
int batch_size,
|
||||
std::vector<int> p_class_ids_applied_to,
|
||||
int min_width_applied_to,
|
||||
int min_height_applied_to,
|
||||
int crop_padding,
|
||||
bool need_softmax,
|
||||
float scale,
|
||||
cv::Scalar mean,
|
||||
cv::Scalar std,
|
||||
bool swap_rb,
|
||||
bool swap_chn):
|
||||
vp_secondary_infer_node(node_name,
|
||||
model_path,
|
||||
model_config_path,
|
||||
labels_path,
|
||||
input_width,
|
||||
input_height,
|
||||
batch_size,
|
||||
p_class_ids_applied_to,
|
||||
min_width_applied_to,
|
||||
min_height_applied_to,
|
||||
crop_padding,
|
||||
scale,
|
||||
mean,
|
||||
std,
|
||||
swap_rb,
|
||||
swap_chn), need_softmax(need_softmax){
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_classifier_node::~vp_classifier_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_classifier_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
// make sure heads of output are not zero
|
||||
assert(raw_outputs.size() > 0);
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
|
||||
// just one head of output
|
||||
auto& output = raw_outputs[0];
|
||||
assert(output.dims == 2);
|
||||
|
||||
auto count = output.rows;
|
||||
auto index = 0;
|
||||
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
cv::Point class_id_point;
|
||||
int class_id;
|
||||
double score;
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (int j = index; j < frame_meta->targets.size(); j++)
|
||||
{
|
||||
if (!need_apply(frame_meta->targets[j]->primary_class_id, frame_meta->targets[j]->width, frame_meta->targets[j]->height)) {
|
||||
// continue as its primary_class_id is not in p_class_ids_applied_to
|
||||
continue;
|
||||
}
|
||||
|
||||
auto prob = output.row(i);
|
||||
cv::minMaxLoc(prob, 0, &score, 0, &class_id_point);
|
||||
|
||||
if (need_softmax) {
|
||||
float maxProb = 0.0;
|
||||
float sum = 0.0;
|
||||
cv::Mat softmaxProb;
|
||||
maxProb = *std::max_element(prob.begin<float>(), prob.end<float>());
|
||||
cv::exp(prob - maxProb, softmaxProb);
|
||||
sum = (float)cv::sum(softmaxProb)[0];
|
||||
softmaxProb /= sum;
|
||||
minMaxLoc(softmaxProb.reshape(1, 1), 0, &score, 0, &class_id_point);
|
||||
}
|
||||
class_id = class_id_point.x;
|
||||
auto label = (labels.size() < class_id + 1) ? "" : labels[class_id];
|
||||
|
||||
// update back to frame meta
|
||||
frame_meta->targets[j]->secondary_class_ids.push_back(class_id);
|
||||
frame_meta->targets[j]->secondary_scores.push_back(score);
|
||||
frame_meta->targets[j]->secondary_labels.push_back(label);
|
||||
|
||||
// break as we found the right target!
|
||||
index = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
38
nodes/infers/vp_classifier_node.h
Executable file
38
nodes/infers/vp_classifier_node.h
Executable file
@@ -0,0 +1,38 @@
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// common classifier for image classification task.
|
||||
// used for image classification, update secondary_class_ids/secondary_labels/secondary_scores of vp_frame_target.
|
||||
class vp_classifier_node: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
// softmax logic applied on output or not
|
||||
bool need_softmax;
|
||||
protected:
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_classifier_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path = "",
|
||||
std::string labels_path = "",
|
||||
int input_width = 128,
|
||||
int input_height = 128,
|
||||
int batch_size = 1,
|
||||
std::vector<int> p_class_ids_applied_to = std::vector<int>(),
|
||||
int min_width_applied_to = 0,
|
||||
int min_height_applied_to = 0,
|
||||
int crop_padding = 10,
|
||||
bool need_softmax = true, // imagenet dataset
|
||||
float scale = 1 / 255.0,
|
||||
cv::Scalar mean = cv::Scalar(123.675, 116.28, 103.53), // imagenet dataset
|
||||
cv::Scalar std = cv::Scalar(0.229, 0.224, 0.225),
|
||||
bool swap_rb = true,
|
||||
bool swap_chn = false);
|
||||
~vp_classifier_node();
|
||||
};
|
||||
|
||||
}
|
||||
33
nodes/infers/vp_enet_seg_node.cpp
Normal file
33
nodes/infers/vp_enet_seg_node.cpp
Normal file
@@ -0,0 +1,33 @@
|
||||
|
||||
#include "vp_enet_seg_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_enet_seg_node::vp_enet_seg_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path,
|
||||
std::string labels_path,
|
||||
int input_width,
|
||||
int input_height,
|
||||
int batch_size,
|
||||
int class_id_offset,
|
||||
float scale,
|
||||
cv::Scalar mean,
|
||||
cv::Scalar std,
|
||||
bool swap_rb):
|
||||
vp_primary_infer_node(node_name, model_path, model_config_path, labels_path, input_width, input_height, batch_size, class_id_offset, scale, mean, std, swap_rb) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_enet_seg_node::~vp_enet_seg_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_enet_seg_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
auto& mask = raw_outputs[0];
|
||||
|
||||
// save output as mask directly, this is the mask for the whole frame.
|
||||
frame_meta->mask = mask.clone();
|
||||
}
|
||||
}
|
||||
30
nodes/infers/vp_enet_seg_node.h
Normal file
30
nodes/infers/vp_enet_seg_node.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
// semantic segmentation based on ENet
|
||||
//
|
||||
class vp_enet_seg_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
protected:
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_enet_seg_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path = "",
|
||||
std::string labels_path = "",
|
||||
int input_width = 1024,
|
||||
int input_height = 512,
|
||||
int batch_size = 1,
|
||||
int class_id_offset = 0,
|
||||
float scale = 1 / 255.0,
|
||||
cv::Scalar mean = cv::Scalar(0),
|
||||
cv::Scalar std = cv::Scalar(1),
|
||||
bool swap_rb = true);
|
||||
~vp_enet_seg_node();
|
||||
};
|
||||
}
|
||||
528
nodes/infers/vp_face_swap_node.cpp
Normal file
528
nodes/infers/vp_face_swap_node.cpp
Normal file
@@ -0,0 +1,528 @@
|
||||
|
||||
#include "vp_face_swap_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_face_swap_node::vp_face_swap_node(std::string node_name,
|
||||
std::string yunet_face_detect_model,
|
||||
std::string buffalo_l_face_encoding_model,
|
||||
std::string emap_file_for_embeddings,
|
||||
std::string insightface_swap_model,
|
||||
std::string swap_source_image,
|
||||
int swap_source_face_index,
|
||||
int min_face_w_h,
|
||||
bool swap_on_osd,
|
||||
bool act_as_primary_detector):
|
||||
vp_primary_infer_node(node_name, ""),
|
||||
act_as_primary_detector(act_as_primary_detector),
|
||||
swap_on_osd(swap_on_osd),
|
||||
min_face_w_h(min_face_w_h) {
|
||||
/* init net*/
|
||||
face_extract_net = cv::dnn::readNetFromONNX(yunet_face_detect_model);
|
||||
face_encoding_net = cv::dnn::readNetFromONNX(buffalo_l_face_encoding_model);
|
||||
face_swap_net = cv::dnn::readNetFromONNX(insightface_swap_model);
|
||||
#ifdef VP_WITH_CUDA
|
||||
face_extract_net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||||
face_extract_net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||||
face_encoding_net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||||
face_encoding_net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||||
face_swap_net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||||
face_swap_net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||||
#endif
|
||||
init_source_face_embeddings(swap_source_image, swap_source_face_index, emap_file_for_embeddings);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_face_swap_node::~vp_face_swap_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_face_swap_node::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
if (frame_meta->face_targets.size() == 0) {
|
||||
if (!act_as_primary_detector) {
|
||||
return;
|
||||
}
|
||||
// extract faces and update back to frame meta
|
||||
// to-do
|
||||
}
|
||||
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
// iterate each face target
|
||||
for (int i = 0; i < frame_meta->face_targets.size(); i++) {
|
||||
if (frame_meta->face_targets[i]->width < min_face_w_h || frame_meta->face_targets[i]->height < min_face_w_h) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cv::Mat aligned_face, swapped_face;
|
||||
|
||||
//auto t = std::chrono::system_clock::now();
|
||||
// align and crop first
|
||||
auto warp_mat = align_crop(frame_meta->frame, frame_meta->face_targets[i]->key_points, aligned_face);
|
||||
//auto T = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - t).count();
|
||||
|
||||
//std::cout << "1st T:" << T << std::endl;
|
||||
//t = std::chrono::system_clock::now();
|
||||
// swap
|
||||
swap(aligned_face, swapped_face);
|
||||
//T = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - t).count();
|
||||
//std::cout << "2nd T:" << T << std::endl;
|
||||
|
||||
//t = std::chrono::system_clock::now();
|
||||
// past back to frame or osd frame
|
||||
if (swap_on_osd) {
|
||||
if (frame_meta->osd_frame.empty()) {
|
||||
frame_meta->osd_frame = frame_meta->frame.clone();
|
||||
}
|
||||
}
|
||||
auto& bg = swap_on_osd ? frame_meta->osd_frame : frame_meta->frame;
|
||||
paste_back(bg, swapped_face, warp_mat);
|
||||
//T = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - t).count();
|
||||
//std::cout << "3rd T:" << T << std::endl;
|
||||
}
|
||||
|
||||
//auto total_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time).count();
|
||||
//std::cout << "total T:" << total_time << std::endl;
|
||||
}
|
||||
|
||||
cv::Mat vp_face_swap_node::read_emap_mat_from_txt(const std::string& emap_file_for_embeddings, int rows, int cols) {
|
||||
std::ifstream file(emap_file_for_embeddings);
|
||||
|
||||
cv::Mat matrix(rows, cols, CV_32F);
|
||||
std::string line;
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
std::getline(file, line);
|
||||
std::istringstream iss(line);
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
float value;
|
||||
assert(iss >> value);
|
||||
matrix.at<float>(i, j) = value;
|
||||
}
|
||||
}
|
||||
file.close();
|
||||
return matrix;
|
||||
}
|
||||
|
||||
cv::Mat vp_face_swap_node::process_embeddings_using_emap(const cv::Mat& source_face_normed_embedding, const cv::Mat& emap) {
|
||||
cv::Mat latent = source_face_normed_embedding.clone().reshape(1, 1);
|
||||
cv::Mat result = latent * emap;
|
||||
cv::normalize(result, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
cv::Mat vp_face_swap_node::get_similarity_transform_matrix(float src[5][2]) {
|
||||
using namespace cv;
|
||||
//float dst[5][2] = { {38.2946f, 51.6963f}, {73.5318f, 51.5014f}, {56.0252f, 71.7366f}, {41.5493f, 92.3655f}, {70.7299f, 92.2041f} }; // for 112*112
|
||||
float dst[5][2] = { {43.0f, 58.0f}, {85.0f, 58.0f}, {64.0f, 81.0f}, {47.0f, 105.0f}, {80.0f, 105.0f} }; // for 128*128
|
||||
//float dst[5][2] = { {38.0f, 54.0f}, {90.0f, 54.0f}, {64.0f, 85.0f}, {47.0f, 109.0f}, {80.0f, 109.0f} }; // for 128*128, zoom out
|
||||
float avg0 = (src[0][0] + src[1][0] + src[2][0] + src[3][0] + src[4][0]) / 5;
|
||||
float avg1 = (src[0][1] + src[1][1] + src[2][1] + src[3][1] + src[4][1]) / 5;
|
||||
//Compute mean of src and dst.
|
||||
float src_mean[2] = { avg0, avg1 };
|
||||
float dst_mean[2] = { 56.0262f, 71.9008f };
|
||||
//Subtract mean from src and dst.
|
||||
float src_demean[5][2];
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
for (int j = 0; j < 5; j++)
|
||||
{
|
||||
src_demean[j][i] = src[j][i] - src_mean[i];
|
||||
}
|
||||
}
|
||||
float dst_demean[5][2];
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
for (int j = 0; j < 5; j++)
|
||||
{
|
||||
dst_demean[j][i] = dst[j][i] - dst_mean[i];
|
||||
}
|
||||
}
|
||||
double A00 = 0.0, A01 = 0.0, A10 = 0.0, A11 = 0.0;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A00 += dst_demean[i][0] * src_demean[i][0];
|
||||
A00 = A00 / 5;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A01 += dst_demean[i][0] * src_demean[i][1];
|
||||
A01 = A01 / 5;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A10 += dst_demean[i][1] * src_demean[i][0];
|
||||
A10 = A10 / 5;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A11 += dst_demean[i][1] * src_demean[i][1];
|
||||
A11 = A11 / 5;
|
||||
Mat A = (Mat_<double>(2, 2) << A00, A01, A10, A11);
|
||||
double d[2] = { 1.0, 1.0 };
|
||||
double detA = A00 * A11 - A01 * A10;
|
||||
if (detA < 0)
|
||||
d[1] = -1;
|
||||
double T[3][3] = { {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0} };
|
||||
Mat s, u, vt, v;
|
||||
SVD::compute(A, s, u, vt);
|
||||
double smax = s.ptr<double>(0)[0]>s.ptr<double>(1)[0] ? s.ptr<double>(0)[0] : s.ptr<double>(1)[0];
|
||||
double tol = smax * 2 * FLT_MIN;
|
||||
int rank = 0;
|
||||
if (s.ptr<double>(0)[0]>tol)
|
||||
rank += 1;
|
||||
if (s.ptr<double>(1)[0]>tol)
|
||||
rank += 1;
|
||||
double arr_u[2][2] = { {u.ptr<double>(0)[0], u.ptr<double>(0)[1]}, {u.ptr<double>(1)[0], u.ptr<double>(1)[1]} };
|
||||
double arr_vt[2][2] = { {vt.ptr<double>(0)[0], vt.ptr<double>(0)[1]}, {vt.ptr<double>(1)[0], vt.ptr<double>(1)[1]} };
|
||||
double det_u = arr_u[0][0] * arr_u[1][1] - arr_u[0][1] * arr_u[1][0];
|
||||
double det_vt = arr_vt[0][0] * arr_vt[1][1] - arr_vt[0][1] * arr_vt[1][0];
|
||||
if (rank == 1)
|
||||
{
|
||||
if ((det_u*det_vt) > 0)
|
||||
{
|
||||
Mat uvt = u*vt;
|
||||
T[0][0] = uvt.ptr<double>(0)[0];
|
||||
T[0][1] = uvt.ptr<double>(0)[1];
|
||||
T[1][0] = uvt.ptr<double>(1)[0];
|
||||
T[1][1] = uvt.ptr<double>(1)[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
double temp = d[1];
|
||||
d[1] = -1;
|
||||
Mat D = (Mat_<double>(2, 2) << d[0], 0.0, 0.0, d[1]);
|
||||
Mat Dvt = D*vt;
|
||||
Mat uDvt = u*Dvt;
|
||||
T[0][0] = uDvt.ptr<double>(0)[0];
|
||||
T[0][1] = uDvt.ptr<double>(0)[1];
|
||||
T[1][0] = uDvt.ptr<double>(1)[0];
|
||||
T[1][1] = uDvt.ptr<double>(1)[1];
|
||||
d[1] = temp;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat D = (Mat_<double>(2, 2) << d[0], 0.0, 0.0, d[1]);
|
||||
Mat Dvt = D*vt;
|
||||
Mat uDvt = u*Dvt;
|
||||
T[0][0] = uDvt.ptr<double>(0)[0];
|
||||
T[0][1] = uDvt.ptr<double>(0)[1];
|
||||
T[1][0] = uDvt.ptr<double>(1)[0];
|
||||
T[1][1] = uDvt.ptr<double>(1)[1];
|
||||
}
|
||||
double var1 = 0.0;
|
||||
for (int i = 0; i < 5; i++)
|
||||
var1 += src_demean[i][0] * src_demean[i][0];
|
||||
var1 = var1 / 5;
|
||||
double var2 = 0.0;
|
||||
for (int i = 0; i < 5; i++)
|
||||
var2 += src_demean[i][1] * src_demean[i][1];
|
||||
var2 = var2 / 5;
|
||||
double scale = 1.0 / (var1 + var2)* (s.ptr<double>(0)[0] * d[0] + s.ptr<double>(1)[0] * d[1]);
|
||||
double TS[2];
|
||||
TS[0] = T[0][0] * src_mean[0] + T[0][1] * src_mean[1];
|
||||
TS[1] = T[1][0] * src_mean[0] + T[1][1] * src_mean[1];
|
||||
T[0][2] = dst_mean[0] - scale*TS[0];
|
||||
T[1][2] = dst_mean[1] - scale*TS[1];
|
||||
T[0][0] *= scale;
|
||||
T[0][1] *= scale;
|
||||
T[1][0] *= scale;
|
||||
T[1][1] *= scale;
|
||||
Mat transform_mat = (Mat_<double>(2, 3) << T[0][0], T[0][1], T[0][2], T[1][0], T[1][1], T[1][2]);
|
||||
return transform_mat;
|
||||
}
|
||||
|
||||
cv::Mat vp_face_swap_node::align_crop(cv::Mat& src_img, std::vector<std::pair<int, int>>& kps, cv::Mat& aligned_image) {
|
||||
float face_keypoints[5][2] =
|
||||
{{kps[0].first, kps[0].second},
|
||||
{kps[1].first, kps[1].second},
|
||||
{kps[2].first, kps[2].second},
|
||||
{kps[3].first, kps[3].second},
|
||||
{kps[4].first, kps[4].second}};
|
||||
|
||||
cv::Mat warp_mat = get_similarity_transform_matrix(face_keypoints);
|
||||
cv::warpAffine(src_img, aligned_image, warp_mat, cv::Size(128, 128), cv::INTER_LINEAR);
|
||||
return warp_mat;
|
||||
}
|
||||
|
||||
void vp_face_swap_node::extract_faces(const cv::Mat& frame, std::vector<face_box>& face_boxes) {
|
||||
auto blob_to_infer = cv::dnn::blobFromImage(frame);
|
||||
face_extract_net.setInput(blob_to_infer);
|
||||
const std::vector<std::string> out_names = {"loc", "conf", "iou"};
|
||||
std::vector<cv::Mat> raw_outputs;
|
||||
face_extract_net.forward(raw_outputs, out_names);
|
||||
|
||||
using namespace cv;
|
||||
float scoreThreshold = 0.7;
|
||||
float nmsThreshold = 0.5;
|
||||
int topK = 50;
|
||||
int inputW = frame.cols;
|
||||
int inputH = frame.rows;
|
||||
|
||||
// 3 heads of output
|
||||
assert(raw_outputs.size() == 3);
|
||||
|
||||
// Extract from output_blobs
|
||||
Mat loc = raw_outputs[0];
|
||||
Mat conf = raw_outputs[1];
|
||||
Mat iou = raw_outputs[2];
|
||||
|
||||
// we need generate priors if input size changed or priors is not initialized
|
||||
if (loc.rows != priors.size()) {
|
||||
generatePriors(inputW, inputH);
|
||||
}
|
||||
|
||||
assert(loc.rows == priors.size());
|
||||
assert(loc.rows == conf.rows);
|
||||
assert(loc.rows == iou.rows);
|
||||
|
||||
// Decode from deltas and priors
|
||||
const std::vector<float> variance = {0.1f, 0.2f};
|
||||
float* loc_v = (float*)(loc.data);
|
||||
float* conf_v = (float*)(conf.data);
|
||||
float* iou_v = (float*)(iou.data);
|
||||
Mat faces;
|
||||
// (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score)
|
||||
// 'tl': top left point of the bounding box
|
||||
// 're': right eye, 'le': left eye
|
||||
// 'nt': nose tip
|
||||
// 'rcm': right corner of mouth, 'lcm': left corner of mouth
|
||||
Mat face(1, 15, CV_32FC1);
|
||||
for (size_t i = 0; i < priors.size(); ++i) {
|
||||
// Get score
|
||||
float clsScore = conf_v[i*2+1];
|
||||
float iouScore = iou_v[i];
|
||||
// Clamp
|
||||
if (iouScore < 0.f) {
|
||||
iouScore = 0.f;
|
||||
}
|
||||
else if (iouScore > 1.f) {
|
||||
iouScore = 1.f;
|
||||
}
|
||||
float score = std::sqrt(clsScore * iouScore);
|
||||
face.at<float>(0, 14) = score;
|
||||
|
||||
// Get bounding box
|
||||
float cx = (priors[i].x + loc_v[i*14+0] * variance[0] * priors[i].width) * inputW;
|
||||
float cy = (priors[i].y + loc_v[i*14+1] * variance[0] * priors[i].height) * inputH;
|
||||
float w = priors[i].width * exp(loc_v[i*14+2] * variance[0]) * inputW;
|
||||
float h = priors[i].height * exp(loc_v[i*14+3] * variance[1]) * inputH;
|
||||
float x1 = cx - w / 2;
|
||||
float y1 = cy - h / 2;
|
||||
face.at<float>(0, 0) = x1;
|
||||
face.at<float>(0, 1) = y1;
|
||||
face.at<float>(0, 2) = w;
|
||||
face.at<float>(0, 3) = h;
|
||||
|
||||
// Get landmarks
|
||||
face.at<float>(0, 4) = (priors[i].x + loc_v[i*14+ 4] * variance[0] * priors[i].width) * inputW; // right eye, x
|
||||
face.at<float>(0, 5) = (priors[i].y + loc_v[i*14+ 5] * variance[0] * priors[i].height) * inputH; // right eye, y
|
||||
face.at<float>(0, 6) = (priors[i].x + loc_v[i*14+ 6] * variance[0] * priors[i].width) * inputW; // left eye, x
|
||||
face.at<float>(0, 7) = (priors[i].y + loc_v[i*14+ 7] * variance[0] * priors[i].height) * inputH; // left eye, y
|
||||
face.at<float>(0, 8) = (priors[i].x + loc_v[i*14+ 8] * variance[0] * priors[i].width) * inputW; // nose tip, x
|
||||
face.at<float>(0, 9) = (priors[i].y + loc_v[i*14+ 9] * variance[0] * priors[i].height) * inputH; // nose tip, y
|
||||
face.at<float>(0, 10) = (priors[i].x + loc_v[i*14+10] * variance[0] * priors[i].width) * inputW; // right corner of mouth, x
|
||||
face.at<float>(0, 11) = (priors[i].y + loc_v[i*14+11] * variance[0] * priors[i].height) * inputH; // right corner of mouth, y
|
||||
face.at<float>(0, 12) = (priors[i].x + loc_v[i*14+12] * variance[0] * priors[i].width) * inputW; // left corner of mouth, x
|
||||
face.at<float>(0, 13) = (priors[i].y + loc_v[i*14+13] * variance[0] * priors[i].height) * inputH; // left corner of mouth, y
|
||||
|
||||
faces.push_back(face);
|
||||
}
|
||||
|
||||
if (faces.rows > 1) {
|
||||
// Retrieve boxes and scores
|
||||
std::vector<Rect2i> faceBoxes;
|
||||
std::vector<float> faceScores;
|
||||
for (int rIdx = 0; rIdx < faces.rows; rIdx++)
|
||||
{
|
||||
faceBoxes.push_back(Rect2i(int(faces.at<float>(rIdx, 0)),
|
||||
int(faces.at<float>(rIdx, 1)),
|
||||
int(faces.at<float>(rIdx, 2)),
|
||||
int(faces.at<float>(rIdx, 3))));
|
||||
faceScores.push_back(faces.at<float>(rIdx, 14));
|
||||
}
|
||||
|
||||
std::vector<int> keepIdx;
|
||||
dnn::NMSBoxes(faceBoxes, faceScores, scoreThreshold, nmsThreshold, keepIdx, 1.f, topK);
|
||||
|
||||
// Get NMS results
|
||||
Mat nms_faces;
|
||||
for (int idx: keepIdx)
|
||||
{
|
||||
nms_faces.push_back(faces.row(idx));
|
||||
}
|
||||
|
||||
// insert face target back to frame meta
|
||||
for (int i = 0; i < nms_faces.rows; i++) {
|
||||
auto x = int(nms_faces.at<float>(i, 0));
|
||||
auto y = int(nms_faces.at<float>(i, 1));
|
||||
auto w = int(nms_faces.at<float>(i, 2));
|
||||
auto h = int(nms_faces.at<float>(i, 3));
|
||||
|
||||
// check value range
|
||||
x = std::max(x, 0);
|
||||
y = std::max(y, 0);
|
||||
w = std::min(w, frame.cols - x);
|
||||
h = std::min(h, frame.rows - y);
|
||||
|
||||
auto kp1 = std::pair<int, int>(int(nms_faces.at<float>(i, 4)), int(nms_faces.at<float>(i, 5)));
|
||||
auto kp2 = std::pair<int, int>(int(nms_faces.at<float>(i, 6)), int(nms_faces.at<float>(i, 7)));
|
||||
auto kp3 = std::pair<int, int>(int(nms_faces.at<float>(i, 8)), int(nms_faces.at<float>(i, 9)));
|
||||
auto kp4 = std::pair<int, int>(int(nms_faces.at<float>(i, 10)), int(nms_faces.at<float>(i, 11)));
|
||||
auto kp5 = std::pair<int, int>(int(nms_faces.at<float>(i, 12)), int(nms_faces.at<float>(i, 13)));
|
||||
auto score = nms_faces.at<float>(i, 14);
|
||||
|
||||
face_box face;
|
||||
face.x = x;
|
||||
face.y = y;
|
||||
face.width = w;
|
||||
face.height = h;
|
||||
face.score = score;
|
||||
face.kps = std::vector<std::pair<int, int>>{kp1, kp2, kp3, kp4, kp5};
|
||||
face_boxes.push_back(face);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp_face_swap_node::init_source_face_embeddings(std::string& swap_source_image, int swap_source_face_index, std::string& emap_file_for_embeddings) {
|
||||
std::vector<face_box> source_faces;
|
||||
auto source_mat = cv::imread(swap_source_image);
|
||||
|
||||
// extract faces
|
||||
extract_faces(source_mat, source_faces);
|
||||
|
||||
assert(source_faces.size() > 0);
|
||||
// sort from left to right
|
||||
std::sort(source_faces.begin(), source_faces.end(), [](face_box a, face_box b){ return a.x < b.x;});
|
||||
|
||||
auto the_selected_face = (swap_source_face_index < 0 || swap_source_face_index >= source_faces.size()) ? source_faces[0] : source_faces[swap_source_face_index];
|
||||
cv::Mat aligned_face;
|
||||
auto warp_mat = align_crop(source_mat, the_selected_face.kps, aligned_face);
|
||||
// for debug
|
||||
cv::imwrite("selected_source_face.jpg", aligned_face);
|
||||
|
||||
// read emap from file
|
||||
auto emap = read_emap_mat_from_txt(emap_file_for_embeddings);
|
||||
|
||||
// encoding for the selected face (infer for only 1 time)
|
||||
cv::Mat source_blob = cv::dnn::blobFromImage(aligned_face, 1 / 127.5, cv::Size(112, 112), (127.5, 127.5, 127.5), true);
|
||||
std::vector<cv::Mat> source_outputs;
|
||||
face_encoding_net.setInput(source_blob);
|
||||
face_encoding_net.forward(source_outputs, face_encoding_net.getUnconnectedOutLayersNames());
|
||||
|
||||
auto& source_output = source_outputs[0];
|
||||
// process using emap
|
||||
source_face_embeddings = process_embeddings_using_emap(source_output, emap);
|
||||
}
|
||||
|
||||
void vp_face_swap_node::swap(cv::Mat& aligned_face, cv::Mat& swapped_face) {
|
||||
cv::Mat target_blob = cv::dnn::blobFromImage(aligned_face, 1 / 255.0, cv::Size(128, 128), (0, 0, 0), true);
|
||||
std::vector<cv::Mat> target_outputs;
|
||||
face_swap_net.setInput(target_blob, "target");
|
||||
face_swap_net.setInput(source_face_embeddings, "source");
|
||||
face_swap_net.forward(target_outputs, face_swap_net.getUnconnectedOutLayersNames());
|
||||
|
||||
auto& output = target_outputs[0];
|
||||
cv::Mat output_channel_last;
|
||||
cv::transposeND(output, {0,2,3,1}, output_channel_last);
|
||||
|
||||
cv::Mat img_fake(output_channel_last.size[1], output_channel_last.size[2], CV_32FC3, output_channel_last.data);
|
||||
cv::cvtColor(img_fake, swapped_face, cv::COLOR_RGB2BGR);
|
||||
}
|
||||
|
||||
void vp_face_swap_node::paste_back(cv::Mat& bg, cv::Mat& swapped_face, const cv::Mat& transform_matrix) {
|
||||
bg.convertTo(bg, CV_32FC3, 1.0 / 255);
|
||||
|
||||
cv::Mat IM;
|
||||
cv::invertAffineTransform(transform_matrix, IM);
|
||||
cv::Mat img_mask(swapped_face.rows, swapped_face.cols, CV_32FC1, 1);
|
||||
cv::warpAffine(swapped_face, swapped_face, IM, bg.size());
|
||||
cv::warpAffine(img_mask, img_mask, IM, bg.size());
|
||||
|
||||
// create mask
|
||||
cv::threshold(img_mask, img_mask, 0, 1, cv::THRESH_BINARY);
|
||||
cv::Point min_loc, max_loc;
|
||||
double min_val, max_val;
|
||||
cv::minMaxLoc(img_mask, &min_val, &max_val, &min_loc, &max_loc);
|
||||
|
||||
int mask_h = max_loc.y - min_loc.y;
|
||||
int mask_w = max_loc.x - min_loc.x;
|
||||
|
||||
int mask_size = std::sqrt(mask_h * mask_w);
|
||||
int k = std::max(mask_size / 10, 10);
|
||||
|
||||
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(k, k));
|
||||
cv::erode(img_mask, img_mask, kernel);
|
||||
|
||||
k = std::max(mask_size / 20, 5);
|
||||
cv::GaussianBlur(img_mask, img_mask, cv::Size(2 * k + 1, 2 * k + 1), 0);
|
||||
cv::cvtColor(img_mask, img_mask, cv::COLOR_GRAY2BGR);
|
||||
|
||||
// merge swapped face and original background
|
||||
cv::Mat ones(bg.rows, bg.cols, CV_32FC3, cv::Scalar(1, 1, 1));
|
||||
bg = img_mask.mul(swapped_face) + (ones - img_mask).mul(bg);
|
||||
bg.convertTo(bg, CV_8U, 255);
|
||||
}
|
||||
|
||||
void vp_face_swap_node::generatePriors(int inputW, int inputH) {
|
||||
using namespace cv;
|
||||
// Calculate shapes of different scales according to the shape of input image
|
||||
Size feature_map_2nd = {
|
||||
int(int((inputW+1)/2)/2), int(int((inputH+1)/2)/2)
|
||||
};
|
||||
Size feature_map_3rd = {
|
||||
int(feature_map_2nd.width/2), int(feature_map_2nd.height/2)
|
||||
};
|
||||
Size feature_map_4th = {
|
||||
int(feature_map_3rd.width/2), int(feature_map_3rd.height/2)
|
||||
};
|
||||
Size feature_map_5th = {
|
||||
int(feature_map_4th.width/2), int(feature_map_4th.height/2)
|
||||
};
|
||||
Size feature_map_6th = {
|
||||
int(feature_map_5th.width/2), int(feature_map_5th.height/2)
|
||||
};
|
||||
|
||||
std::vector<Size> feature_map_sizes;
|
||||
feature_map_sizes.push_back(feature_map_3rd);
|
||||
feature_map_sizes.push_back(feature_map_4th);
|
||||
feature_map_sizes.push_back(feature_map_5th);
|
||||
feature_map_sizes.push_back(feature_map_6th);
|
||||
|
||||
// Fixed params for generating priors
|
||||
const std::vector<std::vector<float>> min_sizes = {
|
||||
{10.0f, 16.0f, 24.0f},
|
||||
{32.0f, 48.0f},
|
||||
{64.0f, 96.0f},
|
||||
{128.0f, 192.0f, 256.0f}
|
||||
};
|
||||
CV_Assert(min_sizes.size() == feature_map_sizes.size()); // just to keep vectors in sync
|
||||
const std::vector<int> steps = { 8, 16, 32, 64 };
|
||||
|
||||
// Generate priors
|
||||
priors.clear();
|
||||
for (size_t i = 0; i < feature_map_sizes.size(); ++i)
|
||||
{
|
||||
Size feature_map_size = feature_map_sizes[i];
|
||||
std::vector<float> min_size = min_sizes[i];
|
||||
|
||||
for (int _h = 0; _h < feature_map_size.height; ++_h)
|
||||
{
|
||||
for (int _w = 0; _w < feature_map_size.width; ++_w)
|
||||
{
|
||||
for (size_t j = 0; j < min_size.size(); ++j)
|
||||
{
|
||||
float s_kx = min_size[j] / inputW;
|
||||
float s_ky = min_size[j] / inputH;
|
||||
|
||||
float cx = (_w + 0.5f) * steps[i] / inputW;
|
||||
float cy = (_h + 0.5f) * steps[i] / inputH;
|
||||
|
||||
Rect2f prior = { cx, cy, s_kx, s_ky };
|
||||
priors.push_back(prior);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp_face_swap_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
65
nodes/infers/vp_face_swap_node.h
Normal file
65
nodes/infers/vp_face_swap_node.h
Normal file
@@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// face swap node
|
||||
// used to swap faces in videos/images using a specific face
|
||||
class vp_face_swap_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
// inner temporary use
|
||||
struct face_box {
|
||||
int x;
|
||||
int y;
|
||||
int width;
|
||||
int height;
|
||||
float score;
|
||||
std::vector<std::pair<int, int>> kps;
|
||||
};
|
||||
|
||||
/* onnx network using opencv::dnn as backend */
|
||||
cv::dnn::Net face_extract_net;
|
||||
cv::dnn::Net face_encoding_net;
|
||||
cv::dnn::Net face_swap_net;
|
||||
|
||||
cv::Mat read_emap_mat_from_txt(const std::string& emap_file_for_embeddings, int rows = 512, int cols = 512);
|
||||
cv::Mat process_embeddings_using_emap(const cv::Mat& source_face_normed_embedding, const cv::Mat& emap);
|
||||
|
||||
cv::Mat get_similarity_transform_matrix(float src[5][2]);
|
||||
cv::Mat align_crop(cv::Mat& _src_img, std::vector<std::pair<int, int>>& kps, cv::Mat& _aligned_img);
|
||||
|
||||
void extract_faces(const cv::Mat& frame, std::vector<face_box>& faces);
|
||||
void init_source_face_embeddings(std::string& swap_source_image, int swap_source_face_index, std::string& emap_file_for_embeddings);
|
||||
void swap(cv::Mat& aligned_face, cv::Mat& swapped_face);
|
||||
void paste_back(cv::Mat& bg, cv::Mat& swapped_face, const cv::Mat& transform_matrix);
|
||||
|
||||
cv::Mat source_face_embeddings;
|
||||
bool act_as_primary_detector = false;
|
||||
bool swap_on_osd = true;
|
||||
|
||||
int min_face_w_h = 50;
|
||||
|
||||
/* used for extract faces */
|
||||
std::vector<cv::Rect2f> priors;
|
||||
void generatePriors(int inputW, int inputH);
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_face_swap_node(std::string node_name,
|
||||
std::string yunet_face_detect_model,
|
||||
std::string buffalo_l_face_encoding_model,
|
||||
std::string emap_file_for_embeddings,
|
||||
std::string insightface_swap_model,
|
||||
std::string swap_source_image,
|
||||
int swap_source_face_index = 0,
|
||||
int min_face_w_h = 50,
|
||||
bool swap_on_osd = true,
|
||||
bool act_as_primary_detector = false);
|
||||
~vp_face_swap_node();
|
||||
};
|
||||
}
|
||||
15
nodes/infers/vp_feature_encoder_node.cpp
Executable file
15
nodes/infers/vp_feature_encoder_node.cpp
Executable file
@@ -0,0 +1,15 @@
|
||||
|
||||
#include "vp_feature_encoder_node.h"
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_feature_encoder_node::vp_feature_encoder_node(std::string node_name, std::string model_path):
|
||||
vp_secondary_infer_node(node_name, model_path) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_feature_encoder_node::~vp_feature_encoder_node() {
|
||||
deinitialized();
|
||||
}
|
||||
}
|
||||
18
nodes/infers/vp_feature_encoder_node.h
Executable file
18
nodes/infers/vp_feature_encoder_node.h
Executable file
@@ -0,0 +1,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// common feature encoder for image feature extraction.
|
||||
// used for feature extraction, update embeddings of vp_frame_target.
|
||||
class vp_feature_encoder_node: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
public:
|
||||
vp_feature_encoder_node(std::string node_name, std::string model_path);
|
||||
~vp_feature_encoder_node();
|
||||
};
|
||||
|
||||
}
|
||||
34
nodes/infers/vp_lane_detector_node.cpp
Normal file
34
nodes/infers/vp_lane_detector_node.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
|
||||
#include "vp_lane_detector_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_lane_detector_node::vp_lane_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path,
|
||||
std::string labels_path,
|
||||
int input_width,
|
||||
int input_height,
|
||||
int batch_size,
|
||||
int class_id_offset,
|
||||
float scale,
|
||||
cv::Scalar mean,
|
||||
cv::Scalar std,
|
||||
bool swap_rb,
|
||||
bool swap_chn):
|
||||
vp_primary_infer_node(node_name, model_path, model_config_path, labels_path, input_width, input_height, batch_size, class_id_offset, scale, mean, std, swap_rb, swap_chn) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_lane_detector_node::~vp_lane_detector_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_lane_detector_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
auto& mask = raw_outputs[0];
|
||||
|
||||
// save output as mask directly, this is the mask for the whole frame.
|
||||
frame_meta->mask = mask.clone();
|
||||
}
|
||||
}
|
||||
31
nodes/infers/vp_lane_detector_node.h
Normal file
31
nodes/infers/vp_lane_detector_node.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
// lane detect based on CenterNet
|
||||
//
|
||||
class vp_lane_detector_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
protected:
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_lane_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path = "",
|
||||
std::string labels_path = "",
|
||||
int input_width = 736,
|
||||
int input_height = 416,
|
||||
int batch_size = 1,
|
||||
int class_id_offset = 0,
|
||||
float scale = 1,
|
||||
cv::Scalar mean = cv::Scalar(0),
|
||||
cv::Scalar std = cv::Scalar(1),
|
||||
bool swap_rb = true,
|
||||
bool swap_chn = true);
|
||||
~vp_lane_detector_node();
|
||||
};
|
||||
}
|
||||
89
nodes/infers/vp_mask_rcnn_detector_node.cpp
Normal file
89
nodes/infers/vp_mask_rcnn_detector_node.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
|
||||
#include "vp_mask_rcnn_detector_node.h"
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
vp_mask_rcnn_detector_node::vp_mask_rcnn_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path,
|
||||
std::string labels_path,
|
||||
int input_width,
|
||||
int input_height,
|
||||
int batch_size,
|
||||
int class_id_offset,
|
||||
float score_threshold,
|
||||
float scale,
|
||||
cv::Scalar mean,
|
||||
cv::Scalar std,
|
||||
bool swap_rb):
|
||||
vp_primary_infer_node(node_name, model_path, model_config_path, labels_path, input_width, input_height, batch_size, class_id_offset, scale, mean, std, swap_rb),
|
||||
score_threshold(score_threshold) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_mask_rcnn_detector_node::~vp_mask_rcnn_detector_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
|
||||
void vp_mask_rcnn_detector_node::infer(const cv::Mat& blob_to_infer, std::vector<cv::Mat>& raw_outputs) {
|
||||
// blob_to_infer is a 4D matrix
|
||||
// the first dim is number of batch, MUST be 1
|
||||
assert(blob_to_infer.dims == 4);
|
||||
assert(blob_to_infer.size[0] == 1);
|
||||
assert(!net.empty());
|
||||
|
||||
net.setInput(blob_to_infer);
|
||||
net.forward(raw_outputs, out_names);
|
||||
}
|
||||
|
||||
|
||||
void vp_mask_rcnn_detector_node::preprocess(const std::vector<cv::Mat>& mats_to_infer, cv::Mat& blob_to_infer) {
|
||||
// ignore preprocess logic in base class
|
||||
cv::dnn::blobFromImages(mats_to_infer, blob_to_infer, 1.0, cv::Size(), cv::Scalar(), true, false);
|
||||
}
|
||||
|
||||
void vp_mask_rcnn_detector_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
auto outDetections = raw_outputs[0];
|
||||
auto outMasks = raw_outputs[1];
|
||||
|
||||
// Output size of masks is NxCxHxW where
|
||||
// N - number of detected boxes
|
||||
// C - number of classes (excluding background)
|
||||
// HxW - segmentation shape
|
||||
const int numDetections = outDetections.size[2];
|
||||
const int numClasses = outMasks.size[1];
|
||||
|
||||
auto outDetections_ = outDetections.reshape(1, outDetections.total() / 7);
|
||||
auto& frame = frame_meta->frame;
|
||||
for (int i = 0; i < numDetections; ++i) {
|
||||
float score = outDetections_.at<float>(i, 2);
|
||||
if (score > score_threshold) {
|
||||
// Extract the bounding box
|
||||
int classId = static_cast<int>(outDetections_.at<float>(i, 1));
|
||||
int left = static_cast<int>(frame.cols * outDetections_.at<float>(i, 3));
|
||||
int top = static_cast<int>(frame.rows * outDetections_.at<float>(i, 4));
|
||||
int right = static_cast<int>(frame.cols * outDetections_.at<float>(i, 5));
|
||||
int bottom = static_cast<int>(frame.rows * outDetections_.at<float>(i, 6));
|
||||
|
||||
left = max(0, min(left, frame.cols - 1));
|
||||
top = max(0, min(top, frame.rows - 1));
|
||||
right = max(0, min(right, frame.cols - 1));
|
||||
bottom = max(0, min(bottom, frame.rows - 1));
|
||||
|
||||
auto label = (labels.size() < classId + 1) ? "" : labels[classId];
|
||||
|
||||
// Extract the mask for the object
|
||||
cv::Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr<float>(i, classId));
|
||||
|
||||
classId += class_id_offset;
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(left, top, right - left, bottom - top, classId, score, frame_meta->frame_index, frame_meta->channel_index, label);
|
||||
target->mask = objectMask;
|
||||
|
||||
frame_meta->targets.push_back(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
41
nodes/infers/vp_mask_rcnn_detector_node.h
Normal file
41
nodes/infers/vp_mask_rcnn_detector_node.h
Normal file
@@ -0,0 +1,41 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../objects/vp_frame_target.h"
|
||||
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
// image segmentation based on Mask RCNN
|
||||
// https://github.com/matterport/Mask_RCNN
|
||||
class vp_mask_rcnn_detector_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
// names of output layers in mask_rcnn
|
||||
const std::vector<std::string> out_names = {"detection_out_final", "detection_masks"};
|
||||
float score_threshold = 0.5;
|
||||
protected:
|
||||
// override infer and preprocess as mask_rcnn has a different logic
|
||||
virtual void infer(const cv::Mat& blob_to_infer, std::vector<cv::Mat>& raw_outputs) override;
|
||||
virtual void preprocess(const std::vector<cv::Mat>& mats_to_infer, cv::Mat& blob_to_infer) override;
|
||||
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_mask_rcnn_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path = "",
|
||||
std::string labels_path = "",
|
||||
int input_width = 416,
|
||||
int input_height = 416,
|
||||
int batch_size = 1,
|
||||
int class_id_offset = 0,
|
||||
float score_threshold = 0.5,
|
||||
float scale = 1 / 255.0,
|
||||
cv::Scalar mean = cv::Scalar(0),
|
||||
cv::Scalar std = cv::Scalar(1),
|
||||
bool swap_rb = true);
|
||||
~vp_mask_rcnn_detector_node();
|
||||
};
|
||||
}
|
||||
34
nodes/infers/vp_mllm_analyser_node.cpp
Normal file
34
nodes/infers/vp_mllm_analyser_node.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
#include "vp_mllm_analyser_node.h"
|
||||
|
||||
#ifdef VP_WITH_LLM
|
||||
namespace vp_nodes {
|
||||
vp_mllm_analyser_node::vp_mllm_analyser_node(std::string node_name,
|
||||
std::string model_name,
|
||||
std::string prompt,
|
||||
std::string api_base_url,
|
||||
std::string api_key,
|
||||
llmlib::LLMBackendType backend_type):
|
||||
vp_primary_infer_node(node_name, ""),
|
||||
llm_model_name(model_name),
|
||||
llm_prompt(prompt) {
|
||||
cli = llmlib::LLMClient(api_base_url, api_key, backend_type);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_mllm_analyser_node::~vp_mllm_analyser_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_mllm_analyser_node::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
auto output = cli.simple_chat(llm_model_name, llm_prompt, {frame_meta->frame}, {});
|
||||
frame_meta->description = output;
|
||||
}
|
||||
|
||||
void vp_mllm_analyser_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
32
nodes/infers/vp_mllm_analyser_node.h
Normal file
32
nodes/infers/vp_mllm_analyser_node.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_LLM
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/cpp_llmlib/llmlib.hpp"
|
||||
|
||||
namespace vp_nodes {
|
||||
// image(frame) analyser based on Multimodal Large Language Model
|
||||
class vp_mllm_analyser_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
llmlib::LLMClient cli;
|
||||
std::string llm_prompt;
|
||||
std::string llm_model_name;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_mllm_analyser_node(std::string node_name,
|
||||
std::string model_name,
|
||||
std::string prompt,
|
||||
std::string api_base_url,
|
||||
std::string api_key = "",
|
||||
llmlib::LLMBackendType backend_type = llmlib::LLMBackendType::Ollama);
|
||||
~vp_mllm_analyser_node();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
107
nodes/infers/vp_openpose_detector_node.cpp
Executable file
107
nodes/infers/vp_openpose_detector_node.cpp
Executable file
@@ -0,0 +1,107 @@
|
||||
#include <set>
|
||||
|
||||
#include "vp_openpose_detector_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_openpose_detector_node::vp_openpose_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path,
|
||||
std::string labels_path,
|
||||
int input_width,
|
||||
int input_height,
|
||||
int batch_size,
|
||||
int class_id_offset,
|
||||
float score_threshold,
|
||||
vp_objects::vp_pose_type type,
|
||||
float scale,
|
||||
cv::Scalar mean,
|
||||
cv::Scalar std,
|
||||
bool swap_rb):
|
||||
vp_primary_infer_node(node_name,
|
||||
model_path,
|
||||
model_config_path,
|
||||
labels_path,
|
||||
input_width,
|
||||
input_height,
|
||||
batch_size,
|
||||
class_id_offset,
|
||||
scale, mean,
|
||||
std,
|
||||
swap_rb),
|
||||
score_threshold(score_threshold), type(type) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_openpose_detector_node::~vp_openpose_detector_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_openpose_detector_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
// make sure heads of output are not zero
|
||||
assert(raw_outputs.size() > 0);
|
||||
// as it just one head of output
|
||||
auto& netOutputBlobs = raw_outputs[0];
|
||||
// batch, number of heatmaps, h, w
|
||||
assert(netOutputBlobs.dims == 4);
|
||||
|
||||
auto start1 = std::chrono::system_clock::now();
|
||||
|
||||
int netOutputBlob_dims[3] = {netOutputBlobs.size[1], netOutputBlobs.size[2], netOutputBlobs.size[3]};
|
||||
// scan batch
|
||||
for (int b = 0; b < netOutputBlobs.size[0]; b++) {
|
||||
auto& frame_meta = frame_meta_with_batch[b];
|
||||
//auto h_scale = frame_meta->frame.rows * 1.0 / netOutputBlobs.size[2];
|
||||
//auto w_scale = frame_meta->frame.cols * 1.0 / netOutputBlobs.size[3];
|
||||
|
||||
cv::Mat netOutputBlob = cv::Mat(3, netOutputBlob_dims, CV_32F, const_cast<uchar*>(netOutputBlobs.ptr(b)));
|
||||
|
||||
int keyPointId = 0;
|
||||
std::vector<std::vector<KeyPoint>> detectedKeypoints;
|
||||
std::vector<KeyPoint> keyPointsList;
|
||||
|
||||
auto start2 = std::chrono::system_clock::now();
|
||||
std::vector<cv::Mat> netOutputParts;
|
||||
splitNetOutputBlobToParts(netOutputBlob, cv::Size(frame_meta->frame.cols, frame_meta->frame.rows), netOutputParts);
|
||||
|
||||
auto start3 = std::chrono::system_clock::now();
|
||||
for(int i = 0; i < points_map.at(type); ++i) {
|
||||
std::vector<KeyPoint> keyPoints;
|
||||
getKeyPoints(netOutputParts[i], score_threshold, keyPoints);
|
||||
|
||||
for(int i = 0; i< keyPoints.size(); ++i, ++keyPointId) {
|
||||
keyPoints[i].id = keyPointId;
|
||||
}
|
||||
|
||||
detectedKeypoints.push_back(keyPoints);
|
||||
keyPointsList.insert(keyPointsList.end(), keyPoints.begin(), keyPoints.end());
|
||||
}
|
||||
|
||||
std::vector<std::vector<ValidPair>> validPairs;
|
||||
std::set<int> invalidPairs;
|
||||
getValidPairs(netOutputParts, detectedKeypoints, validPairs, invalidPairs);
|
||||
|
||||
std::vector<std::vector<int>> personwiseKeypoints;
|
||||
getPersonwiseKeypoints(validPairs, invalidPairs, personwiseKeypoints);
|
||||
|
||||
// insert pose targets back into frame meta
|
||||
for(int n = 0; n < personwiseKeypoints.size(); ++n) {
|
||||
std::vector<vp_objects::vp_pose_keypoint> kps;
|
||||
for (int i = 0; i < personwiseKeypoints[n].size(); i++) {
|
||||
auto index = personwiseKeypoints[n][i];
|
||||
if (index != -1) {
|
||||
auto& p = keyPointsList[index];
|
||||
kps.push_back(vp_objects::vp_pose_keypoint {i, p.point.x, p.point.y, p.probability});
|
||||
}
|
||||
else {
|
||||
// point not detected
|
||||
kps.push_back(vp_objects::vp_pose_keypoint {i, -1, -1, 0});
|
||||
}
|
||||
}
|
||||
|
||||
auto pose_target = std::make_shared<vp_objects::vp_frame_pose_target>(type, kps);
|
||||
frame_meta->pose_targets.push_back(pose_target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
293
nodes/infers/vp_openpose_detector_node.h
Executable file
293
nodes/infers/vp_openpose_detector_node.h
Executable file
@@ -0,0 +1,293 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../objects/vp_frame_pose_target.h"
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
// body keypoints detector using openpose
|
||||
// https://github.com/CMU-Perceptual-Computing-Lab/openpose
|
||||
class vp_openpose_detector_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
float score_threshold;
|
||||
// pose type (model type)
|
||||
vp_objects::vp_pose_type type;
|
||||
|
||||
// map indexs for PAFs
|
||||
const std::map<vp_objects::vp_pose_type, std::vector<std::pair<int,int>>> mapIdxes_map = {
|
||||
{vp_objects::vp_pose_type::body_25, {{0,1}, {14,15}, {22,23}, {16,17}, {18,19}, {24,25}, {26,27}, {6,7}, {2,3}, {4,5}, {8,9}, {10,11}, {12,13}, {30,31}, {32,33}, {36,37}, {34,35},
|
||||
{38,39}, {40,41} ,{42,43}, {44,45}, {46,47}, {48,49}, {50,51}}},
|
||||
{vp_objects::vp_pose_type::coco, {{12,13}, {20,21}, {14,15}, {16,17}, {22,23}, {24,25}, {0,1},
|
||||
{2,3}, {4,5}, {6,7}, {8,9}, {10,11}, {28,29}, {30,31}, {34,35}, {32,33}, {36,37}, {18,19}, {26,27}}},
|
||||
{vp_objects::vp_pose_type::mpi_15, {{0,1}, {2,3}, {4,5}, {6,7}, {8,9}, {10,11}, {12,13}, {14,15}, {16,17}, {18,19}, {20,21}, {22,23}, {24,25}, {26,27}}},
|
||||
{vp_objects::vp_pose_type::hand, std::vector<std::pair<int,int>>()},
|
||||
{vp_objects::vp_pose_type::face, std::vector<std::pair<int,int>>()}
|
||||
};
|
||||
|
||||
// pose pairs for PAFs
|
||||
const std::map<vp_objects::vp_pose_type, std::vector<std::pair<int,int>>> posePairs_map = {
|
||||
{vp_objects::vp_pose_type::body_25, {{1,8}, {1,2}, {1,5}, {2,3}, {3,4}, {5,6}, {6,7}, {8,9},
|
||||
{9,10}, {10,11}, {8,12}, {12,13}, {13,14}, {1,0}, {0,15}, {15,17}, {0,16}, {16,18},
|
||||
{14,19}, {19,20}, {14,21}, {11,22}, {22,23}, {11,24}}},
|
||||
{vp_objects::vp_pose_type::coco, {{1,2}, {1,5}, {2,3}, {3,4}, {5,6}, {6,7},
|
||||
{1,8}, {8,9}, {9,10}, {1,11}, {11,12}, {12,13},
|
||||
{1,0}, {0,14}, {14,16}, {0,15}, {15,17}, {2,16},
|
||||
{5,17}}},
|
||||
{vp_objects::vp_pose_type::mpi_15, {{0,1}, {1,2}, {2,3}, {3,4}, {1,5}, {5,6}, {6,7}, {1,14}, {14,8}, {8,9}, {9,10}, {14,11}, {11,12}, {12,13}, {0, 2}, {0, 5}}},
|
||||
{vp_objects::vp_pose_type::hand, std::vector<std::pair<int,int>>()},
|
||||
{vp_objects::vp_pose_type::face, std::vector<std::pair<int,int>>()}
|
||||
};
|
||||
|
||||
// points count for each type of pose model
|
||||
const std::map<vp_objects::vp_pose_type, int> points_map = {
|
||||
{vp_objects::vp_pose_type::body_25, 25},
|
||||
{vp_objects::vp_pose_type::coco, 18},
|
||||
{vp_objects::vp_pose_type::mpi_15, 15},
|
||||
{vp_objects::vp_pose_type::hand, 0},
|
||||
{vp_objects::vp_pose_type::face, 0}
|
||||
};
|
||||
|
||||
// for postprocess purpose
|
||||
struct ValidPair{
|
||||
ValidPair(int aId,int bId,float score) {
|
||||
this->aId = aId;
|
||||
this->bId = bId;
|
||||
this->score = score;
|
||||
}
|
||||
|
||||
int aId;
|
||||
int bId;
|
||||
float score;
|
||||
};
|
||||
struct KeyPoint{
|
||||
KeyPoint(cv::Point point, float probability) {
|
||||
this->id = -1;
|
||||
this->point = point;
|
||||
this->probability = probability;
|
||||
}
|
||||
|
||||
int id;
|
||||
cv::Point point;
|
||||
float probability;
|
||||
};
|
||||
// for postprocess purpose end
|
||||
|
||||
void getKeyPoints(cv::Mat& probMap, double threshold, std::vector<KeyPoint>& keyPoints) {
|
||||
cv::Mat smoothProbMap;
|
||||
cv::GaussianBlur(probMap, smoothProbMap, cv::Size( 3, 3 ), 0, 0);
|
||||
|
||||
cv::Mat maskedProbMap;
|
||||
cv::threshold(smoothProbMap, maskedProbMap, threshold, 255, cv::THRESH_BINARY);
|
||||
|
||||
maskedProbMap.convertTo(maskedProbMap, CV_8U, 1);
|
||||
|
||||
std::vector<std::vector<cv::Point> > contours;
|
||||
cv::findContours(maskedProbMap, contours, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE);
|
||||
|
||||
for(int i = 0; i < contours.size();++i) {
|
||||
cv::Mat blobMask = cv::Mat::zeros(smoothProbMap.rows, smoothProbMap.cols, smoothProbMap.type());
|
||||
cv::fillConvexPoly(blobMask, contours[i], cv::Scalar(1));
|
||||
|
||||
double maxVal;
|
||||
cv::Point maxLoc;
|
||||
|
||||
cv::minMaxLoc(smoothProbMap.mul(blobMask), 0, &maxVal, 0, &maxLoc);
|
||||
keyPoints.push_back(KeyPoint(maxLoc, probMap.at<float>(maxLoc.y, maxLoc.x)));
|
||||
}
|
||||
}
|
||||
|
||||
void splitNetOutputBlobToParts(cv::Mat& netOutputBlob, const cv::Size& targetSize, std::vector<cv::Mat>& netOutputParts) {
|
||||
int nParts = netOutputBlob.size[0];
|
||||
int h = netOutputBlob.size[1];
|
||||
int w = netOutputBlob.size[2];
|
||||
|
||||
for(int i = 0; i< nParts; ++i) {
|
||||
cv::Mat part(h, w, CV_32F, netOutputBlob.ptr(i));
|
||||
|
||||
cv::Mat resizedPart;
|
||||
|
||||
cv::resize(part, resizedPart, targetSize);
|
||||
|
||||
netOutputParts.push_back(resizedPart);
|
||||
}
|
||||
}
|
||||
|
||||
void populateInterpPoints(const cv::Point& a,const cv::Point& b,int numPoints,std::vector<cv::Point>& interpCoords){
|
||||
float xStep = ((float)(b.x - a.x)) / (float)(numPoints - 1);
|
||||
float yStep = ((float)(b.y - a.y)) / (float)(numPoints - 1);
|
||||
interpCoords.push_back(a);
|
||||
|
||||
for(int i = 1; i< numPoints - 1; ++i) {
|
||||
interpCoords.push_back(cv::Point(a.x + xStep * i, a.y + yStep * i));
|
||||
}
|
||||
interpCoords.push_back(b);
|
||||
}
|
||||
|
||||
void getValidPairs(const std::vector<cv::Mat>& netOutputParts,
|
||||
const std::vector<std::vector<KeyPoint>>& detectedKeypoints,
|
||||
std::vector<std::vector<ValidPair>>& validPairs,
|
||||
std::set<int>& invalidPairs) {
|
||||
|
||||
int nInterpSamples = 10;
|
||||
float confTh = 0.7;
|
||||
|
||||
for(int k = 0; k < mapIdxes_map.at(type).size(); ++k) {
|
||||
//A->B constitute a limb
|
||||
cv::Mat pafA = netOutputParts[mapIdxes_map.at(type)[k].first + points_map.at(type) + 1];
|
||||
cv::Mat pafB = netOutputParts[mapIdxes_map.at(type)[k].second + points_map.at(type) + 1];
|
||||
|
||||
//Find the keypoints for the first and second limb
|
||||
const std::vector<KeyPoint>& candA = detectedKeypoints[posePairs_map.at(type)[k].first];
|
||||
const std::vector<KeyPoint>& candB = detectedKeypoints[posePairs_map.at(type)[k].second];
|
||||
|
||||
int nA = candA.size();
|
||||
int nB = candB.size();
|
||||
|
||||
/*
|
||||
# If keypoints for the joint-pair is detected
|
||||
# check every joint in candA with every joint in candB
|
||||
# Calculate the distance vector between the two joints
|
||||
# Find the PAF values at a set of interpolated points between the joints
|
||||
# Use the above formula to compute a score to mark the connection valid
|
||||
*/
|
||||
|
||||
if(nA != 0 && nB != 0){
|
||||
std::vector<ValidPair> localValidPairs;
|
||||
|
||||
for(int i = 0; i< nA; ++i) {
|
||||
int maxJ = -1;
|
||||
float maxScore = -1;
|
||||
bool found = false;
|
||||
|
||||
for(int j = 0; j < nB; ++j) {
|
||||
std::pair<float, float> distance(candB[j].point.x - candA[i].point.x, candB[j].point.y - candA[i].point.y);
|
||||
|
||||
float norm = std::sqrt(distance.first * distance.first + distance.second * distance.second);
|
||||
|
||||
if(!norm) {
|
||||
continue;
|
||||
}
|
||||
|
||||
distance.first /= norm;
|
||||
distance.second /= norm;
|
||||
|
||||
//Find p(u)
|
||||
std::vector<cv::Point> interpCoords;
|
||||
populateInterpPoints(candA[i].point, candB[j].point, nInterpSamples, interpCoords);
|
||||
//Find L(p(u))
|
||||
std::vector<std::pair<float, float>> pafInterp;
|
||||
for(int l = 0; l < interpCoords.size();++l) {
|
||||
pafInterp.push_back(
|
||||
std::pair<float,float>(
|
||||
pafA.at<float>(interpCoords[l].y, interpCoords[l].x),
|
||||
pafB.at<float>(interpCoords[l].y, interpCoords[l].x)
|
||||
));
|
||||
}
|
||||
|
||||
std::vector<float> pafScores;
|
||||
float sumOfPafScores = 0;
|
||||
int numOverTh = 0;
|
||||
for(int l = 0; l< pafInterp.size(); ++l){
|
||||
float score = pafInterp[l].first * distance.first + pafInterp[l].second * distance.second;
|
||||
sumOfPafScores += score;
|
||||
if(score > score_threshold){
|
||||
++numOverTh;
|
||||
}
|
||||
|
||||
pafScores.push_back(score);
|
||||
}
|
||||
|
||||
float avgPafScore = sumOfPafScores / ((float)pafInterp.size());
|
||||
|
||||
if(((float)numOverTh)/((float)nInterpSamples) > confTh){
|
||||
if(avgPafScore > maxScore) {
|
||||
maxJ = j;
|
||||
maxScore = avgPafScore;
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}/* j */
|
||||
|
||||
if(found) {
|
||||
localValidPairs.push_back(ValidPair(candA[i].id, candB[maxJ].id, maxScore));
|
||||
}
|
||||
|
||||
}/* i */
|
||||
|
||||
validPairs.push_back(localValidPairs);
|
||||
|
||||
} else {
|
||||
invalidPairs.insert(k);
|
||||
validPairs.push_back(std::vector<ValidPair>());
|
||||
}
|
||||
}/* k */
|
||||
}
|
||||
|
||||
void getPersonwiseKeypoints(const std::vector<std::vector<ValidPair>>& validPairs,
|
||||
const std::set<int>& invalidPairs,
|
||||
std::vector<std::vector<int>>& personwiseKeypoints) {
|
||||
for(int k = 0; k < mapIdxes_map.at(type).size(); ++k) {
|
||||
if(invalidPairs.find(k) != invalidPairs.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const std::vector<ValidPair>& localValidPairs(validPairs[k]);
|
||||
|
||||
int indexA(posePairs_map.at(type)[k].first);
|
||||
int indexB(posePairs_map.at(type)[k].second);
|
||||
|
||||
for(int i = 0; i< localValidPairs.size(); ++i) {
|
||||
bool found = false;
|
||||
int personIdx = -1;
|
||||
|
||||
for(int j = 0; !found && j < personwiseKeypoints.size();++j) {
|
||||
if(indexA < personwiseKeypoints[j].size() &&
|
||||
personwiseKeypoints[j][indexA] == localValidPairs[i].aId) {
|
||||
personIdx = j;
|
||||
found = true;
|
||||
}
|
||||
}/* j */
|
||||
|
||||
if(found) {
|
||||
personwiseKeypoints[personIdx].at(indexB) = localValidPairs[i].bId;
|
||||
}
|
||||
else if(k < points_map.at(type) - 1) {
|
||||
std::vector<int> lpkp(std::vector<int>(points_map.at(type), -1));
|
||||
|
||||
lpkp.at(indexA) = localValidPairs[i].aId;
|
||||
lpkp.at(indexB) = localValidPairs[i].bId;
|
||||
|
||||
personwiseKeypoints.push_back(lpkp);
|
||||
}
|
||||
|
||||
}/* i */
|
||||
}/* k */
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_openpose_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path = "",
|
||||
std::string labels_path = "",
|
||||
int input_width = 368,
|
||||
int input_height = 368,
|
||||
int batch_size = 1,
|
||||
int class_id_offset = 0,
|
||||
float score_threshold = 0.1,
|
||||
vp_objects::vp_pose_type type = vp_objects::vp_pose_type::coco,
|
||||
float scale = 1 / 255.0,
|
||||
cv::Scalar mean = cv::Scalar(0),
|
||||
cv::Scalar std = cv::Scalar(1),
|
||||
bool swap_rb = false);
|
||||
~vp_openpose_detector_node();
|
||||
};
|
||||
}
|
||||
66
nodes/infers/vp_ppocr_text_detector_node.cpp
Normal file
66
nodes/infers/vp_ppocr_text_detector_node.cpp
Normal file
@@ -0,0 +1,66 @@
|
||||
|
||||
#ifdef VP_WITH_PADDLE
|
||||
#include "vp_ppocr_text_detector_node.h"
|
||||
#include "../../objects/vp_frame_text_target.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_ppocr_text_detector_node::vp_ppocr_text_detector_node(std::string node_name,
|
||||
std::string det_model_dir,
|
||||
std::string cls_model_dir,
|
||||
std::string rec_model_dir,
|
||||
std::string rec_char_dict_path):
|
||||
vp_primary_infer_node(node_name, "") {
|
||||
// to make the code simpler, paddle_ocr has no more config other than model path
|
||||
// we need modify source code at ../../third_party/paddle_ocr/ if we need tune the parameters
|
||||
ocr = std::make_shared<PaddleOCR::PPOCR>(det_model_dir, cls_model_dir, rec_model_dir, rec_char_dict_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_ppocr_text_detector_node::~vp_ppocr_text_detector_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_ppocr_text_detector_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_ppocr_text_detector_node::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
// call paddle ocr
|
||||
auto ocr_results = ocr->ocr(mats_to_infer);
|
||||
assert(ocr_results.size() == 1);
|
||||
|
||||
auto& ocr_result = ocr_results[0];
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
// scan text detected in frame
|
||||
for (int i = 0; i < ocr_result.size(); i++) {
|
||||
/* code */
|
||||
auto& text = ocr_result[i];
|
||||
std::vector<std::pair<int, int>> region_vertexes = {{text.box[0][0], text.box[0][1]},
|
||||
{text.box[1][0], text.box[1][1]},
|
||||
{text.box[2][0], text.box[2][1]},
|
||||
{text.box[3][0], text.box[3][1]}};
|
||||
// create text target and update back into frame meta
|
||||
auto text_target = std::make_shared<vp_objects::vp_frame_text_target>(region_vertexes, text.text, text.score);
|
||||
frame_meta->text_targets.push_back(text_target);
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
33
nodes/infers/vp_ppocr_text_detector_node.h
Normal file
33
nodes/infers/vp_ppocr_text_detector_node.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_PADDLE
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/paddle_ocr/include/paddleocr.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// ocr based on paddle ocr
|
||||
// paddle ocr project(official): https://github.com/PaddlePaddle/PaddleOCR
|
||||
// source code(modified based on official): ../../third_party/paddle_ocr
|
||||
// note:
|
||||
// this class is not based on opencv::dnn module but paddle, a few data members declared in base class are not usable any more(just ignore), such as vp_infer_node::net.
|
||||
class vp_ppocr_text_detector_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
// paddle ocr instance
|
||||
std::shared_ptr<PaddleOCR::PPOCR> ocr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_ppocr_text_detector_node(std::string node_name,
|
||||
std::string det_model_dir = "",
|
||||
std::string cls_model_dir = "",
|
||||
std::string rec_model_dir = "",
|
||||
std::string rec_char_dict_path = "");
|
||||
~vp_ppocr_text_detector_node();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
52
nodes/infers/vp_restoration_node.cpp
Normal file
52
nodes/infers/vp_restoration_node.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
|
||||
#include "vp_restoration_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_restoration_node::vp_restoration_node(std::string node_name,
|
||||
std::string realesrgan_bg_restoration_model,
|
||||
std::string face_restoration_model,
|
||||
bool restoration_to_osd):
|
||||
vp_primary_infer_node(node_name, ""),
|
||||
restoration_to_osd(restoration_to_osd) {
|
||||
/* init net*/
|
||||
restoration_net = cv::dnn::readNetFromONNX(realesrgan_bg_restoration_model);
|
||||
/* to-do, load face restoration model*/
|
||||
#ifdef VP_WITH_CUDA
|
||||
//restoration_net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
||||
//restoration_net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
||||
#endif
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_restoration_node::~vp_restoration_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_restoration_node::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
// 4x larger for RealESRGAN_x4plus model
|
||||
cv::Mat target_blob = cv::dnn::blobFromImage(frame_meta->frame, 1 / 255.0, cv::Size(frame_meta->frame.cols, frame_meta->frame.rows), (0, 0, 0), true);
|
||||
std::vector<cv::Mat> target_outputs;
|
||||
restoration_net.setInput(target_blob);
|
||||
restoration_net.forward(target_outputs, restoration_net.getUnconnectedOutLayersNames());
|
||||
|
||||
// parse to image
|
||||
auto& output = target_outputs[0];
|
||||
cv::Mat output_channel_last;
|
||||
cv::transposeND(output, {0,2,3,1}, output_channel_last);
|
||||
cv::Mat img_result(output_channel_last.size[1], output_channel_last.size[2], CV_32FC3, output_channel_last.data);
|
||||
img_result.convertTo(img_result, CV_8U, 255);
|
||||
|
||||
// update back to frame meta
|
||||
auto& bg = restoration_to_osd ? frame_meta->osd_frame : frame_meta->frame;
|
||||
cv::cvtColor(img_result, bg, cv::COLOR_RGB2BGR);
|
||||
}
|
||||
|
||||
void vp_restoration_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
27
nodes/infers/vp_restoration_node.h
Normal file
27
nodes/infers/vp_restoration_node.h
Normal file
@@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// general image restoration node using Real-ESRGAN
|
||||
// used to enhance quality of frames in video, see more: https://github.com/xinntao/Real-ESRGAN
|
||||
class vp_restoration_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
/* onnx network using opencv::dnn as backend */
|
||||
cv::dnn::Net restoration_net;
|
||||
bool restoration_to_osd = true;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_restoration_node(std::string node_name,
|
||||
std::string realesrgan_bg_restoration_model,
|
||||
std::string face_restoration_model = "",
|
||||
bool restoration_to_osd = true);
|
||||
~vp_restoration_node();
|
||||
};
|
||||
}
|
||||
183
nodes/infers/vp_sface_feature_encoder_node.cpp
Normal file
183
nodes/infers/vp_sface_feature_encoder_node.cpp
Normal file
@@ -0,0 +1,183 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include "vp_sface_feature_encoder_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_sface_feature_encoder_node::vp_sface_feature_encoder_node(std::string node_name, std::string model_path):
|
||||
vp_secondary_infer_node(node_name, model_path,
|
||||
"", "",
|
||||
112, 112,
|
||||
1, std::vector<int>(), 0, 0,
|
||||
0, 1, cv::Scalar()) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_sface_feature_encoder_node::~vp_sface_feature_encoder_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_sface_feature_encoder_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
// make sure heads of output are not zero
|
||||
assert(raw_outputs.size() > 0);
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
|
||||
// just one head of output
|
||||
auto& output = raw_outputs[0];
|
||||
assert(output.dims == 2);
|
||||
|
||||
auto count = output.rows;
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
// update feature data back into frame meta
|
||||
for (int i = 0; i < count; i++) {
|
||||
cv::Mat feature = output.row(i);
|
||||
|
||||
for (int j = 0; j < feature.cols; j++) {
|
||||
frame_meta->face_targets[i]->embeddings.push_back(feature.at<float>(0, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// refer to vp_secondary_infer_node::prepare
|
||||
void vp_sface_feature_encoder_node::prepare(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch, std::vector<cv::Mat>& mats_to_infer) {
|
||||
// only one by one for secondary infer node
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
|
||||
// align face and crop
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
// batch by batch inside single frame
|
||||
for (auto& i : frame_meta->face_targets) {
|
||||
// align and crop
|
||||
float face_keypoints[5][2] =
|
||||
{{i->key_points[0].first, i->key_points[0].second},
|
||||
{i->key_points[1].first, i->key_points[1].second},
|
||||
{i->key_points[2].first, i->key_points[2].second},
|
||||
{i->key_points[3].first, i->key_points[3].second},
|
||||
{i->key_points[4].first, i->key_points[4].second}};
|
||||
cv::Mat aligned_face;
|
||||
alignCrop(frame_meta->frame, face_keypoints, aligned_face);
|
||||
mats_to_infer.push_back(aligned_face);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat vp_sface_feature_encoder_node::getSimilarityTransformMatrix(float src[5][2]) {
|
||||
using namespace cv;
|
||||
float dst[5][2] = { {38.2946f, 51.6963f}, {73.5318f, 51.5014f}, {56.0252f, 71.7366f}, {41.5493f, 92.3655f}, {70.7299f, 92.2041f} };
|
||||
float avg0 = (src[0][0] + src[1][0] + src[2][0] + src[3][0] + src[4][0]) / 5;
|
||||
float avg1 = (src[0][1] + src[1][1] + src[2][1] + src[3][1] + src[4][1]) / 5;
|
||||
//Compute mean of src and dst.
|
||||
float src_mean[2] = { avg0, avg1 };
|
||||
float dst_mean[2] = { 56.0262f, 71.9008f };
|
||||
//Subtract mean from src and dst.
|
||||
float src_demean[5][2];
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
for (int j = 0; j < 5; j++)
|
||||
{
|
||||
src_demean[j][i] = src[j][i] - src_mean[i];
|
||||
}
|
||||
}
|
||||
float dst_demean[5][2];
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
for (int j = 0; j < 5; j++)
|
||||
{
|
||||
dst_demean[j][i] = dst[j][i] - dst_mean[i];
|
||||
}
|
||||
}
|
||||
double A00 = 0.0, A01 = 0.0, A10 = 0.0, A11 = 0.0;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A00 += dst_demean[i][0] * src_demean[i][0];
|
||||
A00 = A00 / 5;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A01 += dst_demean[i][0] * src_demean[i][1];
|
||||
A01 = A01 / 5;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A10 += dst_demean[i][1] * src_demean[i][0];
|
||||
A10 = A10 / 5;
|
||||
for (int i = 0; i < 5; i++)
|
||||
A11 += dst_demean[i][1] * src_demean[i][1];
|
||||
A11 = A11 / 5;
|
||||
Mat A = (Mat_<double>(2, 2) << A00, A01, A10, A11);
|
||||
double d[2] = { 1.0, 1.0 };
|
||||
double detA = A00 * A11 - A01 * A10;
|
||||
if (detA < 0)
|
||||
d[1] = -1;
|
||||
double T[3][3] = { {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0} };
|
||||
Mat s, u, vt, v;
|
||||
SVD::compute(A, s, u, vt);
|
||||
double smax = s.ptr<double>(0)[0]>s.ptr<double>(1)[0] ? s.ptr<double>(0)[0] : s.ptr<double>(1)[0];
|
||||
double tol = smax * 2 * FLT_MIN;
|
||||
int rank = 0;
|
||||
if (s.ptr<double>(0)[0]>tol)
|
||||
rank += 1;
|
||||
if (s.ptr<double>(1)[0]>tol)
|
||||
rank += 1;
|
||||
double arr_u[2][2] = { {u.ptr<double>(0)[0], u.ptr<double>(0)[1]}, {u.ptr<double>(1)[0], u.ptr<double>(1)[1]} };
|
||||
double arr_vt[2][2] = { {vt.ptr<double>(0)[0], vt.ptr<double>(0)[1]}, {vt.ptr<double>(1)[0], vt.ptr<double>(1)[1]} };
|
||||
double det_u = arr_u[0][0] * arr_u[1][1] - arr_u[0][1] * arr_u[1][0];
|
||||
double det_vt = arr_vt[0][0] * arr_vt[1][1] - arr_vt[0][1] * arr_vt[1][0];
|
||||
if (rank == 1)
|
||||
{
|
||||
if ((det_u*det_vt) > 0)
|
||||
{
|
||||
Mat uvt = u*vt;
|
||||
T[0][0] = uvt.ptr<double>(0)[0];
|
||||
T[0][1] = uvt.ptr<double>(0)[1];
|
||||
T[1][0] = uvt.ptr<double>(1)[0];
|
||||
T[1][1] = uvt.ptr<double>(1)[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
double temp = d[1];
|
||||
d[1] = -1;
|
||||
Mat D = (Mat_<double>(2, 2) << d[0], 0.0, 0.0, d[1]);
|
||||
Mat Dvt = D*vt;
|
||||
Mat uDvt = u*Dvt;
|
||||
T[0][0] = uDvt.ptr<double>(0)[0];
|
||||
T[0][1] = uDvt.ptr<double>(0)[1];
|
||||
T[1][0] = uDvt.ptr<double>(1)[0];
|
||||
T[1][1] = uDvt.ptr<double>(1)[1];
|
||||
d[1] = temp;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat D = (Mat_<double>(2, 2) << d[0], 0.0, 0.0, d[1]);
|
||||
Mat Dvt = D*vt;
|
||||
Mat uDvt = u*Dvt;
|
||||
T[0][0] = uDvt.ptr<double>(0)[0];
|
||||
T[0][1] = uDvt.ptr<double>(0)[1];
|
||||
T[1][0] = uDvt.ptr<double>(1)[0];
|
||||
T[1][1] = uDvt.ptr<double>(1)[1];
|
||||
}
|
||||
double var1 = 0.0;
|
||||
for (int i = 0; i < 5; i++)
|
||||
var1 += src_demean[i][0] * src_demean[i][0];
|
||||
var1 = var1 / 5;
|
||||
double var2 = 0.0;
|
||||
for (int i = 0; i < 5; i++)
|
||||
var2 += src_demean[i][1] * src_demean[i][1];
|
||||
var2 = var2 / 5;
|
||||
double scale = 1.0 / (var1 + var2)* (s.ptr<double>(0)[0] * d[0] + s.ptr<double>(1)[0] * d[1]);
|
||||
double TS[2];
|
||||
TS[0] = T[0][0] * src_mean[0] + T[0][1] * src_mean[1];
|
||||
TS[1] = T[1][0] * src_mean[0] + T[1][1] * src_mean[1];
|
||||
T[0][2] = dst_mean[0] - scale*TS[0];
|
||||
T[1][2] = dst_mean[1] - scale*TS[1];
|
||||
T[0][0] *= scale;
|
||||
T[0][1] *= scale;
|
||||
T[1][0] *= scale;
|
||||
T[1][1] *= scale;
|
||||
Mat transform_mat = (Mat_<double>(2, 3) << T[0][0], T[0][1], T[0][2], T[1][0], T[1][1], T[1][2]);
|
||||
return transform_mat;
|
||||
}
|
||||
|
||||
void vp_sface_feature_encoder_node::alignCrop(cv::Mat& _src_img, float _src_point[5][2], cv::Mat& _aligned_img) {
|
||||
cv::Mat warp_mat = getSimilarityTransformMatrix(_src_point);
|
||||
cv::warpAffine(_src_img, _aligned_img, warp_mat, cv::Size(input_width, input_height), cv::INTER_LINEAR);
|
||||
}
|
||||
}
|
||||
27
nodes/infers/vp_sface_feature_encoder_node.h
Normal file
27
nodes/infers/vp_sface_feature_encoder_node.h
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// face feature encoder based on SFace, update embeddings of vp_frame_face_target
|
||||
// https://github.com/opencv/opencv/blob/4.x/modules/objdetect/src/face_recognize.cpp
|
||||
// https://github.com/zhongyy/SFace
|
||||
class vp_sface_feature_encoder_node: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
// get transform matrix for aglin face
|
||||
cv::Mat getSimilarityTransformMatrix(float src[5][2]);
|
||||
// align and crop
|
||||
void alignCrop(cv::Mat& _src_img, float _src_point[5][2], cv::Mat& _aligned_img);
|
||||
protected:
|
||||
// override prepare as sface has an additional logic for face align before preprocess
|
||||
virtual void prepare(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch, std::vector<cv::Mat>& mats_to_infer) override;
|
||||
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_sface_feature_encoder_node(std::string node_name, std::string model_path);
|
||||
~vp_sface_feature_encoder_node();
|
||||
};
|
||||
|
||||
}
|
||||
64
nodes/infers/vp_trt_vehicle_color_classifier.cpp
Normal file
64
nodes/infers/vp_trt_vehicle_color_classifier.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_vehicle_color_classifier.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
vp_trt_vehicle_color_classifier::vp_trt_vehicle_color_classifier(std::string node_name,
|
||||
std::string vehicle_color_cls_model_path,
|
||||
std::vector<int> p_class_ids_applied_to,
|
||||
int min_width_applied_to, int min_height_applied_to):
|
||||
vp_secondary_infer_node(node_name, "", "", "", 1, 1, 1, p_class_ids_applied_to, min_width_applied_to, min_height_applied_to) {
|
||||
vehicle_color_classifier = std::make_shared<trt_vehicle::VehicleColorClassifier>(vehicle_color_cls_model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_vehicle_color_classifier::~vp_trt_vehicle_color_classifier() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_color_classifier::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_secondary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// infer using trt_vehicle library
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<trt_vehicle::ObjCls> vehicles_color;
|
||||
vehicle_color_classifier->classify(mats_to_infer, vehicles_color);
|
||||
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
auto index = 0;
|
||||
for (int i = 0; i < vehicles_color.size(); i++) {
|
||||
for (int j = index; j < frame_meta->targets.size(); j++) {
|
||||
// need apply or not?
|
||||
if (!need_apply(frame_meta->targets[j]->primary_class_id, frame_meta->targets[j]->width, frame_meta->targets[j]->height)) {
|
||||
// continue as its primary_class_id is not in p_class_ids_applied_to
|
||||
continue;
|
||||
}
|
||||
|
||||
// update back to frame meta
|
||||
frame_meta->targets[j]->secondary_class_ids.push_back(vehicles_color[i].class_);
|
||||
frame_meta->targets[j]->secondary_scores.push_back(vehicles_color[i].score);
|
||||
frame_meta->targets[j]->secondary_labels.push_back(vehicles_color[i].label);
|
||||
|
||||
// break as we found the right target!
|
||||
index = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_color_classifier::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
26
nodes/infers/vp_trt_vehicle_color_classifier.h
Normal file
26
nodes/infers/vp_trt_vehicle_color_classifier.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
#include "../../third_party/trt_vehicle/models/vehicle_color_classifier.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// vehicle color classifier based on tensorrt using trt_vehicle library
|
||||
// update secondary_class_ids/secondary_labels/secondary_scores of vp_frame_target.
|
||||
class vp_trt_vehicle_color_classifier: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
std::shared_ptr<trt_vehicle::VehicleColorClassifier> vehicle_color_classifier = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_vehicle_color_classifier(std::string node_name, std::string vehicle_color_cls_model_path = "", std::vector<int> p_class_ids_applied_to = std::vector<int>(), int min_width_applied_to = 0, int min_height_applied_to = 0);
|
||||
~vp_trt_vehicle_color_classifier();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
64
nodes/infers/vp_trt_vehicle_detector.cpp
Normal file
64
nodes/infers/vp_trt_vehicle_detector.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_vehicle_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_trt_vehicle_detector::vp_trt_vehicle_detector(std::string node_name,
|
||||
std::string vehicle_det_model_path):
|
||||
vp_primary_infer_node(node_name, "") {
|
||||
vehicle_detector = std::make_shared<trt_vehicle::VehicleDetector>(vehicle_det_model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_vehicle_detector::~vp_trt_vehicle_detector() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_trt_vehicle_detector::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<trt_vehicle::ObjBox>> vehicles;
|
||||
vehicle_detector->detect(mats_to_infer, vehicles);
|
||||
|
||||
assert(vehicles.size() == 1);
|
||||
auto& vehicle_list = vehicles[0];
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
for (int i = 0; i < vehicle_list.size(); i++) {
|
||||
auto& objbox = vehicle_list[i];
|
||||
|
||||
// check value range
|
||||
objbox.x = std::max(objbox.x, 0);
|
||||
objbox.y = std::max(objbox.y, 0);
|
||||
objbox.width = std::min(objbox.width, frame_meta->frame.cols - objbox.x);
|
||||
objbox.height = std::min(objbox.height, frame_meta->frame.rows - objbox.y);
|
||||
if (objbox.width <= 0 || objbox.height <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(objbox.x, objbox.y, objbox.width, objbox.height,
|
||||
objbox.class_, objbox.score, frame_meta->frame_index, frame_meta->channel_index, objbox.label);
|
||||
// create target and update back into frame meta
|
||||
frame_meta->targets.push_back(target);
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_detector::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
24
nodes/infers/vp_trt_vehicle_detector.h
Normal file
24
nodes/infers/vp_trt_vehicle_detector.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/trt_vehicle/models/vehicle_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// vehicle detector based on tensorrt using trt_vehicle library
|
||||
class vp_trt_vehicle_detector: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
std::shared_ptr<trt_vehicle::VehicleDetector> vehicle_detector = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_vehicle_detector(std::string node_name, std::string vehicle_det_model_path = "");
|
||||
~vp_trt_vehicle_detector();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
65
nodes/infers/vp_trt_vehicle_feature_encoder.cpp
Normal file
65
nodes/infers/vp_trt_vehicle_feature_encoder.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_vehicle_feature_encoder.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
vp_trt_vehicle_feature_encoder::vp_trt_vehicle_feature_encoder(std::string node_name,
|
||||
std::string vehicle_feature_model_path,
|
||||
std::vector<int> p_class_ids_applied_to,
|
||||
int min_width_applied_to, int min_height_applied_to):
|
||||
vp_secondary_infer_node(node_name, "", "", "", 1, 1, 1, p_class_ids_applied_to, min_width_applied_to, min_height_applied_to) {
|
||||
vehicle_feature_encoder = std::make_shared<trt_vehicle::VehicleFeatureEncoder>(vehicle_feature_model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_vehicle_feature_encoder::~vp_trt_vehicle_feature_encoder() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_feature_encoder::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_secondary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// infer using trt_vehicle library
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<float>> vehicles_feature;
|
||||
vehicle_feature_encoder->encode(mats_to_infer, vehicles_feature);
|
||||
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
auto index = 0;
|
||||
for (int i = 0; i < vehicles_feature.size(); i++) {
|
||||
for (int j = index; j < frame_meta->targets.size(); j++) {
|
||||
// need apply or not?
|
||||
if (!need_apply(frame_meta->targets[j]->primary_class_id, frame_meta->targets[j]->width, frame_meta->targets[j]->height)) {
|
||||
// continue as its primary_class_id is not in p_class_ids_applied_to
|
||||
continue;
|
||||
}
|
||||
|
||||
// update back to frame meta
|
||||
auto& features = vehicles_feature[i];
|
||||
for(int k = 0; k < features.size(); k++) {
|
||||
frame_meta->targets[j]->embeddings.push_back(features[k]);
|
||||
}
|
||||
|
||||
// break as we found the right target!
|
||||
index = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_feature_encoder::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
26
nodes/infers/vp_trt_vehicle_feature_encoder.h
Normal file
26
nodes/infers/vp_trt_vehicle_feature_encoder.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
#include "../../third_party/trt_vehicle/models/vehicle_feature_encoder.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// vehicle feature encoder based on tensorrt using trt_vehicle library
|
||||
// update embeddings of vp_frame_target
|
||||
class vp_trt_vehicle_feature_encoder: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
std::shared_ptr<trt_vehicle::VehicleFeatureEncoder> vehicle_feature_encoder = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_vehicle_feature_encoder(std::string node_name, std::string vehicle_feature_model_path = "", std::vector<int> p_class_ids_applied_to = std::vector<int>(), int min_width_applied_to = 0, int min_height_applied_to = 0);
|
||||
~vp_trt_vehicle_feature_encoder();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
71
nodes/infers/vp_trt_vehicle_plate_detector.cpp
Normal file
71
nodes/infers/vp_trt_vehicle_plate_detector.cpp
Normal file
@@ -0,0 +1,71 @@
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_vehicle_plate_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_trt_vehicle_plate_detector::vp_trt_vehicle_plate_detector(std::string node_name,
|
||||
std::string plate_det_model_path,
|
||||
std::string char_rec_model_path):
|
||||
vp_secondary_infer_node(node_name, "") {
|
||||
plate_detector = std::make_shared<trt_vehicle::VehiclePlateDetector>(plate_det_model_path, char_rec_model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_vehicle_plate_detector::~vp_trt_vehicle_plate_detector() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_plate_detector::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_trt_vehicle_plate_detector::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_secondary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
// here we detect vehicle plate for all target(like car/bus) in frame
|
||||
std::vector<std::vector<trt_vehicle::Plate>> plates;
|
||||
plate_detector->detect(mats_to_infer, plates, true); // detect at most 1 plate for each image
|
||||
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
for (int i = 0; i < plates.size(); i++) {
|
||||
for (int j = 0; j < plates[i].size(); j++) {
|
||||
auto& plate = plates[i][j];
|
||||
|
||||
// only plate detection but no recognition result
|
||||
if (plate.text.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check value range
|
||||
auto x = std::max(0, plate.x + frame_meta->targets[i]->x - 10); // offset
|
||||
auto y = std::max(0, plate.y + frame_meta->targets[i]->y - 8); // offset
|
||||
auto w = std::min(plate.width, frame_meta->frame.cols - x);
|
||||
auto h = std::min(plate.height, frame_meta->frame.rows - y);
|
||||
if (w <= 0 || h <=0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// create sub target and update back into frame meta
|
||||
// we treat vehicle plate as sub target of those in vp_frame_meta.targets
|
||||
auto sub_target = std::make_shared<vp_objects::vp_sub_target>(x, y, w, h,
|
||||
-1, 0, plate.color + "_" + plate.text, frame_meta->frame_index, frame_meta->channel_index);
|
||||
frame_meta->targets[i]->sub_targets.push_back(sub_target);
|
||||
}
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
29
nodes/infers/vp_trt_vehicle_plate_detector.h
Normal file
29
nodes/infers/vp_trt_vehicle_plate_detector.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
#include "../../objects/vp_sub_target.h"
|
||||
#include "../../third_party/trt_vehicle/models/vehicle_plate_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// vehicle plate detector based on tensorrt using trt_vehicle library
|
||||
// source code: ../../third_party/trt_vehicle
|
||||
// note: derived from vp_secondary_infer_node since it detects plates on small cropped images, which is different from vp_trt_vehicle_plate_detector_v2 class
|
||||
// this class is not based on opencv::dnn module but tensorrt, a few data members declared in base class are not usable any more(just ignore), such as vp_infer_node::net.
|
||||
class vp_trt_vehicle_plate_detector: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
std::shared_ptr<trt_vehicle::VehiclePlateDetector> plate_detector = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_vehicle_plate_detector(std::string node_name, std::string plate_det_model_path = "", std::string char_rec_model_path = "");
|
||||
~vp_trt_vehicle_plate_detector();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
75
nodes/infers/vp_trt_vehicle_plate_detector_v2.cpp
Normal file
75
nodes/infers/vp_trt_vehicle_plate_detector_v2.cpp
Normal file
@@ -0,0 +1,75 @@
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_vehicle_plate_detector_v2.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_trt_vehicle_plate_detector_v2::vp_trt_vehicle_plate_detector_v2(std::string node_name,
|
||||
std::string plate_det_model_path,
|
||||
std::string char_rec_model_path):
|
||||
vp_primary_infer_node(node_name, "") {
|
||||
plate_detector = std::make_shared<trt_vehicle::VehiclePlateDetector>(plate_det_model_path, char_rec_model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_vehicle_plate_detector_v2::~vp_trt_vehicle_plate_detector_v2() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_plate_detector_v2::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_trt_vehicle_plate_detector_v2::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
// here we detect vehicle plate on the whole big frame
|
||||
std::vector<std::vector<trt_vehicle::Plate>> plates;
|
||||
plate_detector->detect(mats_to_infer, plates);
|
||||
|
||||
for (int i = 0; i < plates.size(); i++) {
|
||||
auto& frame_meta = frame_meta_with_batch[i];
|
||||
for (int j = 0; j < plates[i].size(); j++) {
|
||||
auto& plate = plates[i][j];
|
||||
|
||||
// only plate detection but no recognition result
|
||||
if (plate.text.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check value range
|
||||
auto x = std::max(0, plate.x);
|
||||
auto y = std::max(0, plate.y);
|
||||
auto w = std::min(plate.width, frame_meta->frame.cols - x);
|
||||
auto h = std::min(plate.height, frame_meta->frame.rows - y);
|
||||
if (w <= 0 || h <=0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// create target and update back into frame meta
|
||||
/*
|
||||
we treat vehicle plate as target in vp_frame_meta.targets,
|
||||
1. x,y,width,height in vp_frame_target stand for location of plate in frame
|
||||
2. primary_label in vp_frame_target stands for plate color and plate text, which combined with '_'
|
||||
*/
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(x, y, w, h,
|
||||
-1, 0, frame_meta->frame_index, frame_meta->channel_index, plate.color + "_" + plate.text);
|
||||
frame_meta->targets.push_back(target);
|
||||
VP_INFO("plate-color:" + plate.color + " plate-text:" + plate.text);
|
||||
}
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
28
nodes/infers/vp_trt_vehicle_plate_detector_v2.h
Normal file
28
nodes/infers/vp_trt_vehicle_plate_detector_v2.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/trt_vehicle/models/vehicle_plate_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// vehicle plate detector based on tensorrt using trt_vehicle library
|
||||
// source code: ../../third_party/trt_vehicle
|
||||
// note: derived from vp_primary_infer_node since it detects plates on the whole big frame, which is different from vp_trt_vehicle_plate_detector class
|
||||
// this class is not based on opencv::dnn module but tensorrt, a few data members declared in base class are not usable any more(just ignore), such as vp_infer_node::net.
|
||||
class vp_trt_vehicle_plate_detector_v2: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
std::shared_ptr<trt_vehicle::VehiclePlateDetector> plate_detector = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_vehicle_plate_detector_v2(std::string node_name, std::string plate_det_model_path = "", std::string char_rec_model_path = "");
|
||||
~vp_trt_vehicle_plate_detector_v2();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
64
nodes/infers/vp_trt_vehicle_scanner.cpp
Normal file
64
nodes/infers/vp_trt_vehicle_scanner.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_vehicle_scanner.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_trt_vehicle_scanner::vp_trt_vehicle_scanner(std::string node_name,
|
||||
std::string vehicle_scan_model_path):
|
||||
vp_primary_infer_node(node_name, "") {
|
||||
vehicle_scanner = std::make_shared<trt_vehicle::VehicleScanner>(vehicle_scan_model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_vehicle_scanner::~vp_trt_vehicle_scanner() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_trt_vehicle_scanner::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<trt_vehicle::ObjBox>> vehicles;
|
||||
vehicle_scanner->detect(mats_to_infer, vehicles);
|
||||
|
||||
assert(vehicles.size() == 1);
|
||||
auto& vehicle_list = vehicles[0];
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
for (int i = 0; i < vehicle_list.size(); i++) {
|
||||
auto& objbox = vehicle_list[i];
|
||||
|
||||
// check value range
|
||||
objbox.x = std::max(objbox.x, 0);
|
||||
objbox.y = std::max(objbox.y, 0);
|
||||
objbox.width = std::min(objbox.width, frame_meta->frame.cols - objbox.x);
|
||||
objbox.height = std::min(objbox.height, frame_meta->frame.rows - objbox.y);
|
||||
if (objbox.width <= 0 || objbox.height <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(objbox.x, objbox.y, objbox.width, objbox.height,
|
||||
objbox.class_, objbox.score, frame_meta->frame_index, frame_meta->channel_index, objbox.label);
|
||||
// create target and update back into frame meta
|
||||
frame_meta->targets.push_back(target);
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_scanner::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
24
nodes/infers/vp_trt_vehicle_scanner.h
Normal file
24
nodes/infers/vp_trt_vehicle_scanner.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/trt_vehicle/models/vehicle_scanner.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// vehicle scanner based on tensorrt using trt_vehicle library
|
||||
class vp_trt_vehicle_scanner: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
std::shared_ptr<trt_vehicle::VehicleScanner> vehicle_scanner = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_vehicle_scanner(std::string node_name, std::string vehicle_scan_model_path = "");
|
||||
~vp_trt_vehicle_scanner();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
64
nodes/infers/vp_trt_vehicle_type_classifier.cpp
Normal file
64
nodes/infers/vp_trt_vehicle_type_classifier.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_vehicle_type_classifier.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
vp_trt_vehicle_type_classifier::vp_trt_vehicle_type_classifier(std::string node_name,
|
||||
std::string vehicle_type_cls_model_path,
|
||||
std::vector<int> p_class_ids_applied_to,
|
||||
int min_width_applied_to, int min_height_applied_to):
|
||||
vp_secondary_infer_node(node_name, "", "", "", 1, 1, 1, p_class_ids_applied_to, min_width_applied_to, min_height_applied_to) {
|
||||
vehicle_type_classifier = std::make_shared<trt_vehicle::VehicleTypeClassifier>(vehicle_type_cls_model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_vehicle_type_classifier::~vp_trt_vehicle_type_classifier() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_type_classifier::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_secondary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// infer using trt_vehicle library
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<trt_vehicle::ObjCls> vehicles_type;
|
||||
vehicle_type_classifier->classify(mats_to_infer, vehicles_type);
|
||||
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
auto index = 0;
|
||||
for (int i = 0; i < vehicles_type.size(); i++) {
|
||||
for (int j = index; j < frame_meta->targets.size(); j++) {
|
||||
// need apply or not?
|
||||
if (!need_apply(frame_meta->targets[j]->primary_class_id, frame_meta->targets[j]->width, frame_meta->targets[j]->height)) {
|
||||
// continue as its primary_class_id is not in p_class_ids_applied_to
|
||||
continue;
|
||||
}
|
||||
|
||||
// update back to frame meta
|
||||
frame_meta->targets[j]->secondary_class_ids.push_back(vehicles_type[i].class_);
|
||||
frame_meta->targets[j]->secondary_scores.push_back(vehicles_type[i].score);
|
||||
frame_meta->targets[j]->secondary_labels.push_back(vehicles_type[i].label);
|
||||
|
||||
// break as we found the right target!
|
||||
index = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_vehicle_type_classifier::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
26
nodes/infers/vp_trt_vehicle_type_classifier.h
Normal file
26
nodes/infers/vp_trt_vehicle_type_classifier.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
#include "../../third_party/trt_vehicle/models/vehicle_type_classifier.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// vehicle type classifier based on tensorrt using trt_vehicle library
|
||||
// update secondary_class_ids/secondary_labels/secondary_scores of vp_frame_target.
|
||||
class vp_trt_vehicle_type_classifier: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
std::shared_ptr<trt_vehicle::VehicleTypeClassifier> vehicle_type_classifier = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_vehicle_type_classifier(std::string node_name, std::string vehicle_type_cls_model_path = "", std::vector<int> p_class_ids_applied_to = std::vector<int>(), int min_width_applied_to = 0, int min_height_applied_to = 0);
|
||||
~vp_trt_vehicle_type_classifier();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
67
nodes/infers/vp_trt_yolov8_classifier.cpp
Normal file
67
nodes/infers/vp_trt_yolov8_classifier.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_yolov8_classifier.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
vp_trt_yolov8_classifier::vp_trt_yolov8_classifier(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string labels_path,
|
||||
std::vector<int> p_class_ids_applied_to,
|
||||
int min_width_applied_to, int min_height_applied_to):
|
||||
vp_secondary_infer_node(node_name, "", "", labels_path, 1, 1, 1, p_class_ids_applied_to, min_width_applied_to, min_height_applied_to) {
|
||||
yolov8_classifier = std::make_shared<trt_yolov8::trt_yolov8_classifier>(model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_yolov8_classifier::~vp_trt_yolov8_classifier() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_trt_yolov8_classifier::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_secondary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// infer using trt_vehicle library
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<Classification>> classidications;
|
||||
yolov8_classifier->classify(mats_to_infer, classidications);
|
||||
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
auto index = 0;
|
||||
for (int i = 0; i < classidications.size(); i++) {
|
||||
for (int j = index; j < frame_meta->targets.size(); j++) {
|
||||
// need apply or not?
|
||||
if (!need_apply(frame_meta->targets[j]->primary_class_id, frame_meta->targets[j]->width, frame_meta->targets[j]->height)) {
|
||||
// continue as its primary_class_id is not in p_class_ids_applied_to
|
||||
continue;
|
||||
}
|
||||
|
||||
// update back to frame meta
|
||||
frame_meta->targets[j]->secondary_class_ids.push_back(classidications[i][0].class_id);
|
||||
frame_meta->targets[j]->secondary_scores.push_back(classidications[i][0].conf);
|
||||
auto label = labels.size() == 0 ? "" : labels[classidications[i][0].class_id];
|
||||
frame_meta->targets[j]->secondary_labels.push_back(label);
|
||||
|
||||
// break as we found the right target!
|
||||
index = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_yolov8_classifier::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
30
nodes/infers/vp_trt_yolov8_classifier.h
Normal file
30
nodes/infers/vp_trt_yolov8_classifier.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_secondary_infer_node.h"
|
||||
#include "../../third_party/trt_yolov8/trt_yolov8_classifier.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// universal yolov8 classifier based on tensorrt using third_party/trt_yolov8 library
|
||||
class vp_trt_yolov8_classifier: public vp_secondary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
std::shared_ptr<trt_yolov8::trt_yolov8_classifier> yolov8_classifier = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_yolov8_classifier(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string labels_path = "",
|
||||
std::vector<int> p_class_ids_applied_to = std::vector<int>(),
|
||||
int min_width_applied_to = 0,
|
||||
int min_height_applied_to = 0);
|
||||
~vp_trt_yolov8_classifier();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
69
nodes/infers/vp_trt_yolov8_detector.cpp
Normal file
69
nodes/infers/vp_trt_yolov8_detector.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_yolov8_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_trt_yolov8_detector::vp_trt_yolov8_detector(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string labels_path):
|
||||
vp_primary_infer_node(node_name, "", "", labels_path) {
|
||||
yolov8_detector = std::make_shared<trt_yolov8::trt_yolov8_detector>(model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_yolov8_detector::~vp_trt_yolov8_detector() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_trt_yolov8_detector::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<Detection>> detections;
|
||||
yolov8_detector->detect(mats_to_infer, detections);
|
||||
|
||||
assert(detections.size() == 1);
|
||||
auto& detection_list = detections[0];
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
for (int i = 0; i < detection_list.size(); i++) {
|
||||
auto& objbox = detection_list[i];
|
||||
|
||||
// objbox.bbox: center_x center_y width height
|
||||
auto rect = get_rect(frame_meta->frame, objbox.bbox); // convert to: x, y, width,height
|
||||
|
||||
// check value range
|
||||
int x = std::max(rect.x, 0);
|
||||
int y = std::max(rect.y, 0);
|
||||
int width = std::min(rect.width, frame_meta->frame.cols - x);
|
||||
int height = std::min(rect.height, frame_meta->frame.rows - y);
|
||||
if (width <= 0 || height <= 0) {
|
||||
continue;
|
||||
}
|
||||
auto label = labels.size() == 0 ? "" : labels[objbox.class_id];
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(x, y, width, height,
|
||||
objbox.class_id, objbox.conf, frame_meta->frame_index, frame_meta->channel_index, label);
|
||||
// create target and update back into frame meta
|
||||
frame_meta->targets.push_back(target);
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_yolov8_detector::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
24
nodes/infers/vp_trt_yolov8_detector.h
Normal file
24
nodes/infers/vp_trt_yolov8_detector.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/trt_yolov8/trt_yolov8_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// universal yolov8 detector based on tensorrt using third_party/trt_yolov8 library
|
||||
class vp_trt_yolov8_detector: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
std::shared_ptr<trt_yolov8::trt_yolov8_detector> yolov8_detector = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_yolov8_detector(std::string node_name, std::string model_path, std::string labels_path = "");
|
||||
~vp_trt_yolov8_detector();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
60
nodes/infers/vp_trt_yolov8_pose_detector.cpp
Normal file
60
nodes/infers/vp_trt_yolov8_pose_detector.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_yolov8_pose_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_trt_yolov8_pose_detector::vp_trt_yolov8_pose_detector(std::string node_name,
|
||||
std::string model_path):
|
||||
vp_primary_infer_node(node_name, "") {
|
||||
yolov8_pose_detector = std::make_shared<trt_yolov8::trt_yolov8_pose_detector>(model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_yolov8_pose_detector::~vp_trt_yolov8_pose_detector() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_trt_yolov8_pose_detector::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<Detection>> detections;
|
||||
yolov8_pose_detector->detect(mats_to_infer, detections);
|
||||
|
||||
assert(detections.size() == 1);
|
||||
auto& detection_list = detections[0];
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
for (int i = 0; i < detection_list.size(); i++) {
|
||||
auto& objbox = detection_list[i];
|
||||
auto rect = get_rect_adapt_landmark(frame_meta->frame, objbox.bbox, objbox.keypoints);
|
||||
|
||||
std::vector<vp_objects::vp_pose_keypoint> kps;
|
||||
for (int j = 0; j < 51; j += 3) {
|
||||
kps.push_back(vp_objects::vp_pose_keypoint {j, int(objbox.keypoints[j]), int(objbox.keypoints[j + 1]), objbox.keypoints[j + 2]});
|
||||
}
|
||||
|
||||
auto pose_target = std::make_shared<vp_objects::vp_frame_pose_target>(vp_objects::vp_pose_type::yolov8_pose_17, kps);
|
||||
frame_meta->pose_targets.push_back(pose_target);
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_yolov8_pose_detector::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
24
nodes/infers/vp_trt_yolov8_pose_detector.h
Normal file
24
nodes/infers/vp_trt_yolov8_pose_detector.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/trt_yolov8/trt_yolov8_pose_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// universal yolov8 pose detector based on tensorrt using third_party/trt_yolov8 library
|
||||
class vp_trt_yolov8_pose_detector: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
std::shared_ptr<trt_yolov8::trt_yolov8_pose_detector> yolov8_pose_detector = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_yolov8_pose_detector(std::string node_name, std::string model_path);
|
||||
~vp_trt_yolov8_pose_detector();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
78
nodes/infers/vp_trt_yolov8_seg_detector.cpp
Normal file
78
nodes/infers/vp_trt_yolov8_seg_detector.cpp
Normal file
@@ -0,0 +1,78 @@
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "vp_trt_yolov8_seg_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_trt_yolov8_seg_detector::vp_trt_yolov8_seg_detector(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string labels_path):
|
||||
vp_primary_infer_node(node_name, "", "", labels_path) {
|
||||
yolov8_seg_detector = std::make_shared<trt_yolov8::trt_yolov8_seg_detector>(model_path);
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_trt_yolov8_seg_detector::~vp_trt_yolov8_seg_detector() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
// please refer to vp_infer_node::run_infer_combinations
|
||||
void vp_trt_yolov8_seg_detector::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
std::vector<cv::Mat> mats_to_infer;
|
||||
|
||||
// start
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// prepare data, as same as base class
|
||||
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
|
||||
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
start_time = std::chrono::system_clock::now();
|
||||
std::vector<std::vector<Detection>> detections;
|
||||
std::vector<std::vector<cv::Mat>> masks;
|
||||
yolov8_seg_detector->detect(mats_to_infer, detections, masks);
|
||||
|
||||
assert(detections.size() == 1);
|
||||
assert(masks.size() == 1);
|
||||
auto& detection_list = detections[0];
|
||||
auto& mask_list = masks[0];
|
||||
assert(detection_list.size() == mask_list.size());
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
for (int i = 0; i < detection_list.size(); i++) {
|
||||
auto& objbox = detection_list[i];
|
||||
auto& mask = mask_list[i];
|
||||
auto scaled_mask = scale_mask(mask, frame_meta->frame);
|
||||
|
||||
// objbox.bbox: center_x center_y width height
|
||||
auto rect = get_rect(frame_meta->frame, objbox.bbox); // convert to: x, y, width,height
|
||||
// check value range
|
||||
rect.x = std::max(rect.x, 0);
|
||||
rect.y = std::max(rect.y, 0);
|
||||
rect.width = std::min(rect.width, frame_meta->frame.cols - rect.x);
|
||||
rect.height = std::min(rect.height, frame_meta->frame.rows - rect.y);
|
||||
if (rect.width <= 0 || rect.height <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto label = labels.size() == 0 ? "" : labels[objbox.class_id];
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(rect.x, rect.y, rect.width, rect.height,
|
||||
objbox.class_id, objbox.conf, frame_meta->frame_index, frame_meta->channel_index, label);
|
||||
auto rect_mask = scaled_mask(rect);
|
||||
target->mask = rect_mask;
|
||||
|
||||
// create target and update back into frame meta
|
||||
frame_meta->targets.push_back(target);
|
||||
}
|
||||
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
|
||||
|
||||
// can not calculate preprocess time and postprocess time, set 0 by default.
|
||||
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
|
||||
}
|
||||
|
||||
void vp_trt_yolov8_seg_detector::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
24
nodes/infers/vp_trt_yolov8_seg_detector.h
Normal file
24
nodes/infers/vp_trt_yolov8_seg_detector.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef VP_WITH_TRT
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../third_party/trt_yolov8/trt_yolov8_seg_detector.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// universal yolov8 segmentation detector based on tensorrt using third_party/trt_yolov8 library
|
||||
class vp_trt_yolov8_seg_detector: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
std::shared_ptr<trt_yolov8::trt_yolov8_seg_detector> yolov8_seg_detector = nullptr;
|
||||
protected:
|
||||
// we need a totally new logic for the whole infer combinations
|
||||
// no separate step pre-defined needed in base class
|
||||
virtual void run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
// override pure virtual method, for compile pass
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_trt_yolov8_seg_detector(std::string node_name, std::string model_path, std::string labels_path = "");
|
||||
~vp_trt_yolov8_seg_detector();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
307
nodes/infers/vp_yolo5_seg_node.cpp
Normal file
307
nodes/infers/vp_yolo5_seg_node.cpp
Normal file
@@ -0,0 +1,307 @@
|
||||
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include "vp_yolo5_seg_node.h"
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
vp_yolo5_seg_node::vp_yolo5_seg_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string labels_path,
|
||||
int input_width,
|
||||
int input_height,
|
||||
int batch_size,
|
||||
int class_id_offset,
|
||||
float conf_threshold,
|
||||
float iou_threshold):
|
||||
vp_primary_infer_node(node_name, model_path, "", labels_path, input_width, input_height, batch_size, class_id_offset),
|
||||
conf_threshold(conf_threshold),
|
||||
iou_threshold(iou_threshold) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_yolo5_seg_node::~vp_yolo5_seg_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_yolo5_seg_node::prepare(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch, std::vector<cv::Mat>& mats_to_infer) {
|
||||
for (auto& i: frame_meta_with_batch) {
|
||||
auto letterboxed = letterbox(i->frame, cv::Size(input_width, input_height), scale_ratio, padding);
|
||||
mats_to_infer.push_back(letterboxed);
|
||||
}
|
||||
}
|
||||
|
||||
void vp_yolo5_seg_node::infer(const cv::Mat& blob_to_infer, std::vector<cv::Mat>& raw_outputs) {
|
||||
// blob_to_infer is a 4D matrix
|
||||
// the first dim is number of batch, MUST be 1
|
||||
assert(blob_to_infer.dims == 4);
|
||||
assert(blob_to_infer.size[0] == 1);
|
||||
assert(!net.empty());
|
||||
|
||||
net.setInput(blob_to_infer);
|
||||
net.forward(raw_outputs, net.getUnconnectedOutLayersNames());
|
||||
}
|
||||
|
||||
void vp_yolo5_seg_node::preprocess(const std::vector<cv::Mat>& mats_to_infer, cv::Mat& blob_to_infer) {
|
||||
// ignore preprocess logic in base class
|
||||
cv::dnn::blobFromImages(mats_to_infer, blob_to_infer, 1.0 / 255.0, cv::Size(), cv::Scalar(), true, false);
|
||||
}
|
||||
|
||||
void vp_yolo5_seg_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
auto& frame = frame_meta->frame;
|
||||
auto pred = raw_outputs[0]; // [1, N, 4+1+NUM_CLASSES+32]
|
||||
auto proto = raw_outputs[1]; // [1, 32, 96, 160]
|
||||
|
||||
std::vector<cv::Rect> boxes;
|
||||
std::vector<float> confidences;
|
||||
std::vector<int> class_ids;
|
||||
std::vector<cv::Mat> mask_coeffs;
|
||||
|
||||
int num_boxes = pred.size[1];
|
||||
float* data = (float*)pred.data;
|
||||
|
||||
// decode
|
||||
for (int i = 0; i < num_boxes; ++i) {
|
||||
float obj_conf = data[4];
|
||||
if (obj_conf < conf_threshold) {
|
||||
data += (5 + labels.size() + MASK_CHANNELS);
|
||||
continue;
|
||||
}
|
||||
|
||||
int class_id = -1;
|
||||
float max_cls_conf = 0;
|
||||
for (int c = 0; c < labels.size(); ++c) {
|
||||
float cls_conf = data[5 + c];
|
||||
if (cls_conf > max_cls_conf) {
|
||||
max_cls_conf = cls_conf;
|
||||
class_id = c;
|
||||
}
|
||||
}
|
||||
|
||||
float total_conf = obj_conf * max_cls_conf;
|
||||
if (total_conf < conf_threshold || class_id == -1) {
|
||||
data += (5 + labels.size() + MASK_CHANNELS);
|
||||
continue;
|
||||
}
|
||||
|
||||
float cx = data[0];
|
||||
float cy = data[1];
|
||||
float w = data[2];
|
||||
float h = data[3];
|
||||
|
||||
float x1 = cx - w * 0.5f;
|
||||
float y1 = cy - h * 0.5f;
|
||||
float x2 = cx + w * 0.5f;
|
||||
float y2 = cy + h * 0.5f;
|
||||
|
||||
if (x2 <= x1 || y2 <= y1 || w <= 0 || h <= 0) {
|
||||
data += (5 + labels.size() + MASK_CHANNELS);
|
||||
continue;
|
||||
}
|
||||
|
||||
float x_scale = scale_ratio.at<float>(0);
|
||||
float y_scale = scale_ratio.at<float>(1);
|
||||
int orig_x1 = cvRound((x1 - padding.x) * x_scale);
|
||||
int orig_y1 = cvRound((y1 - padding.y) * y_scale);
|
||||
int orig_x2 = cvRound((x2 - padding.x) * x_scale);
|
||||
int orig_y2 = cvRound((y2 - padding.y) * y_scale);
|
||||
|
||||
orig_x1 = max(0, orig_x1);
|
||||
orig_y1 = max(0, orig_y1);
|
||||
orig_x2 = min(frame.cols, orig_x2);
|
||||
orig_y2 = min(frame.rows, orig_y2);
|
||||
|
||||
if (orig_x2 <= orig_x1 || orig_y2 <= orig_y1) {
|
||||
data += (5 + labels.size() + MASK_CHANNELS);
|
||||
continue;
|
||||
}
|
||||
|
||||
cv::Rect box(orig_x1, orig_y1, orig_x2 - orig_x1, orig_y2 - orig_y1);
|
||||
boxes.push_back(box);
|
||||
confidences.push_back(total_conf);
|
||||
class_ids.push_back(class_id);
|
||||
|
||||
cv::Mat coeff(1, MASK_CHANNELS, CV_32F);
|
||||
for (int m = 0; m < MASK_CHANNELS; ++m) {
|
||||
coeff.at<float>(m) = data[5 + labels.size() + m];
|
||||
}
|
||||
mask_coeffs.push_back(coeff);
|
||||
|
||||
data += (5 + labels.size() + MASK_CHANNELS);
|
||||
}
|
||||
|
||||
std::vector<int> final_indices;
|
||||
std::unordered_map<int, std::vector<int>> class_boxes;
|
||||
for (int i = 0; i < boxes.size(); ++i) {
|
||||
class_boxes[class_ids[i]].push_back(i);
|
||||
}
|
||||
|
||||
// NMS
|
||||
for (auto& kv : class_boxes) {
|
||||
const std::vector<int>& indices_in_class = kv.second;
|
||||
std::vector<cv::Rect> boxes_cls;
|
||||
std::vector<float> scores_cls;
|
||||
for (int idx : indices_in_class) {
|
||||
boxes_cls.push_back(boxes[idx]);
|
||||
scores_cls.push_back(confidences[idx]);
|
||||
}
|
||||
|
||||
std::vector<int> nms_indices = nms(boxes_cls, scores_cls, iou_threshold);
|
||||
for (int local_idx : nms_indices) {
|
||||
final_indices.push_back(indices_in_class[local_idx]);
|
||||
}
|
||||
}
|
||||
|
||||
// create target push back to frame meta
|
||||
for (int idx : final_indices) {
|
||||
cv::Rect box = boxes[idx];
|
||||
float conf = confidences[idx];
|
||||
int cls = class_ids[idx];
|
||||
|
||||
auto label = (labels.size() < cls + 1) ? "" : labels[cls];
|
||||
cls += class_id_offset;
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(box.x, box.y, box.width, box.height, cls, conf, frame_meta->frame_index, frame_meta->channel_index, label);
|
||||
|
||||
// try to extract mask
|
||||
auto local_mask = process_mask(proto, mask_coeffs[idx], box,
|
||||
cv::Size(input_width, input_height),
|
||||
padding, scale_ratio.at<float>(0), scale_ratio.at<float>(1));
|
||||
target->mask = local_mask;
|
||||
|
||||
frame_meta->targets.push_back(target);
|
||||
}
|
||||
|
||||
std::cout << "Detected: " << final_indices.size() << " instances" << std::endl;
|
||||
}
|
||||
|
||||
cv::Mat vp_yolo5_seg_node::letterbox(const cv::Mat& src, cv::Size target_size, cv::Mat& scale_ratio, cv::Point& padding) {
|
||||
float scale = min(static_cast<float>(target_size.height) / src.rows,
|
||||
static_cast<float>(target_size.width) / src.cols);
|
||||
int new_width = cvRound(src.cols * scale);
|
||||
int new_height = cvRound(src.rows * scale);
|
||||
|
||||
cv::Mat resized;
|
||||
resize(src, resized, cv::Size(new_width, new_height), 0, 0, cv::INTER_LINEAR);
|
||||
|
||||
cv::Mat letterboxed(target_size, CV_8UC3, cv::Scalar(114, 114, 114));
|
||||
int top = (target_size.height - new_height) / 2;
|
||||
int left = (target_size.width - new_width) / 2;
|
||||
cv::Rect roi(left, top, new_width, new_height);
|
||||
resized.copyTo(letterboxed(roi));
|
||||
|
||||
scale_ratio = cv::Mat(1, 2, CV_32F);
|
||||
scale_ratio.at<float>(0) = 1.0f / scale; // x_scale
|
||||
scale_ratio.at<float>(1) = 1.0f / scale; // y_scale
|
||||
|
||||
padding = cv::Point(left, top);
|
||||
return letterboxed;
|
||||
}
|
||||
|
||||
// Sigmoid
|
||||
cv::Mat vp_yolo5_seg_node::sigmoid(const cv::Mat& x) {
|
||||
cv::Mat result;
|
||||
cv::exp(-x, result);
|
||||
result = 1.0f / (1.0f + result);
|
||||
return result;
|
||||
}
|
||||
|
||||
cv::Mat vp_yolo5_seg_node::process_mask(const cv::Mat& proto, // [1, 32, H/4, W/4]
|
||||
const cv::Mat& coeffs, // [1, 32]
|
||||
const cv::Rect& bbox,
|
||||
const cv::Size& input_size, // e.g., 640x384
|
||||
const cv::Point& padding,
|
||||
const float x_scale,
|
||||
const float y_scale) {
|
||||
|
||||
// 1. 获取 proto 尺寸 [1,32,96,160] → h=96, w=160
|
||||
int h_proto = proto.size[2];
|
||||
int w_proto = proto.size[3];
|
||||
|
||||
// 2. 重塑为 [32, h, w]
|
||||
cv::Mat proto_3d = proto.reshape(1, {MASK_CHANNELS, h_proto, w_proto});
|
||||
|
||||
// 3. 展平为 [32, h*w]
|
||||
cv::Mat proto_flat = proto_3d.reshape(1, MASK_CHANNELS);
|
||||
|
||||
// 4. 矩阵乘: [1,32] × [32, h*w] → [1, h*w]
|
||||
cv::Mat mask_raw = coeffs * proto_flat;
|
||||
|
||||
// 5. 重塑为 [h, w]
|
||||
cv::Mat mask_2d = mask_raw.reshape(0, h_proto);
|
||||
|
||||
// 6. Sigmoid
|
||||
cv::Mat mask_sigmoid = sigmoid(mask_2d);
|
||||
|
||||
// 7. 原图 bbox → 输入图坐标(含 padding)
|
||||
float x1_in = bbox.x / x_scale + padding.x;
|
||||
float y1_in = bbox.y / y_scale + padding.y;
|
||||
float x2_in = (bbox.x + bbox.width) / x_scale + padding.x;
|
||||
float y2_in = (bbox.y + bbox.height) / y_scale + padding.y;
|
||||
|
||||
// 8. 输入图 → proto 坐标(/4)
|
||||
float x1_p = x1_in / 4.0f;
|
||||
float y1_p = y1_in / 4.0f;
|
||||
float x2_p = x2_in / 4.0f;
|
||||
float y2_p = y2_in / 4.0f;
|
||||
|
||||
int px1 = max(0, (int)floor(x1_p));
|
||||
int py1 = max(0, (int)floor(y1_p));
|
||||
int px2 = min(w_proto, (int)ceil(x2_p));
|
||||
int py2 = min(h_proto, (int)ceil(y2_p));
|
||||
|
||||
if (px2 <= px1 || py2 <= py1) {
|
||||
return cv::Mat::zeros(bbox.size(), CV_8UC1);
|
||||
}
|
||||
|
||||
// 9. 裁剪
|
||||
cv::Mat cropped = mask_sigmoid(cv::Rect(px1, py1, px2 - px1, py2 - py1)).clone();
|
||||
|
||||
// 10. 上采样到 bbox 尺寸
|
||||
cv::Mat resized;
|
||||
resize(cropped, resized, bbox.size(), 0, 0, cv::INTER_LINEAR);
|
||||
|
||||
// 11. 二值化
|
||||
cv::Mat mask_bin;
|
||||
threshold(resized, mask_bin, 0.5, 255, cv::THRESH_BINARY);
|
||||
mask_bin.convertTo(mask_bin, CV_8UC1);
|
||||
|
||||
return mask_bin;
|
||||
}
|
||||
|
||||
// NMS
|
||||
std::vector<int> vp_yolo5_seg_node::nms(const std::vector<cv::Rect>& boxes, const std::vector<float>& scores, float iou_threshold) {
|
||||
std::vector<int> indices;
|
||||
std::vector<float> areas(boxes.size());
|
||||
for (size_t i = 0; i < boxes.size(); ++i) {
|
||||
areas[i] = boxes[i].area();
|
||||
}
|
||||
|
||||
std::vector<int> order(scores.size());
|
||||
iota(order.begin(), order.end(), 0);
|
||||
sort(order.begin(), order.end(), [&](int a, int b) {
|
||||
return scores[a] > scores[b];
|
||||
});
|
||||
|
||||
std::vector<bool> keep(scores.size(), true);
|
||||
for (int i = 0; i < order.size(); ++i) {
|
||||
int idx = order[i];
|
||||
if (!keep[idx]) continue;
|
||||
for (int j = i + 1; j < order.size(); ++j) {
|
||||
int idx2 = order[j];
|
||||
if (!keep[idx2]) continue;
|
||||
cv::Rect intersect = boxes[idx] & boxes[idx2];
|
||||
float iou = intersect.area() / (areas[idx] + areas[idx2] - intersect.area() + 1e-6f);
|
||||
if (iou > iou_threshold) {
|
||||
keep[idx2] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < keep.size(); ++i) {
|
||||
if (keep[i]) indices.push_back(i);
|
||||
}
|
||||
return indices;
|
||||
}
|
||||
}
|
||||
48
nodes/infers/vp_yolo5_seg_node.h
Normal file
48
nodes/infers/vp_yolo5_seg_node.h
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../objects/vp_frame_target.h"
|
||||
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
// driving area segmentation based on yolov5s-seg-v7.0
|
||||
// https://github.com/ultralytics/yolov5/releases/tag/v7.0
|
||||
class vp_yolo5_seg_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
/* data */
|
||||
float conf_threshold = 0.5;
|
||||
float iou_threshold = 0.7;
|
||||
int MASK_CHANNELS = 32;
|
||||
cv::Mat scale_ratio;
|
||||
cv::Point padding;
|
||||
cv::Mat letterbox(const cv::Mat& src, cv::Size target_size, cv::Mat& scale_ratio, cv::Point& padding);
|
||||
cv::Mat sigmoid(const cv::Mat& x);
|
||||
cv::Mat process_mask(const cv::Mat& proto, // [1, 32, H/4, W/4]
|
||||
const cv::Mat& coeffs, // [1, 32]
|
||||
const cv::Rect& bbox,
|
||||
const cv::Size& input_size, // e.g., 640x384
|
||||
const cv::Point& padding,
|
||||
const float x_scale,
|
||||
const float y_scale);
|
||||
std::vector<int> nms(const std::vector<cv::Rect>& boxes, const std::vector<float>& scores, float iou_threshold);
|
||||
protected:
|
||||
virtual void infer(const cv::Mat& blob_to_infer, std::vector<cv::Mat>& raw_outputs) override;
|
||||
virtual void preprocess(const std::vector<cv::Mat>& mats_to_infer, cv::Mat& blob_to_infer) override;
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
virtual void prepare(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch, std::vector<cv::Mat>& mats_to_infer) override;
|
||||
public:
|
||||
vp_yolo5_seg_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string labels_path = "",
|
||||
int input_width = 640,
|
||||
int input_height = 384,
|
||||
int batch_size = 1,
|
||||
int class_id_offset = 0,
|
||||
float conf_threshold = 0.5,
|
||||
float iou_threshold = 0.7);
|
||||
~vp_yolo5_seg_node();
|
||||
};
|
||||
}
|
||||
118
nodes/infers/vp_yolo_detector_node.cpp
Executable file
118
nodes/infers/vp_yolo_detector_node.cpp
Executable file
@@ -0,0 +1,118 @@
|
||||
|
||||
#include "vp_yolo_detector_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_yolo_detector_node::vp_yolo_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path,
|
||||
std::string labels_path,
|
||||
int input_width,
|
||||
int input_height,
|
||||
int batch_size,
|
||||
int class_id_offset,
|
||||
float score_threshold,
|
||||
float confidence_threshold,
|
||||
float nms_threshold,
|
||||
float scale,
|
||||
cv::Scalar mean,
|
||||
cv::Scalar std,
|
||||
bool swap_rb):
|
||||
vp_primary_infer_node(node_name, model_path, model_config_path,labels_path,input_width, input_height, batch_size, class_id_offset, scale, mean, std, swap_rb),
|
||||
score_threshold(score_threshold),
|
||||
confidence_threshold(confidence_threshold),
|
||||
nms_threshold(nms_threshold) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_yolo_detector_node::~vp_yolo_detector_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_yolo_detector_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
// make sure heads of output are not zero
|
||||
assert(raw_outputs.size() > 0);
|
||||
|
||||
// check dims of each output
|
||||
auto dim_offset = raw_outputs[0].dims == 2 ? 0 : 1;
|
||||
auto batch = dim_offset == 0 ? 1 : raw_outputs[0].size[0];
|
||||
|
||||
std::vector<int> class_ids;
|
||||
std::vector<float> confidences;
|
||||
std::vector<cv::Rect> boxes;
|
||||
|
||||
for (int k = 0; k < batch; k++) { // scan batch
|
||||
auto& frame_meta = frame_meta_with_batch[k];
|
||||
for (int i = 0; i < raw_outputs.size(); ++i) { // scan heads of output
|
||||
cv::Mat output = cv::Mat(raw_outputs[i].size[0 + dim_offset], raw_outputs[i].size[1 + dim_offset], CV_32F, const_cast<uchar*>(raw_outputs[i].ptr(k)));
|
||||
|
||||
auto data = (float*)output.data;
|
||||
for (int j = 0; j < output.rows; ++j, data += output.cols) {
|
||||
|
||||
float confidence = data[4];
|
||||
// check confidence threshold
|
||||
if (confidence < confidence_threshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cv::Mat scores = output.row(j).colRange(5, output.cols);
|
||||
cv::Point class_id;
|
||||
double max_score;
|
||||
// Get the value and location of the maximum score
|
||||
cv::minMaxLoc(scores, 0, &max_score, 0, &class_id);
|
||||
|
||||
// check score threshold
|
||||
if (max_score >= score_threshold) {
|
||||
// smaller than 1.0 means relative value
|
||||
// greater than 1.0 means absolute value which need to be converted to relative value
|
||||
int centerX = (int)((data[0] <= 1 ? data[0] : data[0] / input_width) * frame_meta->frame.cols);
|
||||
int centerY = (int)((data[1] <= 1 ? data[1] : data[1] / input_height) * frame_meta->frame.rows);
|
||||
int width = (int)((data[2] <= 1 ? data[2] : data[2] / input_width) * frame_meta->frame.cols);
|
||||
int height = (int)((data[3] <= 1 ? data[3] : data[3] / input_height) * frame_meta->frame.rows);
|
||||
int left = centerX - width / 2;
|
||||
int top = centerY - height / 2;
|
||||
|
||||
class_ids.push_back(class_id.x);
|
||||
confidences.push_back((float)confidence);
|
||||
boxes.push_back(cv::Rect(left, top, width, height));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nms
|
||||
std::vector<int> indices;
|
||||
cv::dnn::NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold, indices);
|
||||
|
||||
// create target
|
||||
for (int i = 0; i < indices.size(); ++i) {
|
||||
int idx = indices[i];
|
||||
auto box = boxes[idx];
|
||||
auto confidence = confidences[idx];
|
||||
auto class_id = class_ids[idx];
|
||||
auto label = (labels.size() < class_id + 1) ? "" : labels[class_id];
|
||||
|
||||
// check value range
|
||||
box.x = std::max(box.x, 0);
|
||||
box.y = std::max(box.y, 0);
|
||||
box.width = std::min(box.width, frame_meta->frame.cols - box.x);
|
||||
box.height = std::min(box.height, frame_meta->frame.rows - box.y);
|
||||
if (box.width <= 0 || box.height <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// apply offset to class id since multi detectors can exist at front of this one.
|
||||
// later we MUST use the new class id (applied offset) instead of orignal one.
|
||||
class_id += class_id_offset;
|
||||
|
||||
auto target = std::make_shared<vp_objects::vp_frame_target>(box.x, box.y, box.width, box.height, class_id, confidence, frame_meta->frame_index, frame_meta->channel_index, label);
|
||||
|
||||
// insert target back to frame meta
|
||||
frame_meta->targets.push_back(target);
|
||||
}
|
||||
|
||||
class_ids.clear();
|
||||
confidences.clear();
|
||||
boxes.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
35
nodes/infers/vp_yolo_detector_node.h
Executable file
35
nodes/infers/vp_yolo_detector_node.h
Executable file
@@ -0,0 +1,35 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// yolo detector, support yolov3/4/5
|
||||
// https://github.com/pjreddie/darknet
|
||||
class vp_yolo_detector_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
float score_threshold;
|
||||
float confidence_threshold;
|
||||
float nms_threshold;
|
||||
protected:
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_yolo_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
std::string model_config_path = "",
|
||||
std::string labels_path = "",
|
||||
int input_width = 416,
|
||||
int input_height = 416,
|
||||
int batch_size = 1,
|
||||
int class_id_offset = 0,
|
||||
float score_threshold = 0.5,
|
||||
float confidence_threshold = 0.5,
|
||||
float nms_threshold = 0.5,
|
||||
float scale = 1 / 255.0,
|
||||
cv::Scalar mean = cv::Scalar(0),
|
||||
cv::Scalar std = cv::Scalar(1),
|
||||
bool swap_rb = true);
|
||||
~vp_yolo_detector_node();
|
||||
};
|
||||
}
|
||||
229
nodes/infers/vp_yunet_face_detector_node.cpp
Normal file
229
nodes/infers/vp_yunet_face_detector_node.cpp
Normal file
@@ -0,0 +1,229 @@
|
||||
|
||||
#include "vp_yunet_face_detector_node.h"
|
||||
|
||||
|
||||
namespace vp_nodes {
|
||||
|
||||
vp_yunet_face_detector_node::vp_yunet_face_detector_node(std::string node_name,
|
||||
std::string model_path,
|
||||
float score_threshold,
|
||||
float nms_threshold,
|
||||
int top_k):
|
||||
vp_primary_infer_node(node_name, model_path),
|
||||
scoreThreshold(score_threshold),
|
||||
nmsThreshold(nms_threshold),
|
||||
topK(top_k) {
|
||||
this->initialized();
|
||||
}
|
||||
|
||||
vp_yunet_face_detector_node::~vp_yunet_face_detector_node() {
|
||||
deinitialized();
|
||||
}
|
||||
|
||||
void vp_yunet_face_detector_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
|
||||
using namespace cv;
|
||||
// 3 heads of output
|
||||
assert(raw_outputs.size() == 3);
|
||||
assert(frame_meta_with_batch.size() == 1);
|
||||
auto& frame_meta = frame_meta_with_batch[0];
|
||||
|
||||
// Extract from output_blobs
|
||||
Mat loc = raw_outputs[0];
|
||||
Mat conf = raw_outputs[1];
|
||||
Mat iou = raw_outputs[2];
|
||||
|
||||
// we need generate priors if input size changed or priors is not initialized
|
||||
if (loc.rows != priors.size()) {
|
||||
inputW = frame_meta->frame.cols;
|
||||
inputH = frame_meta->frame.rows;
|
||||
generatePriors();
|
||||
}
|
||||
|
||||
assert(loc.rows == priors.size());
|
||||
assert(loc.rows == conf.rows);
|
||||
assert(loc.rows == iou.rows);
|
||||
|
||||
// Decode from deltas and priors
|
||||
const std::vector<float> variance = {0.1f, 0.2f};
|
||||
float* loc_v = (float*)(loc.data);
|
||||
float* conf_v = (float*)(conf.data);
|
||||
float* iou_v = (float*)(iou.data);
|
||||
Mat faces;
|
||||
// (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score)
|
||||
// 'tl': top left point of the bounding box
|
||||
// 're': right eye, 'le': left eye
|
||||
// 'nt': nose tip
|
||||
// 'rcm': right corner of mouth, 'lcm': left corner of mouth
|
||||
Mat face(1, 15, CV_32FC1);
|
||||
for (size_t i = 0; i < priors.size(); ++i) {
|
||||
// Get score
|
||||
float clsScore = conf_v[i*2+1];
|
||||
float iouScore = iou_v[i];
|
||||
// Clamp
|
||||
if (iouScore < 0.f) {
|
||||
iouScore = 0.f;
|
||||
}
|
||||
else if (iouScore > 1.f) {
|
||||
iouScore = 1.f;
|
||||
}
|
||||
float score = std::sqrt(clsScore * iouScore);
|
||||
face.at<float>(0, 14) = score;
|
||||
|
||||
// Get bounding box
|
||||
float cx = (priors[i].x + loc_v[i*14+0] * variance[0] * priors[i].width) * inputW;
|
||||
float cy = (priors[i].y + loc_v[i*14+1] * variance[0] * priors[i].height) * inputH;
|
||||
float w = priors[i].width * exp(loc_v[i*14+2] * variance[0]) * inputW;
|
||||
float h = priors[i].height * exp(loc_v[i*14+3] * variance[1]) * inputH;
|
||||
float x1 = cx - w / 2;
|
||||
float y1 = cy - h / 2;
|
||||
face.at<float>(0, 0) = x1;
|
||||
face.at<float>(0, 1) = y1;
|
||||
face.at<float>(0, 2) = w;
|
||||
face.at<float>(0, 3) = h;
|
||||
|
||||
// Get landmarks
|
||||
face.at<float>(0, 4) = (priors[i].x + loc_v[i*14+ 4] * variance[0] * priors[i].width) * inputW; // right eye, x
|
||||
face.at<float>(0, 5) = (priors[i].y + loc_v[i*14+ 5] * variance[0] * priors[i].height) * inputH; // right eye, y
|
||||
face.at<float>(0, 6) = (priors[i].x + loc_v[i*14+ 6] * variance[0] * priors[i].width) * inputW; // left eye, x
|
||||
face.at<float>(0, 7) = (priors[i].y + loc_v[i*14+ 7] * variance[0] * priors[i].height) * inputH; // left eye, y
|
||||
face.at<float>(0, 8) = (priors[i].x + loc_v[i*14+ 8] * variance[0] * priors[i].width) * inputW; // nose tip, x
|
||||
face.at<float>(0, 9) = (priors[i].y + loc_v[i*14+ 9] * variance[0] * priors[i].height) * inputH; // nose tip, y
|
||||
face.at<float>(0, 10) = (priors[i].x + loc_v[i*14+10] * variance[0] * priors[i].width) * inputW; // right corner of mouth, x
|
||||
face.at<float>(0, 11) = (priors[i].y + loc_v[i*14+11] * variance[0] * priors[i].height) * inputH; // right corner of mouth, y
|
||||
face.at<float>(0, 12) = (priors[i].x + loc_v[i*14+12] * variance[0] * priors[i].width) * inputW; // left corner of mouth, x
|
||||
face.at<float>(0, 13) = (priors[i].y + loc_v[i*14+13] * variance[0] * priors[i].height) * inputH; // left corner of mouth, y
|
||||
|
||||
faces.push_back(face);
|
||||
}
|
||||
|
||||
if (faces.rows > 1)
|
||||
{
|
||||
// Retrieve boxes and scores
|
||||
std::vector<Rect2i> faceBoxes;
|
||||
std::vector<float> faceScores;
|
||||
for (int rIdx = 0; rIdx < faces.rows; rIdx++)
|
||||
{
|
||||
faceBoxes.push_back(Rect2i(int(faces.at<float>(rIdx, 0)),
|
||||
int(faces.at<float>(rIdx, 1)),
|
||||
int(faces.at<float>(rIdx, 2)),
|
||||
int(faces.at<float>(rIdx, 3))));
|
||||
faceScores.push_back(faces.at<float>(rIdx, 14));
|
||||
}
|
||||
|
||||
std::vector<int> keepIdx;
|
||||
dnn::NMSBoxes(faceBoxes, faceScores, scoreThreshold, nmsThreshold, keepIdx, 1.f, topK);
|
||||
|
||||
// Get NMS results
|
||||
Mat nms_faces;
|
||||
for (int idx: keepIdx)
|
||||
{
|
||||
nms_faces.push_back(faces.row(idx));
|
||||
}
|
||||
|
||||
// insert face target back to frame meta
|
||||
for (int i = 0; i < nms_faces.rows; i++) {
|
||||
auto x = int(nms_faces.at<float>(i, 0));
|
||||
auto y = int(nms_faces.at<float>(i, 1));
|
||||
auto w = int(nms_faces.at<float>(i, 2));
|
||||
auto h = int(nms_faces.at<float>(i, 3));
|
||||
|
||||
// check value range
|
||||
x = std::max(x, 0);
|
||||
y = std::max(y, 0);
|
||||
w = std::min(w, frame_meta->frame.cols - x);
|
||||
h = std::min(h, frame_meta->frame.rows - y);
|
||||
|
||||
auto kp1 = std::pair<int, int>(int(nms_faces.at<float>(i, 4)), int(nms_faces.at<float>(i, 5)));
|
||||
auto kp2 = std::pair<int, int>(int(nms_faces.at<float>(i, 6)), int(nms_faces.at<float>(i, 7)));
|
||||
auto kp3 = std::pair<int, int>(int(nms_faces.at<float>(i, 8)), int(nms_faces.at<float>(i, 9)));
|
||||
auto kp4 = std::pair<int, int>(int(nms_faces.at<float>(i, 10)), int(nms_faces.at<float>(i, 11)));
|
||||
auto kp5 = std::pair<int, int>(int(nms_faces.at<float>(i, 12)), int(nms_faces.at<float>(i, 13)));
|
||||
auto score = nms_faces.at<float>(i, 14);
|
||||
|
||||
auto face_target = std::make_shared<vp_objects::vp_frame_face_target>(x, y, w, h, score, std::vector<std::pair<int, int>>{kp1, kp2, kp3, kp4, kp5});
|
||||
|
||||
frame_meta->face_targets.push_back(face_target);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// refer to vp_infer_node::preprocess
|
||||
void vp_yunet_face_detector_node::preprocess(const std::vector<cv::Mat>& mats_to_infer, cv::Mat& blob_to_infer) {
|
||||
cv::dnn::blobFromImages(mats_to_infer, blob_to_infer);
|
||||
}
|
||||
|
||||
// refer to vp_infer_node::infer
|
||||
void vp_yunet_face_detector_node::infer(const cv::Mat& blob_to_infer, std::vector<cv::Mat>& raw_outputs) {
|
||||
// blob_to_infer is a 4D matrix
|
||||
// the first dim is number of batch, MUST be 1
|
||||
assert(blob_to_infer.dims == 4);
|
||||
assert(blob_to_infer.size[0] == 1);
|
||||
assert(!net.empty());
|
||||
|
||||
net.setInput(blob_to_infer);
|
||||
net.forward(raw_outputs, out_names);
|
||||
}
|
||||
|
||||
void vp_yunet_face_detector_node::generatePriors() {
|
||||
using namespace cv;
|
||||
// Calculate shapes of different scales according to the shape of input image
|
||||
Size feature_map_2nd = {
|
||||
int(int((inputW+1)/2)/2), int(int((inputH+1)/2)/2)
|
||||
};
|
||||
Size feature_map_3rd = {
|
||||
int(feature_map_2nd.width/2), int(feature_map_2nd.height/2)
|
||||
};
|
||||
Size feature_map_4th = {
|
||||
int(feature_map_3rd.width/2), int(feature_map_3rd.height/2)
|
||||
};
|
||||
Size feature_map_5th = {
|
||||
int(feature_map_4th.width/2), int(feature_map_4th.height/2)
|
||||
};
|
||||
Size feature_map_6th = {
|
||||
int(feature_map_5th.width/2), int(feature_map_5th.height/2)
|
||||
};
|
||||
|
||||
std::vector<Size> feature_map_sizes;
|
||||
feature_map_sizes.push_back(feature_map_3rd);
|
||||
feature_map_sizes.push_back(feature_map_4th);
|
||||
feature_map_sizes.push_back(feature_map_5th);
|
||||
feature_map_sizes.push_back(feature_map_6th);
|
||||
|
||||
// Fixed params for generating priors
|
||||
const std::vector<std::vector<float>> min_sizes = {
|
||||
{10.0f, 16.0f, 24.0f},
|
||||
{32.0f, 48.0f},
|
||||
{64.0f, 96.0f},
|
||||
{128.0f, 192.0f, 256.0f}
|
||||
};
|
||||
CV_Assert(min_sizes.size() == feature_map_sizes.size()); // just to keep vectors in sync
|
||||
const std::vector<int> steps = { 8, 16, 32, 64 };
|
||||
|
||||
// Generate priors
|
||||
priors.clear();
|
||||
for (size_t i = 0; i < feature_map_sizes.size(); ++i)
|
||||
{
|
||||
Size feature_map_size = feature_map_sizes[i];
|
||||
std::vector<float> min_size = min_sizes[i];
|
||||
|
||||
for (int _h = 0; _h < feature_map_size.height; ++_h)
|
||||
{
|
||||
for (int _w = 0; _w < feature_map_size.width; ++_w)
|
||||
{
|
||||
for (size_t j = 0; j < min_size.size(); ++j)
|
||||
{
|
||||
float s_kx = min_size[j] / inputW;
|
||||
float s_ky = min_size[j] / inputH;
|
||||
|
||||
float cx = (_w + 0.5f) * steps[i] / inputW;
|
||||
float cy = (_h + 0.5f) * steps[i] / inputH;
|
||||
|
||||
Rect2f prior = { cx, cy, s_kx, s_ky };
|
||||
priors.push_back(prior);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
34
nodes/infers/vp_yunet_face_detector_node.h
Normal file
34
nodes/infers/vp_yunet_face_detector_node.h
Normal file
@@ -0,0 +1,34 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../vp_primary_infer_node.h"
|
||||
#include "../../objects/vp_frame_face_target.h"
|
||||
|
||||
namespace vp_nodes {
|
||||
// face detector based on YunNet
|
||||
// https://github.com/opencv/opencv/blob/4.x/modules/objdetect/src/face_detect.cpp
|
||||
// https://github.com/ShiqiYu/libfacedetection
|
||||
class vp_yunet_face_detector_node: public vp_primary_infer_node
|
||||
{
|
||||
private:
|
||||
// names of output layers in yunet
|
||||
const std::vector<std::string> out_names = {"loc", "conf", "iou"};
|
||||
float scoreThreshold = 0.7;
|
||||
float nmsThreshold = 0.5;
|
||||
int topK = 50;
|
||||
int inputW;
|
||||
int inputH;
|
||||
std::vector<cv::Rect2f> priors;
|
||||
void generatePriors();
|
||||
protected:
|
||||
// override infer and preprocess as yunet has a different logic
|
||||
virtual void infer(const cv::Mat& blob_to_infer, std::vector<cv::Mat>& raw_outputs) override;
|
||||
virtual void preprocess(const std::vector<cv::Mat>& mats_to_infer, cv::Mat& blob_to_infer) override;
|
||||
|
||||
virtual void postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) override;
|
||||
public:
|
||||
vp_yunet_face_detector_node(std::string node_name, std::string model_path, float score_threshold = 0.7, float nms_threshold = 0.5, int top_k = 50);
|
||||
~vp_yunet_face_detector_node();
|
||||
};
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user