#include #include "vp_infer_node.h" namespace vp_nodes { vp_infer_node::vp_infer_node(std::string node_name, vp_infer_type infer_type, std::string model_path, std::string model_config_path, std::string labels_path, int input_width, int input_height, int batch_size, float scale, cv::Scalar mean, cv::Scalar std, bool swap_rb, bool swap_chn): vp_node(node_name), infer_type(infer_type), model_path(model_path), model_config_path(model_config_path), labels_path(labels_path), input_width(input_width), input_height(input_height), batch_size(batch_size), scale(scale), mean(mean), std(std), swap_rb(swap_rb), swap_chn(swap_chn) { // try to load network from file, // failing means maybe it has a custom implementation for model loading in derived class such as using other backends other than opencv::dnn. try { net = cv::dnn::readNet(model_path, model_config_path); #ifdef VP_WITH_CUDA net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); #endif } catch(const std::exception& e) { VP_WARN(vp_utils::string_format("[%s] cv::dnn::readNet load network failed!", node_name.c_str())); } // load labels if labels_path is specified if (labels_path != "") { load_labels(); } assert(batch_size > 0); // primary infer nodes can handle frame meta batch by batch(whole frame), // others can handle multi batchs ONLY inside a single frame(small croped image). if (infer_type == vp_infer_type::PRIMARY && batch_size > 1) { frame_meta_handle_batch = batch_size; } } vp_infer_node::~vp_infer_node() { } // handle frame meta one by one std::shared_ptr vp_infer_node::handle_frame_meta(std::shared_ptr meta) { std::vector> frame_meta_with_batch {meta}; run_infer_combinations(frame_meta_with_batch); return meta; } // handle frame meta batch by batch void vp_infer_node::handle_frame_meta(const std::vector>& meta_with_batch) { const auto& frame_meta_with_batch = meta_with_batch; run_infer_combinations(frame_meta_with_batch); // no return } // default implementation // infer batch by batch void vp_infer_node::infer(const cv::Mat& blob_to_infer, std::vector& raw_outputs) { // blob_to_infer is a 4D matrix // the first dim is number of batch assert(blob_to_infer.dims == 4); assert(!net.empty()); auto number_of_batch = blob_to_infer.size[0]; if (number_of_batch <= batch_size) { // infer one time directly net.setInput(blob_to_infer); net.forward(raw_outputs, net.getUnconnectedOutLayersNames()); } else { // infer more times int b_size[] = {batch_size, blob_to_infer.size[1], blob_to_infer.size[2], blob_to_infer.size[3]}; auto times = (number_of_batch % batch_size) == 0 ? (number_of_batch / batch_size) : (number_of_batch / batch_size + 1); for (int i = 0; i < times; i++) { // split to small piece int i_hwc[] = {i * batch_size, 0, 0, 0}; // 4D auto ptr = blob_to_infer.ptr(i_hwc); cv::Mat b_blob(4, b_size, CV_32F, (void*)ptr); std::vector b_outputs; net.setInput(b_blob); net.forward(b_outputs, net.getUnconnectedOutLayersNames()); // first time, initialize it if (raw_outputs.size() == 0) { // scan multi heads of output for (int j = 0; j < b_outputs.size(); j++) { if (batch_size == 1) { // keep dims as usual, but change size[0] == number_of_batch if (b_outputs[j].dims <= 2 && b_outputs[j].rows == 1) { raw_outputs.push_back(cv::Mat(2, std::vector{number_of_batch, b_outputs[j].cols}.data(), CV_32F)); } else { // dims add 1, and set size[0] == number_of_batch std::vector t_size; t_size.push_back(number_of_batch); for (int s = 0; s < b_outputs[j].dims; s++) { t_size.push_back(b_outputs[j].size[s]); } raw_outputs.push_back(cv::Mat(b_outputs[j].dims + 1, t_size.data(), CV_32F)); } } else { // kepp dims as usual, but change size[0] == number_of_batch std::vector t_size; t_size.push_back(number_of_batch); // start from 1 for (int s = 1; s < b_outputs[j].dims; s++) { /* code */ t_size.push_back(b_outputs[j].size[s]); } raw_outputs.push_back(cv::Mat(b_outputs[j].dims, t_size.data(), CV_32F)); } } } assert(raw_outputs.size() == b_outputs.size()); // merge data directly for (int j = 0; j < b_outputs.size(); j++) { auto& des = raw_outputs[j]; auto& src = b_outputs[j]; std::vector t_size; auto s_dims_n = src.dims <= 2 ? 2 : src.dims; for (int s = 0; s < s_dims_n; s++) { t_size.push_back(src.size[s]); } auto ptr = des.ptr(i * batch_size); cv::Mat tmp(s_dims_n, t_size.data(), CV_32F, (void*)ptr); src.copyTo(tmp); } } } } // default implementation // create a 4D matrix(n, c, h, w) void vp_infer_node::preprocess(const std::vector& mats_to_infer, cv::Mat& blob_to_infer) { cv::dnn::blobFromImages(mats_to_infer, blob_to_infer, scale, cv::Size(input_width, input_height), mean, swap_rb); if (std != cv::Scalar(1)) { // divide by std } // NCHW -> NHWC if (swap_chn) { cv::Mat blob_to_infer_tmp; cv::transposeND(blob_to_infer, {0, 2, 3, 1}, blob_to_infer_tmp); blob_to_infer_tmp.copyTo(blob_to_infer); } } void vp_infer_node::run_infer_combinations(const std::vector>& frame_meta_with_batch) { /* * call logic by default: * frame_meta_with_batch -> mats_to_infer -> blob_to_infer -> raw_outputs -> frame_meta_with_batch */ std::vector mats_to_infer; // 4D matrix cv::Mat blob_to_infer; // multi heads of output in network, raw matrix output which need to be parsed by users. std::vector raw_outputs; // start auto start_time = std::chrono::system_clock::now(); // 1st step, prepare prepare(frame_meta_with_batch, mats_to_infer); auto prepare_time = std::chrono::duration_cast(std::chrono::system_clock::now() - start_time); // nothing to infer if (mats_to_infer.size() == 0) { return; } start_time = std::chrono::system_clock::now(); // 2nd step, preprocess preprocess(mats_to_infer, blob_to_infer); auto preprocess_time = std::chrono::duration_cast(std::chrono::system_clock::now() - start_time); start_time = std::chrono::system_clock::now(); // 3rd step, infer infer(blob_to_infer, raw_outputs); auto infer_time = std::chrono::duration_cast(std::chrono::system_clock::now() - start_time); start_time = std::chrono::system_clock::now(); // 4th step, postprocess postprocess(raw_outputs, frame_meta_with_batch); auto postprocess_time = std::chrono::duration_cast(std::chrono::system_clock::now() - start_time); // end infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), preprocess_time.count(), infer_time.count(), postprocess_time.count()); } // print all by default void vp_infer_node::infer_combinations_time_cost(int data_size, int prepare_time, int preprocess_time, int infer_time, int postprocess_time) { /* std::cout << "########## infer combinations summary ##########" << std::endl; std::cout << " node_name:" << node_name << std::endl; std::cout << " data_size:" << data_size << std::endl; std::cout << " prepare_time:" << prepare_time << "ms" << std::endl; std::cout << " preprocess_time:" << preprocess_time << "ms" << std::endl; std::cout << " infer_time:" << infer_time << "ms" << std::endl; std::cout << " postprocess_time:" << postprocess_time << "ms" << std::endl; std::cout << "########## infer combinations summary ##########" << std::endl; */ std::ostringstream s_stream; s_stream << "\n########## infer combinations summary ##########\n"; s_stream << " node_name:" << node_name << "\n"; s_stream << " data_size:" << data_size << "\n"; s_stream << " prepare_time:" << prepare_time << "ms\n"; s_stream << " preprocess_time:" << preprocess_time << "ms\n"; s_stream << " infer_time:" << infer_time << "ms\n"; s_stream << " postprocess_time:" << postprocess_time << "ms\n"; s_stream << "########## infer combinations summary ##########\n"; // to log VP_DEBUG(s_stream.str()); } void vp_infer_node::load_labels() { try { std::ifstream label_stream(labels_path); for (std::string line; std::getline(label_stream, line); ) { if (!line.empty() && line[line.length() - 1] == '\r') { line.erase(line.length() - 1); } labels.push_back(line); } } catch(const std::exception& e) { } } }