#pragma once #include #include #include "vp_node.h" namespace vp_nodes { // infer type // infer on the whole frame or small cropped image? enum vp_infer_type { PRIMARY, // infer on the whole frame, like detector, pose estimatation SECONDARY // infer on small cropped image, like classifier, feature extractor and secondary detector which need detect on small cropped images. }; // base class for infer node, can't be instanstiated directly. // note: // the class is based on opencv::dnn module which is the default way for all deep learning inference in code, // we can implement it using other backends such as tensorrt with cuda acceleration, see vp_ppocr_text_detector_node which is based on PaddlePaddle dl framework from BaiDu corporation. class vp_infer_node: public vp_node { private: // load labels if need void load_labels(); protected: vp_infer_type infer_type; std::string model_path; std::string model_config_path; std::string labels_path; int input_width; int input_height; int batch_size; cv::Scalar mean; cv::Scalar std; float scale; bool swap_rb; // transpose channel or not, NCHW -> NHWC bool swap_chn; // protected as it can't be instanstiated directly. vp_infer_node(std::string node_name, vp_infer_type infer_type, std::string model_path, std::string model_config_path = "", std::string labels_path = "", int input_width = 128, int input_height = 128, int batch_size = 1, float scale = 1.0, cv::Scalar mean = cv::Scalar(123.675, 116.28, 103.53), // imagenet dataset cv::Scalar std = cv::Scalar(1), bool swap_rb = true, bool swap_chn = false); // the 1st step, MUST implement in specific derived class. // prepare data for infer, fetch frames from frame meta. virtual void prepare(const std::vector>& frame_meta_with_batch, std::vector& mats_to_infer) = 0; // the 2nd step, has a default implementation. // preprocess data, such as normalization, mean substract. virtual void preprocess(const std::vector& mats_to_infer, cv::Mat& blob_to_infer); // the 3rd step, has a default implementation. // infer and retrive raw outputs. virtual void infer(const cv::Mat& blob_to_infer, std::vector& raw_outputs); // the 4th step, MUST implement in specific derived class. // postprocess on raw outputs and create/update something back to frame meta again. virtual void postprocess(const std::vector& raw_outputs, const std::vector>& frame_meta_with_batch) = 0; // debug purpose(ms) virtual void infer_combinations_time_cost(int data_size, int prepare_time, int preprocess_time, int infer_time, int postprocess_time); // infer operations(call prepare/preprocess/infer/postprocess by default) // we can define new logic for infer operations by overriding it. virtual void run_infer_combinations(const std::vector>& frame_meta_with_batch); // labels as text format std::vector labels; // opencv::dnn as backend cv::dnn::Net net; // re-implementation for one by one mode, marked as 'final' as we need not override any more in specific derived classes. virtual std::shared_ptr handle_frame_meta(std::shared_ptr meta) override final; // re-implementation for batch by batch mode, marked as 'final' as we need not override any more in specific derived classes. virtual void handle_frame_meta(const std::vector>& meta_with_batch) override final; public: ~vp_infer_node(); }; }