Files
VideoPipe/nodes/infers/vp_ppocr_text_detector_node.cpp
2026-06-03 12:43:14 +08:00

66 lines
3.3 KiB
C++

#ifdef VP_WITH_PADDLE
#include "vp_ppocr_text_detector_node.h"
#include "../../objects/vp_frame_text_target.h"
namespace vp_nodes {
vp_ppocr_text_detector_node::vp_ppocr_text_detector_node(std::string node_name,
std::string det_model_dir,
std::string cls_model_dir,
std::string rec_model_dir,
std::string rec_char_dict_path):
vp_primary_infer_node(node_name, "") {
// to make the code simpler, paddle_ocr has no more config other than model path
// we need modify source code at ../../third_party/paddle_ocr/ if we need tune the parameters
ocr = std::make_shared<PaddleOCR::PPOCR>(det_model_dir, cls_model_dir, rec_model_dir, rec_char_dict_path);
this->initialized();
}
vp_ppocr_text_detector_node::~vp_ppocr_text_detector_node() {
deinitialized();
}
void vp_ppocr_text_detector_node::postprocess(const std::vector<cv::Mat>& raw_outputs, const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
}
// please refer to vp_infer_node::run_infer_combinations
void vp_ppocr_text_detector_node::run_infer_combinations(const std::vector<std::shared_ptr<vp_objects::vp_frame_meta>>& frame_meta_with_batch) {
assert(frame_meta_with_batch.size() == 1);
std::vector<cv::Mat> mats_to_infer;
// start
auto start_time = std::chrono::system_clock::now();
// prepare data, as same as base class
vp_primary_infer_node::prepare(frame_meta_with_batch, mats_to_infer);
auto prepare_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
start_time = std::chrono::system_clock::now();
// call paddle ocr
auto ocr_results = ocr->ocr(mats_to_infer);
assert(ocr_results.size() == 1);
auto& ocr_result = ocr_results[0];
auto& frame_meta = frame_meta_with_batch[0];
// scan text detected in frame
for (int i = 0; i < ocr_result.size(); i++) {
/* code */
auto& text = ocr_result[i];
std::vector<std::pair<int, int>> region_vertexes = {{text.box[0][0], text.box[0][1]},
{text.box[1][0], text.box[1][1]},
{text.box[2][0], text.box[2][1]},
{text.box[3][0], text.box[3][1]}};
// create text target and update back into frame meta
auto text_target = std::make_shared<vp_objects::vp_frame_text_target>(region_vertexes, text.text, text.score);
frame_meta->text_targets.push_back(text_target);
}
auto infer_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start_time);
// can not calculate preprocess time and postprocess time, set 0 by default.
vp_infer_node::infer_combinations_time_cost(mats_to_infer.size(), prepare_time.count(), 0, infer_time.count(), 0);
}
}
#endif