first commit

2026-06-03 12:43:14 +08:00
commit ba76cfae28
608 changed files with 120791 additions and 0 deletions
--- a/objects/ba/vp_ba_result.cpp
+++ b/objects/ba/vp_ba_result.cpp
@@ -0,0 +1,34 @@
+#include "vp_ba_result.h"
+
+
+namespace vp_objects {
+
+    vp_ba_result::vp_ba_result(vp_ba_type type, 
+                    int channel_index,
+                    int frame_index,
+                    std::vector<int> involve_target_ids_in_frame, 
+                    std::vector<vp_objects::vp_point> involve_region_in_frame,
+                    std::string ba_label,
+                    std::string record_image_name,
+                    std::string record_video_name):
+                    type(type), channel_index(channel_index), frame_index(frame_index),
+                    involve_target_ids_in_frame(involve_target_ids_in_frame),
+                    involve_region_in_frame(involve_region_in_frame),
+                    ba_label(ba_label),
+                    record_image_name(record_image_name),
+                    record_video_name(record_video_name) {
+        
+    }
+
+    vp_ba_result::~vp_ba_result() {
+
+    }
+
+    std::string vp_ba_result::to_string() {
+        return "";
+    }
+
+    std::shared_ptr<vp_ba_result> vp_ba_result::clone() {
+        return std::make_shared<vp_ba_result>(*this);
+    }
+}
--- a/objects/ba/vp_ba_result.h
+++ b/objects/ba/vp_ba_result.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <vector>
+#include <string>
+#include <memory>
+#include "../shapes/vp_point.h"
+
+
+namespace vp_objects {
+    // type of behaviour analysis
+    enum class vp_ba_type {
+        NONE = 0b00000000,       // none
+        CROSSLINE = 0b00000001,  // cross line
+        STOP = 0b00000010,       // enter stop status
+        UNSTOP = 0b00000100,     // leave stop status
+        JAM = 0b00001000,        // enter jam status
+        UNJAM = 0b00010000       // leave jam status
+        /* more */
+    };
+
+    // result of behaviour analysis
+    // BA logic can ONLY works on vp_frame_target
+    class vp_ba_result
+    {
+    private:
+        /* data */
+    public:
+        // type
+        vp_ba_type type;
+        // target ids which involved for this ba result, empty allowed.
+        std::vector<int> involve_target_ids_in_frame;
+        // region (or single line) involved for this ba result, empty allowed.
+        std::vector<vp_objects::vp_point> involve_region_in_frame;
+
+        // channel index of this ba result
+        int channel_index;
+        // frame index of this ba result
+        int frame_index;
+
+        // name of ba
+        std::string ba_label = "not specified";
+
+        // record image name if exist
+        std::string record_image_name = "";
+        // record video name if exist
+        std::string record_video_name = "";
+
+        vp_ba_result(vp_ba_type type, 
+                    int channel_index,
+                    int frame_index,
+                    std::vector<int> involve_target_ids_in_frame, 
+                    std::vector<vp_objects::vp_point> involve_region_in_frame,
+                    std::string ba_label = "not specified",
+                    std::string record_image_name = "",
+                    std::string record_video_name = "");
+        ~vp_ba_result();
+
+        // get description for ba result
+        virtual std::string to_string();
+
+        // clone myself
+        std::shared_ptr<vp_ba_result> clone();
+    };
+
+}
--- a/objects/shapes/vp_line.cpp
+++ b/objects/shapes/vp_line.cpp
@@ -0,0 +1,17 @@
+
+
+#include "vp_line.h"
+
+namespace vp_objects {
+    vp_line::vp_line(vp_point start, vp_point end): start(start), end(end) {
+
+    }
+    
+    vp_line::~vp_line() {
+
+    }
+
+    float vp_line::length() {
+        return start.distance_with(end);
+    }
+}
--- a/objects/shapes/vp_line.h
+++ b/objects/shapes/vp_line.h
@@ -0,0 +1,24 @@
+
+
+#pragma once
+
+#include "vp_point.h"
+
+namespace vp_objects {
+    // line in 2-dims coordinate system
+    class vp_line {
+    private:
+        /* data */
+    public:
+        vp_line() = default;
+        vp_line(vp_point start, vp_point end);
+        ~vp_line();
+
+        vp_point start;
+        vp_point end;
+
+        // distance between start and end point
+        float length();
+    };
+
+}
--- a/objects/shapes/vp_point.cpp
+++ b/objects/shapes/vp_point.cpp
@@ -0,0 +1,18 @@
+
+
+#include "vp_point.h"
+
+namespace vp_objects {
+    
+    vp_point::vp_point(int x, int y): x(x), y(y) {
+
+    }
+    
+    vp_point::~vp_point() {
+
+    }
+
+    float vp_point::distance_with(const vp_point & p) {
+        return std::sqrt(std::pow(x-p.x, 2) + std::pow(y-p.y, 2));
+    }
+}
--- a/objects/shapes/vp_point.h
+++ b/objects/shapes/vp_point.h
@@ -0,0 +1,23 @@
+
+#pragma once
+
+#include <utility>
+#include <cmath>
+
+namespace vp_objects {
+    // point in 2-dims coordinate system
+    class vp_point
+    {
+    private:
+        /* data */
+    public:
+        vp_point(int x = 0, int y = 0);
+        ~vp_point();
+
+        int x;
+        int y;
+
+        // distance between 2 points
+        float distance_with(const vp_point & p);
+    };    
+}
--- a/objects/shapes/vp_polygon.cpp
+++ b/objects/shapes/vp_polygon.cpp
@@ -0,0 +1,18 @@
+
+#include <assert.h>
+#include "vp_polygon.h"
+
+namespace vp_objects {
+        
+    vp_polygon::vp_polygon(std::vector<vp_point> vertexs): vertexs(vertexs) {
+        assert(vertexs.size() > 2);
+    }
+    
+    vp_polygon::~vp_polygon() {
+
+    }
+    
+    bool vp_polygon::contains(const vp_point & p) {
+        return true;
+    }
+}
--- a/objects/shapes/vp_polygon.h
+++ b/objects/shapes/vp_polygon.h
@@ -0,0 +1,25 @@
+
+
+#pragma once
+
+#include <vector>
+#include "vp_point.h"
+
+namespace vp_objects {
+    class vp_polygon
+    {
+    private:
+        /* data */
+    public:
+        vp_polygon() = default;
+        vp_polygon(std::vector<vp_point> vertexs);
+        ~vp_polygon();
+
+        // vertexs of the polygon
+        std::vector<vp_point> vertexs;
+
+        // check if the polygon contains a point
+        bool contains(const vp_point & p);
+    };
+
+}
--- a/objects/shapes/vp_rect.cpp
+++ b/objects/shapes/vp_rect.cpp
@@ -0,0 +1,41 @@
+
+
+#include "vp_rect.h"
+
+namespace vp_objects {
+        
+    vp_rect::vp_rect(int x, int y, int width, int height): 
+        x(x), 
+        y(y), 
+        width(width), 
+        height(height) {
+
+    }
+    
+    vp_rect::vp_rect(vp_point left_top, vp_size wh):
+        x(left_top.x), y(left_top.y), width(wh.width), height(wh.height) {
+
+    }
+
+
+    vp_rect::~vp_rect() {
+
+    }
+    
+    vp_point vp_rect::center() {
+        return vp_point(x + width / 2, y + height / 2);
+    }
+
+    float vp_rect::iou_with(const vp_rect & rect) {
+        return 1.0;
+    }
+
+    bool vp_rect::contains(const vp_point & p) {
+        return true;
+    }
+
+    vp_point vp_rect::track_point() {
+        // by default the center point of bottom is tracking point.
+        return {x + width / 2, y + height};
+    }
+}
--- a/objects/shapes/vp_rect.h
+++ b/objects/shapes/vp_rect.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <tuple>
+
+#include "vp_point.h"
+#include "vp_size.h"
+
+namespace vp_objects {
+    // rect in 2-dims coordinate system
+    class vp_rect {
+    private:
+        /* data */
+    public:
+        vp_rect() = default;
+        vp_rect(int x, int y, int width, int height);
+        vp_rect(vp_point left_top, vp_size wh);
+        ~vp_rect();
+
+        int x;
+        int y;
+        int width;
+        int height;
+
+        // get center point of the rect
+        vp_point center();
+
+        // get track point of the rect
+        // track point is used to locate the target(represented by the rect)
+        vp_point track_point();
+
+        // calculate the iou with another rect
+        float iou_with(const vp_rect & rect);
+
+        // check if the rect contains a point
+        bool contains(const vp_point & p);
+    };
+
+}
--- a/objects/shapes/vp_size.cpp
+++ b/objects/shapes/vp_size.cpp
@@ -0,0 +1,13 @@
+
+#include "vp_size.h"
+
+namespace vp_objects {
+        
+    vp_size::vp_size(int width, int height): width(width), height(height) {
+
+    }
+    vp_size::~vp_size() {
+        
+    }
+    
+}
--- a/objects/shapes/vp_size.h
+++ b/objects/shapes/vp_size.h
@@ -0,0 +1,20 @@
+
+#pragma once
+
+#include <utility>
+
+namespace vp_objects {
+    // size(width and height) in 2-dims coordinate system
+    class vp_size {
+    private:
+        /* data */
+    public:
+        vp_size(int width = 0, int height = 0);
+        ~vp_size();
+
+
+        int width;
+        int height;
+    };
+
+}
--- a/objects/vp_control_meta.cpp
+++ b/objects/vp_control_meta.cpp
@@ -0,0 +1,31 @@
+
+#include "vp_control_meta.h"
+
+namespace vp_objects {
+        
+    vp_control_meta::vp_control_meta(vp_control_type control_type, int channel_index, std::string control_uid): 
+        vp_meta(vp_meta_type::CONTROL, channel_index), control_type(control_type), control_uid(control_uid) {
+            if (control_uid.empty()){
+                generate_uid();
+            }
+    }
+    
+    vp_control_meta::~vp_control_meta() {
+
+    }
+
+    std::shared_ptr<vp_meta> vp_control_meta::clone() {
+        // just call copy constructor and return new pointer
+        return std::make_shared<vp_control_meta>(*this);
+    }
+
+    void vp_control_meta::generate_uid() {
+        auto now = std::chrono::system_clock::now();
+        auto period = now.time_since_epoch();
+
+        // milliseconds since 1970-1-1 00:00:00
+        auto timestamp = std::chrono::duration_cast<std::chrono::milliseconds>(period).count();
+
+        control_uid = "vp_control_" + std::to_string(timestamp);
+    }
+}
--- a/objects/vp_control_meta.h
+++ b/objects/vp_control_meta.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <chrono>
+#include "vp_meta.h"
+
+
+namespace vp_objects {
+    // type of control meta
+    enum vp_control_type {
+        SPEAK,
+        VIDEO_RECORD,
+        IMAGE_RECORD
+    };
+
+    // control meta, which contains control data.
+    class vp_control_meta: public vp_meta {
+    private:
+        // help to generate control uid if need
+        void generate_uid();
+    public:
+        vp_control_meta(vp_control_type control_type, int channel_index, std::string control_uid = "");
+        ~vp_control_meta();
+
+        vp_control_type control_type;
+        // unique id to identify control meta (caould be generated in random)
+        std::string control_uid;
+
+        // copy myself
+        virtual std::shared_ptr<vp_meta> clone() override;
+    };
+
+}
--- a/objects/vp_frame_face_target.cpp
+++ b/objects/vp_frame_face_target.cpp
@@ -0,0 +1,32 @@
+#include "vp_frame_face_target.h"
+
+namespace vp_objects {
+        
+    vp_frame_face_target::vp_frame_face_target(int x, 
+                                                int y, 
+                                                int width, 
+                                                int height, 
+                                                float score, 
+                                                std::vector<std::pair<int, int>> key_points, 
+                                                std::vector<float> embeddings):
+                                                x(x),
+                                                y(y),
+                                                width(width),
+                                                height(height),
+                                                score(score),
+                                                key_points(key_points),
+                                                embeddings(embeddings) {
+        
+    }
+    
+    vp_frame_face_target::~vp_frame_face_target() {
+    }
+
+    std::shared_ptr<vp_frame_face_target> vp_frame_face_target::clone() {
+        return std::make_shared<vp_frame_face_target>(*this);
+    }
+
+    vp_rect vp_frame_face_target::get_rect() const{
+        return vp_rect(x, y, width, height);
+    }
+}
--- a/objects/vp_frame_face_target.h
+++ b/objects/vp_frame_face_target.h
@@ -0,0 +1,57 @@
+
+#pragma once
+#include <vector>
+#include <memory>
+#include "shapes/vp_rect.h"
+
+
+namespace vp_objects {
+    // target in frame detected by face detectors such as yunet.
+    // note: we can define new target type like vp_frame_xxx_target... if need (see vp_frame_pose_target also)
+    class vp_frame_face_target
+    {
+    private:
+        /* data */
+    public:
+        vp_frame_face_target(int x, 
+                            int y, 
+                            int width, 
+                            int height, 
+                            float score, 
+                            std::vector<std::pair<int, int>> key_points = std::vector<std::pair<int, int>>(), 
+                            std::vector<float> embeddings = std::vector<float>());
+        ~vp_frame_face_target();
+
+        // x of top left
+        int x;
+        // y of top left
+        int y;
+        // width of rect
+        int width;
+        // height of rect
+        int height;
+
+        // confidence
+        float score;
+
+        // feature vector created by infer nodes such as vp_sface_feature_encoder_node.
+        // embeddings can be used for face recognize or other reid works.
+        std::vector<float> embeddings;
+
+        // key points (5 points or more)
+        std::vector<std::pair<int, int>> key_points;
+
+        // track id filled by vp_track_node (child class) if it exists.
+        int track_id = -1;
+        // cache of track rects in the previous frames, filled by track node if it exists. 
+        // we can draw / analyse depend on these track rects later.
+        std::vector<vp_objects::vp_rect> tracks;
+        
+        // clone myself
+        std::shared_ptr<vp_frame_face_target> clone();
+
+        // rect area of target
+        vp_rect get_rect() const;
+    };
+
+}
--- a/objects/vp_frame_meta.cpp
+++ b/objects/vp_frame_meta.cpp
@@ -0,0 +1,72 @@
+#include <iterator>
+
+#include "vp_frame_meta.h"
+
+namespace vp_objects {
+        
+    vp_frame_meta::vp_frame_meta(cv::Mat frame, int frame_index, int channel_index, int original_width, int original_height, int fps): 
+        vp_meta(vp_meta_type::FRAME, channel_index), 
+        frame_index(frame_index), 
+        original_width(original_width),
+        original_height(original_height),
+        fps(fps),
+        frame(frame) {
+            assert(!frame.empty());
+    }
+    
+    // copy constructor of vp_frame_meta would NOT be called at most time.
+    // only when it flow through vp_split_node with vp_split_node::split_with_deep_copy==true.
+    // in fact, all kinds of meta would NOT be copyed in its lifecycle, we just pass them by poniter most time.
+    vp_frame_meta::vp_frame_meta(const vp_frame_meta& meta): 
+        vp_meta(meta),
+        frame_index(meta.frame_index),
+        original_width(meta.original_width),
+        original_height(meta.original_height),
+        description(meta.description),
+        fps(meta.fps) {
+            // deep copy frame data
+            this->frame = meta.frame.clone();
+            this->osd_frame = meta.osd_frame.clone();
+            this->mask = meta.mask.clone();
+
+            // deep copy targets
+            for(auto& i: meta.targets) {
+                this->targets.push_back(i->clone());
+            }
+            // deep copy pose targets
+            for(auto& i: meta.pose_targets) {
+                this->pose_targets.push_back(i->clone());
+            }
+            // deep copy face targets
+            for(auto& i: meta.face_targets) {
+                this->face_targets.push_back(i->clone());
+            }
+            // deep copy text targets
+            for(auto& i: meta.text_targets) {
+                this->text_targets.push_back(i->clone());
+            }
+            // deep copy ba results
+            for(auto& i: meta.ba_results) {
+                this->ba_results.push_back(i->clone());
+            }
+    }
+    
+    vp_frame_meta::~vp_frame_meta() {
+
+    }
+
+    std::shared_ptr<vp_meta> vp_frame_meta::clone() {
+        // just call copy constructor and return new pointer
+        return std::make_shared<vp_frame_meta>(*this);
+    }
+
+    std::vector<std::shared_ptr<vp_frame_target>> vp_frame_meta::get_targets_by_ids(const std::vector<int>& ids) {
+        std::vector<std::shared_ptr<vp_objects::vp_frame_target>> results;
+        for(auto& t: targets) {
+            if (std::find(ids.begin(), ids.end(), t->track_id) != ids.end()) {
+                results.push_back(t);
+            }
+        }
+        return results;
+    }
+}
--- a/objects/vp_frame_meta.h
+++ b/objects/vp_frame_meta.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include <vector>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/videoio.hpp>
+
+#include "vp_meta.h"
+#include "vp_frame_target.h"
+#include "vp_frame_pose_target.h"
+#include "vp_frame_face_target.h"
+#include "vp_frame_text_target.h"
+#include "ba/vp_ba_result.h"
+/*
+* ##########################################
+* how does frame meta work?
+* ##########################################
+* frame meta, holding all data(targets/elements/...) of current frame in the video scene. frame meta are independent and don't know about each other, neither its previous frames nor next frames.
+* the data in frame meta is just telling us what **current frame** is so we can not get something like 'state-switch' from a single frame meta. 
+* if you need know when the 'state-switch' happen, for example, you want to notify to cloud via restful api if state changed(ignore if it's keeping), 
+* you need cache previous frame meta(maybe partial data) in your custom node first and then compare with each other to figure out if it has changed.
+* 
+* frame meta works like our eyes, by taking a glance at the frame in video we can see what the picture is and how many targets are there.
+* but if you want to  know something like state-switch, for example, a person was walking and then stop or it stop for a while and then start to walk, you have to see(cache) more frames.
+* 
+* see more implementation of 'vp_track_node' and 'vp_message_broker_node' which saved history frame meta data and then work based on them.
+* 1. vp_track_node          : save previous locations of targets and then do tracking based on them, we need see more frames to track targets in video.
+* 2. vp_message_broker_node : save previous ba_flags and then do notifying based on them, we need see more frames to check if state-switch has happened.
+* ##########################################
+*/ 
+namespace vp_objects {
+    // frame meta, which contains frame-related data. it is kind of important meta in pipeline.
+    class vp_frame_meta: public vp_meta {
+    private:
+        /* data */
+    public:
+        vp_frame_meta(cv::Mat frame, int frame_index = -1, int channel_index = -1, int original_width = 0, int original_height = 0, int fps = 0);
+        ~vp_frame_meta();
+
+        // define copy constructor since we need deep copy operation.
+        vp_frame_meta(const vp_frame_meta& meta);
+
+        // frame the meta belongs to, filled by src nodes.
+        int frame_index;
+
+        // fps for current video.
+        int fps;
+
+        // orignal frame width, fiiled by src nodes.
+        int original_width;
+        // original frame height, filled by src nodes.
+        int original_height;
+
+        // image data the meta holds, filled by src nodes.
+        // deep copy needed here for this member.
+        cv::Mat frame;
+
+        // osd image data the meta holds, filled by osd node if exists.
+        // deep copy needed here for this member.
+        cv::Mat osd_frame;
+
+        // mask for the WHOLE frame, filled by Semantic Segmentation nodes if exists.
+        // deep copy needed here for this member.
+        cv::Mat mask;
+
+        // text description for frame (output from LLM)
+        std::string description;
+
+        // targets created/appended by primary infer nodes, and then updated by secondary infer nodes if exist.
+        // it is shared_ptr<...> type just to keep same as elements.
+        // deep copy needed here for this member.
+        std::vector<std::shared_ptr<vp_objects::vp_frame_target>> targets;
+
+        // pose targets created/appened by primary infer nodes.
+        std::vector<std::shared_ptr<vp_objects::vp_frame_pose_target>> pose_targets;
+
+        // face targets created/appened by primary infer nodes.
+        std::vector<std::shared_ptr<vp_objects::vp_frame_face_target>> face_targets;
+
+        // text targets created/appened by primary infer nodes.
+        std::vector<std::shared_ptr<vp_objects::vp_frame_text_target>> text_targets;
+
+        // ba results created/appened by ba nodes.
+        std::vector<std::shared_ptr<vp_objects::vp_ba_result>> ba_results;
+        
+        // get target ptrs by target ids in current frame, ONLY supports vp_frame_target
+        std::vector<std::shared_ptr<vp_frame_target>> get_targets_by_ids(const std::vector<int>& ids);
+
+        // copy myself
+        virtual std::shared_ptr<vp_meta> clone() override;
+    };
+
+}
--- a/objects/vp_frame_pose_target.cpp
+++ b/objects/vp_frame_pose_target.cpp
@@ -0,0 +1,19 @@
+
+#include "vp_frame_pose_target.h"
+
+namespace vp_objects {
+    
+    vp_frame_pose_target::vp_frame_pose_target(vp_pose_type type, 
+                                                std::vector<vp_pose_keypoint> key_points):
+                                                type(type),
+                                                key_points(key_points) {
+
+    }
+    
+    vp_frame_pose_target::~vp_frame_pose_target() {
+    }
+
+    std::shared_ptr<vp_frame_pose_target> vp_frame_pose_target::clone() {
+        return std::make_shared<vp_frame_pose_target>(*this);
+    }
+}
--- a/objects/vp_frame_pose_target.h
+++ b/objects/vp_frame_pose_target.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <vector>
+#include <memory>
+
+namespace vp_objects {
+
+    // different types of datasets used to train openpose model.
+    enum vp_pose_type {
+        body_25,
+        coco,
+        mpi_15,
+        face,
+        hand,
+        yolov8_pose_17
+    };
+    
+    struct vp_pose_keypoint {
+        int point_type;       // point type (index), nose, neck or left_eye 
+        int x;                // x in 2D image
+        int y;                // y in 2D image
+        float score;          // probability
+    };
+    
+    // target in frame detected by openpose(or other similar models), which mainly contains point collections.
+    // note: we can define new target type like vp_frame_xxx_target... if need (see vp_frame_face_target also)
+    class vp_frame_pose_target
+    {
+    private:
+        /* data */
+    public:
+        vp_frame_pose_target(vp_pose_type type, std::vector<vp_pose_keypoint> key_points);
+        ~vp_frame_pose_target();
+
+        // target type, different models create different outputs which need specific parsing.
+        vp_pose_type type;
+        // keypoints array
+        std::vector<vp_pose_keypoint> key_points;
+
+        // clone myself
+        std::shared_ptr<vp_frame_pose_target> clone();
+    };
+}
--- a/objects/vp_frame_target.cpp
+++ b/objects/vp_frame_target.cpp
@@ -0,0 +1,56 @@
+
+#include "vp_frame_target.h"
+
+namespace vp_objects {
+        
+    vp_frame_target::vp_frame_target(int x, 
+                                    int y, 
+                                    int width, 
+                                    int height, 
+                                    int primary_class_id, 
+                                    float primary_score, 
+                                    int frame_index, 
+                                    int channel_index,
+                                    std::string primary_label): 
+                                    x(x),
+                                    y(y),
+                                    width(width),
+                                    height(height),
+                                    primary_class_id(primary_class_id),
+                                    primary_score(primary_score),
+                                    primary_label(primary_label),
+                                    frame_index(frame_index),
+                                    channel_index(channel_index),
+                                    ba_flags(0) {
+    }
+    vp_frame_target::vp_frame_target(vp_rect rect,
+                                    int primary_class_id, 
+                                    float primary_score, 
+                                    int frame_index, 
+                                    int channel_index,
+                                    std::string primary_label):
+                                    vp_frame_target(rect.x, 
+                                                    rect.y, 
+                                                    rect.width, 
+                                                    rect.height, 
+                                                    primary_class_id, 
+                                                    primary_score, 
+                                                    frame_index, 
+                                                    channel_index, 
+                                                    primary_label) {
+    
+    }
+
+
+    vp_frame_target::~vp_frame_target() {
+
+    }
+    
+    std::shared_ptr<vp_frame_target> vp_frame_target::clone() {
+        return std::make_shared<vp_frame_target>(*this);
+    }
+
+    vp_rect vp_frame_target::get_rect() const{
+        return vp_rect(x, y, width, height);
+    }
+}
--- a/objects/vp_frame_target.h
+++ b/objects/vp_frame_target.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <opencv2/core.hpp>
+
+#include "shapes/vp_rect.h"
+#include "vp_sub_target.h"
+
+/*
+* ##################################################
+* what is frame target? 
+* ##################################################
+* frame target are those detected by deep learning models(detectors) and then updated by other classifiers.
+* we can detect vehicles, pedestrain, traffic lights, firesmoke and so on using vp_primary_infer_node, and then figure out what color the vehicles are, if the pedstrain wear a hat or not using vp_secondary_infer_node.
+* vehicles, pedstrain are frame targets detected in current frame.
+* 
+* note:
+* frame target is an important concept and it contains a lot of data which would be updated/filled by vp_node when flowing through the piepline.
+* see vp_frame_meta also.
+* ##################################################
+*/
+
+namespace vp_objects {
+    // target in frame, detected by detectors(such as yolo/ssd).
+    class vp_frame_target {
+    private:
+        /* data */
+    public:
+        // x of top left
+        int x;
+        // y of top left
+        int y;
+        // width of rect
+        int width;
+        // height of rect
+        int height;
+
+        // class id created by primary infer nodes.
+        // allow multi primary infer nodes to exist in a pipeline, the class id is unique because we apply class_id_offset in each primary infer node.
+        int primary_class_id;
+        // score created by primary infer nodes
+        float primary_score;
+        // label created by primary infer nodes
+        std::string primary_label;
+
+        // frame the target belongs to
+        int frame_index;
+        // channel the target belongs to
+        int channel_index;
+
+        // track id created by track node if it exists
+        int track_id = -1;
+        // cache of track rects in the previous frames, filled by track node if it exists. 
+        // we can draw / analyse depend on these track rects later.
+        std::vector<vp_objects::vp_rect> tracks;
+
+        // mask of the target, used for Instance Segmentation like mask rcnn network (ignore for other situations).
+        cv::Mat mask;
+
+        // class ids filled/appended by multi secondary infer nodes.
+        std::vector<int> secondary_class_ids;
+        // scores filled/appended by multi secondary infer nodes.
+        std::vector<float> secondary_scores;
+        // labels filled/appended by multi secondary infer nodes.
+        std::vector<std::string> secondary_labels;
+        
+        // sub targets inside current target.
+        // in case detectors applied on small cropped image.
+        std::vector<std::shared_ptr<vp_objects::vp_sub_target>> sub_targets;
+
+        // feature vector(for example, 128 or 256-dims array) created by infer nodes such as vp_feature_encoder_node.
+        // each target has only one feature vector, the value will be override if multi vp_feature_encoder_node exist.
+        // embeddings can be used for reid related works.
+        std::vector<float> embeddings;
+
+        // ba flags of the target, hold by this value (created/updated by ba nodes).
+        // for example, 0001/0010/0100/1000 stands for 4 different flags, 1110 means 3 flags are on and another one is off, using ^|& operators to update and read. 
+        // if 0100 stands for 'Stop' flag of target,  'ba_flags|=0100' means set 'Stop' flag as On, '(ba_flags & 0100) == 0100' means 'Stop' flag is already On. 
+        int ba_flags;
+
+        vp_frame_target(int x, 
+                        int y, 
+                        int width, 
+                        int height, 
+                        int primary_class_id, 
+                        float primary_score, 
+                        int frame_index, 
+                        int channel_index,
+                        std::string primary_label = "");
+        vp_frame_target(vp_rect rect,
+                        int primary_class_id, 
+                        float primary_score, 
+                        int frame_index, 
+                        int channel_index,
+                        std::string primary_label = "");
+        ~vp_frame_target();
+
+        // clone myself
+        std::shared_ptr<vp_frame_target> clone();
+
+        // rect area of target
+        vp_rect get_rect() const;
+    };
+}
--- a/objects/vp_frame_text_target.cpp
+++ b/objects/vp_frame_text_target.cpp
@@ -0,0 +1,22 @@
+
+#include "vp_frame_text_target.h"
+
+namespace vp_objects {
+        
+    vp_frame_text_target::vp_frame_text_target(std::vector<std::pair<int, int>> region_vertexes, 
+                                                std::string text, 
+                                                float score):
+                                                region_vertexes(region_vertexes),
+                                                text(text),
+                                                score(score) {
+
+    }
+    
+    vp_frame_text_target::~vp_frame_text_target() {
+    
+    }
+
+    std::shared_ptr<vp_frame_text_target> vp_frame_text_target::clone() {
+        return std::make_shared<vp_frame_text_target>(*this);
+    }
+}
--- a/objects/vp_frame_text_target.h
+++ b/objects/vp_frame_text_target.h
@@ -0,0 +1,26 @@
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include <string>
+
+namespace vp_objects {
+    class vp_frame_text_target
+    {
+    private:
+        /* data */
+    public:
+        vp_frame_text_target(std::vector<std::pair<int, int>> region_vertexes, std::string text, float score);
+        ~vp_frame_text_target();
+
+        std::vector<std::pair<int, int>> region_vertexes;
+        std::string text;
+        float score;
+
+        // flags for text
+        std::string flags = "";
+        // clone myself
+        std::shared_ptr<vp_frame_text_target> clone();
+    };
+}
--- a/objects/vp_image_record_control_meta.cpp
+++ b/objects/vp_image_record_control_meta.cpp
@@ -0,0 +1,19 @@
+#include "vp_image_record_control_meta.h"
+
+namespace vp_objects {
+        
+    vp_image_record_control_meta::vp_image_record_control_meta(int channel_index, std::string image_file_name_without_ext, bool osd): 
+                                                                vp_control_meta(vp_control_type::IMAGE_RECORD, channel_index),
+                                                                image_file_name_without_ext(image_file_name_without_ext),
+                                                                osd(osd) {
+    }
+    
+    vp_image_record_control_meta::~vp_image_record_control_meta() {
+
+    }
+
+    std::shared_ptr<vp_meta> vp_image_record_control_meta::clone() {
+        // just call copy constructor and return new pointer
+        return std::make_shared<vp_image_record_control_meta>(*this);
+    }
+}
--- a/objects/vp_image_record_control_meta.h
+++ b/objects/vp_image_record_control_meta.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "vp_control_meta.h"
+
+namespace vp_objects {
+    // control meta for image recording, it is a specific type of vp_control_meta.
+    // when vp_record_node handle this control meta, the node will save the Latest Next frame in pipeline to disk.
+    // refer to ./nodes/record/README.md for more details
+    class vp_image_record_control_meta: public vp_control_meta
+    {
+    private:
+        /* data */
+    public:
+        vp_image_record_control_meta(int channel_index, 
+                                    std::string image_file_name_without_ext,
+                                    bool osd = false);
+        ~vp_image_record_control_meta();
+
+        // copy myself
+        virtual std::shared_ptr<vp_meta> clone() override;
+
+        // file name without extension, like 2022-10-20_22-30-20_aaa_bbb, which should be a meaningful and unique name.
+        // generated by sender who want to record image
+        std::string image_file_name_without_ext;
+
+        // record type (osd frame or not)
+        bool osd = false;
+    };
+
+}
--- a/objects/vp_meta.cpp
+++ b/objects/vp_meta.cpp
@@ -0,0 +1,70 @@
+
+#include "vp_meta.h"
+#include "../excepts/vp_invalid_argument_error.h"
+
+namespace vp_objects {
+    
+    vp_meta::vp_meta(vp_meta_type meta_type, int channel_index): 
+        meta_type(meta_type), 
+        channel_index(channel_index) {
+            create_time = std::chrono::system_clock::now();
+    }
+
+    vp_meta::~vp_meta() {
+
+    }
+
+    std::string vp_meta::get_traces_str() {
+        return "";
+    }
+
+    std::string vp_meta::get_meta_str() {
+        return "";
+    }
+
+/*
+    void vp_meta::attach_trace(std::string node_name) {
+        if (trace_table.count(node_name)) {
+            return;
+        }
+        
+        std::map<vp_meta_trace_field, std::any> new_trace_record {
+            {vp_meta_trace_field::SEQUENCE, trace_table.size()},
+            {vp_meta_trace_field::NODE_NAME, node_name},
+            {vp_meta_trace_field::IN_TIME, -1},
+            {vp_meta_trace_field::OUT_TIME, -1},
+            {vp_meta_trace_field::TEXT_INFO, std::vector<std::string>{}}
+        };
+
+        // append to the end of table
+        trace_table[node_name] = new_trace_record;
+    }
+
+    void vp_meta::update_trace(std::string node_name, vp_meta_trace_field trace_key, std::any trace_value) {
+        if (trace_table.count(node_name)) {
+            auto & trace_record = trace_table[node_name];
+            assert(trace_record.count(trace_key));
+
+            switch (trace_key) {
+                case vp_meta_trace_field::SEQUENCE:
+                case vp_meta_trace_field::NODE_NAME:
+                case vp_meta_trace_field::IN_TIME:
+                case vp_meta_trace_field::OUT_TIME: {
+                    // replace directly
+                    trace_record[trace_key] = trace_value;
+                    break;
+                }
+                case vp_meta_trace_field::TEXT_INFO: {
+                    // append to the end of vector
+                    auto & trace_desc = std::any_cast<std::vector<std::string>&>(trace_record[trace_key]);
+                    trace_desc.push_back(std::any_cast<std::string>(trace_value));
+                    break;
+                }
+                default: {
+                    throw vp_excepts::vp_invalid_argument_error("invalid trace_key for meta!");
+                    break;
+                }
+            }
+        }
+    }*/
+}
--- a/objects/vp_meta.h
+++ b/objects/vp_meta.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <string>
+#include <map>
+#include <any>
+#include <vector>
+#include <memory>
+#include <chrono>
+#include <assert.h>
+
+namespace vp_objects {
+
+    // meta type
+    enum vp_meta_type {
+        FRAME,
+        CONTROL
+    };
+
+    // meta trace field
+    // 1. sequence   ->int       ,sequence number the meta flowing through pipeline
+    // 2. node_name  ->string    ,name of current node the meta flow through
+    // 3. in_time    ->long      ,time when the meta arrive current node
+    // 4. out_time   ->long      ,time when the meta leave current node
+    // 5. text_info  ->vector    ,text info while the meta inside node
+    enum vp_meta_trace_field {
+        SEQUENCE,
+        NODE_NAME,
+        IN_TIME,
+        OUT_TIME,
+        TEXT_INFO
+    };
+
+    // base class for meta
+    class vp_meta {
+    private:
+    
+    protected:
+        /*
+        trace table, single record describe how meta flows in each node.
+        {
+            "file_src_0": {
+                "sequence": 0,
+                "node_name": "file_src_0",
+                "in_time": 1692384,
+                "out_time": 1692384,
+                "trace_desc": ["create frame meta at time 1692384", "...", "..."]
+            },
+            "primary_infer": {
+                "sequence": 1,
+                "node_name": "file_src_0",
+                "in_time": 1692384,
+                "out_time": 1692384,
+                "trace_desc": ["add 6 targets in frame meta at time 1692384", "...", "..."]
+            },
+            "secondary_infer": {
+                "sequence": 2,
+                "node_name": "file_src_0",
+                "in_time": 1692384,
+                "out_time": 1692384,
+                "trace_desc": ["updated 6 targets in target_list at 1692384", "...", "..."]
+            },
+            "osd": {
+                "sequence": 3,
+                "node_name": "file_src_0",
+                "in_time": 1692384,
+                "out_time": 1692384,
+                "trace_desc": ["draw 6 targets in on frame at time 1692384", "...", "..."]
+            },
+            "file_des_0": {
+                "sequence": 4,
+                "node_name": "file_src_0",
+                "in_time": 1692384,
+                "out_time": 1692384,
+                "trace_desc": ["balabala", "...", "..."]
+            }
+        }
+        */
+        //std::map<std::string, std::map<vp_meta_trace_field, std::any>> trace_table;
+    public:
+        vp_meta(vp_meta_type meta_type, int channel_index);
+        ~vp_meta();
+
+        // the time when meta created
+        std::chrono::system_clock::time_point create_time;
+
+        vp_meta_type meta_type;
+
+        // channel the meta belongs to
+        int channel_index;
+
+        // write trace record or not
+        bool trace_on = false;
+
+        // format traces string
+        virtual std::string get_traces_str();
+
+        // format meta string
+        virtual std::string get_meta_str();
+
+        // virtual clone method since we do not know what specific meta we need copy in some situations, return a new pointer pointting to new memory allocation in heap.
+        // note: every child class need implement its own clone() method.
+        virtual std::shared_ptr<vp_meta> clone() = 0;
+
+        // attach a new trace record for specific node (initialize key-value for current node)
+        //void attach_trace(std::string node_name);
+
+        // update trace record
+        //void update_trace(std::string node_name, vp_meta_trace_field trace_key, std::any trace_value);
+    };
+
+}
--- a/objects/vp_sub_target.cpp
+++ b/objects/vp_sub_target.cpp
@@ -0,0 +1,36 @@
+
+#include "vp_sub_target.h"
+
+namespace vp_objects {
+    
+    vp_sub_target::vp_sub_target(int x, 
+                        int y, 
+                        int width, 
+                        int height, 
+                        int class_id, 
+                        float score, 
+                        std::string label, 
+                        int frame_index, 
+                        int channel_index):
+                        x(x),
+                        y(y),
+                        width(width),
+                        height(height),
+                        class_id(class_id),
+                        score(score),
+                        label(label),
+                        frame_index(frame_index),
+                        channel_index(channel_index) {
+    }
+    
+    vp_sub_target::~vp_sub_target() {
+    }    
+
+    std::shared_ptr<vp_sub_target> vp_sub_target::clone() {
+        return std::make_shared<vp_sub_target>(*this);
+    }
+
+    vp_rect vp_sub_target::get_rect() const {
+        return vp_rect(x, y, width, height);
+    }
+}
--- a/objects/vp_sub_target.h
+++ b/objects/vp_sub_target.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <string>
+#include <memory>
+#include <vector>
+
+#include "shapes/vp_rect.h"
+
+namespace vp_objects {
+    // sub target inside vp_frame_target, created by detectors which MUST infer on small cropped images (detectors are derived from vp_secondary_infer_node).
+    // this class has less properties/functions than vp_frame_target.
+    // see vp_frame_target also.
+    class vp_sub_target
+    {
+    private:
+        /* data */
+    public:
+        vp_sub_target(int x, 
+                        int y, 
+                        int width, 
+                        int height, 
+                        int class_id, 
+                        float score, 
+                        std::string label, 
+                        int frame_index, 
+                        int channel_index);
+        ~vp_sub_target();
+
+        // x of top left
+        int x;
+        // y of top left
+        int y;
+        // width of rect
+        int width;
+        // height of rect
+        int height;
+
+        // class id
+        int class_id;
+        // score
+        float score;
+        // label
+        std::string label;
+
+        // frame the sub target belongs to
+        int frame_index;
+        // channel the sub target belongs to
+        int channel_index;
+
+        // save some info
+        std::vector<std::string> attachments;
+
+        // clone myself
+        std::shared_ptr<vp_sub_target> clone();
+
+        // rect area of target
+        vp_rect get_rect() const;
+    };
+
+}
--- a/objects/vp_video_record_control_meta.cpp
+++ b/objects/vp_video_record_control_meta.cpp
@@ -0,0 +1,27 @@
+
+
+#include "vp_video_record_control_meta.h"
+
+
+namespace vp_objects {
+    
+    vp_video_record_control_meta::vp_video_record_control_meta(int channel_index, 
+                                                            std::string video_file_name_without_ext, 
+                                                            int record_video_duration,
+                                                            bool osd):
+                                                            vp_control_meta(vp_objects::vp_control_type::VIDEO_RECORD, channel_index),
+                                                            video_file_name_without_ext(video_file_name_without_ext),
+                                                            record_video_duration(record_video_duration),
+                                                            osd(osd) {
+
+    }
+    
+    vp_video_record_control_meta::~vp_video_record_control_meta() {
+
+    }
+
+    std::shared_ptr<vp_meta> vp_video_record_control_meta::clone() {
+        // just call copy constructor and return new pointer
+        return std::make_shared<vp_video_record_control_meta>(*this);
+    }
+}
--- a/objects/vp_video_record_control_meta.h
+++ b/objects/vp_video_record_control_meta.h
@@ -0,0 +1,35 @@
+
+#pragma once
+
+#include "vp_control_meta.h"
+
+namespace vp_objects {
+    // control meta for video recording, it is a specific type of vp_control_meta.
+    // when vp_record_node handle this control meta, the node will start recording video asynchronously, begin with the Latest Next frame in pipeline (pre-record frames excluded).
+    // refer to ./nodes/record/README.md for more details
+    class vp_video_record_control_meta: public vp_control_meta {
+    private:
+        /* data */
+    public:
+        vp_video_record_control_meta(int channel_index, 
+                                    std::string video_file_name_without_ext, 
+                                    int record_video_duration = 0,
+                                    bool osd = false);
+        ~vp_video_record_control_meta();
+
+        // copy myself
+        virtual std::shared_ptr<vp_meta> clone() override;
+
+        // file name without extension, like 2022-10-20_22-30-20_aaa_bbb, which should be a meaningful and unique name.
+        // generated by sender who want to record video
+        std::string video_file_name_without_ext;
+
+        // record video duration (seconds), not including pre-record time.
+        // 0 means using the value setted by vp_record_node.
+        int record_video_duration = 0;
+
+        // record type (osd frame or not)
+        bool osd = false;
+    };
+
+}