multi-task-v2(instance+semantic)

e19f8c81 · xin.wang.waytous · 3290100a · e19f8c81 · e19f8c81
Commit e19f8c81 authored Oct 13, 2023 by xin.wang.waytous
Hide whitespace changes
Inline Side-by-side

Showing with 322 additions and 0 deletions

multi_post_v2.cpp src/libs/postprocessors/multi_post_v2.cpp +246 -0

multi_post_v2.h src/libs/postprocessors/multi_post_v2.h +76 -0

No files found.
--- a/src/libs/postprocessors/multi_post_v2.cpp
+++ b/src/libs/postprocessors/multi_post_v2.cpp
+#include "libs/postprocessors/multi_post_v2.h"
+
+namespace waytous {
+namespace deepinfer {
+namespace postprocess {
+
+
+bool MultiPostProcessV2::Init(YAML::Node& node, interfaces::BaseIOMapPtr pmap) {
+    if(!BaseUnit::Init(node, pmap)){
+        LOG_WARN << "Init multitask postprocess error";
+        return false;
+    };
+    
+    inputHeight = node["inputHeight"].as<int>();
+    inputWidth = node["inputWidth"].as<int>();
+    fixAspectRatio = node["fixAspectRatio"].as<bool>();
+
+    nmsThreshold = node["nmsThreshold"].as<float>();
+    scoreThreshold = node["scoreThreshold"].as<float>();
+    truncatedThreshold = node["truncatedThreshold"].as<float>();
+    maxOutputNum = node["maxOutputNum"].as<int>();
+    rawDetectionLength = node["rawDetectionLength"].as<int>();
+    keepTopK = node["keepTopK"].as<int>();
+    segProtoDim = node["segProtoDim"].as<int>();
+    instanceDownScale = node["instanceDownScale"].as<int>();
+    instanceClassNumber = node["instanceClassNumber"].as<int>();
+    instanceClassNames = node["instanceClassNames"].as<std::vector<std::string>>();
+    detectionStep = segProtoDim + instanceClassNumber + 5;
+
+    semanticDownScale = node["semanticDownScale"].as<int>();
+    semanticClassNumber = node["semanticClassNumber"].as<int>();
+    semanticClassNames = node["semanticClassNames"].as<std::vector<std::string>>();
+
+    // depthDownScale = node["depthDownScale"].as<int>();
+    // depthDistanceScale = node["depthDistanceScale"].as<int>();
+
+    output_length_ptr.reset(new base::Blob<int>({inferBatchSize, 1}));
+    output_length_ptr->cpu_data();
+    bboxes_ptr.reset(new base::Blob<float>({inferBatchSize, maxOutputNum, detectionStep})); // xywh s c[] mask[]
+    bboxes_ptr->cpu_data(); // init, cpu malloc
+    return true;
+};
+
+
+bool MultiPostProcessV2::Exec() {
+    if (inputNames.size() != 4 || outputNames.size() != 2){
+        LOG_ERROR << "multitask postprocess, inputsize != 4 or outputsize != 2";
+        return false;
+    }
+    auto rawDetections = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[0]));
+    if (rawDetections == nullptr){
+        LOG_ERROR << "multitask postprocess input " << inputNames[0] << " haven't been init or doesn't exist.";
+        return false;
+    }
+    auto segProtos = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[1]));
+    if (segProtos == nullptr){
+        LOG_ERROR << "multitask postprocess input " << inputNames[1] << " haven't been init or doesn't exist.";
+        return false;
+    }
+    // auto rawDepths = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[2]));
+    // if (rawDepths == nullptr){
+    //     LOG_ERROR << "multitask postprocess input " << inputNames[2] << " haven't been init or doesn't exist.";
+    //     return false;
+    // }
+    auto rawSemantics = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[2]));
+    if (rawSemantics == nullptr){
+        LOG_ERROR << "multitask postprocess input " << inputNames[2] << " haven't been init or doesn't exist.";
+        return false;
+    }
+    auto inputImage = std::dynamic_pointer_cast<ios::CameraSrcOut>(pMap->GetIOPtr(inputNames[3]));
+    if (inputImage == nullptr){
+        LOG_ERROR << "multitask postprocess input " << inputNames[3] << " haven't been init or doesn't exist.";
+        return false;
+    }
+    
+    
+    // filter detections 25200 x (5+9+32) -> 1000 x (5+9+32)
+    // reset output_length=0, otherwise, it will increase after every inference.
+    output_length_ptr->mutable_cpu_data()[0] = 0;
+    multitask_instance_filter(
+        rawDetections->data_->gpu_data(), rawDetectionLength,
+        bboxes_ptr->mutable_gpu_data(),
+        output_length_ptr->mutable_gpu_data(),
+        scoreThreshold, detectionStep, maxOutputNum
+    );
+
+    auto outputLength = output_length_ptr->cpu_data();
+    auto outputBoxes = bboxes_ptr->mutable_cpu_data();
+    auto proto = segProtos->data_->mutable_cpu_data();
+
+    // Detection
+    float img_width = float(inputImage->img_ptr_->cols());
+    float img_height = float(inputImage->img_ptr_->rows());
+    float scalex = inputWidth / img_width;
+    float scaley = inputHeight / img_height;
+    if(fixAspectRatio){
+        scalex = scaley = std::min(scalex, scaley);
+    }
+    auto dets = std::make_shared<ios::Detection2Ds>();
+    std::vector<std::vector<float>> mask_coeffs;
+    for(int i = 0; i < outputLength[0]; i++){
+        float* current_box_info = outputBoxes + i * detectionStep;
+        float max_class_conf = 0.0;
+        int class_id = 0;
+        for(int ic=0; ic < instanceClassNumber; ic++){
+            if(current_box_info[5 + ic] > max_class_conf){
+                max_class_conf = current_box_info[5 + ic];
+                class_id = ic;
+            }
+        }
+        float confidence = max_class_conf * current_box_info[4];
+        if(confidence < scoreThreshold){
+            continue;
+        }
+        auto obj = std::make_shared<ios::Det2D>();
+        obj->confidence = confidence;
+        obj->class_label = class_id;
+        obj->class_name = instanceClassNames[obj->class_label];
+        obj->x1= (current_box_info[0] - current_box_info[2] / 2)  / scalex;
+        obj->y1 = (current_box_info[1] - current_box_info[3] / 2)  / scaley;
+        obj->x2 = (current_box_info[0] + current_box_info[2] / 2) / scalex;
+        obj->y2 = (current_box_info[1] + current_box_info[3] / 2) / scaley;
+        obj->image_height = img_height;
+        obj->image_width = img_width;
+        obj->validCoordinate(); //
+        // LOG_INFO << "box:" << obj->x1 << ","<< obj->y1 << ","<< obj->x2 << ","<< obj->y2;
+        if((obj->x1 / img_width  < truncatedThreshold) || (obj->y1 / img_height  < truncatedThreshold) ||
+        (obj->x2 / img_width  > (1 - truncatedThreshold)) || (obj->y2 / img_height  > (1 - truncatedThreshold))){
+            obj->truncated = true;
+        }
+        for(int im=0; im < segProtoDim; im++){
+            obj->mask_coeff.push_back(current_box_info[5 + instanceClassNumber + im]);
+        }
+        dets->detections.push_back(obj);
+    }
+    // NMS
+    LOG_INFO << "before nms:" << dets->detections.size();
+    nms_cpu(dets, scoreThreshold, nmsThreshold, instanceClassNumber, keepTopK, true);
+    LOG_INFO << "after nms:" << dets->detections.size();
+
+    // Instance Mask
+    int mask_width = inputWidth / instanceDownScale;
+    int mask_height = inputHeight / instanceDownScale;
+    for(auto det: dets->detections){
+        int x1 = round(det->x1 * scalex / instanceDownScale); // scale to output mask level.
+        int x2 = round(det->x2 * scalex / instanceDownScale);
+        int y1 = round(det->y1 * scaley / instanceDownScale);
+        int y2 = round(det->y2 * scaley / instanceDownScale);
+        cv::Mat mask_mat = cv::Mat::zeros(y2 - y1, x2 - x1, CV_32FC1); // local mask
+        for (int x = x1; x < x2; x++) {
+            for (int y = y1; y < y2; y++) {
+                float e = 0.0f;
+                for (int j = 0; j < segProtoDim; j++) {
+                    e += det->mask_coeff[j] * proto[j * mask_width * mask_height + y * mask_width + x];
+                }
+                e = 1.0f / (1.0f + expf(-e));
+                mask_mat.at<float>(y - y1, x - x1) = e;
+            }
+        }
+        // cv::Mat mask_res;
+        // if(fixAspectRatio){
+        //     int w = img_width * scalex / instanceDownScale;
+        //     int h = img_height * scaley / instanceDownScale;
+        //     cv::Rect r(0, 0, w, h);
+        //     cv::resize(mask_mat(r), mask_res, cv::Size(img_width, img_height));
+        // }else{
+        //     cv::resize(mask_mat, mask_res, cv::Size(img_width, img_height));
+        // }
+        det->mask_ptr.reset(
+            new ios::InstanceMask(mask_mat)
+        );
+        // LOG_INFO << x1 << " " << x2 << " " << y1 << " " << y2 <<", " << det->mask_ptr->rle_string;
+    }
+    pMap->SetIOPtr(outputNames[0], dets);
+
+    // Semantic Mask
+    auto semantics = std::make_shared<ios::Semantics>();
+    auto rawSemanticSegs = rawSemantics->data_->mutable_cpu_data();
+    for(int is=0; is < semanticClassNumber; is++){
+        auto seg = std::make_shared<ios::SemanticSeg>();
+        seg->class_label = is;
+        seg->class_name = semanticClassNames[is];
+        cv::Mat mask_mat = cv::Mat::zeros(inputHeight / semanticDownScale, inputWidth / semanticDownScale, CV_32FC1);
+        for (int x = 0; x < inputWidth / semanticDownScale; x++) {
+            for (int y = 0; y < inputHeight / semanticDownScale; y++) {
+                float e = rawSemanticSegs[is * mask_mat.cols * mask_mat.rows + y * mask_mat.cols + x];
+                // e = 1.0f / (1.0f + expf(-e));
+                mask_mat.at<float>(y, x) = e;
+            }
+        }
+        // cv::Mat mask_res;
+        // if(fixAspectRatio){
+        //     int w = img_width * scalex / semanticDownScale;
+        //     int h = img_height * scaley / semanticDownScale;
+        //     cv::Rect r(0, 0, w, h);
+        //     cv::resize(mask_mat(r), mask_res, cv::Size(img_width, img_height));
+        // }else{
+        //     cv::resize(mask_mat, mask_res, cv::Size(img_width, img_height));
+        // }
+        seg->mask_ptr.reset(
+            new ios::InstanceMask(mask_mat)
+        );
+        // LOG_INFO << seg->mask_ptr->rle_string;
+        semantics->semantic_segs.push_back(seg);
+    }
+    pMap->SetIOPtr(outputNames[1], semantics);
+
+    // Depth
+    // auto depth = std::make_shared<ios::Depth>();
+    // auto rawDepth = rawDepths->data_->mutable_cpu_data();
+    // cv::Mat mask_mat = cv::Mat::zeros(inputHeight / depthDownScale, inputWidth / depthDownScale, CV_32FC1);
+    // for (int x = 0; x < inputWidth / depthDownScale; x++) {
+    //     for (int y = 0; y < inputHeight / depthDownScale; y++) {
+    //         float e = rawDepth[y * mask_mat.cols + x];
+    //         // e = 1.0f / (1.0f + expf(-e));
+    //         mask_mat.at<float>(y, x) = e * depthDistanceScale;
+    //     }
+    // }
+    // cv::Mat mask_res;
+    // if(fixAspectRatio){
+    //     int w = img_width * scalex / depthDownScale;
+    //     int h = img_height * scaley / depthDownScale;
+    //     cv::Rect r(0, 0, w, h);
+    //     cv::resize(mask_mat(r), mask_res, cv::Size(img_width, img_height));
+    // }else{
+    //     cv::resize(mask_mat, mask_res, cv::Size(img_width, img_height));
+    // }
+    // depth->depth = mask_mat;
+    // pMap->SetIOPtr(outputNames[2], depth);
+
+    return true;
+
+};
+
+
+std::string MultiPostProcessV2::Name() {
+    return "MultiPostProcessV2";
+};
+
+
+}  // namespace postprocess
+}  // namespace deepinfer
+}  // namespace waytous
+
+
+
--- a/src/libs/postprocessors/multi_post_v2.h
+++ b/src/libs/postprocessors/multi_post_v2.h
+
+#ifndef DEEPINFER_POSTPROCESS_MULTI_H_
+#define DEEPINFER_POSTPROCESS_MULTI_H_
+
+#include "interfaces/base_unit.h"
+#include "base/image.h"
+
+#include "libs/ios/normal_ios.h"
+#include "libs/ios/camera_ios.h"
+#include "libs/ios/detection.h"
+#include "libs/ios/semantic.h"
+#include "libs/ios/depth.h"
+#include "libs/postprocessors/multi_post_gpu.h"
+#include "libs/postprocessors/nms.h"
+
+namespace waytous {
+namespace deepinfer {
+namespace postprocess {
+
+
+class MultiPostProcessV2: public interfaces::BaseUnit{
+
+public:
+    bool Init(YAML::Node& node, interfaces::BaseIOMapPtr pmap) override;
+
+    bool Exec() override;
+
+    std::string Name() override;
+
+
+public:
+    base::BlobPtr<int> output_length_ptr;
+    base::BlobPtr<float> bboxes_ptr;
+
+    int inputHeight, inputWidth;
+    int inferBatchSize = 1;
+    bool fixAspectRatio = true;
+    float nmsThreshold, scoreThreshold, truncatedThreshold;
+    int rawDetectionLength;
+    int maxOutputNum = 1000;
+    int keepTopK = 100;
+    int segProtoDim = 32;
+    int instanceDownScale = 4;
+    int instanceClassNumber;
+    std::vector<std::string> instanceClassNames;
+    int detectionStep;
+
+    int semanticDownScale = 4, semanticClassNumber;
+    std::vector<std::string> semanticClassNames;
+
+    // int depthDownScale = 4, depthDistanceScale = 256;
+};
+
+DEEPINFER_REGISTER_UNIT(MultiPostProcessV2);
+
+
+}  // namespace postprocess
+}  // namespace deepinfer
+}  // namespace waytous
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+