Commit e19f8c81 authored by xin.wang.waytous's avatar xin.wang.waytous

multi-task-v2(instance+semantic)

parent 3290100a
#include "libs/postprocessors/multi_post_v2.h"
namespace waytous {
namespace deepinfer {
namespace postprocess {
bool MultiPostProcessV2::Init(YAML::Node& node, interfaces::BaseIOMapPtr pmap) {
if(!BaseUnit::Init(node, pmap)){
LOG_WARN << "Init multitask postprocess error";
return false;
};
inputHeight = node["inputHeight"].as<int>();
inputWidth = node["inputWidth"].as<int>();
fixAspectRatio = node["fixAspectRatio"].as<bool>();
nmsThreshold = node["nmsThreshold"].as<float>();
scoreThreshold = node["scoreThreshold"].as<float>();
truncatedThreshold = node["truncatedThreshold"].as<float>();
maxOutputNum = node["maxOutputNum"].as<int>();
rawDetectionLength = node["rawDetectionLength"].as<int>();
keepTopK = node["keepTopK"].as<int>();
segProtoDim = node["segProtoDim"].as<int>();
instanceDownScale = node["instanceDownScale"].as<int>();
instanceClassNumber = node["instanceClassNumber"].as<int>();
instanceClassNames = node["instanceClassNames"].as<std::vector<std::string>>();
detectionStep = segProtoDim + instanceClassNumber + 5;
semanticDownScale = node["semanticDownScale"].as<int>();
semanticClassNumber = node["semanticClassNumber"].as<int>();
semanticClassNames = node["semanticClassNames"].as<std::vector<std::string>>();
// depthDownScale = node["depthDownScale"].as<int>();
// depthDistanceScale = node["depthDistanceScale"].as<int>();
output_length_ptr.reset(new base::Blob<int>({inferBatchSize, 1}));
output_length_ptr->cpu_data();
bboxes_ptr.reset(new base::Blob<float>({inferBatchSize, maxOutputNum, detectionStep})); // xywh s c[] mask[]
bboxes_ptr->cpu_data(); // init, cpu malloc
return true;
};
bool MultiPostProcessV2::Exec() {
if (inputNames.size() != 4 || outputNames.size() != 2){
LOG_ERROR << "multitask postprocess, inputsize != 4 or outputsize != 2";
return false;
}
auto rawDetections = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[0]));
if (rawDetections == nullptr){
LOG_ERROR << "multitask postprocess input " << inputNames[0] << " haven't been init or doesn't exist.";
return false;
}
auto segProtos = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[1]));
if (segProtos == nullptr){
LOG_ERROR << "multitask postprocess input " << inputNames[1] << " haven't been init or doesn't exist.";
return false;
}
// auto rawDepths = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[2]));
// if (rawDepths == nullptr){
// LOG_ERROR << "multitask postprocess input " << inputNames[2] << " haven't been init or doesn't exist.";
// return false;
// }
auto rawSemantics = std::dynamic_pointer_cast<ios::NormalIO>(pMap->GetIOPtr(inputNames[2]));
if (rawSemantics == nullptr){
LOG_ERROR << "multitask postprocess input " << inputNames[2] << " haven't been init or doesn't exist.";
return false;
}
auto inputImage = std::dynamic_pointer_cast<ios::CameraSrcOut>(pMap->GetIOPtr(inputNames[3]));
if (inputImage == nullptr){
LOG_ERROR << "multitask postprocess input " << inputNames[3] << " haven't been init or doesn't exist.";
return false;
}
// filter detections 25200 x (5+9+32) -> 1000 x (5+9+32)
// reset output_length=0, otherwise, it will increase after every inference.
output_length_ptr->mutable_cpu_data()[0] = 0;
multitask_instance_filter(
rawDetections->data_->gpu_data(), rawDetectionLength,
bboxes_ptr->mutable_gpu_data(),
output_length_ptr->mutable_gpu_data(),
scoreThreshold, detectionStep, maxOutputNum
);
auto outputLength = output_length_ptr->cpu_data();
auto outputBoxes = bboxes_ptr->mutable_cpu_data();
auto proto = segProtos->data_->mutable_cpu_data();
// Detection
float img_width = float(inputImage->img_ptr_->cols());
float img_height = float(inputImage->img_ptr_->rows());
float scalex = inputWidth / img_width;
float scaley = inputHeight / img_height;
if(fixAspectRatio){
scalex = scaley = std::min(scalex, scaley);
}
auto dets = std::make_shared<ios::Detection2Ds>();
std::vector<std::vector<float>> mask_coeffs;
for(int i = 0; i < outputLength[0]; i++){
float* current_box_info = outputBoxes + i * detectionStep;
float max_class_conf = 0.0;
int class_id = 0;
for(int ic=0; ic < instanceClassNumber; ic++){
if(current_box_info[5 + ic] > max_class_conf){
max_class_conf = current_box_info[5 + ic];
class_id = ic;
}
}
float confidence = max_class_conf * current_box_info[4];
if(confidence < scoreThreshold){
continue;
}
auto obj = std::make_shared<ios::Det2D>();
obj->confidence = confidence;
obj->class_label = class_id;
obj->class_name = instanceClassNames[obj->class_label];
obj->x1= (current_box_info[0] - current_box_info[2] / 2) / scalex;
obj->y1 = (current_box_info[1] - current_box_info[3] / 2) / scaley;
obj->x2 = (current_box_info[0] + current_box_info[2] / 2) / scalex;
obj->y2 = (current_box_info[1] + current_box_info[3] / 2) / scaley;
obj->image_height = img_height;
obj->image_width = img_width;
obj->validCoordinate(); //
// LOG_INFO << "box:" << obj->x1 << ","<< obj->y1 << ","<< obj->x2 << ","<< obj->y2;
if((obj->x1 / img_width < truncatedThreshold) || (obj->y1 / img_height < truncatedThreshold) ||
(obj->x2 / img_width > (1 - truncatedThreshold)) || (obj->y2 / img_height > (1 - truncatedThreshold))){
obj->truncated = true;
}
for(int im=0; im < segProtoDim; im++){
obj->mask_coeff.push_back(current_box_info[5 + instanceClassNumber + im]);
}
dets->detections.push_back(obj);
}
// NMS
LOG_INFO << "before nms:" << dets->detections.size();
nms_cpu(dets, scoreThreshold, nmsThreshold, instanceClassNumber, keepTopK, true);
LOG_INFO << "after nms:" << dets->detections.size();
// Instance Mask
int mask_width = inputWidth / instanceDownScale;
int mask_height = inputHeight / instanceDownScale;
for(auto det: dets->detections){
int x1 = round(det->x1 * scalex / instanceDownScale); // scale to output mask level.
int x2 = round(det->x2 * scalex / instanceDownScale);
int y1 = round(det->y1 * scaley / instanceDownScale);
int y2 = round(det->y2 * scaley / instanceDownScale);
cv::Mat mask_mat = cv::Mat::zeros(y2 - y1, x2 - x1, CV_32FC1); // local mask
for (int x = x1; x < x2; x++) {
for (int y = y1; y < y2; y++) {
float e = 0.0f;
for (int j = 0; j < segProtoDim; j++) {
e += det->mask_coeff[j] * proto[j * mask_width * mask_height + y * mask_width + x];
}
e = 1.0f / (1.0f + expf(-e));
mask_mat.at<float>(y - y1, x - x1) = e;
}
}
// cv::Mat mask_res;
// if(fixAspectRatio){
// int w = img_width * scalex / instanceDownScale;
// int h = img_height * scaley / instanceDownScale;
// cv::Rect r(0, 0, w, h);
// cv::resize(mask_mat(r), mask_res, cv::Size(img_width, img_height));
// }else{
// cv::resize(mask_mat, mask_res, cv::Size(img_width, img_height));
// }
det->mask_ptr.reset(
new ios::InstanceMask(mask_mat)
);
// LOG_INFO << x1 << " " << x2 << " " << y1 << " " << y2 <<", " << det->mask_ptr->rle_string;
}
pMap->SetIOPtr(outputNames[0], dets);
// Semantic Mask
auto semantics = std::make_shared<ios::Semantics>();
auto rawSemanticSegs = rawSemantics->data_->mutable_cpu_data();
for(int is=0; is < semanticClassNumber; is++){
auto seg = std::make_shared<ios::SemanticSeg>();
seg->class_label = is;
seg->class_name = semanticClassNames[is];
cv::Mat mask_mat = cv::Mat::zeros(inputHeight / semanticDownScale, inputWidth / semanticDownScale, CV_32FC1);
for (int x = 0; x < inputWidth / semanticDownScale; x++) {
for (int y = 0; y < inputHeight / semanticDownScale; y++) {
float e = rawSemanticSegs[is * mask_mat.cols * mask_mat.rows + y * mask_mat.cols + x];
// e = 1.0f / (1.0f + expf(-e));
mask_mat.at<float>(y, x) = e;
}
}
// cv::Mat mask_res;
// if(fixAspectRatio){
// int w = img_width * scalex / semanticDownScale;
// int h = img_height * scaley / semanticDownScale;
// cv::Rect r(0, 0, w, h);
// cv::resize(mask_mat(r), mask_res, cv::Size(img_width, img_height));
// }else{
// cv::resize(mask_mat, mask_res, cv::Size(img_width, img_height));
// }
seg->mask_ptr.reset(
new ios::InstanceMask(mask_mat)
);
// LOG_INFO << seg->mask_ptr->rle_string;
semantics->semantic_segs.push_back(seg);
}
pMap->SetIOPtr(outputNames[1], semantics);
// Depth
// auto depth = std::make_shared<ios::Depth>();
// auto rawDepth = rawDepths->data_->mutable_cpu_data();
// cv::Mat mask_mat = cv::Mat::zeros(inputHeight / depthDownScale, inputWidth / depthDownScale, CV_32FC1);
// for (int x = 0; x < inputWidth / depthDownScale; x++) {
// for (int y = 0; y < inputHeight / depthDownScale; y++) {
// float e = rawDepth[y * mask_mat.cols + x];
// // e = 1.0f / (1.0f + expf(-e));
// mask_mat.at<float>(y, x) = e * depthDistanceScale;
// }
// }
// cv::Mat mask_res;
// if(fixAspectRatio){
// int w = img_width * scalex / depthDownScale;
// int h = img_height * scaley / depthDownScale;
// cv::Rect r(0, 0, w, h);
// cv::resize(mask_mat(r), mask_res, cv::Size(img_width, img_height));
// }else{
// cv::resize(mask_mat, mask_res, cv::Size(img_width, img_height));
// }
// depth->depth = mask_mat;
// pMap->SetIOPtr(outputNames[2], depth);
return true;
};
std::string MultiPostProcessV2::Name() {
return "MultiPostProcessV2";
};
} // namespace postprocess
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_POSTPROCESS_MULTI_H_
#define DEEPINFER_POSTPROCESS_MULTI_H_
#include "interfaces/base_unit.h"
#include "base/image.h"
#include "libs/ios/normal_ios.h"
#include "libs/ios/camera_ios.h"
#include "libs/ios/detection.h"
#include "libs/ios/semantic.h"
#include "libs/ios/depth.h"
#include "libs/postprocessors/multi_post_gpu.h"
#include "libs/postprocessors/nms.h"
namespace waytous {
namespace deepinfer {
namespace postprocess {
class MultiPostProcessV2: public interfaces::BaseUnit{
public:
bool Init(YAML::Node& node, interfaces::BaseIOMapPtr pmap) override;
bool Exec() override;
std::string Name() override;
public:
base::BlobPtr<int> output_length_ptr;
base::BlobPtr<float> bboxes_ptr;
int inputHeight, inputWidth;
int inferBatchSize = 1;
bool fixAspectRatio = true;
float nmsThreshold, scoreThreshold, truncatedThreshold;
int rawDetectionLength;
int maxOutputNum = 1000;
int keepTopK = 100;
int segProtoDim = 32;
int instanceDownScale = 4;
int instanceClassNumber;
std::vector<std::string> instanceClassNames;
int detectionStep;
int semanticDownScale = 4, semanticClassNumber;
std::vector<std::string> semanticClassNames;
// int depthDownScale = 4, depthDistanceScale = 256;
};
DEEPINFER_REGISTER_UNIT(MultiPostProcessV2);
} // namespace postprocess
} // namespace deepinfer
} // namespace waytous
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment