Commit fd5257a8 authored by xin.wang.waytous's avatar xin.wang.waytous

Deepinfer Model

parents
build
test
*.engine
*.onnx
*.wts
*.cache
*.jpg
*.png
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/src/**",
"/usr/local/cuda/include",
"/usr/local/include",
"/usr/include/**",
"/home/wangxin/projects/TensorRT-7.0.0/TensorRT/include",
],
"defines": [],
"compilerPath": "/usr/bin/gcc",
"cStandard": "c11",
"cppStandard": "c++17",
"intelliSenseMode": "linux-gcc-x64"
}
],
"version": 4
}
\ No newline at end of file
{
"files.associations": {
"vector": "cpp",
"cctype": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"array": "cpp",
"atomic": "cpp",
"strstream": "cpp",
"*.tcc": "cpp",
"bitset": "cpp",
"chrono": "cpp",
"complex": "cpp",
"condition_variable": "cpp",
"cstdint": "cpp",
"deque": "cpp",
"list": "cpp",
"unordered_map": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"optional": "cpp",
"ratio": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"fstream": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"memory": "cpp",
"mutex": "cpp",
"new": "cpp",
"ostream": "cpp",
"numeric": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"thread": "cpp",
"cfenv": "cpp",
"cinttypes": "cpp",
"utility": "cpp",
"typeindex": "cpp",
"typeinfo": "cpp",
"bit": "cpp",
"map": "cpp",
"set": "cpp",
"iterator": "cpp",
"memory_resource": "cpp",
"random": "cpp",
"string": "cpp",
"unordered_set": "cpp"
}
}
\ No newline at end of file
name: TraDeS
inputNames: [cvImage1]
outputNames: [segInstances1]
globalParams:
inferBatchSize: 1
inputWidth: 640
inputHeight: 384
fixAspectRatio: false
units:
-
name: CameraSrc
inputNames: [cvImage1]
outputNames: [uint8Image1]
-
name: ResizeNorm
inputNames: [uint8Image1]
outputNames: [resizeNormImages]
inputMean: [0.40789654, 0.44719302, 0.47026115]
inputStd: [0.28863828, 0.27408164, 0.27809835]
useBGR: true
-
name: TRTInference
inputNames: [resizeNormImages]
outputNames: [hm, reh, wh, conv_weight, seg_feat]
runMode: 1 # 0-fp32 1-fp16 2-int8 (int8 not supported)
weightsPath: "configs/mots/trades_segv3.onnx"
engineFile: "configs/mots/trades_segv3_fp16.engine"
# calibImgPathFile: "configs/mots/trades_calib_imgs.txt"
# calibTableCache: "configs/mots/trades_calib_table.cache"
inferDynamic: false
maxBatchSize: 1
-
name: TraDesPostProcess
inputNames: [hm, reh, wh, conv_weight, seg_feat, uint8Image1]
outputNames: [segInstances1]
scoreThreshold: 0.2
truncatedThreshold: 0.05
classNumber: 3
classNames: [vehicle, person, cycle]
topK: 100
downScale: 4
segDims: 64
maxCntsLength: 500
name: YoloV5
inputNames: [cvImage1, cvImage2]
outputNames: [trackedDetections1, trackedDetections2]
globalParams:
inferBatchSize: 2
inputWidth: 640
inputHeight: 640
fixAspectRatio: true
units:
-
name: CameraSrc
inputNames: [cvImage1]
outputNames: [uint8Image1]
-
name: CameraSrc
inputNames: [cvImage2]
outputNames: [uint8Image2]
-
name: ResizeNorm
inputNames: [uint8Image1, uint8Image2]
outputNames: [resizeNormImages]
inputMean: [0, 0, 0]
inputStd: [1, 1, 1]
useBGR: false
-
name: YoloV5TRTInference
inputNames: [resizeNormImages]
outputNames: [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes]
runMode: 1 # 0-fp32 1-fp16 2-int8 (int8 not supported)
weightsPath: "configs/thermal/yolov5s_640x640_thermal.wts"
engineFile: "configs/thermal/yolov5s_640x640_thermal_fp16.engine"
calibImgPathFile: "configs/thermal/yolov5s_calib_imgs.txt"
calibTableCache: "configs/thermal/yolov5s_calib_table.cache"
inferDynamic: true
maxBatchSize: 4
classNumber: 2
numAnchorsPerLevel: 3
numLevels: 3
startScale: 8
maxOutputBBoxCount: 1000
keepTopK: 100
# used when build engine, always be smaller than scoreThreshold.
# if you want it to take effect, you should delete the ${engineFile}
engineScoreThreshold: 0.1
nmsThreshold: 0.45
# yolov5
# n: 0.33+0.25 s: 0.33+0.5 m: 0.67+0.75 l: 1.0+1.0 x: 1.33+1.25
gd: 0.33
gw: 0.5
-
name: YoloV5PostProcess
inputNames: [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes, uint8Image1, uint8Image2]
outputNames: [detections1, detections2]
scoreThreshold: 0.2 # used when inference, can be modified
truncatedThreshold: 0.05
keepTopK: 100
classNames: [vehicle, person]
-
name: ByteTracker
inputNames: [detections1]
outputNames: [trackedDetections1]
frame_rate: 20
# max saved tracked-objs every track
track_buffer: 30
# if obj.confidence < track_thresh and > detector.score_threshold, it will add to lower detections and used for tracking later.
track_thresh: 0.2
# if obj.confidence < high_thresh, it wouldn't be initialized.
high_thresh: 0.2
# iou threshold, if iou(a,b) > match_thresh, they matched
match_thresh: 0.8
# output detection after it was tracked in x frames
init_frames: 0
#ifndef DEEPINFER_BASE_BLOB_H_
#define DEEPINFER_BASE_BLOB_H_
#include <memory>
#include <limits>
#include "common/common.h"
#include "base/syncmem.h"
namespace waytous{
namespace deepinfer{
namespace base{
constexpr size_t kMaxBlobAxes = 32;
template <typename Dtype>
class Blob {
public:
Blob() : data_(), count_(0), capacity_(0), use_cuda_host_malloc_(false) {}
explicit Blob(bool use_cuda_host_malloc)
: data_(),
count_(0),
capacity_(0),
use_cuda_host_malloc_(use_cuda_host_malloc) {}
explicit Blob(const std::vector<int>& shape,
const bool use_cuda_host_malloc = false):
capacity_(0), use_cuda_host_malloc_(use_cuda_host_malloc) {
Reshape(shape);
};
explicit Blob(const std::vector<int>& shape,
Dtype* cpu_d_,
const bool use_cuda_host_malloc = false
):
capacity_(0), use_cuda_host_malloc_(use_cuda_host_malloc) {
Reshape(shape);
auto d_ = data_->mutable_cpu_data();
memcpy(d_, cpu_d_, data_->size());
};
Blob(const Blob&) = delete;
void operator=(const Blob&) = delete;
void Reshape(const std::vector<int>& shape){
CHECK_LE(shape.size(), kMaxBlobAxes);
count_ = 1;
shape_.resize(shape.size());
if (!shape_data_ || shape_data_->size() < shape.size() * sizeof(int)) {
shape_data_.reset(
new SyncedMemory(shape.size() * sizeof(int), use_cuda_host_malloc_));
}
int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
for (size_t i = 0; i < shape.size(); ++i) {
CHECK_GE(shape[i], 0);
if (count_ != 0) {
CHECK_LE(shape[i], std::numeric_limits<int>::max() / count_)
<< "blob size exceeds std::numeric_limits<int>::max()";
}
count_ *= shape[i];
shape_[i] = shape[i];
shape_data[i] = shape[i];
}
if (count_ > capacity_) {
capacity_ = count_;
data_.reset(
new SyncedMemory(capacity_ * sizeof(Dtype), use_cuda_host_malloc_));
}
};
void ReshapeLike(const Blob& other){
Reshape(other.shape());
};
inline const std::vector<int>& shape() const { return shape_; }
inline int shape(int index) const {
return shape_[CanonicalAxisIndex(index)];
}
inline int CanonicalAxisIndex(int axis_index) const {
CHECK_GE(axis_index, -num_axes())
<< "axis " << axis_index << " out of range for " << num_axes()
<< "-D Blob with shape ";
CHECK_LT(axis_index, num_axes())
<< "axis " << axis_index << " out of range for " << num_axes()
<< "-D Blob with shape ";
if (axis_index < 0) {
return axis_index + num_axes();
}
return axis_index;
}
inline int num_axes() const { return static_cast<int>(shape_.size()); }
inline int count() const { return count_; }
inline const std::shared_ptr<SyncedMemory>& data() const {
CHECK(data_);
return data_;
}
/// @brief Deprecated legacy shape accessor num: use shape(0) instead.
inline int num() const { return LegacyShape(0); }
/// @brief Deprecated legacy shape accessor channels: use shape(1) instead.
inline int channels() const { return LegacyShape(1); }
/// @brief Deprecated legacy shape accessor height: use shape(2) instead.
inline int height() const { return LegacyShape(2); }
/// @brief Deprecated legacy shape accessor width: use shape(3) instead.
inline int width() const { return LegacyShape(3); }
inline int LegacyShape(int index) const {
CHECK_LE(num_axes(), 4)
<< "Cannot use legacy accessors on Blobs with > 4 axes.";
CHECK_LT(index, 4);
CHECK_GE(index, -4);
if (index >= num_axes() || index < -num_axes()) {
// Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse
// indexing) -- this special case simulates the one-padding used to fill
// extraneous axes of legacy blobs.
return 1;
}
return shape(index);
}
inline int offset(const int n, const int c = 0, const int h = 0,
const int w = 0) const {
CHECK_GE(n, 0);
CHECK_LE(n, num());
CHECK_GE(channels(), 0);
CHECK_LE(c, channels());
CHECK_GE(height(), 0);
CHECK_LE(h, height());
CHECK_GE(width(), 0);
CHECK_LE(w, width());
return ((n * channels() + c) * height() + h) * width() + w;
}
inline int offset(const std::vector<int>& indices) const {
CHECK_LE(indices.size(), static_cast<size_t>(num_axes()));
int offset = 0;
for (int i = 0; i < num_axes(); ++i) {
offset *= shape(i);
if (static_cast<int>(indices.size()) > i) {
CHECK_GE(indices[i], 0);
CHECK_LT(indices[i], shape(i));
offset += indices[i];
}
}
return offset;
}
const Dtype* cpu_data() const {
CHECK(data_);
return (const Dtype*)data_->cpu_data();
};
void set_cpu_data(Dtype* data){
CHECK(data);
// Make sure CPU and GPU sizes remain equal
size_t size = count_ * sizeof(Dtype);
if (data_->size() != size) {
data_.reset(new SyncedMemory(size, use_cuda_host_malloc_));
}
data_->set_cpu_data(data);
};
const int* gpu_shape() const{
CHECK(shape_data_);
return (const int*)shape_data_->gpu_data();
};
const Dtype* gpu_data() const {
CHECK(data_);
return (const Dtype*)data_->gpu_data();
};
void set_gpu_data(Dtype* data){
CHECK(data);
// Make sure CPU and GPU sizes remain equal
size_t size = count_ * sizeof(Dtype);
if (data_->size() != size) {
data_.reset(new SyncedMemory(size, use_cuda_host_malloc_));
}
data_->set_gpu_data(data);
};
Dtype* mutable_cpu_data(){
CHECK(data_);
return static_cast<Dtype*>(data_->mutable_cpu_data());
};
Dtype* mutable_gpu_data(){
CHECK(data_);
return static_cast<Dtype*>(data_->mutable_gpu_data());
};
void set_head_gpu() { data_->set_head_gpu(); }
void set_head_cpu() { data_->set_head_cpu(); }
SyncedMemory::SyncedHead head() const { return data_->head(); }
// void CopyFrom(const Blob<Dtype>& source, bool reshape = false);
void ShareData(const Blob& other){
CHECK_EQ(count_, other.count());
data_ = other.data();
};
protected:
std::shared_ptr<SyncedMemory> data_;
std::shared_ptr<SyncedMemory> shape_data_;
std::vector<int> shape_;
int count_;
int capacity_;
bool use_cuda_host_malloc_;
};
template <typename Dtype>
using BlobPtr = std::shared_ptr<Blob<Dtype>>;
template <typename Dtype>
using BlobConstPtr = std::shared_ptr<const Blob<Dtype>>;
} //namespace base
} //namspace deepinfer
} //namespace waytous
#endif
#ifndef DEEPINFER_BASE_IMAGE_H_
#define DEEPINFER_BASE_IMAGE_H_
#pragma once
#include <string>
#include <memory>
#include <map>
#include <opencv2/opencv.hpp>
#include "base/blob.h"
namespace waytous{
namespace deepinfer{
namespace base{
enum class Color {
NONE = 0x00,
GRAY = 0x01,
RGB = 0x02,
BGR = 0x03,
};
const std::map<Color, int> kChannelsMap{
{Color::GRAY, 1}, {Color::RGB, 3}, {Color::BGR, 3}};
class Image8U{
public:
Image8U()
: rows_(0),
cols_(0),
type_(Color::NONE),
channels_(0),
width_step_(0),
blob_(nullptr),
offset_(0) {}
Image8U(int rows, int cols, Color type, std::shared_ptr<Blob<uint8_t>> blob,
int offset = 0)
: rows_(rows), cols_(cols), type_(type), blob_(blob), offset_(offset) {
channels_ = kChannelsMap.at(type);
CHECK_EQ(blob_->num_axes(), 3);
CHECK_EQ(blob_->shape(2), channels_);
CHECK_LE(offset_ + blob_->offset({rows - 1, cols - 1, channels_ - 1}),
(int)(blob_->count()));
width_step_ = blob_->offset({1, 0, 0}) * static_cast<int>(sizeof(uint8_t));
}
Image8U(int rows, int cols, Color type)
: rows_(rows), cols_(cols), type_(type), offset_(0) {
channels_ = kChannelsMap.at(type);
blob_.reset(new Blob<uint8_t>({rows_, cols_, channels_}));
width_step_ = blob_->offset({1, 0, 0}) * static_cast<int>(sizeof(uint8_t));
}
// use cv::image to init blob_
Image8U(cv::Mat& image){
rows_ = image.rows;
cols_ = image.cols;
type_ = Color::BGR;
offset_ = 0;
channels_ = image.channels();
blob_.reset(new Blob<uint8_t>({rows_, cols_, channels_}));
blob_->set_cpu_data(image.data);
width_step_ = blob_->offset({1, 0, 0}) * static_cast<int>(sizeof(uint8_t));
}
// use cv::image to init blob_
Image8U(cv::Mat* image){
rows_ = image->rows;
cols_ = image->cols;
type_ = Color::BGR;
offset_ = 0;
channels_ = image->channels();
blob_.reset(new Blob<uint8_t>({rows_, cols_, channels_}));
blob_->set_cpu_data(image->data);
width_step_ = blob_->offset({1, 0, 0}) * static_cast<int>(sizeof(uint8_t));
}
// convert to opencv img
cv::Mat toCVMat(){
cv::Mat image;
if(channels_ == 1){
image = cv::Mat(rows_, cols_, CV_8UC1);
}else if (channels_ == 3){
image = cv::Mat(rows_, cols_, CV_8UC3);
}
image.data = blob_->mutable_cpu_data();
return image;
}
Image8U(const Image8U &src)
: rows_(src.rows_),
cols_(src.cols_),
type_(src.type_),
channels_(src.channels_),
width_step_(src.width_step_),
blob_(src.blob_),
offset_(src.offset_) {}
Image8U &operator=(const Image8U &src) {
this->rows_ = src.rows_;
this->cols_ = src.cols_;
this->type_ = src.type_;
this->channels_ = src.channels_;
this->width_step_ = src.width_step_;
this->blob_ = src.blob_;
this->offset_ = src.offset_;
return *this;
}
~Image8U() {}
uint8_t *mutable_cpu_data() { return mutable_cpu_ptr(0); }
uint8_t *mutable_gpu_data() { return mutable_gpu_ptr(0); }
const uint8_t *cpu_data() const { return cpu_ptr(0); }
const uint8_t *gpu_data() const { return gpu_ptr(0); }
const uint8_t *cpu_ptr(int row = 0) const {
return blob_->cpu_data() + blob_->offset({row, 0, 0}) + offset_;
}
const uint8_t *gpu_ptr(int row = 0) const {
return blob_->gpu_data() + blob_->offset({row, 0, 0}) + offset_;
}
uint8_t *mutable_cpu_ptr(int row = 0) {
return blob_->mutable_cpu_data() + blob_->offset({row, 0, 0}) + offset_;
}
uint8_t *mutable_gpu_ptr(int row = 0) {
return blob_->mutable_gpu_data() + blob_->offset({row, 0, 0}) + offset_;
}
Color type() const { return type_; }
int rows() const { return rows_; }
int cols() const { return cols_; }
int channels() const { return channels_; }
int width_step() const { return width_step_; }
// @brief: returns the total number of pixels.
int total() const { return rows_ * cols_ * channels_; }
Image8U operator()(const cv::Rect2i &roi) {
int offset = offset_ + blob_->offset({roi.y, roi.x, 0});
// return Image8U(roi.height, roi.width, type_, blob_, offset);
return Image8U(roi.height, roi.width, type_, blob_, offset);
}
Image8U* newRoiImage(const std::shared_ptr<cv::Rect2i> roi) {
int offset = offset_ + blob_->offset({roi->y, roi->x, 0});
// return Image8U(roi->height, roi->width, type_, blob_, offset);
return new Image8U(roi->height, roi->width, type_, blob_, offset);
}
std::shared_ptr<Blob<uint8_t>> blob() { return blob_; }
// DONOT return `std::shared_ptr<const Blob<uint8_t>> &` or `const std::... &`
std::shared_ptr<const Blob<uint8_t>> blob() const { return blob_; }
protected:
int rows_;
int cols_;
Color type_;
int channels_;
int width_step_;
std::shared_ptr<Blob<uint8_t>> blob_;
int offset_;
}; // class Image8U
typedef std::shared_ptr<Image8U> Image8UPtr;
typedef std::shared_ptr<const Image8U> Image8UConstPtr;
} //namespace base
} //namspace deepinfer
} //namespace waytous
#endif
#ifndef DEEPINFER_BASE_PIPELINE_H_
#define DEEPINFER_BASE_PIPELINE_H_
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include <thread>
#include <atomic>
#include <yaml-cpp/yaml.h>
#include "common/register.h"
using namespace std;
namespace waytous {
namespace deepinfer {
namespace base {
class LifeCycle {
public:
virtual void init(YAML::Node& node) = 0;
virtual void startUp() = 0;
virtual void shutDown() = 0;
};
template<typename T>
class Component : public LifeCycle {
public:
virtual std::string getName() = 0;
virtual void execute(T t) = 0;
};
template<typename T, typename R>
class AbstractComponent : public Component<T> {
private:
std::unordered_set<shared_ptr<Component<R>>> down_stream;
protected:
const std::unordered_set<shared_ptr<Component<R>>> &getDownStream() {
return down_stream;
}
protected:
virtual R doExecute(T t) = 0;
public:
void addDownStream(shared_ptr<Component<R>> component) {
down_stream.insert(component);
}
void init(YAML::Node& node) override {
}
void execute(T t) override {
R r = doExecute(t);
cout << this->getName() + "\treceive\t" << typeid(t).name() << "\t" << t << "\treturn\t" << typeid(r).name()
<< "\t" << r << endl;
if constexpr (is_same_v<R, void>) {
return;
}
for (auto &&obj : getDownStream()) {
obj->execute(r);
}
}
void startUp() override {
for (auto &&obj : this->getDownStream()) {
obj->startUp();
}
cout << "------------------ " + this->getName() + " is starting ----------------------" << endl;
}
void shutDown() override {
auto downStreams = this->getDownStream();
for (auto &&obj : downStreams) {
obj->shutDown();
}
cout << "------------------ " + this->getName() + " is ending ----------------------" << endl;
}
};
template<typename T, typename R>
using Source = AbstractComponent<T, R>;
// DEEPINFER_REGISTER_REGISTERER(Source);
// #define DEEPINFER_REGISTER_INFERENCE_ENGINE(name) \
// DEEPINFER_REGISTER_CLASS(Source, name)
template<typename T, typename R>
using Channel = AbstractComponent<T, R>;
template<typename T, typename R>
using Sink = AbstractComponent<T, R>;
template<typename R, typename T>
class Pipeline : public LifeCycle {
private:
shared_ptr<Source<R, T>> source;
public:
void setSource(shared_ptr<Source<R, T>> component) {
source = component;
}
void init(YAML::Node& node) override {
}
void startUp() override {
assert(source.get() != nullptr);
source->startUp();
}
void shutDown() override {
source->shutDown();
}
};
/*
class printSink;
class intStringChannel;
class printSink : public Sink<string, int> {
public:
string getName() override {
return "printSink";
}
protected:
int doExecute(string t) override {
return INT_MIN;
}
};
class intStringChannel : public Channel<int, string> {
public:
void init(std::string config) override {
}
string getName() override {
return "intStringChannel";
}
void startUp() override {
}
void shutDown() override {
}
protected:
string doExecute(int t) override {
return to_string(t + 100);
}
};
class IntSource : public Source<int, int> {
private:
int val = 0;
public:
void init(std::string config) override {
cout << "--------- " + getName() + " init --------- ";
val = 1;
}
string getName() override {
return "Int Source";
}
void startUp() override {
this->execute(val);
}
protected:
int doExecute(int) override {
return val + 1;
}
};
int main() {
pipeline<int, int> p;
// source
auto is = make_shared<IntSource>();
// channel
auto isc = make_shared<intStringChannel>();
// sink
auto ps = make_shared<printSink>();
is->addDownStream(isc);
isc->addDownStream(ps);
// 设置 source
p.setSource(is);
// 启动
p.startUp();
}
*/
} // namespace base
} // namespace deepinfer
} // namespace waytous
#endif
#include "base/syncmem.h"
namespace waytous{
namespace deepinfer{
namespace base{
SyncedMemory::SyncedMemory(bool use_cuda)
: cpu_ptr_(NULL),
gpu_ptr_(NULL),
size_(0),
head_(UNINITIALIZED),
own_cpu_data_(false),
cpu_malloc_use_cuda_(use_cuda),
own_gpu_data_(false),
device_(-1) {
#ifdef PERCEPTION_DEBUG
CUDA_CHECK(cudaGetDevice(&device_));
#endif
}
SyncedMemory::SyncedMemory(size_t size, bool use_cuda)
: cpu_ptr_(NULL),
gpu_ptr_(NULL),
size_(size),
head_(UNINITIALIZED),
own_cpu_data_(false),
cpu_malloc_use_cuda_(use_cuda),
own_gpu_data_(false),
device_(-1) {
#ifdef PERCEPTION_DEBUG
CUDA_CHECK(cudaGetDevice(&device_));
#endif
}
SyncedMemory::~SyncedMemory() {
check_device();
if (cpu_ptr_ && own_cpu_data_) {
PerceptionFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
}
if (gpu_ptr_ && own_gpu_data_) {
CUDA_CHECK(cudaFree(gpu_ptr_));
}
}
inline void SyncedMemory::to_cpu() {
check_device();
switch (head_) {
case UNINITIALIZED:
PerceptionMallocHost(&cpu_ptr_, size_, cpu_malloc_use_cuda_);
if (cpu_ptr_ == nullptr) {
LOG_ERROR << "cpu_ptr_ is null";
return;
}
memset(cpu_ptr_, 0, size_);
head_ = HEAD_AT_CPU;
own_cpu_data_ = true;
break;
case HEAD_AT_GPU:
if (cpu_ptr_ == nullptr) {
PerceptionMallocHost(&cpu_ptr_, size_, cpu_malloc_use_cuda_);
own_cpu_data_ = true;
}
CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDefault));
head_ = SYNCED;
break;
case HEAD_AT_CPU:
case SYNCED:
break;
}
}
inline void SyncedMemory::to_gpu() {
check_device();
switch (head_) {
case UNINITIALIZED:
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
CUDA_CHECK(cudaMemset(gpu_ptr_, 0, size_));
head_ = HEAD_AT_GPU;
own_gpu_data_ = true;
// LOG_INFO << "gpu init malloc: " << size_;
break;
case HEAD_AT_CPU:
if (gpu_ptr_ == nullptr) {
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
// LOG_INFO << "gpu copy malloc: " << size_;
}
CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyDefault));
// LOG_INFO << "gpu copy done";
head_ = SYNCED;
break;
case HEAD_AT_GPU:
case SYNCED:
break;
}
}
const void* SyncedMemory::cpu_data() {
check_device();
to_cpu();
return (const void*)cpu_ptr_;
}
void SyncedMemory::set_cpu_data(void* data) {
check_device();
CHECK(data);
if (own_cpu_data_) {
PerceptionFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
}
cpu_ptr_ = data;
head_ = HEAD_AT_CPU;
own_cpu_data_ = false;
}
const void* SyncedMemory::gpu_data() {
check_device();
to_gpu();
return (const void*)gpu_ptr_;
}
void SyncedMemory::set_gpu_data(void* data) {
check_device();
CHECK(data);
if (own_gpu_data_) {
CUDA_CHECK(cudaFree(gpu_ptr_));
}
gpu_ptr_ = data;
head_ = HEAD_AT_GPU;
own_gpu_data_ = false;
}
void* SyncedMemory::mutable_cpu_data() {
check_device();
to_cpu();
head_ = HEAD_AT_CPU;
return cpu_ptr_;
}
void* SyncedMemory::mutable_gpu_data() {
check_device();
to_gpu();
head_ = HEAD_AT_GPU;
return gpu_ptr_;
}
void SyncedMemory::async_gpu_push(const cudaStream_t& stream) {
check_device();
CHECK_EQ(head_, HEAD_AT_CPU);
if (gpu_ptr_ == nullptr) {
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
LOG_INFO << "gpu async malloc: " << size_;
}
const cudaMemcpyKind put = cudaMemcpyHostToDevice;
CUDA_CHECK(cudaMemcpyAsync(gpu_ptr_, cpu_ptr_, size_, put, stream));
// Assume caller will synchronize on the stream before use
head_ = SYNCED;
}
void SyncedMemory::check_device() {
#ifdef PERCEPTION_DEBUG
int device;
cudaGetDevice(&device);
CHECK_EQ(device, device_);
if (gpu_ptr_ && own_gpu_data_) {
cudaPointerAttributes attributes;
CUDA_CHECK(cudaPointerGetAttributes(&attributes, gpu_ptr_));
CHECK_EQ(attributes.device, device_);
}
#endif
}
} //namespace base
} //namspace deepinfer
} //namespace waytous
#ifndef DEEPINFER_BASE_SYNCMEM_H_
#define DEEPINFER_BASE_SYNCMEM_H_
#include "common/log.h"
#include "common/common.h"
namespace waytous{
namespace deepinfer{
namespace base{
inline void PerceptionMallocHost(void** ptr, size_t size, bool use_cuda) {
if (use_cuda) {
CUDA_CHECK(cudaMallocHost(ptr, size));
// cudaHostAlloc();
// cudaHostGetDevicePointer();
return;
}
*ptr = malloc(size);
// LOG_INFO << "cpu malloc: " << size;
CHECK(*ptr) << "host allocation of size " << size << " failed";
}
inline void PerceptionFreeHost(void* ptr, bool use_cuda) {
if (use_cuda) {
CUDA_CHECK(cudaFreeHost(ptr));
return;
}
free(ptr);
}
/**
* @brief Manages memory allocation and synchronization between the host (CPU)
* and device (GPU).
*/
class SyncedMemory {
public:
enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
explicit SyncedMemory(bool use_cuda);
SyncedMemory(size_t size, bool use_cuda);
SyncedMemory(const SyncedMemory&) = delete;
void operator=(const SyncedMemory&) = delete;
~SyncedMemory();
const void* cpu_data();
void set_cpu_data(void* data);
const void* gpu_data();
void set_gpu_data(void* data);
void* mutable_cpu_data();
void* mutable_gpu_data();
SyncedHead head() const { return head_; }
void set_head(SyncedHead head) { head_ = head; }
void set_head_gpu() { set_head(HEAD_AT_GPU); }
void set_head_cpu() { set_head(HEAD_AT_CPU); }
size_t size() { return size_; }
void async_gpu_push(const cudaStream_t& stream);
private:
void check_device();
void to_cpu();
void to_gpu();
private:
void* cpu_ptr_;
void* gpu_ptr_;
size_t size_;
SyncedHead head_;
bool own_cpu_data_;
bool cpu_malloc_use_cuda_;
bool own_gpu_data_;
int device_;
}; // class SyncedMemory
} //namespace base
} //namspace deepinfer
} //namespace waytous
#endif
\ No newline at end of file
#ifndef DEEPINFER_COMMON_H_
#define DEEPINFER_COMMON_H_
#include <cublas_v2.h>
#include <cuda_runtime.h>
#include <iostream>
#include <assert.h>
#define BLOCK 512
#define iMAX(a, b) ((a) > (b) ? (a) : (b))
#define iMIN(a, b) ((a) > (b) ? (b) : (a))
#ifndef CUDA_CHECK
#define CUDA_CHECK(callstr) \
{ \
cudaError_t error_code = callstr; \
if (error_code != cudaSuccess) { \
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
assert(0); \
} \
}
#endif
inline dim3 cudaGridSize(uint n, uint block)
{
uint k = (n - 1) / block + 1;
uint x = k ;
uint y = 1 ;
if (x > 65535 )
{
x = ceil(sqrt(x));
y = (n - 1 )/(x * block) + 1;
}
dim3 d = {x, y, 1} ;
return d;
}
#endif
#include "common/file.h"
namespace waytous{
namespace deepinfer{
namespace common{
std::shared_ptr<ConfigRoot> ConfigRoot::instance = nullptr;
const std::string ConfigRoot::GetRootPath(){
if(instance == nullptr){
SetRootPath("");
LOG_WARN << "Haven't init config root, use default ''.";
}
return instance->rootPath;
}
void ConfigRoot::SetRootPath(const std::string path){
if(instance == nullptr){
instance.reset(new ConfigRoot());
}
instance->rootPath = path;
}
} //namespace common
} //namspace deepinfer
} //namespace waytous
#ifndef WAYTOUS_DEEPINFER_COMMON_FILES_H_
#define WAYTOUS_DEEPINFER_COMMON_FILES_H_
#pragma once
#include <string>
#include <memory>
#include <sys/stat.h>
#include <unistd.h>
#include "common/log.h"
namespace waytous{
namespace deepinfer{
namespace common{
class ConfigRoot{
public:
static std::shared_ptr<ConfigRoot> instance;
public:
std::string rootPath;
static const std::string GetRootPath();
static void SetRootPath(const std::string path);
};
inline std::string GetCurrentPath() {
char tmp[4096];
return getcwd(tmp, sizeof(tmp)) ? std::string(tmp) : std::string("");
}
inline std::string GetAbsolutePath(const std::string &prefix,
const std::string &relative_path) {
if (relative_path.empty()) {
return prefix;
}
// If prefix is empty or relative_path is already absolute.
if (prefix.empty() || relative_path.front() == '/') {
return relative_path;
}
if (prefix.back() == '/') {
return prefix + relative_path;
}
return prefix + "/" + relative_path;
}
inline bool PathExists(const std::string &path) {
struct stat info;
return stat(path.c_str(), &info) == 0;
}
} //namespace common
} //namspace deepinfer
} //namespace waytous
#endif
\ No newline at end of file
#ifndef DEEPINFER_COMMON_LOG_H_
#define DEEPINFER_COMMON_LOG_H_
#pragma once
#include <string>
//sudo apt-get install libgoogle-glog*
#include "glog/logging.h"
#define LOG_INFO google::LogMessage(__FILE__, __LINE__, google::INFO).stream()
#define LOG_WARN google::LogMessage(__FILE__, __LINE__, google::WARNING).stream()
#define LOG_ERROR google::LogMessage(__FILE__, __LINE__, google::ERROR).stream()
#define LOG_FATAL google::LogMessage(__FILE__, __LINE__, google::FATAL).stream()
#define LOG_DEBUG VLOG(4) << "[DEBUG]"
#endif
#include "common/register.h"
namespace waytous {
namespace deepinfer {
namespace common {
BaseClassMap &GlobalFactoryMap() {
static BaseClassMap factory_map;
return factory_map;
}
bool GetRegisteredClasses(
const std::string &base_class_name,
std::vector<std::string> *registered_derived_classes_names) {
if (registered_derived_classes_names == nullptr) {
LOG_ERROR << "registered_derived_classes_names is not available";
return false;
}
BaseClassMap &map = GlobalFactoryMap();
auto iter = map.find(base_class_name);
if (iter == map.end()) {
LOG_ERROR << "class not registered:" << base_class_name;
return false;
}
for (auto pair : iter->second) {
registered_derived_classes_names->push_back(pair.first);
}
return true;
}
} // namespace common
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_COMMON_REGISTER_H_
#define DEEPINFER_COMMON_REGISTER_H_
#pragma once
#include <map>
#include <string>
#include <vector>
#include "common/log.h"
namespace waytous {
namespace deepinfer {
namespace common {
// from apollo register
// idea from boost any but make it more simple and don't use type_info.
class Any {
public:
Any() : content_(NULL) {}
template <typename ValueType>
explicit Any(const ValueType &value)
: content_(new Holder<ValueType>(value)) {}
Any(const Any &other)
: content_(other.content_ ? other.content_->Clone() : nullptr) {}
~Any() { delete content_; }
template <typename ValueType>
ValueType *AnyCast() {
return content_ ? &(static_cast<Holder<ValueType> *>(content_)->held_)
: nullptr;
}
private:
class PlaceHolder {
public:
virtual ~PlaceHolder() {}
virtual PlaceHolder *Clone() const = 0;
};
template <typename ValueType>
class Holder : public PlaceHolder {
public:
explicit Holder(const ValueType &value) : held_(value) {}
virtual ~Holder() {}
virtual PlaceHolder *Clone() const { return new Holder(held_); }
ValueType held_;
};
PlaceHolder *content_;
};
class ObjectFactory {
public:
ObjectFactory() {}
virtual ~ObjectFactory() {}
virtual Any NewInstance() { return Any(); }
ObjectFactory(const ObjectFactory &) = delete;
ObjectFactory &operator=(const ObjectFactory &) = delete;
private:
};
typedef std::map<std::string, ObjectFactory *> FactoryMap;
typedef std::map<std::string, FactoryMap> BaseClassMap;
BaseClassMap &GlobalFactoryMap();
bool GetRegisteredClasses(
const std::string &base_class_name,
std::vector<std::string> *registered_derived_classes_names);
} // namespace common
} // namespace deepinfer
} // namespace waytous
#define DEEPINFER_REGISTER_REGISTERER(base_class) \
class base_class##Registerer { \
typedef ::waytous::deepinfer::common::Any Any; \
typedef ::waytous::deepinfer::common::FactoryMap FactoryMap; \
\
public: \
static base_class *GetInstanceByName(const ::std::string &name) { \
FactoryMap &map = \
::waytous::deepinfer::common::GlobalFactoryMap()[#base_class]; \
FactoryMap::iterator iter = map.find(name); \
if (iter == map.end()) { \
for (auto c : map) { \
LOG_ERROR << "Instance:" << c.first; \
} \
LOG_ERROR << "Get instance " << name << " failed."; \
return nullptr; \
} \
Any object = iter->second->NewInstance(); \
return *(object.AnyCast<base_class *>()); \
} \
static std::vector<base_class *> GetAllInstances() { \
std::vector<base_class *> instances; \
FactoryMap &map = \
::waytous::deepinfer::common::GlobalFactoryMap()[#base_class]; \
instances.reserve(map.size()); \
for (auto item : map) { \
Any object = item.second->NewInstance(); \
instances.push_back(*(object.AnyCast<base_class *>())); \
} \
return instances; \
} \
static const ::std::string GetUniqInstanceName() { \
FactoryMap &map = \
::waytous::deepinfer::common::GlobalFactoryMap()[#base_class]; \
CHECK_EQ(map.size(), 1U) << map.size(); \
return map.begin()->first; \
} \
static base_class *GetUniqInstance() { \
FactoryMap &map = \
::waytous::deepinfer::common::GlobalFactoryMap()[#base_class]; \
CHECK_EQ(map.size(), 1U) << map.size(); \
Any object = map.begin()->second->NewInstance(); \
return *(object.AnyCast<base_class *>()); \
} \
static bool IsValid(const ::std::string &name) { \
FactoryMap &map = \
::waytous::deepinfer::common::GlobalFactoryMap()[#base_class]; \
return map.find(name) != map.end(); \
} \
};
#define DEEPINFER_REGISTER_CLASS(clazz, name) \
namespace { \
class ObjectFactory##name : public waytous::deepinfer::common::ObjectFactory { \
public: \
virtual ~ObjectFactory##name() {} \
virtual ::waytous::deepinfer::common::Any NewInstance() { \
return ::waytous::deepinfer::common::Any(new name()); \
} \
}; \
__attribute__((constructor)) void RegisterFactory##name() { \
::waytous::deepinfer::common::FactoryMap &map = \
::waytous::deepinfer::common::GlobalFactoryMap()[#clazz]; \
if (map.find(#name) == map.end()) map[#name] = new ObjectFactory##name(); \
} \
} // namespace
#endif
#include "interfaces/base_io.h"
namespace waytous {
namespace deepinfer {
namespace interfaces {
BaseIOPtr GetIOPtr(std::string& ioName){
auto iter = GlobalIOMap.find(ioName);
if (iter == GlobalIOMap.end()) {
return nullptr;
}
return iter->second;
}
void SetIOPtr(std::string& ioName, BaseIOPtr src){
GlobalIOMap[ioName] = src;
}
} // namespace interfaces
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_INTERFACES_BASE_IO_H_
#define DEEPINFER_INTERFACES_BASE_IO_H_
#include <string>
#include <vector>
#include <map>
#include <memory>
#include "common/log.h"
namespace waytous {
namespace deepinfer {
namespace interfaces {
class BaseIO{
public:
BaseIO() = default;
virtual ~BaseIO() = default;
};
using BaseIOPtr = std::shared_ptr<BaseIO>;
typedef std::map<std::string, BaseIOPtr> BaseIOMap;
static BaseIOMap GlobalIOMap;
BaseIOPtr GetIOPtr(std::string& ioName);
void SetIOPtr(std::string& ioName, BaseIOPtr src);
} // namespace interfaces
} // namespace deepinfer
} // namespace waytous
#endif
#ifndef DEEPINFER_INTERFACES_BASE_MODEL_H_
#define DEEPINFER_INTERFACES_BASE_MODEL_H_
#include <string>
#include <vector>
#include <memory>
#include <yaml-cpp/yaml.h>
#include "common/log.h"
#include "common/register.h"
#include "common/file.h"
#include "base/image.h"
#include "interfaces/base_io.h"
#include "interfaces/base_unit.h"
namespace waytous {
namespace deepinfer {
namespace interfaces {
class BaseModel{
public:
BaseModel() = default;
virtual ~BaseModel() = default;
virtual bool Init(std::string& configPath);
// for camera image infer
virtual bool Exec(std::vector<cv::Mat*> inputs, std::vector<BaseIOPtr>& outputs);
virtual bool Exec(std::vector<base::Image8UPtr> inputs, std::vector<BaseIOPtr>& outputs);
virtual std::string Name(){
return "BaseModel";
}
public:
std::vector<std::string> inputNames;
std::vector<std::string> outputNames;
};
DEEPINFER_REGISTER_REGISTERER(BaseModel);
#define DEEPINFER_REGISTER_MODELS(name) \
DEEPINFER_REGISTER_CLASS(BaseModel, name)
} // namespace interfaces
} // namespace deepinfer
} // namespace waytous
#endif
#ifndef WAYTOUS_DEEPINFER_TASK_H_
#define WAYTOUS_DEEPINFER_TASK_H_
#include <string>
#include <opencv2/opencv.hpp>
#include <yaml-cpp/yaml.h>
namespace waytous{
namespace deepinfer{
namespace task{
class Task{
public:
virtual ~Task() = default;
virtual bool Init(const std::string& taskConfigPath);
/*
task input/output
*/
virtual bool Infer() = 0;
/*
draw outputs on image
*/
virtual void Visualize(cv::Mat& image) = 0;
virtual cv::Scalar get_color(int idx);
public:
YAML::Node taskNode;
};
} //namespace task
} //namspace deepinfer
} //namespace waytous
#endif
#include "interfaces/base_unit.h"
namespace waytous {
namespace deepinfer {
namespace interfaces {
bool BaseUnit::Init(YAML::Node& node, YAML::Node& globalParamNode){
inputNames = node.as<std::vector<std::string>>("inputNames");
outputNames = node.as<std::vector<std::string>>("outputNames");
inferBatchSize = globalParamNode["inferBatchSize"].as<int>(); // default=1
inputHeight = globalParamNode["inputHeight"].as<int>();
inputWidth = globalParamNode["inputWidth"].as<int>();
fixAspectRatio = globalParamNode["fixAspectRatio"].as<bool>();
return true;
};
} // namespace interfaces
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_INTERFACES_BASE_UNIT_H_
#define DEEPINFER_INTERFACES_BASE_UNIT_H_
#include <string>
#include <vector>
#include <memory>
#include <yaml-cpp/yaml.h>
#include <opencv2/opencv.hpp>
#include "base/blob.h"
#include "common/log.h"
#include "common/register.h"
namespace waytous {
namespace deepinfer {
namespace interfaces {
class BaseUnit{
public:
BaseUnit() = default;
virtual ~BaseUnit() = default;
virtual bool Init(YAML::Node& node, YAML::Node& globalParamNode);
virtual bool Exec() = 0;
virtual std::string Name(){
return "BaseUnit";
}
public:
std::vector<std::string> inputNames;
std::vector<std::string> outputNames;
//global params
int inferBatchSize = 1;
int inputWidth, inputHeight;
bool fixAspectRatio = true;
};
using BaseUnitPtr = std::shared_ptr<BaseUnit>;
DEEPINFER_REGISTER_REGISTERER(BaseUnit);
#define DEEPINFER_REGISTER_UNIT(name) \
DEEPINFER_REGISTER_CLASS(BaseUnit, name)
} // namespace interfaces
} // namespace deepinfer
} // namespace waytous
#endif
#include "libs/inferences/tensorrt/trt_calibrator.h"
namespace waytous {
namespace deepinfer {
namespace inference {
void resizeImageCPU(cv::Mat& img, float* hostImage, int inputWidth, int inputHeight,
const float* inputMean, const float* inputStd, bool fixAspect, bool bgr)
{
cv::Mat resized;
if (fixAspect){
float scale = cv::min(float(inputWidth) / img.cols, float(inputHeight) / img.rows);
auto scaleSize = cv::Size(img.cols * scale, img.rows * scale);
cv::resize(img, resized, scaleSize);
cv::Mat cropped = cv::Mat::zeros(inputHeight, inputWidth, CV_8UC3);
cv::Rect rect(0, 0, scaleSize.width, scaleSize.height);
resized.copyTo(cropped(rect));
resized = cropped;
}else{
cv::resize(img, resized, cv::Size(inputWidth, inputHeight));
}
for(int i=0; i < inputWidth * inputHeight; i++){
hostImage[i + inputWidth * inputHeight] = (resized.at<cv::Vec3b>(i)[1] / 255. - inputMean[1]) / inputStd[1];
if(bgr){
hostImage[i] = (resized.at<cv::Vec3b>(i)[0] / 255. - inputMean[0]) / inputStd[0] ;
hostImage[i + 2 * inputWidth * inputHeight] = (resized.at<cv::Vec3b>(i)[2] / 255. - inputMean[2]) / inputStd[2];
}else{
hostImage[i] = (resized.at<cv::Vec3b>(i)[2] / 255. - inputMean[2]) / inputStd[2] ;
hostImage[i + 2 * inputWidth * inputHeight] = (resized.at<cv::Vec3b>(i)[0] / 255. - inputMean[0]) / inputStd[0];
}
}
}
int8EntroyCalibrator::int8EntroyCalibrator(const std::string &imgPath_, const std::string &calibTablePath_,
int batchSize_, int inputWidth_, int inputHeight_, std::vector<float>& inputMean_, std::vector<float>& inputStd_, bool useBGR_, bool fixAspectRatio_):
batchSize(batchSize_), inputWidth(inputWidth_), inputHeight(inputHeight_), imageIndex(0), useBGR(useBGR_), fixAspectRatio(fixAspectRatio_)
{
calibTablePath = common::GetAbsolutePath(common::ConfigRoot::GetRootPath(), calibTablePath_);
for(int ii =0; ii<3; ii++){
inputMean[ii] = inputMean_[ii];
inputStd[ii] = inputStd_[ii];
}
inputCount = 3 * inputWidth * inputHeight;
std::fstream f(common::GetAbsolutePath(common::ConfigRoot::GetRootPath(), imgPath_));
if(f.is_open()){
std::string temp;
while (std::getline(f,temp)) imgPaths.push_back(common::GetAbsolutePath(common::ConfigRoot::GetRootPath(), temp));
}
batchData = new float[batchSize * inputCount];
CUDA_CHECK(cudaMalloc(&deviceInput, batchSize * inputCount * sizeof(float)));
};
int8EntroyCalibrator::~int8EntroyCalibrator(){
CUDA_CHECK(cudaFree(deviceInput));
if(batchData)
delete[] batchData;
};
int int8EntroyCalibrator::getBatchSize() const { return batchSize; };
bool int8EntroyCalibrator::getBatch(void *bindings[], const char *names[], int nbBindings){
if (imageIndex + batchSize > int(imgPaths.size()))
return false;
// load batch
float* ptr = batchData;
for (size_t j = imageIndex; j < imageIndex + batchSize; ++j)
{
auto img = cv::imread(imgPaths[j]);
LOG_INFO << "load image " << imgPaths[j] << " " << (j+1)*100./imgPaths.size() << "%";
resizeImageCPU(img, ptr, inputWidth, inputHeight, inputMean, inputStd, fixAspectRatio, useBGR);
ptr += inputCount;
}
imageIndex += batchSize;
CUDA_CHECK(cudaMemcpy(deviceInput, batchData, batchSize * inputCount * sizeof(float), cudaMemcpyHostToDevice));
bindings[0] = deviceInput;
return true;
};
const void * int8EntroyCalibrator::readCalibrationCache(std::size_t &length){
calibrationCache.clear();
std::ifstream input(calibTablePath, std::ios::binary);
input >> std::noskipws;
if (readCache && input.good())
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
std::back_inserter(calibrationCache));
length = calibrationCache.size();
return length ? &calibrationCache[0] : nullptr;
};
void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length){
std::ofstream output(calibTablePath, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
};
} // namespace inference
} // namespace deepinfer
} // namespace waytous
#ifndef WAYTOUS_DEEPINFER_INFERENCE_TRT_CALIBRATOR_H_
#define WAYTOUS_DEEPINFER_INFERENCE_TRT_CALIBRATOR_H_
#pragma once
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <iterator>
#include <opencv2/opencv.hpp>
#include "NvInfer.h"
#include "cuda.h"
#include "cuda_runtime.h"
#include "common/log.h"
#include "common/file.h"
#include "libs/inferences/tensorrt/trt_utils.h"
namespace waytous {
namespace deepinfer {
namespace inference {
void resizeImageCPU(cv::Mat& img, float* hostImage, int inputWidth, int inputHeight,
const float* inputMean, const float* inputStd, bool fixAspect, bool bgr);
class int8EntroyCalibrator : public nvinfer1::IInt8EntropyCalibrator {
public:
int8EntroyCalibrator(const std::string &imgPath_, const std::string &calibTablePath_,
int batchSize_, int inputWidth_, int inputHeight_, std::vector<float>& inputMean_,
std::vector<float>& inputStd_, bool useBGR_, bool fixAspectRatio_);
~int8EntroyCalibrator();
int getBatchSize() const override;
bool getBatch(void *bindings[], const char *names[], int nbBindings) override;
const void *readCalibrationCache(std::size_t &length) override;
void writeCalibrationCache(const void *cache, std::size_t length) override;
private:
int batchSize, inputWidth, inputHeight;
float inputMean[3], inputStd[3];
size_t inputCount;
size_t imageIndex;
bool useBGR = false;
bool fixAspectRatio = true;
std::string calibTablePath;
std::vector<std::string> imgPaths;
float *batchData = nullptr;
void *deviceInput = nullptr;
bool readCache;
std::vector<char> calibrationCache;
};
} // namespace inference
} // namespace deepinfer
} // namespace waytous
#endif
#include "libs/inferences/tensorrt/trt_infer.h"
namespace waytous {
namespace deepinfer {
namespace inference {
TRTInference::~TRTInference(){
if(mCudaStream){
cudaStreamSynchronize(mCudaStream);
cudaStreamDestroy(mCudaStream);
}
if (mRunTime)
mRunTime->destroy();
if (mContext)
mContext->destroy();
if (mEngine)
mEngine->destroy();
}
bool TRTInference::Init(YAML::Node& configNode, YAML::Node& globalParamNode){
if(!BaseUnit::Init(configNode, globalParamNode)){
LOG_WARN << "Init trt engine error";
return false;
};
engineFile = configNode["engineFile"].as<std::string>();
inferDynamic = configNode["inferDynamic"].as<bool>();
engineFile = common::GetAbsolutePath(common::ConfigRoot::GetRootPath(), engineFile);
if(!waytous::deepinfer::common::PathExists(engineFile)){
LOG_INFO << "Tensorrt engine haven't been built, built from saved weights.";
BuildEngine(configNode);
}
initLibNvInferPlugins(&gLogger, "");
std::fstream file;
file.open(engineFile, std::ios::binary | std::ios::in);
if(!file.is_open())
{
LOG_WARN << "Read engine file" << engineFile <<" failed, please check.";
return false;
}
file.seekg(0, std::ios::end);
int length = file.tellg();
file.seekg(0, std::ios::beg);
std::unique_ptr<char[]> data(new char[length]);
file.read(data.get(), length);
file.close();
LOG_INFO << "Deserializing tensorrt engine.";
mRunTime = nvinfer1::createInferRuntime(gLogger);
assert(mRunTime != nullptr);
mEngine= mRunTime->deserializeCudaEngine(data.get(), length, NULL);
assert(mEngine != nullptr);
mContext = mEngine->createExecutionContext();
assert(mContext != nullptr);
mContext->setProfiler(&mProfiler);
int nbBindings = mEngine->getNbBindings();
if(nbBindings != inputNames.size() + outputNames.size()){
LOG_ERROR << " model input(" << inputNames.size() << ")+output(" <<
outputNames.size() << ") != nbBindings:" << nbBindings;
return false;
}
for (auto inputName: inputNames){
auto input = std::dynamic_pointer_cast<ios::NormalIO>(interfaces::GetIOPtr(inputName));
blobs_.insert(std::make_pair(inputName, input->data_));
auto binding = input->data_->mutable_gpu_data();
mBindings.emplace_back(static_cast<void*>(binding));
}
for (int i = 0; i < nbBindings; ++i)
{
bool isInput = mEngine->bindingIsInput(i);
if(isInput){
continue;
}
nvinfer1::Dims dims = mEngine->getBindingDimensions(i);
nvinfer1::DataType dtype = mEngine->getBindingDataType(i);
std::string name = mEngine->getBindingName(i);
std::vector<int> shape;
shape.push_back(inferBatchSize);
for(int dindex = 0; dindex < dims.nbDims; dindex++){
shape.push_back(dims.d[dindex]);
};
auto blob = std::make_shared<base::Blob<float>>(base::Blob<float>(shape));
blobs_.insert(std::make_pair(name, blob));
auto output = std::make_shared<ios::NormalIO>(ios::NormalIO(blob));
interfaces::SetIOPtr(name, output);
auto binding = blob->mutable_gpu_data();
mBindings.emplace_back(static_cast<void*>(binding));
}
CUDA_CHECK(cudaStreamCreate(&mCudaStream));
LOG_INFO << "Deserialized tensorrt engine.";
return true;
}
bool TRTInference::BuildEngine(YAML::Node& configNode){
// default convert onnx to engine
int maxBatchSize = configNode["maxBatchSize"].as<int>();
initLibNvInferPlugins(&gLogger, "");
// int number;
// auto lst = getPluginRegistry()->getPluginCreatorList(&number);
// for(int i=0; i<number; i++){
// LOG_INFO << lst[i]->getPluginName()<<","<<lst[i]->getPluginVersion()<<","<<lst[i]->getPluginNamespace();
// }
nvinfer1::IHostMemory* modelStream{nullptr};
// int verbosity = (int) nvinfer1::ILogger::Severity::kVERBOSE;
int verbosity = (int) nvinfer1::ILogger::Severity::kINFO;
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1U);
auto parser = nvonnxparser::createParser(*network, gLogger);
std::string weightsPath = configNode["weightsPath"].as<std::string>();
weightsPath = common::GetAbsolutePath(common::ConfigRoot::GetRootPath(), weightsPath);
if (!parser->parseFromFile(weightsPath.c_str(), verbosity))
{
LOG_ERROR << "Failed to parse weights file: " << weightsPath;
return false;
}
builder->setMaxBatchSize(maxBatchSize);
builder->setMaxWorkspaceSize(1 << 30);// 1G
int8EntroyCalibrator *calibrator = nullptr;
int runMode = configNode["runMode"].as<int>();
if (runMode == 1)//fp16
{
LOG_INFO << "Set FP16 Mode.";
if (!builder->platformHasFastFp16())
LOG_INFO << "Notice: the platform do not has fast for fp16" ;
builder->setFp16Mode(true);
}
else if(runMode == 2){//int8
LOG_ERROR << "No supported int8";
/*LOG_INFO <<"Set Int8 Mode";
if (!builder->platformHasFastInt8())
LOG_INFO << "Notice: the platform do not has fast for int8.";
builder->setInt8Mode(true);
if(configNode["calibImgPathFile"].as<std::string>().size() > 0){
std::vector<float> inputMean = configNode["inputMean"].as<std::vector<float>>();
std::vector<float> inputStd = configNode["inputStd"].as<std::vector<float>>();
calibrator = new int8EntroyCalibrator(
configNode["calibImgPathFile"].as<std::string>(),
configNode["calibTableCache"].as<std::string>(), maxBatchSize,
configNode["inputWidth"].as<int>(), configNode["inputHeight"].as<int>(),
inputMean, inputStd, useBGR, fixAspectRatio
);
builder->setInt8Calibrator(calibrator);
}
else{
LOG_ERROR << "Not imgs for calib int8. " << configNode["calibImgPathFile"].as<std::string>();
return false;
}
*/
}
LOG_INFO << "Begin building engine..." ;
nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
if (!engine){
std::string error_message ="Unable to create engine";
gLogger.log(nvinfer1::ILogger::Severity::kERROR, error_message.c_str());
}
LOG_INFO << "End building engine..." ;
// Serialize the engine, then close everything down.
modelStream = engine->serialize();
engine->destroy();
network->destroy();
builder->destroy();
parser->destroy();
assert(modelStream != nullptr);
if(calibrator){
delete calibrator;
calibrator = nullptr;
}
// write
std::ofstream file(engineFile, std::ios::binary);
assert(file);
file.write(static_cast<char*>(modelStream->data()), modelStream->size());
assert(!file.fail());
modelStream->destroy();
CUDA_CHECK(cudaStreamCreate(&mCudaStream));
LOG_INFO << "End writing engine";
return true;
}
std::shared_ptr<base::Blob<float>> TRTInference::get_blob(const std::string &name) {
auto iter = blobs_.find(name);
if (iter == blobs_.end()) {
return nullptr;
}
return iter->second;
}
bool TRTInference::Exec(){
CUDA_CHECK(cudaStreamSynchronize(mCudaStream));
for (auto name : inputNames) {
auto blob = get_blob(name);
if (blob != nullptr) {
blob->gpu_data();
}
}
// If `out_blob->mutable_cpu_data()` is invoked outside,
// HEAD will be set to CPU, and `out_blob->mutable_gpu_data()`
// after `enqueue` will copy data from CPU to GPU,
// which will overwrite the `inference` results.
// `out_blob->gpu_data()` will set HEAD to SYNCED,
// then no copy happends after `enqueue`.
for (auto name : outputNames) {
auto blob = get_blob(name);
if (blob != nullptr) {
blob->gpu_data();
}
}
if(inferDynamic){
mContext->enqueue(inferBatchSize, &mBindings[0], mCudaStream, nullptr);
}else{
mContext->executeV2(&mBindings[0]);
}
CUDA_CHECK(cudaStreamSynchronize(mCudaStream));
for (auto name : outputNames) {
auto blob = get_blob(name);
if (blob != nullptr) {
// LOG_INFO << "output name: " << name;
blob->mutable_gpu_data();
}
}
}
std::string TRTInference::Name(){
return "TRTInference";
}
DEEPINFER_REGISTER_UNIT(TRTInference);
} // namespace inference
} // namespace deepinfer
} // namespace waytous
#ifndef WAYTOUS_DEEPINFER_INFERENCE_TRT_INFER_H_
#define WAYTOUS_DEEPINFER_INFERENCE_TRT_INFER_H_
#pragma once
#include <string>
#include <yaml-cpp/yaml.h>
// GPU TensorRT
#include <cuda_runtime_api.h>
#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "NvOnnxParser.h"
#include "base/blob.h"
#include "common/file.h"
#include "common/register.h"
#include "interfaces/base_unit.h"
#include "libs/ios/normal_ios.h"
#include "libs/inferences/tensorrt/trt_utils.h"
#include "libs/inferences/tensorrt/trt_calibrator.h"
namespace waytous {
namespace deepinfer {
namespace inference {
static Logger gLogger;
typedef std::map<std::string,
std::shared_ptr<base::Blob<float>>>
BlobMap;
class TRTInference: public interfaces::BaseUnit {
public:
~TRTInference();
bool Init(YAML::Node& node, YAML::Node& globalParamNode) override;
virtual bool BuildEngine(YAML::Node& node);
bool Exec() override;
virtual std::string Name() override;
std::shared_ptr<base::Blob<float>> get_blob(const std::string &name);
public:
nvinfer1::IExecutionContext* mContext = nullptr;
nvinfer1::ICudaEngine* mEngine = nullptr;
nvinfer1::IRuntime* mRunTime = nullptr;
Profiler mProfiler;
cudaStream_t mCudaStream;
std::vector<void*> mBindings;
BlobMap blobs_;
std::string engineFile;
bool inferDynamic = false;
}; // class TRTInferenceEngine
} // namespace inference
} // namespace deepinfer
} // namespace waytous
#endif // header
#ifndef WAYTOUS_DEEPINFER_INFERENCE_TRT_UTILS_H_
#define WAYTOUS_DEEPINFER_INFERENCE_TRT_UTILS_H_
#pragma once
#include <iostream>
#include <numeric>
#include <iterator>
#include <string>
#include <vector>
#include <map>
#include "NvInfer.h"
#include "cuda.h"
#include "cuda_runtime.h"
#include "common/common.h"
namespace waytous {
namespace deepinfer {
namespace inference {
#if NV_TENSORRT_MAJOR >= 8
#define TRT_NOEXCEPT noexcept
#define TRT_CONST_ENQUEUE const
#else
#define TRT_NOEXCEPT
#define TRT_CONST_ENQUEUE
#endif
namespace Tn
{
template<typename T>
void write(char*& buffer, const T& val)
{
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}
template<typename T>
void read(const char*& buffer, T& val)
{
val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
}
} // Tn
namespace Yolo{
static const int maxNumAnchorsPerLevel = 10; // every level can have 10 anchors at most.
struct YoloKernel{
int width;
int height;
float anchors[maxNumAnchorsPerLevel * 2];
};
}//Yolo kernel
class Profiler : public nvinfer1::IProfiler
{
public:
struct Record
{
float time{0};
int count{0};
};
void printTime(const int& runTimes)
{
//std::cout << "========== " << mName << " profile ==========" << std::endl;
float totalTime = 0;
std::string layerNameStr = "TensorRT layer name";
int maxLayerNameLength = std::max(static_cast<int>(layerNameStr.size()), 70);
for (const auto& elem : mProfile)
{
totalTime += elem.second.time;
maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
}
std::cout<< " total runtime = " << totalTime/(runTimes + 1e-5) << " ms " << std::endl;
}
virtual void reportLayerTime(const char* layerName, float ms)
{
mProfile[layerName].count++;
mProfile[layerName].time += ms;
}
private:
std::map<std::string, Record> mProfile;
};
class Logger : public nvinfer1::ILogger
{
public:
Logger(Severity severity = Severity::kWARNING)
: reportableSeverity(severity)
{
}
void log(Severity severity, const char* msg) override
{
// suppress messages with severity enum value greater than the reportable
if (severity > reportableSeverity)
return;
switch (severity)
{
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
case Severity::kERROR: std::cerr << "ERROR: "; break;
case Severity::kWARNING: std::cerr << "WARNING: "; break;
case Severity::kINFO: std::cerr << "INFO: "; break;
default: std::cerr << "UNKNOWN: "; break;
}
std::cerr << msg << std::endl;
}
Severity reportableSeverity;
};
inline int64_t volume(const nvinfer1::Dims& d)
{
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
}
inline unsigned int getElementSize(nvinfer1::DataType t)
{
switch (t)
{
case nvinfer1::DataType::kINT32: return 4;
case nvinfer1::DataType::kFLOAT: return 4;
case nvinfer1::DataType::kHALF: return 2;
case nvinfer1::DataType::kINT8: return 1;
// case nvinfer1::DataType::kBOOL: return 1;
}
throw std::runtime_error("Invalid DataType.");
return 0;
}
inline void* safeCudaMalloc(size_t memSize)
{
void* deviceMem;
CUDA_CHECK(cudaMalloc(&deviceMem, memSize));
if (deviceMem == nullptr)
{
std::cerr << "Out of memory" << std::endl;
exit(1);
}
return deviceMem;
}
} // namespace inference
} // namespace deepinfer
} // namespace waytous
#endif // header
This diff is collapsed.
#ifndef WAYTOUS_DEEPINFER_INFERENCE_TRT_YOLOV5_INFER_H_
#define WAYTOUS_DEEPINFER_INFERENCE_TRT_YOLOV5_INFER_H_
#include "libs/inferences/tensorrt/trt_infer.h"
#include "libs/inferences/tensorrt/trt_yolov5_layer.h"
using namespace nvinfer1;
namespace waytous {
namespace deepinfer {
namespace inference {
// TensorRT weight files have a simple space delimited format:
// [type] [size] <data x size in hex>
inline std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file) {
LOG_INFO << "Loading weights: " << file ;
std::map<std::string, nvinfer1::Weights> weightMap;
// Open weights file
std::ifstream input(file);
assert(input.is_open() && "Unable to load weight file. please check if the .wts file path is right!!!!!!");
// Read number of weight blobs
int32_t count;
input >> count;
assert(count > 0 && "Invalid weight map file.");
while (count--)
{
nvinfer1::Weights wt{ nvinfer1::DataType::kFLOAT, nullptr, 0 };
uint32_t size;
// Read name and type of blob
std::string name;
input >> name >> std::dec >> size;
wt.type = nvinfer1::DataType::kFLOAT;
// Load blob
uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
for (uint32_t x = 0, y = size; x < y; ++x)
{
input >> std::hex >> val[x];
}
wt.values = val;
wt.count = size;
weightMap[name] = wt;
}
return weightMap;
}
inline static int get_width(int x, float gw, int divisor = 8) {
return int(ceil((x * gw) / divisor)) * divisor;
}
inline static int get_depth(int x, float gd) {
if (x == 1) return 1;
int r = round(x * gd);
if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) {
--r;
}
return std::max<int>(r, 1);
}
class YoloV5TRTInference: public TRTInference{
public:
bool BuildEngine(YAML::Node& node) override;
std::string Name() override;
private:
IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps);
ILayer* convBlock(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, int ksize, int s, int g, std::string lname);
ILayer* focus(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int ksize, std::string lname, int inputWidth, int inputHeight);
ILayer* bottleneck(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, bool shortcut, int g, float e, std::string lname);
ILayer* bottleneckCSP(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname);
ILayer* C3(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname);
ILayer* SPP(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int k1, int k2, int k3, std::string lname);
ILayer* SPPF(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int k, std::string lname);
std::vector<std::vector<float>> getAnchors(std::map<std::string, Weights>& weightMap, std::string lname, int numAnchorsPerLevel);
IPluginV2Layer* addYoLoLayer(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, std::string lname, std::vector<IConvolutionLayer*> dets,
int classNumber, int inputWidth, int inputHeight, int maxOutputBBoxCount, int numAnchorsPerLevel, int startScale=8);
IPluginV2Layer *addBatchedNMSLayer(INetworkDefinition *network, IPluginV2Layer *yolo, int num_classes, int top_k, int keep_top_k, float score_thresh,
float iou_thresh, bool is_normalized = false, bool clip_boxes = false);
};
} // namespace inference
} // namespace deepinfer
} // namespace waytous
#endif // header
This diff is collapsed.
#ifndef WAYTOUS_DEEPINFER_INFERENCE_TRT_YOLOV5_LAYER_H_
#define WAYTOUS_DEEPINFER_INFERENCE_TRT_YOLOV5_LAYER_H_
#include <assert.h>
#include <iostream>
#include <vector>
#include <string>
#include <NvInfer.h>
#include "libs/inferences/tensorrt/trt_utils.h"
namespace nvinfer1
{
class YoloV5LayerPlugin : public IPluginV2IOExt
{
public:
YoloV5LayerPlugin(int classCount, int netWidth, int netHeight, int maxOut,
int numAnchorsPerLevel, const std::vector<waytous::deepinfer::inference::Yolo::YoloKernel> &vYoloKernel);
YoloV5LayerPlugin(const void *data, size_t length);
~YoloV5LayerPlugin();
int getNbOutputs() const TRT_NOEXCEPT override
{
return 2;
}
Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims) TRT_NOEXCEPT override;
int initialize() TRT_NOEXCEPT override;
virtual void terminate() TRT_NOEXCEPT override{};
virtual size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override
{
return maxBatchSize * sizeof(int);
}
virtual int enqueue(int batchSize, const void *const *inputs, void *TRT_CONST_ENQUEUE *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT override;
virtual size_t getSerializationSize() const TRT_NOEXCEPT override;
virtual void serialize(void *buffer) const TRT_NOEXCEPT override;
bool supportsFormatCombination(int pos, const PluginTensorDesc *inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT override
{
return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
}
const char *getPluginType() const TRT_NOEXCEPT override;
const char *getPluginVersion() const TRT_NOEXCEPT override;
void destroy() TRT_NOEXCEPT override;
IPluginV2IOExt *clone() const TRT_NOEXCEPT override;
void setPluginNamespace(const char *pluginNamespace) TRT_NOEXCEPT override;
const char *getPluginNamespace() const TRT_NOEXCEPT override;
DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const TRT_NOEXCEPT override;
bool isOutputBroadcastAcrossBatch(int outputIndex, const bool *inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT override;
bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override;
void attachToContext(
cudnnContext *cudnnContext, cublasContext *cublasContext, IGpuAllocator *gpuAllocator) TRT_NOEXCEPT override;
void configurePlugin(const PluginTensorDesc *in, int nbInput, const PluginTensorDesc *out, int nbOutput) TRT_NOEXCEPT override;
void detachFromContext() TRT_NOEXCEPT override;
private:
void forwardGpu(const float *const *inputs, void **outputs, void *workspace, cudaStream_t stream, int batchSize = 1);
int mThreadCount = 256;
const char *mPluginNamespace;
int mKernelCount;
int mClassCount;
int mYoloV5NetWidth;
int mYoloV5NetHeight;
int mMaxOutObject;
int mNumAnchorsPerLevel;
std::vector<waytous::deepinfer::inference::Yolo::YoloKernel> mYoloKernel;
void **mAnchor;
};
class YoloV5PluginCreator : public IPluginCreator
{
public:
YoloV5PluginCreator();
~YoloV5PluginCreator() override = default;
const char *getPluginName() const TRT_NOEXCEPT override;
const char *getPluginVersion() const TRT_NOEXCEPT override;
const PluginFieldCollection *getFieldNames() TRT_NOEXCEPT override;
IPluginV2IOExt *createPlugin(const char *name, const PluginFieldCollection *fc) TRT_NOEXCEPT override;
IPluginV2IOExt *deserializePlugin(const char *name, const void *serialData, size_t serialLength) TRT_NOEXCEPT override;
void setPluginNamespace(const char *libNamespace) TRT_NOEXCEPT override
{
mNamespace = libNamespace;
}
const char *getPluginNamespace() const TRT_NOEXCEPT override
{
return mNamespace.c_str();
}
private:
std::string mNamespace;
static PluginFieldCollection mFC;
static std::vector<PluginField> mPluginAttributes;
};
REGISTER_TENSORRT_PLUGIN(YoloV5PluginCreator);
};
#endif
#ifndef DEEPINFER_CAMERA_IOS_H_
#define DEEPINFER_CAMERA_IOS_H_
#include <vector>
#include <string>
#include "base/image.h"
#include "interfaces/base_io.h"
namespace waytous {
namespace deepinfer {
namespace ios {
class CameraSrcIn: public interfaces::BaseIO{
public:
CameraSrcIn(cv::Mat* img): cv_img_(img){};
cv::Mat* cv_img_;
};
using CameraSrcInPtr = std::shared_ptr<CameraSrcIn>;
class CameraSrcOut: public interfaces::BaseIO{
public:
CameraSrcOut(base::Image8UPtr img): img_ptr_(img){};
base::Image8UPtr img_ptr_;
};
using CameraSrcOutPtr = std::shared_ptr<CameraSrcOut>;
} // namespace ios
} // namespace deepinfer
} // namespace waytous
#endif
#include "libs/ios/detection.h"
namespace waytous {
namespace deepinfer {
namespace ios {
void Det2D::validCoordinate(){
x1 = iMIN(image_width - 1, iMAX(0, x1));
x2 = iMIN(image_width - 1, iMAX(0, x2));
y1 = iMIN(image_height - 1, iMAX(0, y1));
y2 = iMIN(image_height - 1, iMAX(0, y2));
}
Det2DPtr Det2D::copy(){
Det2DPtr obj = std::make_shared<Det2D>(Det2D());
obj->x1 = this->x1;
obj->x2 = this->x2;
obj->y1 = this->y1;
obj->y2 = this->y2;
obj->confidence = this->confidence;
obj->class_label = this->class_label;
obj->class_name = this->class_name;
obj->truncated = this->truncated;
obj->track_id = this->track_id;
if (obj->mask_ptr != nullptr){
obj->mask_ptr.reset(new InstanceMask(this->mask_ptr.get()));
}
}
void Det2D::update(Det2DPtr obj_){
this->x1 = obj_->x1;
this->x2 = obj_->x2;
this->y1 = obj_->y1;
this->y2 = obj_->y2;
this->confidence = obj_->confidence;
this->class_label = obj_->class_label;
this->class_name = obj_->class_name;
this->truncated = obj_->truncated;
this->track_id = obj_->track_id;
if (this->mask_ptr != nullptr){
this->mask_ptr->update(obj_->mask_ptr.get());
}
};
void Det2D::update(std::vector<float>& tlbr){
this->x1 = tlbr[0];
this->y1 = tlbr[1];
this->x2 = tlbr[2];
this->y2 = tlbr[3];
};
} // namespace ios
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_DETECTION_H_
#define DEEPINFER_DETECTION_H_
#include <vector>
#include <string>
#include "common/common.h"
#include "interfaces/base_io.h"
#include "libs/ios/instance_mask.h"
namespace waytous {
namespace deepinfer {
namespace ios {
class Det{
public:
Det() = default;
virtual ~Det() = default;
};
using DetPtr = std::shared_ptr<Det>;
class Det2D : public Det{
public:
void validCoordinate();
Det2DPtr copy();
void update(Det2DPtr obj_);
void update(std::vector<float>& tlbr);
public:
float image_width = 0, image_height = 0;
float x1, y1, x2, y2;
float confidence;
int class_label;
std::string class_name;
bool truncated = false;
InstanceMaskPtr mask_ptr = nullptr;
int track_id = -1;
};
using Det2DPtr = std::shared_ptr<Det2D>;
class Detection2Ds: public interfaces::BaseIO{
public:
std::vector<Det2DPtr> detections;
};
using Detection2DsPtr = std::shared_ptr<Detection2Ds>;
} // namespace ios
} // namespace deepinfer
} // namespace waytous
#endif
#include "libs/ios/instance_mask.h"
namespace waytous{
namespace deepinfer{
namespace ios{
InstanceMask::InstanceMask(){
width = -1;
height = -1;
rle_string = "";
};
InstanceMask::InstanceMask(const InstanceMask* mask){
this->width = mask->width;
this->height = mask->height;
this->rle_string = mask->rle_string;
}
void InstanceMask::update(const InstanceMask* mask){
this->width = mask->width;
this->height = mask->height;
this->rle_string = mask->rle_string;
}
InstanceMask::InstanceMask(int width, int height, const std::string &rle_string){
this->width = width;
this->height = height;
this->rle_string = rle_string;
}
InstanceMask::InstanceMask(const cv::Mat &map){
encode(map);
}
InstanceMask::InstanceMask(int width, int height, const char* data){
this->width = width;
this->height = height;
encode(data);
}
InstanceMask::InstanceMask(int width, int height, const int* cnts, int length){
this->width = width;
this->height = height;
encode(cnts, length);
}
cv::Mat InstanceMask::decode(){
int m=0, p=0, k; long x; int more;
std::vector<uint> cnts;
while(rle_string[p]) {
x=0; k=0; more=1;
while(more) {
char c = rle_string[p] - 48;
x |= (c & 0x1f) << 5*k;
more = c & 0x20;
p++; k++;
if(!more && (c & 0x10))
x |= -1 << 5*k;
}
if(m>2)
x += (long) cnts[m-2];
cnts.push_back((uint)x);
m++;
}
bool v=0;
cv::Mat M = cv::Mat::zeros(cv::Size(width, height), CV_8UC1);
int index = 0;
for( int j=0; j<m; j++ ) {
for( k=0; k<cnts[j]; k++ ){
M.at<uchar>(index++) = v;
}
v=!v;
}
// std::cout<<index<<"--777777777777777777"<<std::endl;
return M;
}
void InstanceMask::encode(const char* data){
uint count = 0;
char p = 0;
std::vector<uint> cnts;
for (int i = 0; i < height * width; i++){
if(*(data + i) != p){
cnts.push_back(count);
count = 0;
p = *(data + i);
}
count++;
}
long x; int more;
int m = cnts.size();
for(int i=0; i<m; i++ ) {
x=(long) cnts[i];
if(i>2)
x -= (long) cnts[i-2];
more = 1;
while( more ) {
char c=x & 0x1f;
x >>= 5;
more = (c & 0x10) ? x!=-1 : x!=0;
if(more)
c |= 0x20;
c += 48;
rle_string.push_back(c);
}
}
}
void InstanceMask::encode(const int* cnts, int length){
long x; int more;
for(int i=0; i<length; i++ ) {
x=(long) cnts[i];
if(i>2)
x -= (long) cnts[i-2];
more = 1;
while( more ) {
char c=x & 0x1f;
x >>= 5;
more = (c & 0x10) ? x!=-1 : x!=0;
if(more)
c |= 0x20;
c += 48;
rle_string.push_back(c);
}
}
}
void InstanceMask::encode(const cv::Mat& map){
height = map.rows;
width = map.cols;
uint count = 0;
char p = 0;
std::vector<uint> cnts;
for (int i=0; i<height; i++){
for(int j=0; j<width; j++){
if(map.at<char>(i, j) != p){
cnts.push_back(count);
count = 0;
p = map.at<char>(i, j);
}
count++;
}
}
long x; int more;
int m = cnts.size();
for(int i=0; i<m; i++ ) {
x=(long) cnts[i];
if(i>2)
x -= (long) cnts[i-2];
more = 1;
while( more ) {
char c=x & 0x1f;
x >>= 5;
more = (c & 0x10) ? x!=-1 : x!=0;
if(more)
c |= 0x20;
c += 48;
rle_string.push_back(c);
}
}
}
} //namespace ios
} //namspace deepinfer
} //namespace waytous
#ifndef DEEPINFER_INSTANCE_MASK_H_
#define DEEPINFER_INSTANCE_MASK_H_
#include <string>
#include <memory>
#include <opencv2/opencv.hpp>
namespace waytous{
namespace deepinfer{
namespace ios{
class InstanceMask{
public:
InstanceMask();
InstanceMask(const InstanceMask* mask);
InstanceMask(int width, int height, const std::string &rle_string);
InstanceMask(const cv::Mat &map);
InstanceMask(int width, int height, const char* data);
InstanceMask(int width, int height, const int* cnts, int length);
void update(const InstanceMask* mask);
cv::Mat decode();
void encode(const cv::Mat& map);
void encode(const char* data);
void encode(const int* cnts, int length);
public:
int width;
int height;
std::string rle_string; // coco encode mask string
};
using InstanceMaskPtr = std::shared_ptr<InstanceMask>;
} //namespace ios
} //namspace deepinfer
} //namespace waytous
#endif
#ifndef DEEPINFER_NORMAL_IOS_H_
#define DEEPINFER_NORMAL_IOS_H_
#include <vector>
#include <string>
#include "base/blob.h"
#include "interfaces/base_io.h"
namespace waytous {
namespace deepinfer {
namespace ios {
class NormalIO: public interfaces::BaseIO{
public:
NormalIO(base::BlobPtr<float> data): data_(data){};
base::BlobPtr<float> data_;
};
using NormalIOPtr = std::shared_ptr<NormalIO>;
class NormalIOI: public interfaces::BaseIO{
public:
NormalIOI(base::BlobPtr<int> data): data_(data){};
base::BlobPtr<int> data_;
};
using NormalIOIPtr = std::shared_ptr<NormalIOI>;
} // namespace ios
} // namespace deepinfer
} // namespace waytous
#endif
This diff is collapsed.
#ifndef DEEPINFER_POSTPROCESS_TRDES_H_
#define DEEPINFER_POSTPROCESS_TRDES_H_
#include <cuda_runtime.h>
#include <cstdint>
#include "interfaces/base_unit.h"
#include "base/image.h"
#include "libs/ios/normal_ios.h"
#include "libs/ios/camera_ios.h"
#include "libs/ios/detection.h"
namespace waytous {
namespace deepinfer {
namespace postprocess {
class TraDesPostProcess: public interfaces::BaseUnit{
public:
bool Init(YAML::Node& node, YAML::Node& globalParamNode) override;
bool Exec() override;
std::string Name() override;
protected:
void trades_postprocess(const float *hm, const float *reg, const float *wh, const float* seg_weights, const float* seg_feat,
int* output_length, float *output_bboxes, int* output_mask_cnt_lengths, int* output_mask_cnts, const int topK, const int max_cnts_length,
const int w, const int h, const int number_classes, const int kernerl_size, const int seg_dims, const float score_threshold);
public:
base::BlobPtr<int> output_length_ptr;
base::BlobPtr<float> bboxes_ptr;
base::BlobPtr<int> maskCnts_lengths_ptr;
base::BlobPtr<int> maskCnts_ptr;
float scoreThreshold, truncatedThreshold;
int classNumber;
std::vector<std::string> classNames;
int topK = 100;
int downScale = 4;
int segDims = 64;
int maxCntsLength = 500;
};
} // namespace postprocess
} // namespace deepinfer
} // namespace waytous
#endif
#include "libs/postprocessors/yolov5_post.h"
namespace waytous {
namespace deepinfer {
namespace postprocess {
bool YoloV5PostProcess::Init(YAML::Node& node, YAML::Node& globalParamNode) {
if(!BaseUnit::Init(node, globalParamNode)){
LOG_WARN << "Init trades postprocess error";
return false;
};
classNames = node["classNames"].as<std::vector<std::string>>();
truncatedThreshold = node["truncatedThreshold"].as<float>(); // default 5%
scoreThreshold = node["scoreThreshold"].as<float>();
keepTopK = node["keepTopK"].as<int>();
}
bool YoloV5PostProcess::Exec(){
if (inputNames.size() != 4 + outputNames.size()){
LOG_ERROR << "yolov5 postprocess, inputsize != 4 + ouputsize.";
return false;
}
std::vector<base::BlobPtr<float>> inputs; // model outputs
for(int i=0; i<4; i++){
auto inputName = inputNames[i];
auto ptr = std::dynamic_pointer_cast<ios::NormalIO>(interfaces::GetIOPtr(inputName));
inputs.push_back(ptr->data_);
}
std::vector<base::Image8UPtr> inputImages;
for(int j=4; j<inputNames.size(); j++){
auto iName = inputNames[j];
auto iptr = std::dynamic_pointer_cast<ios::CameraSrcOut>(interfaces::GetIOPtr(iName));
inputImages.push_back(iptr->img_ptr_);
}
//post
int* num_detections = static_cast<int*>(static_cast<void*>(inputs[0]->mutable_cpu_data()));// it's type is int32
const float* nmsed_boxes = inputs[1]->cpu_data();
const float* nmsed_scores = inputs[2]->cpu_data();
const float* nmsed_classes = inputs[3]->cpu_data();
for(int b=0; b<inputImages.size(); b++){
auto outName = outputNames[b];
float img_width = float(inputImages[b]->cols());
float img_height = float(inputImages[b]->rows());
float scalex = inputWidth / img_width;
float scaley = inputHeight / img_height;
if(fixAspectRatio){
scalex = scaley = std::min(scalex, scaley);
}
auto dets = std::make_shared<ios::Detection2Ds>(ios::Detection2Ds());
for(int i = 0; i < num_detections[b]; i++){
if(nmsed_scores[b * keepTopK * 1 + i] < scoreThreshold){
continue;
}
ios::Det2DPtr obj = std::make_shared<ios::Det2D>(ios::Det2D());
obj->confidence = nmsed_scores[b * keepTopK * 1 + i];
obj->class_label = int(nmsed_classes[b * keepTopK * 1 + i]);
obj->class_name = classNames[obj->class_label];
obj->x1= nmsed_boxes[b * keepTopK * 4 + i * 4 + 0] / scalex;
obj->y1 = nmsed_boxes[b * keepTopK * 4 + i * 4 + 1] / scaley;
obj->x2 = nmsed_boxes[b * keepTopK * 4 + i * 4 + 2] / scalex;
obj->y2 = nmsed_boxes[b * keepTopK * 4 + i * 4 + 3] / scaley;
obj->image_height = img_height;
obj->image_width = img_width;
obj->validCoordinate(); //
// LOG_INFO << "box:" << obj->x1 << ","<< obj->y1 << ","<< obj->x2 << ","<< obj->y2;
if((obj->x1 / img_width < truncatedThreshold) || (obj->y1 / img_height < truncatedThreshold) ||
(obj->x2 / img_width > (1 - truncatedThreshold)) || (obj->y2 / img_height > (1 - truncatedThreshold))){
obj->truncated = true;
}
dets->detections.push_back(obj);
}
interfaces::SetIOPtr(outName, dets);
}
}
std::string YoloV5PostProcess::Name() {
return "YoloV5PostProcess";
};
DEEPINFER_REGISTER_UNIT(YoloV5PostProcess);
} // namespace postprocess
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_POSTPROCESS_YOLOV5_H_
#define DEEPINFER_POSTPROCESS_YOLOV5_H_
#include <cuda_runtime.h>
#include <cstdint>
#include "interfaces/base_unit.h"
#include "base/image.h"
#include "libs/ios/normal_ios.h"
#include "libs/ios/camera_ios.h"
#include "libs/ios/detection.h"
namespace waytous {
namespace deepinfer {
namespace postprocess {
class YoloV5PostProcess: public interfaces::BaseUnit{
public:
bool Init(YAML::Node& node, YAML::Node& globalParamNode) override;
bool Exec() override;
std::string Name() override;
public:
int keepTopK = 100;
float scoreThreshold, truncatedThreshold;
std::vector<std::string> classNames;
};
} // namespace postprocess
} // namespace deepinfer
} // namespace waytous
#endif
#include "libs/preprocessors/resize_norm.h"
namespace waytous {
namespace deepinfer {
namespace preprocess {
bool ResizeNorm::Init(YAML::Node& node, YAML::Node& globalParamNode){
if(!BaseUnit::Init(node, globalParamNode)){
LOG_WARN << "Init resize_norm error";
return false;
};
useBGR = node["useBGR"].as<bool>();
std::vector<float> inputMean = node["inputMean"].as<std::vector<float>>();
std::vector<float> inputStd = node["inputStd"].as<std::vector<float>>();
dst.reset(new base::Blob<float>({inferBatchSize, 3, inputHeight, inputWidth}));
auto dst_ptr = std::make_shared<ios::NormalIO>(ios::NormalIO(dst));
interfaces::SetIOPtr(outputNames[0], dst_ptr);
mean.reset(new base::Blob<float>({3, 1}, inputMean.data()));
std.reset(new base::Blob<float>({3, 1}, inputStd.data()));
};
bool ResizeNorm::Exec(){
for(int b=0; b < inputNames.size(); b++){
auto inputName = inputNames[b];
auto input = std::dynamic_pointer_cast<ios::CameraSrcOut>(interfaces::GetIOPtr(inputName));
if(input == nullptr){
LOG_ERROR << "resize norm input ptr haven't init";
return false;
}
auto img = input->img_ptr_;
resizeGPU(
img->mutable_gpu_data(),
img->cols(),
img->rows(),
img->width_step(),
dst->mutable_gpu_data() + (b * 3 * inputHeight * inputWidth), // multi inputs
inputWidth, inputHeight,
mean->mutable_gpu_data(),
std->mutable_gpu_data(),
useBGR, fixAspectRatio, stream_
);
}
// ios::NormalIOPtr dst_ptr = std::make_shared<ios::NormalIO>(ios::NormalIO(dst));
return true;
};
__global__ void warpaffine_kernel(
uint8_t* src, int stepwidth, int src_width,
int src_height, float* dst, int dst_width,
int dst_height, uint8_t const_value_st,
AffineMatrix d2s, int edge, float* input_mean, float* input_std, bool bgr) {
int position = blockDim.x * blockIdx.x + threadIdx.x;
if (position >= edge) return;
float m_x1 = d2s.value[0];
float m_y1 = d2s.value[1];
float m_z1 = d2s.value[2];
float m_x2 = d2s.value[3];
float m_y2 = d2s.value[4];
float m_z2 = d2s.value[5];
int dx = position % dst_width;
int dy = position / dst_width;
float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
float c0, c1, c2;
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
// out of range
c0 = const_value_st;
c1 = const_value_st;
c2 = const_value_st;
} else {
int y_low = floorf(src_y);
int x_low = floorf(src_x);
int y_high = y_low + 1;
int x_high = x_low + 1;
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
float ly = src_y - y_low;
float lx = src_x - x_low;
float hy = 1 - ly;
float hx = 1 - lx;
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
uint8_t* v1 = const_value;
uint8_t* v2 = const_value;
uint8_t* v3 = const_value;
uint8_t* v4 = const_value;
if (y_low >= 0) {
if (x_low >= 0)
v1 = src + y_low * stepwidth + x_low * 3;
if (x_high < src_width)
v2 = src + y_low * stepwidth + x_high * 3;
}
if (y_high < src_height) {
if (x_low >= 0)
v3 = src + y_high * stepwidth + x_low * 3;
if (x_high < src_width)
v4 = src + y_high * stepwidth + x_high * 3;
}
c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
}
if(!bgr){
//bgr to rgb
float t = c2;
c2 = c0;
c0 = t;
}
//normalization
c0 = (c0 / 255.0f - input_mean[0]) / input_std[0];
c1 = (c1 / 255.0f - input_mean[1]) / input_std[1];
c2 = (c2 / 255.0f - input_mean[2]) / input_std[2];
//rgbrgbrgb to rrrgggbbb or bgrbgrbgr to bbbgggrrr
int area = dst_width * dst_height;
float* pdst_c0 = dst + dy * dst_width + dx;
float* pdst_c1 = pdst_c0 + area;
float* pdst_c2 = pdst_c1 + area;
*pdst_c0 = c0;
*pdst_c1 = c1;
*pdst_c2 = c2;
}
void ResizeNorm::resizeGPU(uint8_t* src, int src_width, int src_height, int step_width,
float* dst, int dst_width, int dst_height, float* input_mean, float* input_std,
bool bgr, bool resizeFixAspectRatio, cudaStream_t stream){
AffineMatrix s2d, d2s;
float scalex = dst_width / (float)src_width;
float scaley = dst_height / (float)src_height;
if(resizeFixAspectRatio){
scalex = scaley = std::min(scalex, scaley);
}
s2d.value[0] = scalex;
s2d.value[1] = 0;
s2d.value[2] = 0; // -scalex * src_width * 0.5 + dst_width * 0.5;
s2d.value[3] = 0;
s2d.value[4] = scaley;
s2d.value[5] = 0; // -scaley * src_height * 0.5 + dst_height * 0.5;
cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
// printf("affine: %f, %f, %f, %f, %f, %f\n", d2s.value[0], d2s.value[1], d2s.value[2], d2s.value[3], d2s.value[4], d2s.value[5]);
int jobs = dst_height * dst_width;
int threads = 256;
int blocks = ceil(jobs / (float)threads);
warpaffine_kernel<<<blocks, threads, 0, stream>>>(
src, step_width, src_width,
src_height, dst, dst_width,
dst_height, 128, d2s, jobs, input_mean, input_std, bgr);
};
std::string ResizeNorm::Name(){
return "ResizeNorm";
};
DEEPINFER_REGISTER_UNIT(ResizeNorm);
} // namespace preprocess
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_PREPROCESS_RESIZE_NORM_H_
#define DEEPINFER_PREPROCESS_RESIZE_NORM_H_
#include <cuda_runtime.h>
#include <cstdint>
#include "interfaces/base_unit.h"
#include "base/image.h"
#include "libs/sources/camera_src.h"
#include "libs/ios/normal_ios.h"
#include "libs/ios/camera_ios.h"
namespace waytous {
namespace deepinfer {
namespace preprocess {
struct AffineMatrix{
float value[6];
};
class ResizeNorm: public interfaces::BaseUnit{
public:
bool Init(YAML::Node& node, YAML::Node& globalParamNode) override;
bool Exec() override;
std::string Name() override;
protected:
void resizeGPU(uint8_t* src, int src_width, int src_height, int step_width,
float* dst, int dst_width, int dst_height, float* input_mean, float* input_std,
bool bgr, bool resizeFixAspectRatio, cudaStream_t stream);
public:
cudaStream_t stream_;
base::BlobPtr<float> dst, mean, std;
bool useBGR = false;
};
} // namespace preprocess
} // namespace deepinfer
} // namespace waytous
#endif
#include "libs/sources/camera_src.h"
namespace waytous {
namespace deepinfer {
namespace sources {
bool CameraSrc::Exec(){
auto src_input = interfaces::GetIOPtr(inputNames[0]);
auto src = std::dynamic_pointer_cast<ios::CameraSrcIn>(src_input);
auto img = std::make_shared<base::Image8U>(base::Image8U(src->cv_img_));
auto dst = std::make_shared<ios::CameraSrcOut>(ios::CameraSrcOut(img));
interfaces::SetIOPtr(outputNames[0], dst);
return true;
}
std::string CameraSrc::Name(){
return "CameraSrc";
}
DEEPINFER_REGISTER_UNIT(CameraSrc);
} // namespace sources
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_CAMERA_SOURCE_H_
#define DEEPINFER_CAMERA_SOURCE_H_
#include <cuda_runtime.h>
#include <cstdint>
#include "base/image.h"
#include "interfaces/base_unit.h"
#include "libs/ios/camera_ios.h"
namespace waytous {
namespace deepinfer {
namespace sources {
class CameraSrc: public interfaces::BaseUnit{
public:
bool Exec() override;
std::string Name() override;
};
using CameraSrcPtr = std::shared_ptr<CameraSrc>;
} // namespace sources
} // namespace deepinfer
} // namespace waytous
#endif
This diff is collapsed.
#ifndef WAYTOUS_DEEPINFER_TRACKER_BYTETRACKER_H_
#define WAYTOUS_DEEPINFER_TRACKER_BYTETRACKER_H_
#include <opencv2/opencv.hpp>
#include "interfaces/base_unit.h"
#include "libs/trackers/tracker_datatype.h"
#include "libs/trackers/track.h"
#include "libs/trackers/lapjv.h"
namespace waytous{
namespace deepinfer{
namespace tracker{
class ByteTracker: public interfaces::BaseUnit
{
public:
bool Init(YAML::Node& node, YAML::Node& globalParamNode) override;
bool Exec() override;
std::string Name() override;
private:
std::vector<Track*> joint_tracks(std::vector<Track*> &tlista, std::vector<Track> &tlistb);
std::vector<Track> joint_tracks(std::vector<Track> &tlista, std::vector<Track> &tlistb);
std::vector<Track> sub_tracks(std::vector<Track> &tlista, std::vector<Track> &tlistb);
void remove_duplicate_tracks(std::vector<Track> &resa, std::vector<Track> &resb, std::vector<Track> &tracksa, std::vector<Track> &tracksb);
void linear_assignment(std::vector<std::vector<float> > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh,
std::vector<std::vector<int> > &matches, std::vector<int> &unmatched_a, std::vector<int> &unmatched_b);
std::vector<std::vector<float> > iou_distance(std::vector<Track*> &atracks, std::vector<Track> &btracks, int &dist_size, int &dist_size_size);
std::vector<std::vector<float> > iou_distance(std::vector<Track> &atracks, std::vector<Track> &btracks);
std::vector<std::vector<float> > ious(std::vector<std::vector<float> > &atlbrs, std::vector<std::vector<float> > &btlbrs);
double lapjv(const std::vector<std::vector<float> > &cost, std::vector<int> &rowsol, std::vector<int> &colsol,
bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true);
private:
float track_thresh;
float high_thresh;
float match_thresh;
int frame_id;
int max_time_lost;
int init_frames = 0;
std::vector<Track> tracked_tracks;
std::vector<Track> lost_tracks;
std::vector<Track> removed_tracks;
KalmanFilter kalman_filter;
};
} //namespace tracker
} //namspace deepinfer
} //namespace waytous
#endif
#include "libs/trackers/kalman_filter.h"
namespace waytous{
namespace deepinfer{
namespace tracker{
const double KalmanFilter::chi2inv95[10] = {
0,
3.8415,
5.9915,
7.8147,
9.4877,
11.070,
12.592,
14.067,
15.507,
16.919
};
KalmanFilter::KalmanFilter()
{
int ndim = 4;
double dt = 1.;
_motion_mat = Eigen::MatrixXf::Identity(8, 8);
for (int i = 0; i < ndim; i++) {
_motion_mat(i, ndim + i) = dt;
}
_update_mat = Eigen::MatrixXf::Identity(4, 8);
this->_std_weight_position = 1. / 20;
this->_std_weight_velocity = 1. / 160;
}
KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement)
{
DETECTBOX mean_pos = measurement;
DETECTBOX mean_vel;
for (int i = 0; i < 4; i++) mean_vel(i) = 0;
KAL_MEAN mean;
for (int i = 0; i < 8; i++) {
if (i < 4) mean(i) = mean_pos(i);
else mean(i) = mean_vel(i - 4);
}
KAL_MEAN std;
std(0) = 2 * _std_weight_position * measurement[3];
std(1) = 2 * _std_weight_position * measurement[3];
std(2) = 1e-2;
std(3) = 2 * _std_weight_position * measurement[3];
std(4) = 10 * _std_weight_velocity * measurement[3];
std(5) = 10 * _std_weight_velocity * measurement[3];
std(6) = 1e-5;
std(7) = 10 * _std_weight_velocity * measurement[3];
KAL_MEAN tmp = std.array().square();
KAL_COVA var = tmp.asDiagonal();
return std::make_pair(mean, var);
}
void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance)
{
//revise the data;
DETECTBOX std_pos;
std_pos << _std_weight_position * mean(3),
_std_weight_position * mean(3),
1e-2,
_std_weight_position * mean(3);
DETECTBOX std_vel;
std_vel << _std_weight_velocity * mean(3),
_std_weight_velocity * mean(3),
1e-5,
_std_weight_velocity * mean(3);
KAL_MEAN tmp;
tmp.block<1, 4>(0, 0) = std_pos;
tmp.block<1, 4>(0, 4) = std_vel;
tmp = tmp.array().square();
KAL_COVA motion_cov = tmp.asDiagonal();
KAL_MEAN mean1 = this->_motion_mat * mean.transpose();
KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose());
covariance1 += motion_cov;
mean = mean1;
covariance = covariance1;
}
KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance)
{
DETECTBOX std;
std << _std_weight_position * mean(3), _std_weight_position * mean(3),
1e-1, _std_weight_position * mean(3);
KAL_HMEAN mean1 = _update_mat * mean.transpose();
KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose());
Eigen::Matrix<float, 4, 4> diag = std.asDiagonal();
diag = diag.array().square().matrix();
covariance1 += diag;
// covariance1.diagonal() << diag;
return std::make_pair(mean1, covariance1);
}
KAL_DATA
KalmanFilter::update(
const KAL_MEAN &mean,
const KAL_COVA &covariance,
const DETECTBOX &measurement)
{
KAL_HDATA pa = project(mean, covariance);
KAL_HMEAN projected_mean = pa.first;
KAL_HCOVA projected_cov = pa.second;
//chol_factor, lower =
//scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
//kalmain_gain =
//scipy.linalg.cho_solve((cho_factor, lower),
//np.dot(covariance, self._upadte_mat.T).T,
//check_finite=False).T
Eigen::Matrix<float, 4, 8> B = (covariance * (_update_mat.transpose())).transpose();
Eigen::Matrix<float, 8, 4> kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4
Eigen::Matrix<float, 1, 4> innovation = measurement - projected_mean; //eg.1x4
auto tmp = innovation * (kalman_gain.transpose());
KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix();
KAL_COVA new_covariance = covariance - kalman_gain * projected_cov*(kalman_gain.transpose());
return std::make_pair(new_mean, new_covariance);
}
Eigen::Matrix<float, 1, -1>
KalmanFilter::gating_distance(
const KAL_MEAN &mean,
const KAL_COVA &covariance,
const std::vector<DETECTBOX> &measurements,
bool only_position)
{
KAL_HDATA pa = this->project(mean, covariance);
if (only_position) {
printf("not implement!");
exit(0);
}
KAL_HMEAN mean1 = pa.first;
KAL_HCOVA covariance1 = pa.second;
// Eigen::Matrix<float, -1, 4, Eigen::RowMajor> d(size, 4);
DETECTBOXSS d(measurements.size(), 4);
int pos = 0;
for (DETECTBOX box : measurements) {
d.row(pos++) = box - mean1;
}
Eigen::Matrix<float, -1, -1, Eigen::RowMajor> factor = covariance1.llt().matrixL();
Eigen::Matrix<float, -1, -1> z = factor.triangularView<Eigen::Lower>().solve<Eigen::OnTheRight>(d).transpose();
auto zz = ((z.array())*(z.array())).matrix();
auto square_maha = zz.colwise().sum();
return square_maha;
}
} //namespace tracker
} //namspace deepinfer
} //namespace waytous
#ifndef WAYTOUS_DEEPINFER_TRACKER_KALMAN_FILTER_H_
#define WAYTOUS_DEEPINFER_TRACKER_KALMAN_FILTER_H_
#include <Eigen/Cholesky>
#include "libs/trackers/tracker_datatype.h"
namespace waytous{
namespace deepinfer{
namespace tracker{
class KalmanFilter
{
public:
static const double chi2inv95[10];
KalmanFilter();
KAL_DATA initiate(const DETECTBOX& measurement);
void predict(KAL_MEAN& mean, KAL_COVA& covariance);
KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance);
KAL_DATA update(const KAL_MEAN& mean,
const KAL_COVA& covariance,
const DETECTBOX& measurement);
Eigen::Matrix<float, 1, -1> gating_distance(
const KAL_MEAN& mean,
const KAL_COVA& covariance,
const std::vector<DETECTBOX>& measurements,
bool only_position = false);
private:
Eigen::Matrix<float, 8, 8, Eigen::RowMajor> _motion_mat;
Eigen::Matrix<float, 4, 8, Eigen::RowMajor> _update_mat;
float _std_weight_position;
float _std_weight_velocity;
};
} //namespace tracker
} //namspace deepinfer
} //namespace waytous
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libs/trackers/lapjv.h"
namespace waytous{
namespace deepinfer{
namespace tracker{
/** Column-reduction and reduction transfer for a dense cost matrix.
*/
int_t _ccrrt_dense(const uint_t n, cost_t *cost[],
int_t *free_rows, int_t *x, int_t *y, cost_t *v)
{
int_t n_free_rows;
boolean *unique;
for (uint_t i = 0; i < n; i++) {
x[i] = -1;
v[i] = LARGE;
y[i] = 0;
}
for (uint_t i = 0; i < n; i++) {
for (uint_t j = 0; j < n; j++) {
const cost_t c = cost[i][j];
if (c < v[j]) {
v[j] = c;
y[j] = i;
}
PRINTF("i=%d, j=%d, c[i,j]=%f, v[j]=%f y[j]=%d\n", i, j, c, v[j], y[j]);
}
}
PRINT_COST_ARRAY(v, n);
PRINT_INDEX_ARRAY(y, n);
NEW(unique, boolean, n);
memset(unique, TRUE, n);
{
int_t j = n;
do {
j--;
const int_t i = y[j];
if (x[i] < 0) {
x[i] = j;
}
else {
unique[i] = FALSE;
y[j] = -1;
}
} while (j > 0);
}
n_free_rows = 0;
for (uint_t i = 0; i < n; i++) {
if (x[i] < 0) {
free_rows[n_free_rows++] = i;
}
else if (unique[i]) {
const int_t j = x[i];
cost_t min = LARGE;
for (uint_t j2 = 0; j2 < n; j2++) {
if (j2 == (uint_t)j) {
continue;
}
const cost_t c = cost[i][j2] - v[j2];
if (c < min) {
min = c;
}
}
PRINTF("v[%d] = %f - %f\n", j, v[j], min);
v[j] -= min;
}
}
FREE(unique);
return n_free_rows;
}
/** Augmenting row reduction for a dense cost matrix.
*/
int_t _carr_dense(
const uint_t n, cost_t *cost[],
const uint_t n_free_rows,
int_t *free_rows, int_t *x, int_t *y, cost_t *v)
{
uint_t current = 0;
int_t new_free_rows = 0;
uint_t rr_cnt = 0;
PRINT_INDEX_ARRAY(x, n);
PRINT_INDEX_ARRAY(y, n);
PRINT_COST_ARRAY(v, n);
PRINT_INDEX_ARRAY(free_rows, n_free_rows);
while (current < n_free_rows) {
int_t i0;
int_t j1, j2;
cost_t v1, v2, v1_new;
boolean v1_lowers;
rr_cnt++;
PRINTF("current = %d rr_cnt = %d\n", current, rr_cnt);
const int_t free_i = free_rows[current++];
j1 = 0;
v1 = cost[free_i][0] - v[0];
j2 = -1;
v2 = LARGE;
for (uint_t j = 1; j < n; j++) {
PRINTF("%d = %f %d = %f\n", j1, v1, j2, v2);
const cost_t c = cost[free_i][j] - v[j];
if (c < v2) {
if (c >= v1) {
v2 = c;
j2 = j;
}
else {
v2 = v1;
v1 = c;
j2 = j1;
j1 = j;
}
}
}
i0 = y[j1];
v1_new = v[j1] - (v2 - v1);
v1_lowers = v1_new < v[j1];
PRINTF("%d %d 1=%d,%f 2=%d,%f v1'=%f(%d,%g) \n", free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers, v[j1] - v1_new);
if (rr_cnt < current * n) {
if (v1_lowers) {
v[j1] = v1_new;
}
else if (i0 >= 0 && j2 >= 0) {
j1 = j2;
i0 = y[j2];
}
if (i0 >= 0) {
if (v1_lowers) {
free_rows[--current] = i0;
}
else {
free_rows[new_free_rows++] = i0;
}
}
}
else {
PRINTF("rr_cnt=%d >= %d (current=%d * n=%d)\n", rr_cnt, current * n, current, n);
if (i0 >= 0) {
free_rows[new_free_rows++] = i0;
}
}
x[free_i] = j1;
y[j1] = free_i;
}
return new_free_rows;
}
/** Find columns with minimum d[j] and put them on the SCAN list.
*/
uint_t _find_dense(const uint_t n, uint_t lo, cost_t *d, int_t *cols, int_t *y)
{
uint_t hi = lo + 1;
cost_t mind = d[cols[lo]];
for (uint_t k = hi; k < n; k++) {
int_t j = cols[k];
if (d[j] <= mind) {
if (d[j] < mind) {
hi = lo;
mind = d[j];
}
cols[k] = cols[hi];
cols[hi++] = j;
}
}
return hi;
}
// Scan all columns in TODO starting from arbitrary column in SCAN
// and try to decrease d of the TODO columns using the SCAN column.
int_t _scan_dense(const uint_t n, cost_t *cost[],
uint_t *plo, uint_t*phi,
cost_t *d, int_t *cols, int_t *pred,
int_t *y, cost_t *v)
{
uint_t lo = *plo;
uint_t hi = *phi;
cost_t h, cred_ij;
while (lo != hi) {
int_t j = cols[lo++];
const int_t i = y[j];
const cost_t mind = d[j];
h = cost[i][j] - v[j] - mind;
PRINTF("i=%d j=%d h=%f\n", i, j, h);
// For all columns in TODO
for (uint_t k = hi; k < n; k++) {
j = cols[k];
cred_ij = cost[i][j] - v[j] - h;
if (cred_ij < d[j]) {
d[j] = cred_ij;
pred[j] = i;
if (cred_ij == mind) {
if (y[j] < 0) {
return j;
}
cols[k] = cols[hi];
cols[hi++] = j;
}
}
}
}
*plo = lo;
*phi = hi;
return -1;
}
/** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper.
*
* This is a dense matrix version.
*
* \return The closest free column index.
*/
int_t find_path_dense(
const uint_t n, cost_t *cost[],
const int_t start_i,
int_t *y, cost_t *v,
int_t *pred)
{
uint_t lo = 0, hi = 0;
int_t final_j = -1;
uint_t n_ready = 0;
int_t *cols;
cost_t *d;
NEW(cols, int_t, n);
NEW(d, cost_t, n);
for (uint_t i = 0; i < n; i++) {
cols[i] = i;
pred[i] = start_i;
d[i] = cost[start_i][i] - v[i];
}
PRINT_COST_ARRAY(d, n);
while (final_j == -1) {
// No columns left on the SCAN list.
if (lo == hi) {
PRINTF("%d..%d -> find\n", lo, hi);
n_ready = lo;
hi = _find_dense(n, lo, d, cols, y);
PRINTF("check %d..%d\n", lo, hi);
PRINT_INDEX_ARRAY(cols, n);
for (uint_t k = lo; k < hi; k++) {
const int_t j = cols[k];
if (y[j] < 0) {
final_j = j;
}
}
}
if (final_j == -1) {
PRINTF("%d..%d -> scan\n", lo, hi);
final_j = _scan_dense(
n, cost, &lo, &hi, d, cols, pred, y, v);
PRINT_COST_ARRAY(d, n);
PRINT_INDEX_ARRAY(cols, n);
PRINT_INDEX_ARRAY(pred, n);
}
}
PRINTF("found final_j=%d\n", final_j);
PRINT_INDEX_ARRAY(cols, n);
{
const cost_t mind = d[cols[lo]];
for (uint_t k = 0; k < n_ready; k++) {
const int_t j = cols[k];
v[j] += d[j] - mind;
}
}
FREE(cols);
FREE(d);
return final_j;
}
/** Augment for a dense cost matrix.
*/
int_t _ca_dense(
const uint_t n, cost_t *cost[],
const uint_t n_free_rows,
int_t *free_rows, int_t *x, int_t *y, cost_t *v)
{
int_t *pred;
NEW(pred, int_t, n);
for (int_t *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) {
int_t i = -1, j;
uint_t k = 0;
PRINTF("looking at free_i=%d\n", *pfree_i);
j = find_path_dense(n, cost, *pfree_i, y, v, pred);
ASSERT(j >= 0);
ASSERT(j < n);
while (i != *pfree_i) {
PRINTF("augment %d\n", j);
PRINT_INDEX_ARRAY(pred, n);
i = pred[j];
PRINTF("y[%d]=%d -> %d\n", j, y[j], i);
y[j] = i;
PRINT_INDEX_ARRAY(x, n);
SWAP_INDICES(j, x[i]);
k++;
if (k >= n) {
ASSERT(FALSE);
}
}
}
FREE(pred);
return 0;
}
/** Solve dense sparse LAP.
*/
int lapjv_internal(
const uint_t n, cost_t *cost[],
int_t *x, int_t *y)
{
int ret;
int_t *free_rows;
cost_t *v;
NEW(free_rows, int_t, n);
NEW(v, cost_t, n);
ret = _ccrrt_dense(n, cost, free_rows, x, y, v);
int i = 0;
while (ret > 0 && i < 2) {
ret = _carr_dense(n, cost, ret, free_rows, x, y, v);
i++;
}
if (ret > 0) {
ret = _ca_dense(n, cost, ret, free_rows, x, y, v);
}
FREE(v);
FREE(free_rows);
return ret;
}
} //namespace tracker
} //namspace deepinfer
} //namespace waytous
#ifndef WAYTOUS_DEEPINFER_TRACKER_LAPJV_H_
#define WAYTOUS_DEEPINFER_TRACKER_LAPJV_H_
namespace waytous{
namespace deepinfer{
namespace tracker{
#define LARGE 1000000
#if !defined TRUE
#define TRUE 1
#endif
#if !defined FALSE
#define FALSE 0
#endif
#define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; }
#define FREE(x) if (x != 0) { free(x); x = 0; }
#define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; }
#if 0
#include <assert.h>
#define ASSERT(cond) assert(cond)
#define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__)
#define PRINT_COST_ARRAY(a, n) \
while (1) { \
printf(#a" = ["); \
if ((n) > 0) { \
printf("%f", (a)[0]); \
for (uint_t j = 1; j < n; j++) { \
printf(", %f", (a)[j]); \
} \
} \
printf("]\n"); \
break; \
}
#define PRINT_INDEX_ARRAY(a, n) \
while (1) { \
printf(#a" = ["); \
if ((n) > 0) { \
printf("%d", (a)[0]); \
for (uint_t j = 1; j < n; j++) { \
printf(", %d", (a)[j]); \
} \
} \
printf("]\n"); \
break; \
}
#else
#define ASSERT(cond)
#define PRINTF(fmt, ...)
#define PRINT_COST_ARRAY(a, n)
#define PRINT_INDEX_ARRAY(a, n)
#endif
typedef signed int int_t;
typedef unsigned int uint_t;
typedef double cost_t;
typedef char boolean;
typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
extern int_t lapjv_internal(
const uint_t n, cost_t *cost[],
int_t *x, int_t *y);
} //namespace tracker
} //namspace deepinfer
} //namespace waytous
#endif
#include "libs/trackers/track.h"
namespace waytous{
namespace deepinfer{
namespace tracker{
Track::Track(ios::Det2DPtr obj)
{
obj_ = obj->copy();
_tlwh.resize(4);
_tlwh[0] = obj->x1;
_tlwh[1] = obj->y1;
_tlwh[2] = obj->x2 - _tlwh[0];
_tlwh[3] = obj->y2 - _tlwh[1];
is_activated = false;
track_id = 0;
state = TrackState::New;
tlwh.resize(4);
tlbr.resize(4);
static_tlwh();
static_tlbr();
frame_id = 0;
tracklet_len = 0;
start_frame = 0;
}
Track::~Track()
{
}
void Track::activate(KalmanFilter &kalman_filter, int frame_id)
{
this->kalman_filter = kalman_filter;
this->track_id = this->next_id();
this->obj_->track_id = this->track_id;
std::vector<float> _tlwh_tmp(4);
_tlwh_tmp[0] = this->_tlwh[0];
_tlwh_tmp[1] = this->_tlwh[1];
_tlwh_tmp[2] = this->_tlwh[2];
_tlwh_tmp[3] = this->_tlwh[3];
std::vector<float> xyah = tlwh_to_xyah(_tlwh_tmp);
DETECTBOX xyah_box;
xyah_box[0] = xyah[0];
xyah_box[1] = xyah[1];
xyah_box[2] = xyah[2];
xyah_box[3] = xyah[3];
auto mc = this->kalman_filter.initiate(xyah_box);
this->mean = mc.first;
this->covariance = mc.second;
static_tlwh();
static_tlbr();
this->tracklet_len = 0;
this->state = TrackState::Tracked;
if (frame_id == 1)
{
this->is_activated = true;
}
//this->is_activated = true;
this->frame_id = frame_id;
this->start_frame = frame_id;
}
void Track::re_activate(Track &new_track, int frame_id, bool new_id)
{
std::vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
DETECTBOX xyah_box;
xyah_box[0] = xyah[0];
xyah_box[1] = xyah[1];
xyah_box[2] = xyah[2];
xyah_box[3] = xyah[3];
auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
this->mean = mc.first;
this->covariance = mc.second;
static_tlwh();
static_tlbr();
// this->tracklet_len = 0;
this->tracklet_len++;
this->state = TrackState::Tracked;
this->is_activated = true;
this->frame_id = frame_id;
if (new_id){
this->track_id = next_id();
this->obj_->track_id = this->track_id;
}
// update obj_
this->obj_->update(new_track.obj_);
this->obj_->update(tlbr);
}
void Track::update(Track &new_track, int frame_id)
{
this->frame_id = frame_id;
this->tracklet_len++;
std::vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
DETECTBOX xyah_box;
xyah_box[0] = xyah[0];
xyah_box[1] = xyah[1];
xyah_box[2] = xyah[2];
xyah_box[3] = xyah[3];
auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
this->mean = mc.first;
this->covariance = mc.second;
static_tlwh();
static_tlbr();
this->state = TrackState::Tracked;
this->is_activated = true;
this->_tlwh = new_track._tlwh; // save current detection for vis mask
// update obj_
this->obj_->update(new_track.obj_);
this->obj_->update(tlbr);
}
void Track::static_tlwh()
{
if (this->state == TrackState::New)
{
tlwh[0] = _tlwh[0];
tlwh[1] = _tlwh[1];
tlwh[2] = _tlwh[2];
tlwh[3] = _tlwh[3];
return;
}
tlwh[0] = mean[0];
tlwh[1] = mean[1];
tlwh[2] = mean[2];
tlwh[3] = mean[3];
tlwh[2] *= tlwh[3];
tlwh[0] -= tlwh[2] / 2;
tlwh[1] -= tlwh[3] / 2;
}
void Track::static_tlbr()
{
tlbr.clear();
tlbr.assign(tlwh.begin(), tlwh.end());
tlbr[2] += tlbr[0];
tlbr[3] += tlbr[1];
}
std::vector<float> Track::tlwh_to_xyah(std::vector<float> tlwh_tmp)
{
std::vector<float> tlwh_output = tlwh_tmp;
tlwh_output[0] += tlwh_output[2] / 2;
tlwh_output[1] += tlwh_output[3] / 2;
tlwh_output[2] /= tlwh_output[3];
return tlwh_output;
}
std::vector<float> Track::to_xyah()
{
return tlwh_to_xyah(tlwh);
}
std::vector<float> Track::tlbr_to_tlwh(std::vector<float> &tlbr)
{
tlbr[2] -= tlbr[0];
tlbr[3] -= tlbr[1];
return tlbr;
}
void Track::mark_lost()
{
state = TrackState::Lost;
}
void Track::mark_removed()
{
state = TrackState::Removed;
}
int Track::next_id()
{
static int _count = 0;
_count++;
return _count;
}
int Track::end_frame()
{
return this->frame_id;
}
void Track::multi_predict(std::vector<Track*> &tracks, KalmanFilter &kalman_filter)
{
for (int i = 0; i < tracks.size(); i++)
{
if (tracks[i]->state != TrackState::Tracked)
{
tracks[i]->mean[7] = 0;
}
kalman_filter.predict(tracks[i]->mean, tracks[i]->covariance);
}
}
} //namespace camera
} //namspace percpetion
} //namespace waytous
#ifndef WAYTOUS_DEEPINFER_TRACK_H_
#define WAYTOUS_DEEPINFER_TRACK_H_
#include <vector>
#include "libs/ios/detection.h"
#include "libs/trackers/tracker_datatype.h"
#include "libs/trackers/kalman_filter.h"
namespace waytous{
namespace deepinfer{
namespace tracker{
enum TrackState { New = 0, Tracked, Lost, Removed };
class Track
{
public:
Track(ios::Det2DPtr obj);
~Track();
std::vector<float> static tlbr_to_tlwh(std::vector<float> &tlbr);
void static multi_predict(std::vector<Track*> &tracks, KalmanFilter &kalman_filter);
void static_tlwh();
void static_tlbr();
std::vector<float> tlwh_to_xyah(std::vector<float> tlwh_tmp);
std::vector<float> to_xyah();
void mark_lost();
void mark_removed();
int next_id();
int end_frame();
void activate(KalmanFilter &kalman_filter, int frame_id);
void re_activate(Track &new_track, int frame_id, bool new_id = false);
void update(Track &new_track, int frame_id);
public:
bool is_activated;
int track_id;
int state;
std::vector<float> _tlwh;
std::vector<float> tlwh;
std::vector<float> tlbr;
int frame_id;
int tracklet_len;
int start_frame;
KAL_MEAN mean;
KAL_COVA covariance;
ios::Det2DPtr obj_;
private:
KalmanFilter kalman_filter;
};
} //namespace tracker
} //namspace deepinfer
} //namespace waytous
#endif
#ifndef WAYTOUS_DEEPINFER_TRACKER_DATATYPE_H_
#define WAYTOUS_DEEPINFER_TRACKER_DATATYPE_H_
#include <vector>
#include <Eigen/Core>
#include <Eigen/Dense>
namespace waytous{
namespace deepinfer{
namespace tracker{
typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> DETECTBOX; // xyah
typedef Eigen::Matrix<float, -1, 4, Eigen::RowMajor> DETECTBOXSS;
//Kalmanfilter
//typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_FILTER;
typedef Eigen::Matrix<float, 1, 8, Eigen::RowMajor> KAL_MEAN;
typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_COVA;
typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> KAL_HMEAN;
typedef Eigen::Matrix<float, 4, 4, Eigen::RowMajor> KAL_HCOVA;
using KAL_DATA = std::pair<KAL_MEAN, KAL_COVA>;
using KAL_HDATA = std::pair<KAL_HMEAN, KAL_HCOVA>;
//tracker:
using MATCH_DATA = std::pair<int, int>;
typedef struct t {
std::vector<MATCH_DATA> matches;
std::vector<int> unmatched_tracks;
std::vector<int> unmatched_detections;
}TRACHER_MATCHD;
//linear_assignment:
typedef Eigen::Matrix<float, -1, -1, Eigen::RowMajor> DYNAMICM;
} //namespace tracker
} //namspace deepinfer
} //namespace waytous
#endif
#include "models/camera_model.h"
namespace waytous {
namespace deepinfer {
namespace camera {
bool CameraModel::Init(std::string& configPath) {
// CUDA_CHECK(cudaStreamCreate(&stream_));
std::string cfgPath = common::GetAbsolutePath(common::ConfigRoot::GetRootPath(), configPath);
if(!common::PathExists(cfgPath)){
LOG_WARN << "config_file "<< common::ConfigRoot::GetRootPath() << " " << cfgPath << " not exist.";
return false;
}
modelConfigNode = YAML::LoadFile(cfgPath);
if (modelConfigNode.IsNull()) {
LOG_WARN << "Load " << configPath << " failed! please check!";
return false;
}
inputNames = modelConfigNode.as<std::vector<std::string>>("inputNames");
outputNames = modelConfigNode.as<std::vector<std::string>>("outputNames");
auto globalParamNode = modelConfigNode["globalParams"];
// units
auto unitNodes = modelConfigNode["units"];
for(int i=0; i<unitNodes.size(); i++){
auto unitNode = unitNodes[i];
std::string unitName = unitNode["name"].as<std::string>();
interfaces::BaseUnitPtr unitPtr = std::make_shared<interfaces::BaseUnit>(interfaces::BaseUnitRegisterer::GetInstanceByName(unitName));
if(!unitPtr->Init(unitNode, globalParamNode)){
LOG_WARN << "Init unit " << unitName << " failed!";
return false;
};
units_.push_back(unitPtr);
}
return true;
}
bool CameraModel::Exec(std::vector<cv::Mat*> inputs, std::vector<interfaces::BaseIOPtr>& outputs){
// set source
for (int i=0; i<inputs.size(); i++){
auto input_img = inputs[i];
auto inputName = inputNames[i];
auto input = std::make_shared<ios::CameraSrcIn>(ios::CameraSrcIn(input_img));
interfaces::SetIOPtr(inputName, input);
}
// infer
for(auto unit: units_){
unit->Exec();
}
// output
for(auto outName: outputNames){
outputs.push_back(interfaces::GetIOPtr(outName));
}
return true;
}
bool CameraModel::Exec(std::vector<base::Image8UPtr> inputs, std::vector<interfaces::BaseIOPtr>& outputs){
// set source
for (int i=0; i<inputs.size(); i++){
auto input_img = inputs[i];
auto inputName = inputNames[i];
auto input = std::make_shared<ios::CameraSrcOut>(ios::CameraSrcOut(input_img));
interfaces::SetIOPtr(inputName, input);
}
// infer
for(auto unit: units_){
unit->Exec();
}
// output
for(auto outName: outputNames){
outputs.push_back(interfaces::GetIOPtr(outName));
}
return true;
}
std::string CameraModel::Name(){
return "CameraModel";
}
DEEPINFER_REGISTER_MODELS(CameraModel);
} // namespace camera
} // namespace deepinfer
} // namespace waytous
#ifndef DEEPINFER_CAMERA_MODEL_H_
#define DEEPINFER_CAMERA_MODEL_H_
#include <string>
#include <vector>
#include <memory>
#include "libs/sources/camera_src.h"
#include "interfaces/base_model.h"
namespace waytous {
namespace deepinfer {
namespace camera {
class CameraModel: public interfaces::BaseModel{
public:
virtual bool Init(std::string& configPath) override;
virtual bool Exec(std::vector<cv::Mat*> inputs, std::vector<interfaces::BaseIOPtr>& outputs);
virtual bool Exec(std::vector<base::Image8UPtr> inputs, std::vector<interfaces::BaseIOPtr>& outputs);
virtual std::string Name() override;
public:
YAML::Node modelConfigNode;
std::vector<interfaces::BaseUnitPtr> units_;
};
} // namespace camera
} // namespace deepinfer
} // namespace waytous
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment