trt-8

9a613def · xin.wang.waytous · 54cfa83e · 9a613def · 9a613def · 9a613def
Commit 9a613def authored Mar 01, 2023 by xin.wang.waytous
10 changed files
--- a/include/interfaces/base_model.h
+++ b/include/interfaces/base_model.h
@@ -27,8 +27,8 @@ public:
    virtual bool Init(std::string& configPath) = 0;

    // for camera image infer, dynamic_cast need base-class-func to implemented or pure-virtual
-    virtual bool Exec(std::vector<cv::Mat*> inputs, std::vector<BaseIOPtr>& outputs){};
-    virtual bool Exec(std::vector<base::Image8UPtr> inputs, std::vector<BaseIOPtr>& outputs){};
+    virtual bool Exec(std::vector<cv::Mat*> inputs, std::vector<BaseIOPtr>& outputs){return false;};
+    virtual bool Exec(std::vector<base::Image8UPtr> inputs, std::vector<BaseIOPtr>& outputs){return false;};

    virtual std::string Name(){
        return "BaseModel";

--- a/main.cpp
+++ b/main.cpp
@@ -36,6 +36,7 @@ int main(int argc, char** argv){
    std::string configPath = argv[3];
    std::string srcPath = argv[4];
    std::string savePath = argv[5];
+    int infer_count = 1000;
    auto t = interfaces::BaseTaskRegisterer::GetInstanceByName(taskName);
    if(!t->Init(configPath)){
        printf("Init problems!\n");
@@ -60,13 +61,13 @@ int main(int argc, char** argv){
        }
        std::cout << "before infer." << std::endl;
        auto e1 = std::chrono::system_clock::now();
-        for(int i=0; i<100; i++){
+        for(int i=0; i<infer_count; i++){
            outputs.clear();
            t->Exec(inputs, outputs);
        }
        auto e2 = std::chrono::system_clock::now();
        std::cout << "100 times avg infer time: " <<
-            std::chrono::duration_cast<std::chrono::microseconds>(e2 - e1).count() / 1000. / 100. << " ms" << std::endl;
+            std::chrono::duration_cast<std::chrono::microseconds>(e2 - e1).count() / 1000. / infer_count << " ms" << std::endl;
        if(inputs.size() != outputs.size()){
            cv::Mat vis = images[0];
            t->Visualize(&vis, outputs[0]);

--- a/src/libs/inferences/tensorrt/trt_calibrator.cpp
+++ b/src/libs/inferences/tensorrt/trt_calibrator.cpp
@@ -60,9 +60,9 @@ int8EntroyCalibrator::~int8EntroyCalibrator(){
        delete[] batchData;
 };

-int int8EntroyCalibrator::getBatchSize() const { return batchSize; };
+int int8EntroyCalibrator::getBatchSize() const TRT_NOEXCEPT{ return batchSize; };

-bool int8EntroyCalibrator::getBatch(void *bindings[], const char *names[], int nbBindings){
+bool int8EntroyCalibrator::getBatch(void *bindings[], const char *names[], int nbBindings) TRT_NOEXCEPT{
    if (imageIndex + batchSize > int(imgPaths.size()))
        return false;
    // load batch
@@ -80,7 +80,7 @@ bool int8EntroyCalibrator::getBatch(void *bindings[], const char *names[], int n
    return true;
 };

-const void * int8EntroyCalibrator::readCalibrationCache(std::size_t &length){
+const void * int8EntroyCalibrator::readCalibrationCache(std::size_t &length) TRT_NOEXCEPT{
    calibrationCache.clear();
    std::ifstream input(calibTablePath, std::ios::binary);
    input >> std::noskipws;
@@ -92,7 +92,7 @@ const void * int8EntroyCalibrator::readCalibrationCache(std::size_t &length){
    return length ? &calibrationCache[0] : nullptr;
 };

-void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length){
+void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length) TRT_NOEXCEPT{
    std::ofstream output(calibTablePath, std::ios::binary);
    output.write(reinterpret_cast<const char*>(cache), length);
 };

--- a/src/libs/inferences/tensorrt/trt_calibrator.h
+++ b/src/libs/inferences/tensorrt/trt_calibrator.h
@@ -35,13 +35,13 @@ public:

    ~int8EntroyCalibrator();

-    int getBatchSize() const override;
+    int getBatchSize() const TRT_NOEXCEPT override;

-    bool getBatch(void *bindings[], const char *names[], int nbBindings) override;
+    bool getBatch(void *bindings[], const char *names[], int nbBindings) TRT_NOEXCEPT override;

-    const void *readCalibrationCache(std::size_t &length) override;
+    const void *readCalibrationCache(std::size_t &length) TRT_NOEXCEPT override;

-    void writeCalibrationCache(const void *cache, std::size_t length) override;
+    void writeCalibrationCache(const void *cache, std::size_t length) TRT_NOEXCEPT override;

 private:
    int batchSize, inputWidth, inputHeight;

--- a/src/libs/inferences/tensorrt/trt_infer.cpp
+++ b/src/libs/inferences/tensorrt/trt_infer.cpp
@@ -13,12 +13,18 @@ TRTInference::~TRTInference(){
        cudaStreamDestroy(mCudaStream);
    }

-    if (mRunTime)
+    if (mRunTime){
        mRunTime->destroy();
-    if (mContext)
+        mRunTime = nullptr;
+    }
+    if (mContext){
        mContext->destroy();
-    if (mEngine)
+        mContext = nullptr;
+    }
+    if (mEngine){
        mEngine->destroy();
+        mEngine = nullptr;
+    }
 }


@@ -129,6 +135,9 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
    // int verbosity = (int) nvinfer1::ILogger::Severity::kVERBOSE;
    int verbosity = (int) nvinfer1::ILogger::Severity::kINFO;
    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
+    #ifdef TRT_8
+    nvinfer1::IBuilderConfig* builderConfig = builder->createBuilderConfig();
+    #endif
    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1U);
    auto parser = nvonnxparser::createParser(*network, gLogger);

@@ -141,7 +150,11 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
    }

    builder->setMaxBatchSize(maxBatchSize);
+    #ifdef TRT_8
+    builderConfig->setMaxWorkspaceSize(2UL << 30);
+    #else
    builder->setMaxWorkspaceSize(2UL << 30);// 2G
+    #endif

    int8EntroyCalibrator *calibrator = nullptr;
    int runMode = configNode["runMode"].as<int>();
@@ -150,7 +163,11 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
        LOG_INFO << "Set FP16 Mode.";
        if (!builder->platformHasFastFp16())
            LOG_INFO << "Notice: the platform do not has fast for fp16" ;
+        #ifdef TRT_8
+        builderConfig->setFlag(nvinfer1::BuilderFlag::kFP16);
+        #else
        builder->setFp16Mode(true);
+        #endif
    }
    else if(runMode == 2){//int8
        LOG_ERROR << "No supported int8";
@@ -174,22 +191,28 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
            return false;
        }
        */
+        return false;
    }

    LOG_INFO << "Begin building engine..." ;
+    #ifdef TRT_8
+    nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *builderConfig);
+    #else
    nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
+    #endif
    if (!engine){
        std::string error_message ="Unable to create engine";
        gLogger.log(nvinfer1::ILogger::Severity::kERROR, error_message.c_str());
+        return false;
    }
    LOG_INFO << "End building engine..." ;

    // Serialize the engine, then close everything down.
    modelStream = engine->serialize();
-    engine->destroy();
-    network->destroy();
-    builder->destroy();
-    parser->destroy();
+    engine->destroy(); engine = nullptr; 
+    network->destroy(); network = nullptr;
+    builder->destroy(); builder = nullptr;
+    parser->destroy(); parser = nullptr;
    assert(modelStream != nullptr);
    if(calibrator){
        delete calibrator;
@@ -200,7 +223,7 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
    assert(file);
    file.write(static_cast<char*>(modelStream->data()), modelStream->size());
    assert(!file.fail());
-    modelStream->destroy();
+    modelStream->destroy(); modelStream = nullptr;
    CUDA_CHECK(cudaStreamCreate(&mCudaStream));
    LOG_INFO << "End writing engine";
    return true;

--- a/src/libs/inferences/tensorrt/trt_utils.h
+++ b/src/libs/inferences/tensorrt/trt_utils.h
@@ -19,6 +19,7 @@ namespace deepinfer {
 namespace inference {

 #if NV_TENSORRT_MAJOR >= 8
+#define TRT_8
 #define TRT_NOEXCEPT noexcept
 #define TRT_CONST_ENQUEUE const
 #else
@@ -77,7 +78,7 @@ public:
        std::cout<< " total runtime = " << totalTime/(runTimes + 1e-5) << " ms " << std::endl;
    }

-    virtual void reportLayerTime(const char* layerName, float ms)
+    virtual void reportLayerTime(const char* layerName, float ms)  TRT_NOEXCEPT override
    {
        mProfile[layerName].count++;
        mProfile[layerName].time += ms;
@@ -95,7 +96,7 @@ public:
    {
    }

-    void log(Severity severity, const char* msg) override
+    void log(Severity severity, const char* msg)  TRT_NOEXCEPT override
    {
        // suppress messages with severity enum value greater than the reportable
        if (severity > reportableSeverity)

--- a/src/libs/inferences/tensorrt/trt_yolov5_infer.cpp
+++ b/src/libs/inferences/tensorrt/trt_yolov5_infer.cpp
@@ -365,6 +365,7 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
                        maxBatchSize, inputWidth, inputHeight, inputMean, inputStd, useBGR);
        config->setInt8Calibrator(calibrator);
        */
+       return false;
    }

    LOG_INFO << "Building engine, please wait for a while...";
@@ -374,14 +375,16 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
    if (!engine){
        std::string error_message ="Unable to create engine";
        gLogger.log(nvinfer1::ILogger::Severity::kERROR, error_message.c_str());
+        return false;
    }
    LOG_INFO << "End building engine...";

    // Serialize the engine, save engine file and close everything down.
    nvinfer1::IHostMemory* modelStream = engine->serialize();
-    engine->destroy();
-    network->destroy();
-    builder->destroy();
+    engine->destroy(); engine = nullptr; 
+    network->destroy(); network = nullptr;
+    builder->destroy(); builder = nullptr;
+    config->destroy(); config = nullptr;
    assert(modelStream != nullptr);
    if(calibrator){
        delete calibrator;
@@ -392,7 +395,7 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
    assert(file);
    file.write(static_cast<char*>(modelStream->data()), modelStream->size());
    assert(!file.fail());
-    modelStream->destroy();
+    modelStream->destroy(); modelStream = nullptr;
    LOG_INFO <<"End writing engine";
    CUDA_CHECK(cudaStreamCreate(&mCudaStream));

@@ -401,6 +404,7 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
    {
        free((void *)(mem.second.values));
    }
+    return true;
 }



--- a/src/libs/inferences/tensorrt/trt_yolov5_layer.cu
+++ b/src/libs/inferences/tensorrt/trt_yolov5_layer.cu
@@ -233,7 +233,7 @@ namespace nvinfer1
        }
    }

-    void YoloV5LayerPlugin::forwardGpu(const float* const* inputs, void** outputs, void* workspace, cudaStream_t stream, int batchSize)
+    void YoloV5LayerPlugin::forwardGpu(const float* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream, int batchSize)
    {
        float *bboxData = (float *)outputs[0];
        float *scoreData = (float *)outputs[1];
@@ -258,7 +258,7 @@ namespace nvinfer1
    }


-    int YoloV5LayerPlugin::enqueue(int batchSize, const void *const *inputs, void ** outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT
+    int YoloV5LayerPlugin::enqueue(int batchSize, const void *const *inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT
    {
        forwardGpu((const float* const*)inputs, outputs, workspace, stream, batchSize);
        return 0;

--- a/src/libs/inferences/tensorrt/trt_yolov5_layer.h
+++ b/src/libs/inferences/tensorrt/trt_yolov5_layer.h
@@ -73,7 +73,7 @@ namespace nvinfer1
        void detachFromContext() TRT_NOEXCEPT override;

    private:
-        void forwardGpu(const float *const *inputs, void **outputs, void *workspace, cudaStream_t stream, int batchSize = 1);
+        void forwardGpu(const float *const *inputs, void * TRT_CONST_ENQUEUE*outputs, void *workspace, cudaStream_t stream, int batchSize = 1);
        int mThreadCount = 256;
        const char *mPluginNamespace;
        int mKernelCount;

--- a/src/libs/trackers/byte_tracker.cpp
+++ b/src/libs/trackers/byte_tracker.cpp
@@ -248,7 +248,7 @@ bool ByteTracker::Exec()
 		}
 	}
 	interfaces::SetIOPtr(outputNames[0], tracked_bboxes);
-    LOG_INFO << "Get " << tracked_bboxes->detections.size() << " tracked objs.";
+    LOG_INFO << "Get " << tracked_bboxes->detections.size() << " tracked objs. Removed track length: " << this->removed_tracks.size();
    return true;
 }