#include "libs/inferences/tensorrt/trt_yolov5_infer.h"

using namespace nvinfer1;


namespace waytous {
namespace deepinfer {
namespace inference {


IScaleLayer* YoloV5TRTInference::addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
    float *gamma = (float*)weightMap[lname + ".weight"].values;
    float *beta = (float*)weightMap[lname + ".bias"].values;
    float *mean = (float*)weightMap[lname + ".running_mean"].values;
    float *var = (float*)weightMap[lname + ".running_var"].values;
    int len = weightMap[lname + ".running_var"].count;
    // LOG_INFO <<lname<<" "<<len;

    float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        scval[i] = gamma[i] / sqrt(var[i] + eps);
    }
    Weights scale{ DataType::kFLOAT, scval, len };

    float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
    }
    Weights shift{ DataType::kFLOAT, shval, len };

    float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        pval[i] = 1.0;
    }
    Weights power{ DataType::kFLOAT, pval, len };

    weightMap[lname + ".scale"] = scale;
    weightMap[lname + ".shift"] = shift;
    weightMap[lname + ".power"] = power;
    IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
    assert(scale_1);
    return scale_1;
}


ILayer* YoloV5TRTInference::convBlock(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, int ksize, int s, int g, std::string lname) {
    Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
    int p = ksize / 3;
    IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{ ksize, ksize }, weightMap[lname + ".conv.weight"], emptywts);
    assert(conv1);
    conv1->setStrideNd(DimsHW{ s, s });
    conv1->setPaddingNd(DimsHW{ p, p });
    conv1->setNbGroups(g);
    IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn", 1e-3);

    // silu = x * sigmoid
    auto sig = network->addActivation(*bn1->getOutput(0), ActivationType::kSIGMOID);
    assert(sig);
    auto ew = network->addElementWise(*bn1->getOutput(0), *sig->getOutput(0), ElementWiseOperation::kPROD);
    assert(ew);
    return ew;
}

ILayer* YoloV5TRTInference::focus(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int ksize, std::string lname, int inputWidth, int inputHeight) {
    ISliceLayer *s1 = network->addSlice(input, Dims3{ 0, 0, 0 }, Dims3{ inch, inputHeight / 2, inputWidth / 2 }, Dims3{ 1, 2, 2 });
    ISliceLayer *s2 = network->addSlice(input, Dims3{ 0, 1, 0 }, Dims3{ inch, inputHeight / 2, inputWidth / 2 }, Dims3{ 1, 2, 2 });
    ISliceLayer *s3 = network->addSlice(input, Dims3{ 0, 0, 1 }, Dims3{ inch, inputHeight / 2, inputWidth / 2 }, Dims3{ 1, 2, 2 });
    ISliceLayer *s4 = network->addSlice(input, Dims3{ 0, 1, 1 }, Dims3{ inch, inputHeight / 2, inputWidth / 2 }, Dims3{ 1, 2, 2 });
    ITensor* inputTensors[] = { s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0) };
    auto cat = network->addConcatenation(inputTensors, 4);
    auto conv = convBlock(network, weightMap, *cat->getOutput(0), outch, ksize, 1, 1, lname + ".conv");
    return conv;
}

ILayer* YoloV5TRTInference::bottleneck(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, bool shortcut, int g, float e, std::string lname) {
    auto cv1 = convBlock(network, weightMap, input, (int)((float)c2 * e), 1, 1, 1, lname + ".cv1");
    auto cv2 = convBlock(network, weightMap, *cv1->getOutput(0), c2, 3, 1, g, lname + ".cv2");
    if (shortcut && c1 == c2) {
        auto ew = network->addElementWise(input, *cv2->getOutput(0), ElementWiseOperation::kSUM);
        return ew;
    }
    return cv2;
}

ILayer* YoloV5TRTInference::bottleneckCSP(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname) {
    Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
    int c_ = (int)((float)c2 * e);
    auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
    auto cv2 = network->addConvolutionNd(input, c_, DimsHW{ 1, 1 }, weightMap[lname + ".cv2.weight"], emptywts);
    ITensor *y1 = cv1->getOutput(0);
    for (int i = 0; i < n; i++) {
        auto b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, g, 1.0, lname + ".m." + std::to_string(i));
        y1 = b->getOutput(0);
    }
    auto cv3 = network->addConvolutionNd(*y1, c_, DimsHW{ 1, 1 }, weightMap[lname + ".cv3.weight"], emptywts);

    ITensor* inputTensors[] = { cv3->getOutput(0), cv2->getOutput(0) };
    auto cat = network->addConcatenation(inputTensors, 2);

    IScaleLayer* bn = addBatchNorm2d(network, weightMap, *cat->getOutput(0), lname + ".bn", 1e-4);
    auto lr = network->addActivation(*bn->getOutput(0), ActivationType::kLEAKY_RELU);
    lr->setAlpha(0.1);

    auto cv4 = convBlock(network, weightMap, *lr->getOutput(0), c2, 1, 1, 1, lname + ".cv4");
    return cv4;
}

ILayer* YoloV5TRTInference::C3(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname) {
    int c_ = (int)((float)c2 * e);
    auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
    auto cv2 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv2");
    ITensor *y1 = cv1->getOutput(0);
    for (int i = 0; i < n; i++) {
        auto b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, g, 1.0, lname + ".m." + std::to_string(i));
        y1 = b->getOutput(0);
    }

    ITensor* inputTensors[] = { y1, cv2->getOutput(0) };
    auto cat = network->addConcatenation(inputTensors, 2);

    auto cv3 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv3");
    return cv3;
}

ILayer* YoloV5TRTInference::SPP(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int k1, int k2, int k3, std::string lname) {
    int c_ = c1 / 2;
    auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");

    auto pool1 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k1, k1 });
    pool1->setPaddingNd(DimsHW{ k1 / 2, k1 / 2 });
    pool1->setStrideNd(DimsHW{ 1, 1 });
    auto pool2 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k2, k2 });
    pool2->setPaddingNd(DimsHW{ k2 / 2, k2 / 2 });
    pool2->setStrideNd(DimsHW{ 1, 1 });
    auto pool3 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k3, k3 });
    pool3->setPaddingNd(DimsHW{ k3 / 2, k3 / 2 });
    pool3->setStrideNd(DimsHW{ 1, 1 });

    ITensor* inputTensors[] = { cv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0) };
    auto cat = network->addConcatenation(inputTensors, 4);

    auto cv2 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv2");
    return cv2;
}

ILayer* YoloV5TRTInference::SPPF(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int k, std::string lname) {
    int c_ = c1 / 2;
    auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");

    auto pool1 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k, k });
    pool1->setPaddingNd(DimsHW{ k / 2, k / 2 });
    pool1->setStrideNd(DimsHW{ 1, 1 });
    auto pool2 = network->addPoolingNd(*pool1->getOutput(0), PoolingType::kMAX, DimsHW{ k, k });
    pool2->setPaddingNd(DimsHW{ k / 2, k / 2 });
    pool2->setStrideNd(DimsHW{ 1, 1 });
    auto pool3 = network->addPoolingNd(*pool2->getOutput(0), PoolingType::kMAX, DimsHW{ k, k });
    pool3->setPaddingNd(DimsHW{ k / 2, k / 2 });
    pool3->setStrideNd(DimsHW{ 1, 1 });
    ITensor* inputTensors[] = { cv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0) };
    auto cat = network->addConcatenation(inputTensors, 4);
    auto cv2 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv2");
    return cv2;
}

std::vector<std::vector<float>> YoloV5TRTInference::getAnchors(std::map<std::string, Weights>& weightMap, std::string lname, int numAnchorsPerLevel) {
    std::vector<std::vector<float>> anchors;
    Weights wts = weightMap[lname + ".anchor_grid"];
    int anchor_len = numAnchorsPerLevel * 2;
    for (int i = 0; i < wts.count / anchor_len; i++) {
        auto *p = (const float*)wts.values + i * anchor_len;
        std::vector<float> anchor(p, p + anchor_len);
        anchors.push_back(anchor);
    }
    return anchors;
}

IPluginV2Layer* YoloV5TRTInference::addYoLoLayer(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, std::string lname, std::vector<IConvolutionLayer*> dets,
    int classNumber, int inputWidth, int inputHeight, int maxOutputBBoxCount, int numAnchorsPerLevel, int startScale) {
    auto creator = getPluginRegistry()->getPluginCreator("YoloV5Layer_TRT", "1");
    auto anchors = getAnchors(weightMap, lname, numAnchorsPerLevel);
    PluginField plugin_fields[2];
    // add numAnchorsPerLevel as param
    int netinfo[5] = {classNumber, inputWidth, inputHeight, maxOutputBBoxCount, numAnchorsPerLevel};
    plugin_fields[0].data = netinfo;
    plugin_fields[0].length = 5;
    plugin_fields[0].name = "netinfo";
    plugin_fields[0].type = PluginFieldType::kFLOAT32;
    int scale = startScale;
    std::vector<Yolo::YoloKernel> kernels;
    for (size_t i = 0; i < anchors.size(); i++) {
        Yolo::YoloKernel kernel;
        kernel.width = inputWidth / scale;
        kernel.height = inputHeight / scale;
        memcpy(kernel.anchors, &anchors[i][0], anchors[i].size() * sizeof(float));
        kernels.push_back(kernel);
        scale *= 2;
    }
    plugin_fields[1].data = &kernels[0];
    plugin_fields[1].length = kernels.size();
    plugin_fields[1].name = "kernels";
    plugin_fields[1].type = PluginFieldType::kFLOAT32;
    PluginFieldCollection plugin_data;
    plugin_data.nbFields = 2;
    plugin_data.fields = plugin_fields;
    IPluginV2 *plugin_obj = creator->createPlugin("yolov5_layer", &plugin_data);
    std::vector<ITensor*> input_tensors;
    for (auto det: dets) {
        input_tensors.push_back(det->getOutput(0));
    }
    auto yolo = network->addPluginV2(&input_tensors[0], input_tensors.size(), *plugin_obj);
    return yolo;
}

IPluginV2Layer *YoloV5TRTInference::addBatchedNMSLayer(INetworkDefinition *network, IPluginV2Layer *yolo, int num_classes, int top_k, int keep_top_k, float score_thresh, float iou_thresh, bool is_normalized, bool clip_boxes)
{
    auto creator = getPluginRegistry()->getPluginCreator("BatchedNMS_TRT", "1");
    // Set plugin fields and the field collection
    const bool share_location = true;
    const int background_id = -1;
    PluginField fields[9] = {
        PluginField{"shareLocation", &share_location,
                    PluginFieldType::kINT32, 1},
        PluginField{"backgroundLabelId", &background_id,
                    PluginFieldType::kINT32, 1},
        PluginField{"numClasses", &num_classes,
                    PluginFieldType::kINT32, 1},
        PluginField{"topK", &top_k, PluginFieldType::kINT32,
                    1},
        PluginField{"keepTopK", &keep_top_k,
                    PluginFieldType::kINT32, 1},
        PluginField{"scoreThreshold", &score_thresh,
                    PluginFieldType::kFLOAT32, 1},
        PluginField{"iouThreshold", &iou_thresh,
                    PluginFieldType::kFLOAT32, 1},
        PluginField{"isNormalized", &is_normalized,
                    PluginFieldType::kINT32, 1},
        PluginField{"clipBoxes", &clip_boxes,
                    PluginFieldType::kINT32, 1},
    };
    PluginFieldCollection pfc{9, fields};
    IPluginV2 *pluginObj = creator->createPlugin("batchednms", &pfc);
    ITensor *inputTensors[] = {yolo->getOutput(0), yolo->getOutput(1)};
    auto batchednmslayer = network->addPluginV2(inputTensors, 2, *pluginObj);
    batchednmslayer->setName("nms_layer");
    assert(batchednmslayer);
    return batchednmslayer;
}



bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
    initLibNvInferPlugins(&gLogger, "");
    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
    nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
    nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0U);

    int runMode = configNode["runMode"].as<int>();
    int maxBatchSize = configNode["maxBatchSize"].as<int>();
    int classNumber = configNode["classNumber"].as<int>();
    int numAnchorsPerLevel = configNode["numAnchorsPerLevel"].as<int>();
    int numLevels = configNode["numLevels"].as<int>();
    int startScale = configNode["startScale"].as<int>();
    int maxOutputBBoxCount = configNode["maxOutputBBoxCount"].as<int>();
    int keepTopK = configNode["keepTopK"].as<int>();
    float scoreThreshold = configNode["engineScoreThreshold"].as<float>();
    float nmsThreshold = configNode["nmsThreshold"].as<float>();
    float gw = configNode["gw"].as<float>();
    float gd = configNode["gd"].as<float>();

    std::string weightsPath = configNode["weightsPath"].as<std::string>();
    weightsPath = common::GetAbsolutePath(common::ConfigRoot::GetRootPath(), weightsPath);

    // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(inputNames[0].c_str(), nvinfer1::DataType::kFLOAT, 
                Dims3{ 3, inputHeight, inputWidth});
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights(weightsPath);
    // for(auto& x: weightMap){
    //     LOG_INFO <<x.first;
    // }

    /* ------ yolov5 backbone------ */
    auto conv0 = convBlock(network, weightMap, *data,  get_width(64, gw), 6, 2, 1,  "model.0");
    assert(conv0);
    auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
    auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
    auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
    auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
    auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
    auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
    auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
    auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
    auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
    /* ------ yolov5 head ------ */
    auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");

    auto upsample11 = network->addResize(*conv10->getOutput(0));
    assert(upsample11);
    upsample11->setResizeMode(ResizeMode::kNEAREST);
    upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());

    ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
    auto cat12 = network->addConcatenation(inputTensors12, 2);
    auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
    auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");

    auto upsample15 = network->addResize(*conv14->getOutput(0));
    assert(upsample15);
    upsample15->setResizeMode(ResizeMode::kNEAREST);
    upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());

    ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
    auto cat16 = network->addConcatenation(inputTensors16, 2);

    auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");

    /* ------ detect ------ */
    IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), numAnchorsPerLevel * (classNumber + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
    auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
    ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
    auto cat19 = network->addConcatenation(inputTensors19, 2);
    auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
    IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), numAnchorsPerLevel * (classNumber + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
    auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
    ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
    auto cat22 = network->addConcatenation(inputTensors22, 2);
    auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
    IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), numAnchorsPerLevel * (classNumber + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);

    auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer *>{det0, det1, det2}, classNumber,
                    inputWidth, inputHeight, maxOutputBBoxCount, numAnchorsPerLevel, startScale);
    auto nms = addBatchedNMSLayer(network, yolo, classNumber, maxOutputBBoxCount, 
                    keepTopK, scoreThreshold, nmsThreshold, false, false);

    nms->getOutput(0)->setName(outputNames[0].c_str());
    network->markOutput(*nms->getOutput(0));

    nms->getOutput(1)->setName(outputNames[1].c_str());
    network->markOutput(*nms->getOutput(1));

    nms->getOutput(2)->setName(outputNames[2].c_str());
    network->markOutput(*nms->getOutput(2));

    nms->getOutput(3)->setName(outputNames[3].c_str());
    network->markOutput(*nms->getOutput(3));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(maxBatchSize * (1 << 30)); // 1GB
    int8EntroyCalibrator *calibrator = nullptr;
    if (runMode == 1){
        config->setFlag(BuilderFlag::kFP16);
    }else if (runMode == 2){
        LOG_ERROR << "no supported int8";
        /*LOG_INFO << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false");
        assert(builder->platformHasFastInt8());
        config->setFlag(BuilderFlag::kINT8);
        calibrator = new int8EntroyCalibrator(configNode["calibImgPathFile"].as<std::string>(), 
                        configNode["calibTableCache"].as<std::string>(), 
                        maxBatchSize, inputWidth, inputHeight, inputMean, inputStd, useBGR);
        config->setInt8Calibrator(calibrator);
        */
    }

    LOG_INFO << "Building engine, please wait for a while...";
    ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);
    LOG_INFO << "Build engine successfully!";

    if (!engine){
        std::string error_message ="Unable to create engine";
        gLogger.log(nvinfer1::ILogger::Severity::kERROR, error_message.c_str());
    }
    LOG_INFO << "End building engine...";

    // Serialize the engine, save engine file and close everything down.
    nvinfer1::IHostMemory* modelStream = engine->serialize();
    engine->destroy();
    network->destroy();
    builder->destroy();
    assert(modelStream != nullptr);
    if(calibrator){
        delete calibrator;
        calibrator = nullptr;
    }
    // write
    std::ofstream file(engineFile, std::ios::binary);
    assert(file);
    file.write(static_cast<char*>(modelStream->data()), modelStream->size());
    assert(!file.fail());
    modelStream->destroy();
    LOG_INFO <<"End writing engine";
    CUDA_CHECK(cudaStreamCreate(&mCudaStream));

    // Release host memory
    for (auto &mem : weightMap)
    {
        free((void *)(mem.second.values));
    }
}


std::string YoloV5TRTInference::Name(){
    return "YoloV5TRTInference";
}


DEEPINFER_REGISTER_UNIT(YoloV5TRTInference);

}  // namespace inference
}  // namespace deepinfer
}  // namespace waytous
