Commit 9a613def authored by xin.wang.waytous's avatar xin.wang.waytous

trt-8

parent 54cfa83e
......@@ -27,8 +27,8 @@ public:
virtual bool Init(std::string& configPath) = 0;
// for camera image infer, dynamic_cast need base-class-func to implemented or pure-virtual
virtual bool Exec(std::vector<cv::Mat*> inputs, std::vector<BaseIOPtr>& outputs){};
virtual bool Exec(std::vector<base::Image8UPtr> inputs, std::vector<BaseIOPtr>& outputs){};
virtual bool Exec(std::vector<cv::Mat*> inputs, std::vector<BaseIOPtr>& outputs){return false;};
virtual bool Exec(std::vector<base::Image8UPtr> inputs, std::vector<BaseIOPtr>& outputs){return false;};
virtual std::string Name(){
return "BaseModel";
......
......@@ -36,6 +36,7 @@ int main(int argc, char** argv){
std::string configPath = argv[3];
std::string srcPath = argv[4];
std::string savePath = argv[5];
int infer_count = 1000;
auto t = interfaces::BaseTaskRegisterer::GetInstanceByName(taskName);
if(!t->Init(configPath)){
printf("Init problems!\n");
......@@ -60,13 +61,13 @@ int main(int argc, char** argv){
}
std::cout << "before infer." << std::endl;
auto e1 = std::chrono::system_clock::now();
for(int i=0; i<100; i++){
for(int i=0; i<infer_count; i++){
outputs.clear();
t->Exec(inputs, outputs);
}
auto e2 = std::chrono::system_clock::now();
std::cout << "100 times avg infer time: " <<
std::chrono::duration_cast<std::chrono::microseconds>(e2 - e1).count() / 1000. / 100. << " ms" << std::endl;
std::chrono::duration_cast<std::chrono::microseconds>(e2 - e1).count() / 1000. / infer_count << " ms" << std::endl;
if(inputs.size() != outputs.size()){
cv::Mat vis = images[0];
t->Visualize(&vis, outputs[0]);
......
......@@ -60,9 +60,9 @@ int8EntroyCalibrator::~int8EntroyCalibrator(){
delete[] batchData;
};
int int8EntroyCalibrator::getBatchSize() const { return batchSize; };
int int8EntroyCalibrator::getBatchSize() const TRT_NOEXCEPT{ return batchSize; };
bool int8EntroyCalibrator::getBatch(void *bindings[], const char *names[], int nbBindings){
bool int8EntroyCalibrator::getBatch(void *bindings[], const char *names[], int nbBindings) TRT_NOEXCEPT{
if (imageIndex + batchSize > int(imgPaths.size()))
return false;
// load batch
......@@ -80,7 +80,7 @@ bool int8EntroyCalibrator::getBatch(void *bindings[], const char *names[], int n
return true;
};
const void * int8EntroyCalibrator::readCalibrationCache(std::size_t &length){
const void * int8EntroyCalibrator::readCalibrationCache(std::size_t &length) TRT_NOEXCEPT{
calibrationCache.clear();
std::ifstream input(calibTablePath, std::ios::binary);
input >> std::noskipws;
......@@ -92,7 +92,7 @@ const void * int8EntroyCalibrator::readCalibrationCache(std::size_t &length){
return length ? &calibrationCache[0] : nullptr;
};
void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length){
void int8EntroyCalibrator::writeCalibrationCache(const void *cache, std::size_t length) TRT_NOEXCEPT{
std::ofstream output(calibTablePath, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
};
......
......@@ -35,13 +35,13 @@ public:
~int8EntroyCalibrator();
int getBatchSize() const override;
int getBatchSize() const TRT_NOEXCEPT override;
bool getBatch(void *bindings[], const char *names[], int nbBindings) override;
bool getBatch(void *bindings[], const char *names[], int nbBindings) TRT_NOEXCEPT override;
const void *readCalibrationCache(std::size_t &length) override;
const void *readCalibrationCache(std::size_t &length) TRT_NOEXCEPT override;
void writeCalibrationCache(const void *cache, std::size_t length) override;
void writeCalibrationCache(const void *cache, std::size_t length) TRT_NOEXCEPT override;
private:
int batchSize, inputWidth, inputHeight;
......
......@@ -13,12 +13,18 @@ TRTInference::~TRTInference(){
cudaStreamDestroy(mCudaStream);
}
if (mRunTime)
if (mRunTime){
mRunTime->destroy();
if (mContext)
mRunTime = nullptr;
}
if (mContext){
mContext->destroy();
if (mEngine)
mContext = nullptr;
}
if (mEngine){
mEngine->destroy();
mEngine = nullptr;
}
}
......@@ -129,6 +135,9 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
// int verbosity = (int) nvinfer1::ILogger::Severity::kVERBOSE;
int verbosity = (int) nvinfer1::ILogger::Severity::kINFO;
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
#ifdef TRT_8
nvinfer1::IBuilderConfig* builderConfig = builder->createBuilderConfig();
#endif
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1U);
auto parser = nvonnxparser::createParser(*network, gLogger);
......@@ -141,7 +150,11 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
}
builder->setMaxBatchSize(maxBatchSize);
#ifdef TRT_8
builderConfig->setMaxWorkspaceSize(2UL << 30);
#else
builder->setMaxWorkspaceSize(2UL << 30);// 2G
#endif
int8EntroyCalibrator *calibrator = nullptr;
int runMode = configNode["runMode"].as<int>();
......@@ -150,7 +163,11 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
LOG_INFO << "Set FP16 Mode.";
if (!builder->platformHasFastFp16())
LOG_INFO << "Notice: the platform do not has fast for fp16" ;
#ifdef TRT_8
builderConfig->setFlag(nvinfer1::BuilderFlag::kFP16);
#else
builder->setFp16Mode(true);
#endif
}
else if(runMode == 2){//int8
LOG_ERROR << "No supported int8";
......@@ -174,22 +191,28 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
return false;
}
*/
return false;
}
LOG_INFO << "Begin building engine..." ;
#ifdef TRT_8
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *builderConfig);
#else
nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
#endif
if (!engine){
std::string error_message ="Unable to create engine";
gLogger.log(nvinfer1::ILogger::Severity::kERROR, error_message.c_str());
return false;
}
LOG_INFO << "End building engine..." ;
// Serialize the engine, then close everything down.
modelStream = engine->serialize();
engine->destroy();
network->destroy();
builder->destroy();
parser->destroy();
engine->destroy(); engine = nullptr;
network->destroy(); network = nullptr;
builder->destroy(); builder = nullptr;
parser->destroy(); parser = nullptr;
assert(modelStream != nullptr);
if(calibrator){
delete calibrator;
......@@ -200,7 +223,7 @@ bool TRTInference::BuildEngine(YAML::Node& configNode){
assert(file);
file.write(static_cast<char*>(modelStream->data()), modelStream->size());
assert(!file.fail());
modelStream->destroy();
modelStream->destroy(); modelStream = nullptr;
CUDA_CHECK(cudaStreamCreate(&mCudaStream));
LOG_INFO << "End writing engine";
return true;
......
......@@ -19,6 +19,7 @@ namespace deepinfer {
namespace inference {
#if NV_TENSORRT_MAJOR >= 8
#define TRT_8
#define TRT_NOEXCEPT noexcept
#define TRT_CONST_ENQUEUE const
#else
......@@ -77,7 +78,7 @@ public:
std::cout<< " total runtime = " << totalTime/(runTimes + 1e-5) << " ms " << std::endl;
}
virtual void reportLayerTime(const char* layerName, float ms)
virtual void reportLayerTime(const char* layerName, float ms) TRT_NOEXCEPT override
{
mProfile[layerName].count++;
mProfile[layerName].time += ms;
......@@ -95,7 +96,7 @@ public:
{
}
void log(Severity severity, const char* msg) override
void log(Severity severity, const char* msg) TRT_NOEXCEPT override
{
// suppress messages with severity enum value greater than the reportable
if (severity > reportableSeverity)
......
......@@ -365,6 +365,7 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
maxBatchSize, inputWidth, inputHeight, inputMean, inputStd, useBGR);
config->setInt8Calibrator(calibrator);
*/
return false;
}
LOG_INFO << "Building engine, please wait for a while...";
......@@ -374,14 +375,16 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
if (!engine){
std::string error_message ="Unable to create engine";
gLogger.log(nvinfer1::ILogger::Severity::kERROR, error_message.c_str());
return false;
}
LOG_INFO << "End building engine...";
// Serialize the engine, save engine file and close everything down.
nvinfer1::IHostMemory* modelStream = engine->serialize();
engine->destroy();
network->destroy();
builder->destroy();
engine->destroy(); engine = nullptr;
network->destroy(); network = nullptr;
builder->destroy(); builder = nullptr;
config->destroy(); config = nullptr;
assert(modelStream != nullptr);
if(calibrator){
delete calibrator;
......@@ -392,7 +395,7 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
assert(file);
file.write(static_cast<char*>(modelStream->data()), modelStream->size());
assert(!file.fail());
modelStream->destroy();
modelStream->destroy(); modelStream = nullptr;
LOG_INFO <<"End writing engine";
CUDA_CHECK(cudaStreamCreate(&mCudaStream));
......@@ -401,6 +404,7 @@ bool YoloV5TRTInference::BuildEngine(YAML::Node& configNode){
{
free((void *)(mem.second.values));
}
return true;
}
......
......@@ -233,7 +233,7 @@ namespace nvinfer1
}
}
void YoloV5LayerPlugin::forwardGpu(const float* const* inputs, void** outputs, void* workspace, cudaStream_t stream, int batchSize)
void YoloV5LayerPlugin::forwardGpu(const float* const* inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream, int batchSize)
{
float *bboxData = (float *)outputs[0];
float *scoreData = (float *)outputs[1];
......@@ -258,7 +258,7 @@ namespace nvinfer1
}
int YoloV5LayerPlugin::enqueue(int batchSize, const void *const *inputs, void ** outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT
int YoloV5LayerPlugin::enqueue(int batchSize, const void *const *inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT
{
forwardGpu((const float* const*)inputs, outputs, workspace, stream, batchSize);
return 0;
......
......@@ -73,7 +73,7 @@ namespace nvinfer1
void detachFromContext() TRT_NOEXCEPT override;
private:
void forwardGpu(const float *const *inputs, void **outputs, void *workspace, cudaStream_t stream, int batchSize = 1);
void forwardGpu(const float *const *inputs, void * TRT_CONST_ENQUEUE*outputs, void *workspace, cudaStream_t stream, int batchSize = 1);
int mThreadCount = 256;
const char *mPluginNamespace;
int mKernelCount;
......
......@@ -248,7 +248,7 @@ bool ByteTracker::Exec()
}
}
interfaces::SetIOPtr(outputNames[0], tracked_bboxes);
LOG_INFO << "Get " << tracked_bboxes->detections.size() << " tracked objs.";
LOG_INFO << "Get " << tracked_bboxes->detections.size() << " tracked objs. Removed track length: " << this->removed_tracks.size();
return true;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment