in

54cfa83e · xin.wang.waytous · 9ac67037 · 54cfa83e · 54cfa83e · 54cfa83e
Commit 54cfa83e authored Feb 21, 2023 by xin.wang.waytous
10 changed files
--- a/include/common/common.h
+++ b/include/common/common.h
@@ -2,18 +2,21 @@
 #define DEEPINFER_COMMON_H_
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
 #include <iostream>
 #include <sstream>
 #include <iomanip>
 #include <cmath>
 #include <assert.h>
-#define BLOCK 512
 #define iMAX(a, b) ((a) > (b) ? (a) : (b))
 #define iMIN(a, b) ((a) > (b) ? (b) : (a))
+// #ifdef USE_CUDA
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#define BLOCK 512
 #ifndef CUDA_CHECK
 #define CUDA_CHECK(callstr)                                                                    \
    {                                                                                          \
@@ -24,6 +27,7 @@
        }                                                                                      \
    }
 #endif
+// #endif // USE_CUDA
 #ifndef PI
 #define PI 3.1415926
@@ -33,6 +37,7 @@ namespace waytous{
 namespace deepinfer{
 namespace common{
+// #ifdef USE_CUDA
 inline dim3 cudaGridSize(uint n, uint block)
 {
    uint k = (n - 1) / block + 1;
@@ -46,7 +51,7 @@ inline dim3 cudaGridSize(uint n, uint block)
    dim3 d = {x, y, 1} ;
    return d;
 }
+// #endif // USE_CUDA
 inline std::string formatValue(float val, int fixed) {
    std::ostringstream oss;

--- a/src/libs/postprocessors/mobilefacenet_post.h
+++ b/src/libs/postprocessors/mobilefacenet_post.h
@@ -2,9 +2,6 @@
 #ifndef DEEPINFER_POSTPROCESS_MOBILEFACENET_H_
 #define DEEPINFER_POSTPROCESS_MOBILEFACENET_H_
-#include <cuda_runtime.h>
-#include <cstdint>
 #include "interfaces/base_unit.h"
 #include "base/image.h"

--- a/src/libs/postprocessors/trades_post.h
+++ b/src/libs/postprocessors/trades_post.h
@@ -2,9 +2,6 @@
 #ifndef DEEPINFER_POSTPROCESS_TRDES_H_
 #define DEEPINFER_POSTPROCESS_TRDES_H_
-#include <cuda_runtime.h>
-#include <cstdint>
 #include "interfaces/base_unit.h"
 #include "base/image.h"

--- a/src/libs/postprocessors/whenet_post.h
+++ b/src/libs/postprocessors/whenet_post.h
@@ -2,9 +2,6 @@
 #ifndef DEEPINFER_POSTPROCESS_WHENet_H_
 #define DEEPINFER_POSTPROCESS_WHENet_H_
-#include <cuda_runtime.h>
-#include <cstdint>
 #include "interfaces/base_unit.h"
 #include "base/image.h"

--- a/src/libs/postprocessors/yolov5_post.h
+++ b/src/libs/postprocessors/yolov5_post.h
@@ -2,9 +2,6 @@
 #ifndef DEEPINFER_POSTPROCESS_YOLOV5_H_
 #define DEEPINFER_POSTPROCESS_YOLOV5_H_
-#include <cuda_runtime.h>
-#include <cstdint>
 #include "interfaces/base_unit.h"
 #include "base/image.h"

--- a/src/libs/preprocessors/resize_gpu.cu
+++ b/src/libs/preprocessors/resize_gpu.cu
@@ -95,8 +95,9 @@ __global__ void warpaffine_kernel(
 void resizeGPU(uint8_t* src, int src_width, int src_height, int step_width,
-    float* dst, int dst_width, int dst_height, float* input_mean, float* input_std, 
+        float* dst, int dst_width, int dst_height, float* input_mean, float* input_std, 
-    bool bgr, bool resizeFixAspectRatio, cudaStream_t stream){
+        bool bgr, bool resizeFixAspectRatio)
+    {
    AffineMatrix s2d, d2s;
    float scalex =  dst_width / (float)src_width;
    float scaley = dst_height / (float)src_height;
@@ -121,7 +122,7 @@ void resizeGPU(uint8_t* src, int src_width, int src_height, int step_width,
    int jobs = dst_height * dst_width;
    int threads = 256;
    int blocks = ceil(jobs / (float)threads);
-    warpaffine_kernel<<<blocks, threads, 0, stream>>>(
+    warpaffine_kernel<<<blocks, threads, 0>>>( //, stream
        src, step_width, src_width,
        src_height, dst, dst_width,
        dst_height, 128, d2s, jobs, input_mean, input_std, bgr);

--- a/src/libs/preprocessors/resize_gpu.h
+++ b/src/libs/preprocessors/resize_gpu.h
@@ -18,7 +18,7 @@ struct AffineMatrix{
 void resizeGPU(uint8_t* src, int src_width, int src_height, int step_width,
            float* dst, int dst_width, int dst_height, float* input_mean, float* input_std, 
-            bool bgr, bool resizeFixAspectRatio, cudaStream_t stream);
+            bool bgr, bool resizeFixAspectRatio);
 } //namespace preprocess

--- a/src/libs/preprocessors/resize_norm.cpp
+++ b/src/libs/preprocessors/resize_norm.cpp
@@ -7,7 +7,7 @@ namespace preprocess {
 bool ResizeNorm::Init(YAML::Node& node){
-    CUDA_CHECK(cudaStreamCreate(&stream_));
+    // CUDA_CHECK(cudaStreamCreate(&stream_));
    if(!BaseUnit::Init(node)){
        LOG_WARN << "Init resize_norm error";
        return false;
@@ -56,7 +56,7 @@ bool ResizeNorm::Exec(){
            inputWidth, inputHeight,
            mean->mutable_gpu_data(),
            std->mutable_gpu_data(),
-            useBGR, fixAspectRatio, stream_
+            useBGR, fixAspectRatio
        );
    }
    // ios::NormalIOPtr dst_ptr = std::make_shared<ios::NormalIO>(ios::NormalIO(dst));

--- a/src/libs/preprocessors/resize_norm.h
+++ b/src/libs/preprocessors/resize_norm.h
 #ifndef DEEPINFER_PREPROCESS_RESIZE_NORM_H_
 #define DEEPINFER_PREPROCESS_RESIZE_NORM_H_
-#include <cuda_runtime.h>
-#include <cstdint>
 #include "interfaces/base_unit.h"
 #include "base/image.h"
@@ -33,7 +30,6 @@ public:
    bool fixAspectRatio = true;
    bool useBGR = false;
    base::BlobPtr<float> dst, mean, std;
-    cudaStream_t stream_;
 };

--- a/src/libs/sources/camera_src.h
+++ b/src/libs/sources/camera_src.h
 #ifndef DEEPINFER_CAMERA_SOURCE_H_
 #define DEEPINFER_CAMERA_SOURCE_H_
-#include <cuda_runtime.h>
-#include <cstdint>
 #include "base/image.h"
 #include "interfaces/base_unit.h"
 #include "libs/ios/camera_ios.h"