#ifndef DEEPINFER_BASE_SYNCMEM_H_
#define DEEPINFER_BASE_SYNCMEM_H_

#include "common/log.h"
#include "common/common.h"

namespace waytous{
namespace deepinfer{
namespace base{


inline void PerceptionMallocHost(void** ptr, size_t size, bool use_cuda) {
    if (use_cuda) {
        CUDA_CHECK(cudaMallocHost(ptr, size));
        // cudaHostAlloc();
        // cudaHostGetDevicePointer();
        return;
    }
    *ptr = malloc(size);
    // LOG_INFO << "cpu malloc: " << size;
    CHECK(*ptr) << "host allocation of size " << size << " failed";
}

inline void PerceptionFreeHost(void* ptr, bool use_cuda) {
    if (use_cuda) {
        CUDA_CHECK(cudaFreeHost(ptr));
        return;
    }
    free(ptr);
}

/**
 * @brief Manages memory allocation and synchronization between the host (CPU)
 *        and device (GPU).
 */
class SyncedMemory {
public:
    enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };

    explicit SyncedMemory(bool use_cuda);
    SyncedMemory(size_t size, bool use_cuda);
    SyncedMemory(const SyncedMemory&) = delete;
    void operator=(const SyncedMemory&) = delete;
    ~SyncedMemory();

    const void* cpu_data(); 
    void set_cpu_data(void* data);
    const void* gpu_data();
    void set_gpu_data(void* data);
    void* mutable_cpu_data();
    void* mutable_gpu_data();

    SyncedHead head() const { return head_; }
    void set_head(SyncedHead head) { head_ = head; }
    void set_head_gpu() { set_head(HEAD_AT_GPU); }
    void set_head_cpu() { set_head(HEAD_AT_CPU); }
    size_t size() { return size_; }

    void async_gpu_push(const cudaStream_t& stream);

private:
    void check_device();
    void to_cpu();
    void to_gpu();

private:
    void* cpu_ptr_;
    void* gpu_ptr_;
    size_t size_;
    SyncedHead head_;
    bool own_cpu_data_;
    bool cpu_malloc_use_cuda_;
    bool own_gpu_data_;
    int device_;
};  // class SyncedMemory




} //namespace base
} //namspace deepinfer
} //namespace waytous

#endif