/*! | |
* Copyright (c) 2017 by Contributors | |
* \file dlpack.h | |
* \brief The common header of DLPack. | |
*/ | |
/** | |
* \brief Compatibility with C++ | |
*/ | |
/*! \brief The current version of dlpack */ | |
/*! \brief The current ABI version of dlpack */ | |
/*! \brief DLPACK_DLL prefix for windows */ | |
// NOLINTNEXTLINE(modernize-deprecated-headers) | |
// NOLINTNEXTLINE(modernize-deprecated-headers) | |
extern "C" { | |
/*! | |
* \brief The device type in DLDevice. | |
*/ | |
typedef enum : int32_t { | |
typedef enum { | |
/*! \brief CPU device */ | |
kDLCPU = 1, | |
/*! \brief CUDA GPU device */ | |
kDLCUDA = 2, | |
/*! | |
* \brief Pinned CUDA CPU memory by cudaMallocHost | |
*/ | |
kDLCUDAHost = 3, | |
/*! \brief OpenCL devices. */ | |
kDLOpenCL = 4, | |
/*! \brief Vulkan buffer for next generation graphics. */ | |
kDLVulkan = 7, | |
/*! \brief Metal for Apple GPU. */ | |
kDLMetal = 8, | |
/*! \brief Verilog simulator buffer */ | |
kDLVPI = 9, | |
/*! \brief ROCm GPUs for AMD GPUs */ | |
kDLROCM = 10, | |
/*! | |
* \brief Pinned ROCm CPU memory allocated by hipMallocHost | |
*/ | |
kDLROCMHost = 11, | |
/*! | |
* \brief Reserved extension device type, | |
* used for quickly test extension device | |
* The semantics can differ depending on the implementation. | |
*/ | |
kDLExtDev = 12, | |
/*! | |
* \brief CUDA managed/unified memory allocated by cudaMallocManaged | |
*/ | |
kDLCUDAManaged = 13, | |
/*! | |
* \brief Unified shared memory allocated on a oneAPI non-partititioned | |
* device. Call to oneAPI runtime is required to determine the device | |
* type, the USM allocation type and the sycl context it is bound to. | |
* | |
*/ | |
kDLOneAPI = 14, | |
/*! \brief GPU support for next generation WebGPU standard. */ | |
kDLWebGPU = 15, | |
/*! \brief Qualcomm Hexagon DSP */ | |
kDLHexagon = 16, | |
/*! \brief Microsoft AI Accelerator */ | |
kDLMAIA = 17, | |
} DLDeviceType; | |
/*! | |
* \brief A Device for Tensor and operator. | |
*/ | |
typedef struct { | |
/*! \brief The device type used in the device. */ | |
DLDeviceType device_type; | |
/*! | |
* \brief The device index. | |
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. | |
*/ | |
int32_t device_id; | |
} DLDevice; | |
/*! | |
* \brief The type code options DLDataType. | |
*/ | |
typedef enum { | |
/*! \brief signed integer */ | |
kDLInt = 0U, | |
/*! \brief unsigned integer */ | |
kDLUInt = 1U, | |
/*! \brief IEEE floating point */ | |
kDLFloat = 2U, | |
/*! | |
* \brief Opaque handle type, reserved for testing purposes. | |
* Frameworks need to agree on the handle data type for the exchange to be well-defined. | |
*/ | |
kDLOpaqueHandle = 3U, | |
/*! \brief bfloat16 */ | |
kDLBfloat = 4U, | |
/*! | |
* \brief complex number | |
* (C/C++/Python layout: compact struct per complex number) | |
*/ | |
kDLComplex = 5U, | |
/*! \brief boolean */ | |
kDLBool = 6U, | |
} DLDataTypeCode; | |
/*! | |
* \brief The data type the tensor can hold. The data type is assumed to follow the | |
* native endian-ness. An explicit error message should be raised when attempting to | |
* export an array with non-native endianness | |
* | |
* Examples | |
* - float: type_code = 2, bits = 32, lanes = 1 | |
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4 | |
* - int8: type_code = 0, bits = 8, lanes = 1 | |
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1 | |
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits) | |
*/ | |
typedef struct { | |
/*! | |
* \brief Type code of base types. | |
* We keep it uint8_t instead of DLDataTypeCode for minimal memory | |
* footprint, but the value should be one of DLDataTypeCode enum values. | |
* */ | |
uint8_t code; | |
/*! | |
* \brief Number of bits, common choices are 8, 16, 32. | |
*/ | |
uint8_t bits; | |
/*! \brief Number of lanes in the type, used for vector types. */ | |
uint16_t lanes; | |
} DLDataType; | |
/*! | |
* \brief Plain C Tensor object, does not manage memory. | |
*/ | |
typedef struct { | |
/*! | |
* \brief The data pointer points to the allocated data. This will be CUDA | |
* device pointer or cl_mem handle in OpenCL. It may be opaque on some device | |
* types. This pointer is always aligned to 256 bytes as in CUDA. The | |
* `byte_offset` field should be used to point to the beginning of the data. | |
* | |
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, | |
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement | |
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed | |
* (after which this note will be updated); at the moment it is recommended | |
* to not rely on the data pointer being correctly aligned. | |
* | |
* For given DLTensor, the size of memory required to store the contents of | |
* data is calculated as follows: | |
* | |
* \code{.c} | |
* static inline size_t GetDataSize(const DLTensor* t) { | |
* size_t size = 1; | |
* for (tvm_index_t i = 0; i < t->ndim; ++i) { | |
* size *= t->shape[i]; | |
* } | |
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; | |
* return size; | |
* } | |
* \endcode | |
*/ | |
void* data; | |
/*! \brief The device of the tensor */ | |
DLDevice device; | |
/*! \brief Number of dimensions */ | |
int32_t ndim; | |
/*! \brief The data type of the pointer*/ | |
DLDataType dtype; | |
/*! \brief The shape of the tensor */ | |
const int64_t* shape; | |
/*! | |
* \brief strides of the tensor (in number of elements, not bytes) | |
* can be NULL, indicating tensor is compact and row-majored. | |
*/ | |
const int64_t* strides; | |
/*! \brief The offset in bytes to the beginning pointer to data */ | |
uint64_t byte_offset; | |
} DLTensor; | |
/*! | |
* \brief C Tensor object, manage memory of DLTensor. This data structure is | |
* intended to facilitate the borrowing of DLTensor by another framework. It is | |
* not meant to transfer the tensor. When the borrowing framework doesn't need | |
* the tensor, it should call the deleter to notify the host that the resource | |
* is no longer needed. | |
*/ | |
typedef struct DLManagedTensor { | |
/*! \brief DLTensor which is being memory managed */ | |
DLTensor dl_tensor; | |
/*! \brief the context of the original host framework of DLManagedTensor in | |
* which DLManagedTensor is used in the framework. It can also be NULL. | |
*/ | |
void * manager_ctx; | |
/*! \brief Destructor signature void (*)(void*) - this should be called | |
* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL | |
* if there is no way for the caller to provide a reasonable destructor. | |
* The destructors deletes the argument self as well. | |
*/ | |
void (*deleter)(struct DLManagedTensor * self); | |
} DLManagedTensor; | |
} // DLPACK_EXTERN_C | |