|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
#include <cstdint> |
|
#include <memory> |
|
#include <string> |
|
#include <vector> |
|
|
|
#include "arrow/buffer.h" |
|
#include "arrow/compare.h" |
|
#include "arrow/result.h" |
|
#include "arrow/status.h" |
|
#include "arrow/type.h" |
|
#include "arrow/type_traits.h" |
|
#include "arrow/util/macros.h" |
|
#include "arrow/util/visibility.h" |
|
|
|
namespace arrow { |
|
|
|
static inline bool is_tensor_supported(Type::type type_id) { |
|
switch (type_id) { |
|
case Type::UINT8: |
|
case Type::INT8: |
|
case Type::UINT16: |
|
case Type::INT16: |
|
case Type::UINT32: |
|
case Type::INT32: |
|
case Type::UINT64: |
|
case Type::INT64: |
|
case Type::HALF_FLOAT: |
|
case Type::FLOAT: |
|
case Type::DOUBLE: |
|
return true; |
|
default: |
|
break; |
|
} |
|
return false; |
|
} |
|
|
|
namespace internal { |
|
|
|
ARROW_EXPORT |
|
Status ComputeRowMajorStrides(const FixedWidthType& type, |
|
const std::vector<int64_t>& shape, |
|
std::vector<int64_t>* strides); |
|
|
|
ARROW_EXPORT |
|
Status ComputeColumnMajorStrides(const FixedWidthType& type, |
|
const std::vector<int64_t>& shape, |
|
std::vector<int64_t>* strides); |
|
|
|
ARROW_EXPORT |
|
bool IsTensorStridesContiguous(const std::shared_ptr<DataType>& type, |
|
const std::vector<int64_t>& shape, |
|
const std::vector<int64_t>& strides); |
|
|
|
ARROW_EXPORT |
|
Status ValidateTensorParameters(const std::shared_ptr<DataType>& type, |
|
const std::shared_ptr<Buffer>& data, |
|
const std::vector<int64_t>& shape, |
|
const std::vector<int64_t>& strides, |
|
const std::vector<std::string>& dim_names); |
|
|
|
ARROW_EXPORT |
|
Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major, |
|
MemoryPool* pool, std::shared_ptr<Tensor>* tensor); |
|
|
|
} |
|
|
|
class ARROW_EXPORT Tensor { |
|
public: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline Result<std::shared_ptr<Tensor>> Make( |
|
const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, |
|
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides = {}, |
|
const std::vector<std::string>& dim_names = {}) { |
|
ARROW_RETURN_NOT_OK( |
|
internal::ValidateTensorParameters(type, data, shape, strides, dim_names)); |
|
return std::make_shared<Tensor>(type, data, shape, strides, dim_names); |
|
} |
|
|
|
virtual ~Tensor() = default; |
|
|
|
|
|
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, |
|
const std::vector<int64_t>& shape); |
|
|
|
|
|
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, |
|
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides); |
|
|
|
|
|
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data, |
|
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides, |
|
const std::vector<std::string>& dim_names); |
|
|
|
std::shared_ptr<DataType> type() const { return type_; } |
|
std::shared_ptr<Buffer> data() const { return data_; } |
|
|
|
const uint8_t* raw_data() const { return data_->data(); } |
|
uint8_t* raw_mutable_data() { return data_->mutable_data(); } |
|
|
|
const std::vector<int64_t>& shape() const { return shape_; } |
|
const std::vector<int64_t>& strides() const { return strides_; } |
|
|
|
int ndim() const { return static_cast<int>(shape_.size()); } |
|
|
|
const std::vector<std::string>& dim_names() const { return dim_names_; } |
|
const std::string& dim_name(int i) const; |
|
|
|
|
|
int64_t size() const; |
|
|
|
|
|
bool is_mutable() const { return data_->is_mutable(); } |
|
|
|
|
|
bool is_contiguous() const; |
|
|
|
|
|
bool is_row_major() const; |
|
|
|
|
|
bool is_column_major() const; |
|
|
|
Type::type type_id() const; |
|
|
|
bool Equals(const Tensor& other, const EqualOptions& = EqualOptions::Defaults()) const; |
|
|
|
|
|
Result<int64_t> CountNonZero() const; |
|
|
|
|
|
static int64_t CalculateValueOffset(const std::vector<int64_t>& strides, |
|
const std::vector<int64_t>& index) { |
|
const int64_t n = static_cast<int64_t>(index.size()); |
|
int64_t offset = 0; |
|
for (int64_t i = 0; i < n; ++i) { |
|
offset += index[i] * strides[i]; |
|
} |
|
return offset; |
|
} |
|
|
|
int64_t CalculateValueOffset(const std::vector<int64_t>& index) const { |
|
return Tensor::CalculateValueOffset(strides_, index); |
|
} |
|
|
|
|
|
template <typename ValueType> |
|
const typename ValueType::c_type& Value(const std::vector<int64_t>& index) const { |
|
using c_type = typename ValueType::c_type; |
|
const int64_t offset = CalculateValueOffset(index); |
|
const c_type* ptr = reinterpret_cast<const c_type*>(raw_data() + offset); |
|
return *ptr; |
|
} |
|
|
|
Status Validate() const { |
|
return internal::ValidateTensorParameters(type_, data_, shape_, strides_, dim_names_); |
|
} |
|
|
|
protected: |
|
Tensor() {} |
|
|
|
std::shared_ptr<DataType> type_; |
|
std::shared_ptr<Buffer> data_; |
|
std::vector<int64_t> shape_; |
|
std::vector<int64_t> strides_; |
|
|
|
|
|
std::vector<std::string> dim_names_; |
|
|
|
template <typename SparseIndexType> |
|
friend class SparseTensorImpl; |
|
|
|
private: |
|
ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor); |
|
}; |
|
|
|
template <typename TYPE> |
|
class NumericTensor : public Tensor { |
|
public: |
|
using TypeClass = TYPE; |
|
using value_type = typename TypeClass::c_type; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static Result<std::shared_ptr<NumericTensor<TYPE>>> Make( |
|
const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape, |
|
const std::vector<int64_t>& strides = {}, |
|
const std::vector<std::string>& dim_names = {}) { |
|
ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters( |
|
TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names)); |
|
return std::make_shared<NumericTensor<TYPE>>(data, shape, strides, dim_names); |
|
} |
|
|
|
|
|
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape, |
|
const std::vector<int64_t>& strides, |
|
const std::vector<std::string>& dim_names) |
|
: Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {} |
|
|
|
|
|
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape) |
|
: NumericTensor(data, shape, {}, {}) {} |
|
|
|
|
|
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape, |
|
const std::vector<int64_t>& strides) |
|
: NumericTensor(data, shape, strides, {}) {} |
|
|
|
const value_type& Value(const std::vector<int64_t>& index) const { |
|
return Tensor::Value<TypeClass>(index); |
|
} |
|
}; |
|
|
|
} |
|
|