// Licensed to the Apache Software Foundation (ASF) under one | |
// or more contributor license agreements. See the NOTICE file | |
// distributed with this work for additional information | |
// regarding copyright ownership. The ASF licenses this file | |
// to you under the Apache License, Version 2.0 (the | |
// "License"); you may not use this file except in compliance | |
// with the License. You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, | |
// software distributed under the License is distributed on an | |
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
// KIND, either express or implied. See the License for the | |
// specific language governing permissions and limitations | |
// under the License. | |
namespace arrow { | |
/// \defgroup c-data-interface Functions for working with the C data interface. | |
/// | |
/// @{ | |
/// \brief Export C++ DataType using the C data interface format. | |
/// | |
/// The root type is considered to have empty name and metadata. | |
/// If you want the root type to have a name and/or metadata, pass | |
/// a Field instead. | |
/// | |
/// \param[in] type DataType object to export | |
/// \param[out] out C struct where to export the datatype | |
ARROW_EXPORT | |
Status ExportType(const DataType& type, struct ArrowSchema* out); | |
/// \brief Export C++ Field using the C data interface format. | |
/// | |
/// \param[in] field Field object to export | |
/// \param[out] out C struct where to export the field | |
ARROW_EXPORT | |
Status ExportField(const Field& field, struct ArrowSchema* out); | |
/// \brief Export C++ Schema using the C data interface format. | |
/// | |
/// \param[in] schema Schema object to export | |
/// \param[out] out C struct where to export the field | |
ARROW_EXPORT | |
Status ExportSchema(const Schema& schema, struct ArrowSchema* out); | |
/// \brief Export C++ Array using the C data interface format. | |
/// | |
/// The resulting ArrowArray struct keeps the array data and buffers alive | |
/// until its release callback is called by the consumer. | |
/// | |
/// \param[in] array Array object to export | |
/// \param[out] out C struct where to export the array | |
/// \param[out] out_schema optional C struct where to export the array type | |
ARROW_EXPORT | |
Status ExportArray(const Array& array, struct ArrowArray* out, | |
struct ArrowSchema* out_schema = NULLPTR); | |
/// \brief Export C++ RecordBatch using the C data interface format. | |
/// | |
/// The record batch is exported as if it were a struct array. | |
/// The resulting ArrowArray struct keeps the record batch data and buffers alive | |
/// until its release callback is called by the consumer. | |
/// | |
/// \param[in] batch Record batch to export | |
/// \param[out] out C struct where to export the record batch | |
/// \param[out] out_schema optional C struct where to export the record batch schema | |
ARROW_EXPORT | |
Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out, | |
struct ArrowSchema* out_schema = NULLPTR); | |
/// \brief Import C++ DataType from the C data interface. | |
/// | |
/// The given ArrowSchema struct is released (as per the C data interface | |
/// specification), even if this function fails. | |
/// | |
/// \param[in,out] schema C data interface struct representing the data type | |
/// \return Imported type object | |
ARROW_EXPORT | |
Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema); | |
/// \brief Import C++ Field from the C data interface. | |
/// | |
/// The given ArrowSchema struct is released (as per the C data interface | |
/// specification), even if this function fails. | |
/// | |
/// \param[in,out] schema C data interface struct representing the field | |
/// \return Imported field object | |
ARROW_EXPORT | |
Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema); | |
/// \brief Import C++ Schema from the C data interface. | |
/// | |
/// The given ArrowSchema struct is released (as per the C data interface | |
/// specification), even if this function fails. | |
/// | |
/// \param[in,out] schema C data interface struct representing the field | |
/// \return Imported field object | |
ARROW_EXPORT | |
Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema); | |
/// \brief Import C++ array from the C data interface. | |
/// | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting array. | |
/// | |
/// \param[in,out] array C data interface struct holding the array data | |
/// \param[in] type type of the imported array | |
/// \return Imported array object | |
ARROW_EXPORT | |
Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array, | |
std::shared_ptr<DataType> type); | |
/// \brief Import C++ array and its type from the C data interface. | |
/// | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting array. | |
/// The ArrowSchema struct is released, even if this function fails. | |
/// | |
/// \param[in,out] array C data interface struct holding the array data | |
/// \param[in,out] type C data interface struct holding the array type | |
/// \return Imported array object | |
ARROW_EXPORT | |
Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array, | |
struct ArrowSchema* type); | |
/// \brief Import C++ record batch from the C data interface. | |
/// | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting record batch. | |
/// | |
/// \param[in,out] array C data interface struct holding the record batch data | |
/// \param[in] schema schema of the imported record batch | |
/// \return Imported record batch object | |
ARROW_EXPORT | |
Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array, | |
std::shared_ptr<Schema> schema); | |
/// \brief Import C++ record batch and its schema from the C data interface. | |
/// | |
/// The type represented by the ArrowSchema struct must be a struct type array. | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting record batch. | |
/// The ArrowSchema struct is released, even if this function fails. | |
/// | |
/// \param[in,out] array C data interface struct holding the record batch data | |
/// \param[in,out] schema C data interface struct holding the record batch schema | |
/// \return Imported record batch object | |
ARROW_EXPORT | |
Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array, | |
struct ArrowSchema* schema); | |
/// @} | |
/// \defgroup c-data-device-interface Functions for working with the C data device | |
/// interface. | |
/// | |
/// @{ | |
/// \brief EXPERIMENTAL: Export C++ Array as an ArrowDeviceArray. | |
/// | |
/// The resulting ArrowDeviceArray struct keeps the array data and buffers alive | |
/// until its release callback is called by the consumer. All buffers in | |
/// the provided array MUST have the same device_type, otherwise an error | |
/// will be returned. | |
/// | |
/// If sync is non-null, get_event will be called on it in order to | |
/// potentially provide an event for consumers to synchronize on. | |
/// | |
/// \param[in] array Array object to export | |
/// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null | |
/// \param[out] out C struct to export the array to | |
/// \param[out] out_schema optional C struct to export the array type to | |
ARROW_EXPORT | |
Status ExportDeviceArray(const Array& array, std::shared_ptr<Device::SyncEvent> sync, | |
struct ArrowDeviceArray* out, | |
struct ArrowSchema* out_schema = NULLPTR); | |
/// \brief EXPERIMENTAL: Export C++ RecordBatch as an ArrowDeviceArray. | |
/// | |
/// The record batch is exported as if it were a struct array. | |
/// The resulting ArrowDeviceArray struct keeps the record batch data and buffers alive | |
/// until its release callback is called by the consumer. | |
/// | |
/// All buffers of all columns in the record batch must have the same device_type | |
/// otherwise an error will be returned. If columns are on different devices, | |
/// they should be exported using different ArrowDeviceArray instances. | |
/// | |
/// If sync is non-null, get_event will be called on it in order to | |
/// potentially provide an event for consumers to synchronize on. | |
/// | |
/// \param[in] batch Record batch to export | |
/// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null | |
/// \param[out] out C struct where to export the record batch | |
/// \param[out] out_schema optional C struct where to export the record batch schema | |
ARROW_EXPORT | |
Status ExportDeviceRecordBatch(const RecordBatch& batch, | |
std::shared_ptr<Device::SyncEvent> sync, | |
struct ArrowDeviceArray* out, | |
struct ArrowSchema* out_schema = NULLPTR); | |
using DeviceMemoryMapper = | |
std::function<Result<std::shared_ptr<MemoryManager>>(ArrowDeviceType, int64_t)>; | |
ARROW_EXPORT | |
Result<std::shared_ptr<MemoryManager>> DefaultDeviceMemoryMapper( | |
ArrowDeviceType device_type, int64_t device_id); | |
/// \brief EXPERIMENTAL: Import C++ device array from the C data interface. | |
/// | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting array. The | |
/// buffers of the Array are located on the device indicated by the device_type. | |
/// | |
/// \param[in,out] array C data interface struct holding the array data | |
/// \param[in] type type of the imported array | |
/// \param[in] mapper A function to map device + id to memory manager. If not | |
/// specified, defaults to map "cpu" to the built-in default memory manager. | |
/// \return Imported array object | |
ARROW_EXPORT | |
Result<std::shared_ptr<Array>> ImportDeviceArray( | |
struct ArrowDeviceArray* array, std::shared_ptr<DataType> type, | |
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); | |
/// \brief EXPERIMENTAL: Import C++ device array and its type from the C data interface. | |
/// | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting array. | |
/// The ArrowSchema struct is released, even if this function fails. The | |
/// buffers of the Array are located on the device indicated by the device_type. | |
/// | |
/// \param[in,out] array C data interface struct holding the array data | |
/// \param[in,out] type C data interface struct holding the array type | |
/// \param[in] mapper A function to map device + id to memory manager. If not | |
/// specified, defaults to map "cpu" to the built-in default memory manager. | |
/// \return Imported array object | |
ARROW_EXPORT | |
Result<std::shared_ptr<Array>> ImportDeviceArray( | |
struct ArrowDeviceArray* array, struct ArrowSchema* type, | |
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); | |
/// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data | |
/// interface. | |
/// | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting record batch. | |
/// The buffers of all columns of the record batch are located on the device | |
/// indicated by the device type. | |
/// | |
/// \param[in,out] array C data interface struct holding the record batch data | |
/// \param[in] schema schema of the imported record batch | |
/// \param[in] mapper A function to map device + id to memory manager. If not | |
/// specified, defaults to map "cpu" to the built-in default memory manager. | |
/// \return Imported record batch object | |
ARROW_EXPORT | |
Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch( | |
struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema, | |
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); | |
/// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device and its schema | |
/// from the C data interface. | |
/// | |
/// The type represented by the ArrowSchema struct must be a struct type array. | |
/// The ArrowArray struct has its contents moved (as per the C data interface | |
/// specification) to a private object held alive by the resulting record batch. | |
/// The ArrowSchema struct is released, even if this function fails. The buffers | |
/// of all columns of the record batch are located on the device indicated by the | |
/// device type. | |
/// | |
/// \param[in,out] array C data interface struct holding the record batch data | |
/// \param[in,out] schema C data interface struct holding the record batch schema | |
/// \param[in] mapper A function to map device + id to memory manager. If not | |
/// specified, defaults to map "cpu" to the built-in default memory manager. | |
/// \return Imported record batch object | |
ARROW_EXPORT | |
Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch( | |
struct ArrowDeviceArray* array, struct ArrowSchema* schema, | |
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); | |
/// @} | |
/// \defgroup c-stream-interface Functions for working with the C data interface. | |
/// | |
/// @{ | |
/// \brief Export C++ RecordBatchReader using the C stream interface. | |
/// | |
/// The resulting ArrowArrayStream struct keeps the record batch reader alive | |
/// until its release callback is called by the consumer. | |
/// | |
/// \param[in] reader RecordBatchReader object to export | |
/// \param[out] out C struct where to export the stream | |
ARROW_EXPORT | |
Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader, | |
struct ArrowArrayStream* out); | |
/// \brief Export C++ ChunkedArray using the C data interface format. | |
/// | |
/// The resulting ArrowArrayStream struct keeps the chunked array data and buffers alive | |
/// until its release callback is called by the consumer. | |
/// | |
/// \param[in] chunked_array ChunkedArray object to export | |
/// \param[out] out C struct where to export the stream | |
ARROW_EXPORT | |
Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array, | |
struct ArrowArrayStream* out); | |
/// \brief Export C++ RecordBatchReader using the C device stream interface | |
/// | |
/// The resulting ArrowDeviceArrayStream struct keeps the record batch reader | |
/// alive until its release callback is called by the consumer. The device | |
/// type is determined by calling device_type() on the RecordBatchReader. | |
/// | |
/// \param[in] reader RecordBatchReader object to export | |
/// \param[out] out C struct to export the stream to | |
ARROW_EXPORT | |
Status ExportDeviceRecordBatchReader(std::shared_ptr<RecordBatchReader> reader, | |
struct ArrowDeviceArrayStream* out); | |
/// \brief Export C++ ChunkedArray using the C device data interface format. | |
/// | |
/// The resulting ArrowDeviceArrayStream keeps the chunked array data and buffers | |
/// alive until its release callback is called by the consumer. | |
/// | |
/// \param[in] chunked_array ChunkedArray object to export | |
/// \param[in] device_type the device type the data is located on | |
/// \param[out] out C struct to export the stream to | |
ARROW_EXPORT | |
Status ExportDeviceChunkedArray(std::shared_ptr<ChunkedArray> chunked_array, | |
DeviceAllocationType device_type, | |
struct ArrowDeviceArrayStream* out); | |
/// \brief Import C++ RecordBatchReader from the C stream interface. | |
/// | |
/// The ArrowArrayStream struct has its contents moved to a private object | |
/// held alive by the resulting record batch reader. | |
/// | |
/// \param[in,out] stream C stream interface struct | |
/// \return Imported RecordBatchReader object | |
ARROW_EXPORT | |
Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader( | |
struct ArrowArrayStream* stream); | |
/// \brief Import C++ ChunkedArray from the C stream interface | |
/// | |
/// The ArrowArrayStream struct has its contents moved to a private object, | |
/// is consumed in its entirity, and released before returning all chunks | |
/// as a ChunkedArray. | |
/// | |
/// \param[in,out] stream C stream interface struct | |
/// \return Imported ChunkedArray object | |
ARROW_EXPORT | |
Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(struct ArrowArrayStream* stream); | |
/// \brief Import C++ RecordBatchReader from the C device stream interface | |
/// | |
/// The ArrowDeviceArrayStream struct has its contents moved to a private object | |
/// held alive by the resulting record batch reader. | |
/// | |
/// \note If there was a required sync event, sync events are accessible by individual | |
/// buffers of columns. We are not yet bubbling the sync events from the buffers up to | |
/// the `GetSyncEvent` method of an imported RecordBatch. This will be added in a future | |
/// update. | |
/// | |
/// \param[in,out] stream C device stream interface struct | |
/// \param[in] mapper mapping from device type and ID to memory manager | |
/// \return Imported RecordBatchReader object | |
ARROW_EXPORT | |
Result<std::shared_ptr<RecordBatchReader>> ImportDeviceRecordBatchReader( | |
struct ArrowDeviceArrayStream* stream, | |
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); | |
/// \brief Import C++ ChunkedArray from the C device stream interface | |
/// | |
/// The ArrowDeviceArrayStream struct has its contents moved to a private object, | |
/// is consumed in its entirety, and released before returning all chunks as a | |
/// ChunkedArray. | |
/// | |
/// \note Any chunks that require synchronization for their device memory will have | |
/// the SyncEvent objects available by checking the individual buffers of each chunk. | |
/// These SyncEvents should be checked before accessing the data in those buffers. | |
/// | |
/// \param[in,out] stream C device stream interface struct | |
/// \param[in] mapper mapping from device type and ID to memory manager | |
/// \return Imported ChunkedArray object | |
ARROW_EXPORT | |
Result<std::shared_ptr<ChunkedArray>> ImportDeviceChunkedArray( | |
struct ArrowDeviceArrayStream* stream, | |
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); | |
/// @} | |
/// \defgroup c-async-stream-interface Functions for working with the async C data | |
/// interface. | |
/// | |
/// @{ | |
/// \brief EXPERIMENTAL: AsyncErrorDetail is a StatusDetail that contains an error code | |
/// and message from an asynchronous operation. | |
class AsyncErrorDetail : public StatusDetail { | |
public: | |
AsyncErrorDetail(int code, std::string message, std::string metadata) | |
: code_(code), message_(std::move(message)), metadata_(std::move(metadata)) {} | |
const char* type_id() const override { return "AsyncErrorDetail"; } | |
// ToString just returns the error message that was returned with the error | |
std::string ToString() const override { return message_; } | |
// code is an errno-compatible error code | |
int code() const { return code_; } | |
// returns any metadata that was returned with the error, likely in a | |
// key-value format similar to ArrowSchema metadata | |
const std::string& ErrorMetadataString() const { return metadata_; } | |
std::shared_ptr<KeyValueMetadata> ErrorMetadata() const; | |
private: | |
int code_{0}; | |
std::string message_; | |
std::string metadata_; | |
}; | |
struct AsyncRecordBatchGenerator { | |
std::shared_ptr<Schema> schema; | |
DeviceAllocationType device_type; | |
AsyncGenerator<RecordBatchWithMetadata> generator; | |
}; | |
namespace internal { | |
class Executor; | |
} | |
/// \brief EXPERIMENTAL: Create an AsyncRecordBatchReader and populate a corresponding | |
/// handler to pass to a producer | |
/// | |
/// The ArrowAsyncDeviceStreamHandler struct is intended to have its callbacks populated | |
/// and then be passed to a producer to call the appropriate callbacks when data is ready. | |
/// This inverts the traditional flow of control, and so we construct a corresponding | |
/// AsyncRecordBatchGenerator to provide an interface for the consumer to retrieve data as | |
/// it is pushed to the handler. | |
/// | |
/// \param[in,out] handler C struct to be populated | |
/// \param[in] executor the executor to use for waiting and populating record batches | |
/// \param[in] queue_size initial number of record batches to request for queueing | |
/// \param[in] mapper mapping from device type and ID to memory manager | |
/// \return Future that resolves to either an error or AsyncRecordBatchGenerator once a | |
/// schema is available or an error is received. | |
ARROW_EXPORT | |
Future<AsyncRecordBatchGenerator> CreateAsyncDeviceStreamHandler( | |
struct ArrowAsyncDeviceStreamHandler* handler, internal::Executor* executor, | |
uint64_t queue_size = 5, DeviceMemoryMapper mapper = DefaultDeviceMemoryMapper); | |
/// \brief EXPERIMENTAL: Export an AsyncGenerator of record batches using a provided | |
/// handler | |
/// | |
/// This function calls the callbacks on the consumer-provided async handler as record | |
/// batches become available from the AsyncGenerator which is provided. It will first call | |
/// on_schema using the provided schema, and then serially visit each record batch from | |
/// the generator, calling the on_next_task callback. If an error occurs, on_error will be | |
/// called appropriately. | |
/// | |
/// \param[in] schema the schema of the stream being exported | |
/// \param[in] generator a generator that asynchronously produces record batches | |
/// \param[in] device_type the device type that the record batches will be located on | |
/// \param[in] handler the handler whose callbacks to utilize as data is available | |
/// \return Future that will resolve once the generator is exhausted or an error occurs | |
ARROW_EXPORT | |
Future<> ExportAsyncRecordBatchReader( | |
std::shared_ptr<Schema> schema, | |
AsyncGenerator<std::shared_ptr<RecordBatch>> generator, | |
DeviceAllocationType device_type, struct ArrowAsyncDeviceStreamHandler* handler); | |
/// @} | |
} // namespace arrow | |