|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import collections |
|
import warnings |
|
from uuid import UUID |
|
|
|
|
|
cdef class Scalar(_Weakrefable): |
|
""" |
|
The base class for scalars. |
|
""" |
|
|
|
def __init__(self): |
|
raise TypeError("Do not call {}'s constructor directly, use " |
|
"pa.scalar() instead.".format(self.__class__.__name__)) |
|
|
|
cdef void init(self, const shared_ptr[CScalar]& wrapped): |
|
self.wrapped = wrapped |
|
|
|
@staticmethod |
|
cdef wrap(const shared_ptr[CScalar]& wrapped): |
|
cdef: |
|
Scalar self |
|
Type type_id = wrapped.get().type.get().id() |
|
shared_ptr[CDataType] sp_data_type = wrapped.get().type |
|
|
|
if type_id == _Type_NA: |
|
return _NULL |
|
|
|
if type_id not in _scalar_classes: |
|
raise NotImplementedError( |
|
"Wrapping scalar of type " + frombytes(sp_data_type.get().ToString())) |
|
|
|
typ = get_scalar_class_from_type(sp_data_type) |
|
self = typ.__new__(typ) |
|
self.init(wrapped) |
|
|
|
return self |
|
|
|
cdef inline shared_ptr[CScalar] unwrap(self) nogil: |
|
return self.wrapped |
|
|
|
@property |
|
def type(self): |
|
""" |
|
Data type of the Scalar object. |
|
""" |
|
return pyarrow_wrap_data_type(self.wrapped.get().type) |
|
|
|
@property |
|
def is_valid(self): |
|
""" |
|
Holds a valid (non-null) value. |
|
""" |
|
return self.wrapped.get().is_valid |
|
|
|
def cast(self, object target_type=None, safe=None, options=None, memory_pool=None): |
|
""" |
|
Cast scalar value to another data type. |
|
|
|
See :func:`pyarrow.compute.cast` for usage. |
|
|
|
Parameters |
|
---------- |
|
target_type : DataType, default None |
|
Type to cast scalar to. |
|
safe : boolean, default True |
|
Whether to check for conversion errors such as overflow. |
|
options : CastOptions, default None |
|
Additional checks pass by CastOptions |
|
memory_pool : MemoryPool, optional |
|
memory pool to use for allocations during function execution. |
|
|
|
Returns |
|
------- |
|
scalar : A Scalar of the given target data type. |
|
""" |
|
return _pc().cast(self, target_type, safe=safe, |
|
options=options, memory_pool=memory_pool) |
|
|
|
def validate(self, *, full=False): |
|
""" |
|
Perform validation checks. An exception is raised if validation fails. |
|
|
|
By default only cheap validation checks are run. Pass `full=True` |
|
for thorough validation checks (potentially O(n)). |
|
|
|
Parameters |
|
---------- |
|
full : bool, default False |
|
If True, run expensive checks, otherwise cheap checks only. |
|
|
|
Raises |
|
------ |
|
ArrowInvalid |
|
""" |
|
if full: |
|
with nogil: |
|
check_status(self.wrapped.get().ValidateFull()) |
|
else: |
|
with nogil: |
|
check_status(self.wrapped.get().Validate()) |
|
|
|
def __repr__(self): |
|
return '<pyarrow.{}: {!r}>'.format( |
|
self.__class__.__name__, self.as_py() |
|
) |
|
|
|
def __str__(self): |
|
return str(self.as_py()) |
|
|
|
def equals(self, Scalar other not None): |
|
""" |
|
Parameters |
|
---------- |
|
other : pyarrow.Scalar |
|
|
|
Returns |
|
------- |
|
bool |
|
""" |
|
return self.wrapped.get().Equals(other.unwrap().get()[0]) |
|
|
|
def __eq__(self, other): |
|
try: |
|
return self.equals(other) |
|
except TypeError: |
|
return NotImplemented |
|
|
|
def __hash__(self): |
|
cdef CScalarHash hasher |
|
return hasher(self.wrapped) |
|
|
|
def __reduce__(self): |
|
return scalar, (self.as_py(), self.type) |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python representation. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
raise NotImplementedError() |
|
|
|
|
|
_NULL = NA = None |
|
|
|
|
|
cdef class NullScalar(Scalar): |
|
""" |
|
Concrete class for null scalars. |
|
""" |
|
|
|
def __cinit__(self): |
|
global NA |
|
if NA is not None: |
|
raise RuntimeError('Cannot create multiple NullScalar instances') |
|
self.init(shared_ptr[CScalar](new CNullScalar())) |
|
|
|
def __init__(self): |
|
pass |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python None. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
return None |
|
|
|
|
|
_NULL = NA = NullScalar() |
|
|
|
|
|
cdef class BooleanScalar(Scalar): |
|
""" |
|
Concrete class for boolean scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python bool. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class UInt8Scalar(Scalar): |
|
""" |
|
Concrete class for uint8 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class Int8Scalar(Scalar): |
|
""" |
|
Concrete class for int8 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class UInt16Scalar(Scalar): |
|
""" |
|
Concrete class for uint16 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class Int16Scalar(Scalar): |
|
""" |
|
Concrete class for int16 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class UInt32Scalar(Scalar): |
|
""" |
|
Concrete class for uint32 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class Int32Scalar(Scalar): |
|
""" |
|
Concrete class for int32 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class UInt64Scalar(Scalar): |
|
""" |
|
Concrete class for uint64 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class Int64Scalar(Scalar): |
|
""" |
|
Concrete class for int64 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python int. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class HalfFloatScalar(Scalar): |
|
""" |
|
Concrete class for float scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python float. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get() |
|
return PyHalf_FromHalf(sp.value) if sp.is_valid else None |
|
|
|
|
|
cdef class FloatScalar(Scalar): |
|
""" |
|
Concrete class for float scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python float. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class DoubleScalar(Scalar): |
|
""" |
|
Concrete class for double scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python float. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
|
|
cdef class Decimal32Scalar(Scalar): |
|
""" |
|
Concrete class for decimal32 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python Decimal. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CDecimal32Scalar* sp = <CDecimal32Scalar*> self.wrapped.get() |
|
CDecimal32Type* dtype = <CDecimal32Type*> sp.type.get() |
|
if sp.is_valid: |
|
return _pydecimal.Decimal( |
|
frombytes(sp.value.ToString(dtype.scale())) |
|
) |
|
else: |
|
return None |
|
|
|
|
|
cdef class Decimal64Scalar(Scalar): |
|
""" |
|
Concrete class for decimal64 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python Decimal. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CDecimal64Scalar* sp = <CDecimal64Scalar*> self.wrapped.get() |
|
CDecimal64Type* dtype = <CDecimal64Type*> sp.type.get() |
|
if sp.is_valid: |
|
return _pydecimal.Decimal( |
|
frombytes(sp.value.ToString(dtype.scale())) |
|
) |
|
else: |
|
return None |
|
|
|
|
|
cdef class Decimal128Scalar(Scalar): |
|
""" |
|
Concrete class for decimal128 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python Decimal. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get() |
|
CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get() |
|
if sp.is_valid: |
|
return _pydecimal.Decimal( |
|
frombytes(sp.value.ToString(dtype.scale())) |
|
) |
|
else: |
|
return None |
|
|
|
|
|
cdef class Decimal256Scalar(Scalar): |
|
""" |
|
Concrete class for decimal256 scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python Decimal. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get() |
|
CDecimal256Type* dtype = <CDecimal256Type*> sp.type.get() |
|
if sp.is_valid: |
|
return _pydecimal.Decimal( |
|
frombytes(sp.value.ToString(dtype.scale())) |
|
) |
|
else: |
|
return None |
|
|
|
|
|
cdef class Date32Scalar(Scalar): |
|
""" |
|
Concrete class for date32 scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python datetime.datetime instance. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get() |
|
|
|
if sp.is_valid: |
|
|
|
return ( |
|
datetime.date(1970, 1, 1) + datetime.timedelta(days=sp.value) |
|
) |
|
else: |
|
return None |
|
|
|
|
|
cdef class Date64Scalar(Scalar): |
|
""" |
|
Concrete class for date64 scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python datetime.datetime instance. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get() |
|
|
|
if sp.is_valid: |
|
return ( |
|
datetime.date(1970, 1, 1) + |
|
datetime.timedelta(days=sp.value / 86400000) |
|
) |
|
else: |
|
return None |
|
|
|
|
|
def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None): |
|
if unit == TimeUnit_SECOND: |
|
delta = datetime.timedelta(seconds=value) |
|
elif unit == TimeUnit_MILLI: |
|
delta = datetime.timedelta(milliseconds=value) |
|
elif unit == TimeUnit_MICRO: |
|
delta = datetime.timedelta(microseconds=value) |
|
else: |
|
|
|
if _pandas_api.have_pandas: |
|
return _pandas_api.pd.Timestamp(value, tz=tzinfo, unit='ns') |
|
|
|
if value % 1000 != 0: |
|
raise ValueError( |
|
"Nanosecond resolution temporal type {} is not safely " |
|
"convertible to microseconds to convert to datetime.datetime. " |
|
"Install pandas to return as Timestamp with nanosecond " |
|
"support or access the .value attribute.".format(value) |
|
) |
|
delta = datetime.timedelta(microseconds=value // 1000) |
|
|
|
dt = datetime.datetime(1970, 1, 1) + delta |
|
|
|
if tzinfo is not None: |
|
dt = dt.replace(tzinfo=datetime.timezone.utc).astimezone(tzinfo) |
|
|
|
return dt |
|
|
|
|
|
cdef class Time32Scalar(Scalar): |
|
""" |
|
Concrete class for time32 scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
cdef CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python datetime.timedelta instance. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get() |
|
CTime32Type* dtype = <CTime32Type*> sp.type.get() |
|
|
|
if sp.is_valid: |
|
return _datetime_from_int(sp.value, unit=dtype.unit()).time() |
|
else: |
|
return None |
|
|
|
|
|
cdef class Time64Scalar(Scalar): |
|
""" |
|
Concrete class for time64 scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
cdef CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python datetime.timedelta instance. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get() |
|
CTime64Type* dtype = <CTime64Type*> sp.type.get() |
|
|
|
if sp.is_valid: |
|
return _datetime_from_int(sp.value, unit=dtype.unit()).time() |
|
else: |
|
return None |
|
|
|
|
|
cdef class TimestampScalar(Scalar): |
|
""" |
|
Concrete class for timestamp scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Pandas Timestamp instance (if units are |
|
nanoseconds and pandas is available), otherwise as a Python |
|
datetime.datetime instance. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() |
|
CTimestampType* dtype = <CTimestampType*> sp.type.get() |
|
|
|
if not sp.is_valid: |
|
return None |
|
|
|
if not dtype.timezone().empty(): |
|
tzinfo = string_to_tzinfo(frombytes(dtype.timezone())) |
|
else: |
|
tzinfo = None |
|
|
|
return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo) |
|
|
|
def __repr__(self): |
|
""" |
|
Return the representation of TimestampScalar using `strftime` to avoid |
|
original repr datetime values being out of range. |
|
""" |
|
cdef: |
|
CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() |
|
CTimestampType* dtype = <CTimestampType*> sp.type.get() |
|
|
|
if not dtype.timezone().empty(): |
|
type_format = str(_pc().strftime(self, format="%Y-%m-%dT%H:%M:%S%z")) |
|
else: |
|
type_format = str(_pc().strftime(self)) |
|
return '<pyarrow.{}: {!r}>'.format( |
|
self.__class__.__name__, type_format |
|
) |
|
|
|
|
|
cdef class DurationScalar(Scalar): |
|
""" |
|
Concrete class for duration scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() |
|
return sp.value if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Pandas Timedelta instance (if units are |
|
nanoseconds and pandas is available), otherwise as a Python |
|
datetime.timedelta instance. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() |
|
CDurationType* dtype = <CDurationType*> sp.type.get() |
|
TimeUnit unit = dtype.unit() |
|
|
|
if not sp.is_valid: |
|
return None |
|
|
|
if unit == TimeUnit_SECOND: |
|
return datetime.timedelta(seconds=sp.value) |
|
elif unit == TimeUnit_MILLI: |
|
return datetime.timedelta(milliseconds=sp.value) |
|
elif unit == TimeUnit_MICRO: |
|
return datetime.timedelta(microseconds=sp.value) |
|
else: |
|
|
|
if _pandas_api.have_pandas: |
|
return _pandas_api.pd.Timedelta(sp.value, unit='ns') |
|
|
|
if sp.value % 1000 != 0: |
|
raise ValueError( |
|
"Nanosecond duration {} is not safely convertible to " |
|
"microseconds to convert to datetime.timedelta. Install " |
|
"pandas to return as Timedelta with nanosecond support or " |
|
"access the .value attribute.".format(sp.value) |
|
) |
|
return datetime.timedelta(microseconds=sp.value // 1000) |
|
|
|
|
|
cdef class MonthDayNanoIntervalScalar(Scalar): |
|
""" |
|
Concrete class for month, day, nanosecond interval scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
""" |
|
Same as self.as_py() |
|
""" |
|
return self.as_py() |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a pyarrow.MonthDayNano. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
cdef: |
|
PyObject* val |
|
CMonthDayNanoIntervalScalar* scalar |
|
scalar = <CMonthDayNanoIntervalScalar*>self.wrapped.get() |
|
val = GetResultValue(MonthDayNanoIntervalScalarToPyObject( |
|
deref(scalar))) |
|
return PyObject_to_object(val) |
|
|
|
|
|
cdef class BinaryScalar(Scalar): |
|
""" |
|
Concrete class for binary-like scalars. |
|
""" |
|
|
|
def as_buffer(self): |
|
""" |
|
Return a view over this value as a Buffer object. |
|
""" |
|
cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get() |
|
return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python bytes. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
buffer = self.as_buffer() |
|
return None if buffer is None else buffer.to_pybytes() |
|
|
|
|
|
cdef class LargeBinaryScalar(BinaryScalar): |
|
pass |
|
|
|
|
|
cdef class FixedSizeBinaryScalar(BinaryScalar): |
|
pass |
|
|
|
|
|
cdef class StringScalar(BinaryScalar): |
|
""" |
|
Concrete class for string-like (utf8) scalars. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python string. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
buffer = self.as_buffer() |
|
return None if buffer is None else str(buffer, 'utf8') |
|
|
|
|
|
cdef class LargeStringScalar(StringScalar): |
|
pass |
|
|
|
|
|
cdef class BinaryViewScalar(BinaryScalar): |
|
pass |
|
|
|
|
|
cdef class StringViewScalar(StringScalar): |
|
pass |
|
|
|
|
|
cdef class ListScalar(Scalar): |
|
""" |
|
Concrete class for list-like scalars. |
|
""" |
|
|
|
@property |
|
def values(self): |
|
cdef CBaseListScalar* sp = <CBaseListScalar*> self.wrapped.get() |
|
if sp.is_valid: |
|
return pyarrow_wrap_array(sp.value) |
|
else: |
|
return None |
|
|
|
def __len__(self): |
|
""" |
|
Return the number of values. |
|
""" |
|
return len(self.values) |
|
|
|
def __getitem__(self, i): |
|
""" |
|
Return the value at the given index. |
|
""" |
|
return self.values[_normalize_index(i, len(self))] |
|
|
|
def __iter__(self): |
|
""" |
|
Iterate over this element's values. |
|
""" |
|
return iter(self.values) |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python list. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
arr = self.values |
|
return None if arr is None else arr.to_pylist(maps_as_pydicts=maps_as_pydicts) |
|
|
|
|
|
cdef class FixedSizeListScalar(ListScalar): |
|
pass |
|
|
|
|
|
cdef class LargeListScalar(ListScalar): |
|
pass |
|
|
|
|
|
cdef class ListViewScalar(ListScalar): |
|
pass |
|
|
|
|
|
cdef class LargeListViewScalar(ListScalar): |
|
pass |
|
|
|
|
|
cdef class StructScalar(Scalar, collections.abc.Mapping): |
|
""" |
|
Concrete class for struct scalars. |
|
""" |
|
|
|
def __len__(self): |
|
cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get() |
|
return sp.value.size() |
|
|
|
def __iter__(self): |
|
cdef: |
|
CStructScalar* sp = <CStructScalar*> self.wrapped.get() |
|
CStructType* dtype = <CStructType*> sp.type.get() |
|
vector[shared_ptr[CField]] fields = dtype.fields() |
|
|
|
for i in range(dtype.num_fields()): |
|
yield frombytes(fields[i].get().name()) |
|
|
|
def items(self): |
|
return ((key, self[i]) for i, key in enumerate(self)) |
|
|
|
def __contains__(self, key): |
|
return key in list(self) |
|
|
|
def __getitem__(self, key): |
|
""" |
|
Return the child value for the given field. |
|
|
|
Parameters |
|
---------- |
|
index : Union[int, str] |
|
Index / position or name of the field. |
|
|
|
Returns |
|
------- |
|
result : Scalar |
|
""" |
|
cdef: |
|
CFieldRef ref |
|
CStructScalar* sp = <CStructScalar*> self.wrapped.get() |
|
|
|
if isinstance(key, (bytes, str)): |
|
ref = CFieldRef(<c_string> tobytes(key)) |
|
elif isinstance(key, int): |
|
ref = CFieldRef(<int> key) |
|
else: |
|
raise TypeError('Expected integer or string index') |
|
|
|
try: |
|
return Scalar.wrap(GetResultValue(sp.field(ref))) |
|
except ArrowInvalid as exc: |
|
if isinstance(key, int): |
|
raise IndexError(key) from exc |
|
else: |
|
raise KeyError(key) from exc |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python dict. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
if self.is_valid: |
|
try: |
|
return {k: self[k].as_py(maps_as_pydicts=maps_as_pydicts) for k in self.keys()} |
|
except KeyError: |
|
raise ValueError( |
|
"Converting to Python dictionary is not supported when " |
|
"duplicate field names are present") |
|
else: |
|
return None |
|
|
|
def _as_py_tuple(self): |
|
|
|
|
|
if self.is_valid: |
|
return [(key, self[i].as_py()) for i, key in enumerate(self)] |
|
else: |
|
return None |
|
|
|
def __repr__(self): |
|
return '<pyarrow.{}: {!r}>'.format( |
|
self.__class__.__name__, self._as_py_tuple() |
|
) |
|
|
|
def __str__(self): |
|
return str(self._as_py_tuple()) |
|
|
|
|
|
cdef class MapScalar(ListScalar): |
|
""" |
|
Concrete class for map scalars. |
|
""" |
|
|
|
def __getitem__(self, i): |
|
""" |
|
Return the value at the given index. |
|
""" |
|
arr = self.values |
|
if arr is None: |
|
raise IndexError(i) |
|
dct = arr[_normalize_index(i, len(arr))] |
|
return (dct[self.type.key_field.name], dct[self.type.item_field.name]) |
|
|
|
def __iter__(self): |
|
""" |
|
Iterate over this element's values. |
|
""" |
|
arr = self.values |
|
if arr is None: |
|
return |
|
for k, v in zip(arr.field(self.type.key_field.name), arr.field(self.type.item_field.name)): |
|
yield (k.as_py(), v.as_py()) |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this value as a Python list or dict, depending on 'maps_as_pydicts'. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
if maps_as_pydicts not in (None, "lossy", "strict"): |
|
raise ValueError( |
|
"Invalid value for 'maps_as_pydicts': " |
|
+ "valid values are 'lossy', 'strict' or `None` (default). " |
|
+ f"Received {maps_as_pydicts!r}." |
|
) |
|
if not self.is_valid: |
|
return None |
|
if not maps_as_pydicts: |
|
return list(self) |
|
result_dict = {} |
|
for key, value in self: |
|
if key in result_dict: |
|
if maps_as_pydicts == "strict": |
|
raise KeyError( |
|
"Converting to Python dictionary is not supported in strict mode " |
|
f"when duplicate keys are present (duplicate key was '{key}')." |
|
) |
|
else: |
|
warnings.warn( |
|
f"Encountered key '{key}' which was already encountered.") |
|
result_dict[key] = value |
|
return result_dict |
|
|
|
|
|
cdef class DictionaryScalar(Scalar): |
|
""" |
|
Concrete class for dictionary-encoded scalars. |
|
""" |
|
|
|
@staticmethod |
|
def _reconstruct(type, is_valid, index, dictionary): |
|
cdef: |
|
CDictionaryScalarIndexAndDictionary value |
|
shared_ptr[CDictionaryScalar] wrapped |
|
DataType type_ |
|
Scalar index_ |
|
Array dictionary_ |
|
|
|
type_ = ensure_type(type, allow_none=False) |
|
if not isinstance(type_, DictionaryType): |
|
raise TypeError('Must pass a DictionaryType instance') |
|
|
|
if isinstance(index, Scalar): |
|
if not index.type.equals(type.index_type): |
|
raise TypeError("The Scalar value passed as index must have " |
|
"identical type to the dictionary type's " |
|
"index_type") |
|
index_ = index |
|
else: |
|
index_ = scalar(index, type=type_.index_type) |
|
|
|
if isinstance(dictionary, Array): |
|
if not dictionary.type.equals(type.value_type): |
|
raise TypeError("The Array passed as dictionary must have " |
|
"identical type to the dictionary type's " |
|
"value_type") |
|
dictionary_ = dictionary |
|
else: |
|
dictionary_ = array(dictionary, type=type_.value_type) |
|
|
|
value.index = pyarrow_unwrap_scalar(index_) |
|
value.dictionary = pyarrow_unwrap_array(dictionary_) |
|
|
|
wrapped = make_shared[CDictionaryScalar]( |
|
value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid) |
|
) |
|
return Scalar.wrap(<shared_ptr[CScalar]> wrapped) |
|
|
|
def __reduce__(self): |
|
return DictionaryScalar._reconstruct, ( |
|
self.type, self.is_valid, self.index, self.dictionary |
|
) |
|
|
|
@property |
|
def index(self): |
|
""" |
|
Return this value's underlying index as a scalar. |
|
""" |
|
cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() |
|
return Scalar.wrap(sp.value.index) |
|
|
|
@property |
|
def value(self): |
|
""" |
|
Return the encoded value as a scalar. |
|
""" |
|
cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() |
|
return Scalar.wrap(GetResultValue(sp.GetEncodedValue())) |
|
|
|
@property |
|
def dictionary(self): |
|
cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() |
|
return pyarrow_wrap_array(sp.value.dictionary) |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this encoded value as a Python object. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
return self.value.as_py(maps_as_pydicts=maps_as_pydicts) if self.is_valid else None |
|
|
|
|
|
cdef class RunEndEncodedScalar(Scalar): |
|
""" |
|
Concrete class for RunEndEncoded scalars. |
|
""" |
|
@property |
|
def value(self): |
|
""" |
|
Return underlying value as a scalar. |
|
""" |
|
cdef CRunEndEncodedScalar* sp = <CRunEndEncodedScalar*> self.wrapped.get() |
|
return Scalar.wrap(sp.value) |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return underlying value as a Python object. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
return self.value.as_py(maps_as_pydicts=maps_as_pydicts) |
|
|
|
|
|
cdef class UnionScalar(Scalar): |
|
""" |
|
Concrete class for Union scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
""" |
|
Return underlying value as a scalar. |
|
""" |
|
cdef CSparseUnionScalar* sp |
|
cdef CDenseUnionScalar* dp |
|
if self.type.id == _Type_SPARSE_UNION: |
|
sp = <CSparseUnionScalar*> self.wrapped.get() |
|
return Scalar.wrap(sp.value[sp.child_id]) if sp.is_valid else None |
|
else: |
|
dp = <CDenseUnionScalar*> self.wrapped.get() |
|
return Scalar.wrap(dp.value) if dp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return underlying value as a Python object. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
value = self.value |
|
return None if value is None else value.as_py(maps_as_pydicts=maps_as_pydicts) |
|
|
|
@property |
|
def type_code(self): |
|
""" |
|
Return the union type code for this scalar. |
|
""" |
|
cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get() |
|
return sp.type_code |
|
|
|
|
|
cdef class ExtensionScalar(Scalar): |
|
""" |
|
Concrete class for Extension scalars. |
|
""" |
|
|
|
@property |
|
def value(self): |
|
""" |
|
Return storage value as a scalar. |
|
""" |
|
cdef CExtensionScalar* sp = <CExtensionScalar*> self.wrapped.get() |
|
return Scalar.wrap(sp.value) if sp.is_valid else None |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this scalar as a Python object. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
The default behavior (`None`), is to convert Arrow Map arrays to |
|
Python association lists (list-of-tuples) in the same order as the |
|
Arrow Map, as in [(key1, value1), (key2, value2), ...]. |
|
|
|
If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts. |
|
|
|
If 'lossy', whenever duplicate keys are detected, a warning will be printed. |
|
The last seen value of a duplicate key will be in the Python dictionary. |
|
If 'strict', this instead results in an exception being raised when detected. |
|
""" |
|
return None if self.value is None else self.value.as_py(maps_as_pydicts=maps_as_pydicts) |
|
|
|
@staticmethod |
|
def from_storage(BaseExtensionType typ, value): |
|
""" |
|
Construct ExtensionScalar from type and storage value. |
|
|
|
Parameters |
|
---------- |
|
typ : DataType |
|
The extension type for the result scalar. |
|
value : object |
|
The storage value for the result scalar. |
|
|
|
Returns |
|
------- |
|
ext_scalar : ExtensionScalar |
|
""" |
|
cdef: |
|
shared_ptr[CExtensionScalar] sp_scalar |
|
shared_ptr[CScalar] sp_storage |
|
CExtensionScalar* ext_scalar |
|
|
|
if value is None: |
|
storage = None |
|
elif isinstance(value, Scalar): |
|
if value.type != typ.storage_type: |
|
raise TypeError("Incompatible storage type {0} " |
|
"for extension type {1}" |
|
.format(value.type, typ)) |
|
storage = value |
|
else: |
|
storage = scalar(value, typ.storage_type) |
|
|
|
cdef c_bool is_valid = storage is not None and storage.is_valid |
|
if is_valid: |
|
sp_storage = pyarrow_unwrap_scalar(storage) |
|
else: |
|
sp_storage = MakeNullScalar((<DataType> typ.storage_type).sp_type) |
|
sp_scalar = make_shared[CExtensionScalar](sp_storage, typ.sp_type, |
|
is_valid) |
|
with nogil: |
|
check_status(sp_scalar.get().Validate()) |
|
return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar) |
|
|
|
|
|
class JsonScalar(ExtensionScalar): |
|
""" |
|
Concrete class for JSON extension scalar. |
|
""" |
|
|
|
|
|
class UuidScalar(ExtensionScalar): |
|
""" |
|
Concrete class for Uuid extension scalar. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this scalar as a Python UUID. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
return None if self.value is None else UUID(bytes=self.value.as_py()) |
|
|
|
|
|
cdef class FixedShapeTensorScalar(ExtensionScalar): |
|
""" |
|
Concrete class for fixed shape tensor extension scalar. |
|
""" |
|
|
|
def to_numpy(self): |
|
""" |
|
Convert fixed shape tensor scalar to a numpy.ndarray. |
|
|
|
The resulting ndarray's shape matches the permuted shape of the |
|
fixed shape tensor scalar. |
|
The conversion is zero-copy. |
|
|
|
Returns |
|
------- |
|
numpy.ndarray |
|
""" |
|
return self.to_tensor().to_numpy() |
|
|
|
def to_tensor(self): |
|
""" |
|
Convert fixed shape tensor extension scalar to a pyarrow.Tensor, using shape |
|
and strides derived from corresponding FixedShapeTensorType. |
|
|
|
The conversion is zero-copy. |
|
|
|
Returns |
|
------- |
|
pyarrow.Tensor |
|
Tensor represented stored in FixedShapeTensorScalar. |
|
""" |
|
cdef: |
|
CFixedShapeTensorType* c_type = static_pointer_cast[CFixedShapeTensorType, CDataType]( |
|
self.wrapped.get().type).get() |
|
shared_ptr[CExtensionScalar] scalar = static_pointer_cast[CExtensionScalar, CScalar](self.wrapped) |
|
shared_ptr[CTensor] ctensor |
|
|
|
with nogil: |
|
ctensor = GetResultValue(c_type.MakeTensor(scalar)) |
|
return pyarrow_wrap_tensor(ctensor) |
|
|
|
|
|
cdef class OpaqueScalar(ExtensionScalar): |
|
""" |
|
Concrete class for opaque extension scalar. |
|
""" |
|
|
|
|
|
cdef class Bool8Scalar(ExtensionScalar): |
|
""" |
|
Concrete class for bool8 extension scalar. |
|
""" |
|
|
|
def as_py(self, *, maps_as_pydicts=None): |
|
""" |
|
Return this scalar as a Python object. |
|
|
|
Parameters |
|
---------- |
|
maps_as_pydicts : str, optional, default `None` |
|
Valid values are `None`, 'lossy', or 'strict'. |
|
This parameter is ignored for non-nested Scalars. |
|
""" |
|
py_val = super().as_py() |
|
return None if py_val is None else py_val != 0 |
|
|
|
cdef dict _scalar_classes = { |
|
_Type_BOOL: BooleanScalar, |
|
_Type_UINT8: UInt8Scalar, |
|
_Type_UINT16: UInt16Scalar, |
|
_Type_UINT32: UInt32Scalar, |
|
_Type_UINT64: UInt64Scalar, |
|
_Type_INT8: Int8Scalar, |
|
_Type_INT16: Int16Scalar, |
|
_Type_INT32: Int32Scalar, |
|
_Type_INT64: Int64Scalar, |
|
_Type_HALF_FLOAT: HalfFloatScalar, |
|
_Type_FLOAT: FloatScalar, |
|
_Type_DOUBLE: DoubleScalar, |
|
_Type_DECIMAL32: Decimal32Scalar, |
|
_Type_DECIMAL64: Decimal64Scalar, |
|
_Type_DECIMAL128: Decimal128Scalar, |
|
_Type_DECIMAL256: Decimal256Scalar, |
|
_Type_DATE32: Date32Scalar, |
|
_Type_DATE64: Date64Scalar, |
|
_Type_TIME32: Time32Scalar, |
|
_Type_TIME64: Time64Scalar, |
|
_Type_TIMESTAMP: TimestampScalar, |
|
_Type_DURATION: DurationScalar, |
|
_Type_BINARY: BinaryScalar, |
|
_Type_LARGE_BINARY: LargeBinaryScalar, |
|
_Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar, |
|
_Type_BINARY_VIEW: BinaryViewScalar, |
|
_Type_STRING: StringScalar, |
|
_Type_LARGE_STRING: LargeStringScalar, |
|
_Type_STRING_VIEW: StringViewScalar, |
|
_Type_LIST: ListScalar, |
|
_Type_LARGE_LIST: LargeListScalar, |
|
_Type_FIXED_SIZE_LIST: FixedSizeListScalar, |
|
_Type_LIST_VIEW: ListViewScalar, |
|
_Type_LARGE_LIST_VIEW: LargeListViewScalar, |
|
_Type_STRUCT: StructScalar, |
|
_Type_MAP: MapScalar, |
|
_Type_DICTIONARY: DictionaryScalar, |
|
_Type_RUN_END_ENCODED: RunEndEncodedScalar, |
|
_Type_SPARSE_UNION: UnionScalar, |
|
_Type_DENSE_UNION: UnionScalar, |
|
_Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalScalar, |
|
_Type_EXTENSION: ExtensionScalar, |
|
} |
|
|
|
|
|
cdef object get_scalar_class_from_type( |
|
const shared_ptr[CDataType]& sp_data_type): |
|
cdef CDataType* data_type = sp_data_type.get() |
|
if data_type == NULL: |
|
raise ValueError('Scalar data type was NULL') |
|
|
|
if data_type.id() == _Type_EXTENSION: |
|
py_ext_data_type = pyarrow_wrap_data_type(sp_data_type) |
|
return py_ext_data_type.__arrow_ext_scalar_class__() |
|
else: |
|
return _scalar_classes[data_type.id()] |
|
|
|
|
|
def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None): |
|
""" |
|
Create a pyarrow.Scalar instance from a Python object. |
|
|
|
Parameters |
|
---------- |
|
value : Any |
|
Python object coercible to arrow's type system. |
|
type : pyarrow.DataType |
|
Explicit type to attempt to coerce to, otherwise will be inferred from |
|
the value. |
|
from_pandas : bool, default None |
|
Use pandas's semantics for inferring nulls from values in |
|
ndarray-like data. Defaults to False if not passed explicitly by user, |
|
or True if a pandas object is passed in. |
|
memory_pool : pyarrow.MemoryPool, optional |
|
If not passed, will allocate memory from the currently-set default |
|
memory pool. |
|
|
|
Returns |
|
------- |
|
scalar : pyarrow.Scalar |
|
|
|
Examples |
|
-------- |
|
>>> import pyarrow as pa |
|
|
|
>>> pa.scalar(42) |
|
<pyarrow.Int64Scalar: 42> |
|
|
|
>>> pa.scalar("string") |
|
<pyarrow.StringScalar: 'string'> |
|
|
|
>>> pa.scalar([1, 2]) |
|
<pyarrow.ListScalar: [1, 2]> |
|
|
|
>>> pa.scalar([1, 2], type=pa.list_(pa.int16())) |
|
<pyarrow.ListScalar: [1, 2]> |
|
""" |
|
cdef: |
|
DataType ty |
|
PyConversionOptions options |
|
shared_ptr[CScalar] scalar |
|
shared_ptr[CArray] array |
|
shared_ptr[CChunkedArray] chunked |
|
bint is_pandas_object = False |
|
CMemoryPool* pool |
|
|
|
type = ensure_type(type, allow_none=True) |
|
pool = maybe_unbox_memory_pool(memory_pool) |
|
|
|
extension_type = None |
|
if type is not None and type.id == _Type_EXTENSION: |
|
extension_type = type |
|
type = type.storage_type |
|
|
|
if _is_array_like(value): |
|
value = get_values(value, &is_pandas_object) |
|
|
|
options.size = 1 |
|
|
|
if type is not None: |
|
ty = ensure_type(type) |
|
options.type = ty.sp_type |
|
|
|
if from_pandas is None: |
|
options.from_pandas = is_pandas_object |
|
else: |
|
options.from_pandas = from_pandas |
|
|
|
value = [value] |
|
with nogil: |
|
chunked = GetResultValue(ConvertPySequence(value, None, options, pool)) |
|
|
|
|
|
assert chunked.get().num_chunks() == 1 |
|
array = chunked.get().chunk(0) |
|
|
|
|
|
scalar = GetResultValue(array.get().GetScalar(0)) |
|
result = Scalar.wrap(scalar) |
|
|
|
if extension_type is not None: |
|
result = ExtensionScalar.from_storage(extension_type, result) |
|
return result |
|
|