Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /pyarrow /scalar.pxi

jamtur01

Upload folder using huggingface_hub

9c6594c verified 30 days ago

raw

history blame contribute delete

50.6 kB

	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import collections
	import warnings
	from uuid import UUID


	cdef class Scalar(_Weakrefable):
	"""
	The base class for scalars.
	"""

	def __init__(self):
	raise TypeError("Do not call {}'s constructor directly, use "
	"pa.scalar() instead.".format(self.__class__.__name__))

	cdef void init(self, const shared_ptr[CScalar]& wrapped):
	self.wrapped = wrapped

	@staticmethod
	cdef wrap(const shared_ptr[CScalar]& wrapped):
	cdef:
	Scalar self
	Type type_id = wrapped.get().type.get().id()
	shared_ptr[CDataType] sp_data_type = wrapped.get().type

	if type_id == _Type_NA:
	return _NULL

	if type_id not in _scalar_classes:
	raise NotImplementedError(
	"Wrapping scalar of type " + frombytes(sp_data_type.get().ToString()))

	typ = get_scalar_class_from_type(sp_data_type)
	self = typ.__new__(typ)
	self.init(wrapped)

	return self

	cdef inline shared_ptr[CScalar] unwrap(self) nogil:
	return self.wrapped

	@property
	def type(self):
	"""
	Data type of the Scalar object.
	"""
	return pyarrow_wrap_data_type(self.wrapped.get().type)

	@property
	def is_valid(self):
	"""
	Holds a valid (non-null) value.
	"""
	return self.wrapped.get().is_valid

	def cast(self, object target_type=None, safe=None, options=None, memory_pool=None):
	"""
	Cast scalar value to another data type.

	See :func:`pyarrow.compute.cast` for usage.

	Parameters
	----------
	target_type : DataType, default None
	Type to cast scalar to.
	safe : boolean, default True
	Whether to check for conversion errors such as overflow.
	options : CastOptions, default None
	Additional checks pass by CastOptions
	memory_pool : MemoryPool, optional
	memory pool to use for allocations during function execution.

	Returns
	-------
	scalar : A Scalar of the given target data type.
	"""
	return _pc().cast(self, target_type, safe=safe,
	options=options, memory_pool=memory_pool)

	def validate(self, *, full=False):
	"""
	Perform validation checks. An exception is raised if validation fails.

	By default only cheap validation checks are run. Pass `full=True`
	for thorough validation checks (potentially O(n)).

	Parameters
	----------
	full : bool, default False
	If True, run expensive checks, otherwise cheap checks only.

	Raises
	------
	ArrowInvalid
	"""
	if full:
	with nogil:
	check_status(self.wrapped.get().ValidateFull())
	else:
	with nogil:
	check_status(self.wrapped.get().Validate())

	def __repr__(self):
	return '<pyarrow.{}: {!r}>'.format(
	self.__class__.__name__, self.as_py()
	)

	def __str__(self):
	return str(self.as_py())

	def equals(self, Scalar other not None):
	"""
	Parameters
	----------
	other : pyarrow.Scalar

	Returns
	-------
	bool
	"""
	return self.wrapped.get().Equals(other.unwrap().get()[0])

	def __eq__(self, other):
	try:
	return self.equals(other)
	except TypeError:
	return NotImplemented

	def __hash__(self):
	cdef CScalarHash hasher
	return hasher(self.wrapped)

	def __reduce__(self):
	return scalar, (self.as_py(), self.type)

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python representation.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	raise NotImplementedError()


	_NULL = NA = None


	cdef class NullScalar(Scalar):
	"""
	Concrete class for null scalars.
	"""

	def __cinit__(self):
	global NA
	if NA is not None:
	raise RuntimeError('Cannot create multiple NullScalar instances')
	self.init(shared_ptr[CScalar](new CNullScalar()))

	def __init__(self):
	pass

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python None.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	return None


	_NULL = NA = NullScalar()


	cdef class BooleanScalar(Scalar):
	"""
	Concrete class for boolean scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python bool.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class UInt8Scalar(Scalar):
	"""
	Concrete class for uint8 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class Int8Scalar(Scalar):
	"""
	Concrete class for int8 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class UInt16Scalar(Scalar):
	"""
	Concrete class for uint16 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class Int16Scalar(Scalar):
	"""
	Concrete class for int16 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class UInt32Scalar(Scalar):
	"""
	Concrete class for uint32 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class Int32Scalar(Scalar):
	"""
	Concrete class for int32 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class UInt64Scalar(Scalar):
	"""
	Concrete class for uint64 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class Int64Scalar(Scalar):
	"""
	Concrete class for int64 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python int.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class HalfFloatScalar(Scalar):
	"""
	Concrete class for float scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python float.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
	return PyHalf_FromHalf(sp.value) if sp.is_valid else None


	cdef class FloatScalar(Scalar):
	"""
	Concrete class for float scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python float.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class DoubleScalar(Scalar):
	"""
	Concrete class for double scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python float.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None


	cdef class Decimal32Scalar(Scalar):
	"""
	Concrete class for decimal32 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python Decimal.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CDecimal32Scalar* sp = <CDecimal32Scalar*> self.wrapped.get()
	CDecimal32Type* dtype = <CDecimal32Type*> sp.type.get()
	if sp.is_valid:
	return _pydecimal.Decimal(
	frombytes(sp.value.ToString(dtype.scale()))
	)
	else:
	return None


	cdef class Decimal64Scalar(Scalar):
	"""
	Concrete class for decimal64 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python Decimal.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CDecimal64Scalar* sp = <CDecimal64Scalar*> self.wrapped.get()
	CDecimal64Type* dtype = <CDecimal64Type*> sp.type.get()
	if sp.is_valid:
	return _pydecimal.Decimal(
	frombytes(sp.value.ToString(dtype.scale()))
	)
	else:
	return None


	cdef class Decimal128Scalar(Scalar):
	"""
	Concrete class for decimal128 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python Decimal.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get()
	CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get()
	if sp.is_valid:
	return _pydecimal.Decimal(
	frombytes(sp.value.ToString(dtype.scale()))
	)
	else:
	return None


	cdef class Decimal256Scalar(Scalar):
	"""
	Concrete class for decimal256 scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python Decimal.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get()
	CDecimal256Type* dtype = <CDecimal256Type*> sp.type.get()
	if sp.is_valid:
	return _pydecimal.Decimal(
	frombytes(sp.value.ToString(dtype.scale()))
	)
	else:
	return None


	cdef class Date32Scalar(Scalar):
	"""
	Concrete class for date32 scalars.
	"""

	@property
	def value(self):
	cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python datetime.datetime instance.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get()

	if sp.is_valid:
	# shift to seconds since epoch
	return (
	datetime.date(1970, 1, 1) + datetime.timedelta(days=sp.value)
	)
	else:
	return None


	cdef class Date64Scalar(Scalar):
	"""
	Concrete class for date64 scalars.
	"""

	@property
	def value(self):
	cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python datetime.datetime instance.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get()

	if sp.is_valid:
	return (
	datetime.date(1970, 1, 1) +
	datetime.timedelta(days=sp.value / 86400000)
	)
	else:
	return None


	def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None):
	if unit == TimeUnit_SECOND:
	delta = datetime.timedelta(seconds=value)
	elif unit == TimeUnit_MILLI:
	delta = datetime.timedelta(milliseconds=value)
	elif unit == TimeUnit_MICRO:
	delta = datetime.timedelta(microseconds=value)
	else:
	# TimeUnit_NANO: prefer pandas timestamps if available
	if _pandas_api.have_pandas:
	return _pandas_api.pd.Timestamp(value, tz=tzinfo, unit='ns')
	# otherwise safely truncate to microsecond resolution datetime
	if value % 1000 != 0:
	raise ValueError(
	"Nanosecond resolution temporal type {} is not safely "
	"convertible to microseconds to convert to datetime.datetime. "
	"Install pandas to return as Timestamp with nanosecond "
	"support or access the .value attribute.".format(value)
	)
	delta = datetime.timedelta(microseconds=value // 1000)

	dt = datetime.datetime(1970, 1, 1) + delta
	# adjust timezone if set to the datatype
	if tzinfo is not None:
	dt = dt.replace(tzinfo=datetime.timezone.utc).astimezone(tzinfo)

	return dt


	cdef class Time32Scalar(Scalar):
	"""
	Concrete class for time32 scalars.
	"""

	@property
	def value(self):
	cdef CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python datetime.timedelta instance.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get()
	CTime32Type* dtype = <CTime32Type*> sp.type.get()

	if sp.is_valid:
	return _datetime_from_int(sp.value, unit=dtype.unit()).time()
	else:
	return None


	cdef class Time64Scalar(Scalar):
	"""
	Concrete class for time64 scalars.
	"""

	@property
	def value(self):
	cdef CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python datetime.timedelta instance.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get()
	CTime64Type* dtype = <CTime64Type*> sp.type.get()

	if sp.is_valid:
	return _datetime_from_int(sp.value, unit=dtype.unit()).time()
	else:
	return None


	cdef class TimestampScalar(Scalar):
	"""
	Concrete class for timestamp scalars.
	"""

	@property
	def value(self):
	cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Pandas Timestamp instance (if units are
	nanoseconds and pandas is available), otherwise as a Python
	datetime.datetime instance.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
	CTimestampType* dtype = <CTimestampType*> sp.type.get()

	if not sp.is_valid:
	return None

	if not dtype.timezone().empty():
	tzinfo = string_to_tzinfo(frombytes(dtype.timezone()))
	else:
	tzinfo = None

	return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo)

	def __repr__(self):
	"""
	Return the representation of TimestampScalar using `strftime` to avoid
	original repr datetime values being out of range.
	"""
	cdef:
	CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
	CTimestampType* dtype = <CTimestampType*> sp.type.get()

	if not dtype.timezone().empty():
	type_format = str(_pc().strftime(self, format="%Y-%m-%dT%H:%M:%S%z"))
	else:
	type_format = str(_pc().strftime(self))
	return '<pyarrow.{}: {!r}>'.format(
	self.__class__.__name__, type_format
	)


	cdef class DurationScalar(Scalar):
	"""
	Concrete class for duration scalars.
	"""

	@property
	def value(self):
	cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
	return sp.value if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Pandas Timedelta instance (if units are
	nanoseconds and pandas is available), otherwise as a Python
	datetime.timedelta instance.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
	CDurationType* dtype = <CDurationType*> sp.type.get()
	TimeUnit unit = dtype.unit()

	if not sp.is_valid:
	return None

	if unit == TimeUnit_SECOND:
	return datetime.timedelta(seconds=sp.value)
	elif unit == TimeUnit_MILLI:
	return datetime.timedelta(milliseconds=sp.value)
	elif unit == TimeUnit_MICRO:
	return datetime.timedelta(microseconds=sp.value)
	else:
	# TimeUnit_NANO: prefer pandas timestamps if available
	if _pandas_api.have_pandas:
	return _pandas_api.pd.Timedelta(sp.value, unit='ns')
	# otherwise safely truncate to microsecond resolution timedelta
	if sp.value % 1000 != 0:
	raise ValueError(
	"Nanosecond duration {} is not safely convertible to "
	"microseconds to convert to datetime.timedelta. Install "
	"pandas to return as Timedelta with nanosecond support or "
	"access the .value attribute.".format(sp.value)
	)
	return datetime.timedelta(microseconds=sp.value // 1000)


	cdef class MonthDayNanoIntervalScalar(Scalar):
	"""
	Concrete class for month, day, nanosecond interval scalars.
	"""

	@property
	def value(self):
	"""
	Same as self.as_py()
	"""
	return self.as_py()

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a pyarrow.MonthDayNano.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	cdef:
	PyObject* val
	CMonthDayNanoIntervalScalar* scalar
	scalar = <CMonthDayNanoIntervalScalar*>self.wrapped.get()
	val = GetResultValue(MonthDayNanoIntervalScalarToPyObject(
	deref(scalar)))
	return PyObject_to_object(val)


	cdef class BinaryScalar(Scalar):
	"""
	Concrete class for binary-like scalars.
	"""

	def as_buffer(self):
	"""
	Return a view over this value as a Buffer object.
	"""
	cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get()
	return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python bytes.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	buffer = self.as_buffer()
	return None if buffer is None else buffer.to_pybytes()


	cdef class LargeBinaryScalar(BinaryScalar):
	pass


	cdef class FixedSizeBinaryScalar(BinaryScalar):
	pass


	cdef class StringScalar(BinaryScalar):
	"""
	Concrete class for string-like (utf8) scalars.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python string.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	buffer = self.as_buffer()
	return None if buffer is None else str(buffer, 'utf8')


	cdef class LargeStringScalar(StringScalar):
	pass


	cdef class BinaryViewScalar(BinaryScalar):
	pass


	cdef class StringViewScalar(StringScalar):
	pass


	cdef class ListScalar(Scalar):
	"""
	Concrete class for list-like scalars.
	"""

	@property
	def values(self):
	cdef CBaseListScalar* sp = <CBaseListScalar*> self.wrapped.get()
	if sp.is_valid:
	return pyarrow_wrap_array(sp.value)
	else:
	return None

	def __len__(self):
	"""
	Return the number of values.
	"""
	return len(self.values)

	def __getitem__(self, i):
	"""
	Return the value at the given index.
	"""
	return self.values[_normalize_index(i, len(self))]

	def __iter__(self):
	"""
	Iterate over this element's values.
	"""
	return iter(self.values)

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python list.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	arr = self.values
	return None if arr is None else arr.to_pylist(maps_as_pydicts=maps_as_pydicts)


	cdef class FixedSizeListScalar(ListScalar):
	pass


	cdef class LargeListScalar(ListScalar):
	pass


	cdef class ListViewScalar(ListScalar):
	pass


	cdef class LargeListViewScalar(ListScalar):
	pass


	cdef class StructScalar(Scalar, collections.abc.Mapping):
	"""
	Concrete class for struct scalars.
	"""

	def __len__(self):
	cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get()
	return sp.value.size()

	def __iter__(self):
	cdef:
	CStructScalar* sp = <CStructScalar*> self.wrapped.get()
	CStructType* dtype = <CStructType*> sp.type.get()
	vector[shared_ptr[CField]] fields = dtype.fields()

	for i in range(dtype.num_fields()):
	yield frombytes(fields[i].get().name())

	def items(self):
	return ((key, self[i]) for i, key in enumerate(self))

	def __contains__(self, key):
	return key in list(self)

	def __getitem__(self, key):
	"""
	Return the child value for the given field.

	Parameters
	----------
	index : Union[int, str]
	Index / position or name of the field.

	Returns
	-------
	result : Scalar
	"""
	cdef:
	CFieldRef ref
	CStructScalar* sp = <CStructScalar*> self.wrapped.get()

	if isinstance(key, (bytes, str)):
	ref = CFieldRef(<c_string> tobytes(key))
	elif isinstance(key, int):
	ref = CFieldRef(<int> key)
	else:
	raise TypeError('Expected integer or string index')

	try:
	return Scalar.wrap(GetResultValue(sp.field(ref)))
	except ArrowInvalid as exc:
	if isinstance(key, int):
	raise IndexError(key) from exc
	else:
	raise KeyError(key) from exc

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python dict.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	if self.is_valid:
	try:
	return {k: self[k].as_py(maps_as_pydicts=maps_as_pydicts) for k in self.keys()}
	except KeyError:
	raise ValueError(
	"Converting to Python dictionary is not supported when "
	"duplicate field names are present")
	else:
	return None

	def _as_py_tuple(self):
	# a version that returns a tuple instead of dict to support repr/str
	# with the presence of duplicate field names
	if self.is_valid:
	return [(key, self[i].as_py()) for i, key in enumerate(self)]
	else:
	return None

	def __repr__(self):
	return '<pyarrow.{}: {!r}>'.format(
	self.__class__.__name__, self._as_py_tuple()
	)

	def __str__(self):
	return str(self._as_py_tuple())


	cdef class MapScalar(ListScalar):
	"""
	Concrete class for map scalars.
	"""

	def __getitem__(self, i):
	"""
	Return the value at the given index.
	"""
	arr = self.values
	if arr is None:
	raise IndexError(i)
	dct = arr[_normalize_index(i, len(arr))]
	return (dct[self.type.key_field.name], dct[self.type.item_field.name])

	def __iter__(self):
	"""
	Iterate over this element's values.
	"""
	arr = self.values
	if arr is None:
	return
	for k, v in zip(arr.field(self.type.key_field.name), arr.field(self.type.item_field.name)):
	yield (k.as_py(), v.as_py())

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this value as a Python list or dict, depending on 'maps_as_pydicts'.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	if maps_as_pydicts not in (None, "lossy", "strict"):
	raise ValueError(
	"Invalid value for 'maps_as_pydicts': "
	+ "valid values are 'lossy', 'strict' or `None` (default). "
	+ f"Received {maps_as_pydicts!r}."
	)
	if not self.is_valid:
	return None
	if not maps_as_pydicts:
	return list(self)
	result_dict = {}
	for key, value in self:
	if key in result_dict:
	if maps_as_pydicts == "strict":
	raise KeyError(
	"Converting to Python dictionary is not supported in strict mode "
	f"when duplicate keys are present (duplicate key was '{key}')."
	)
	else:
	warnings.warn(
	f"Encountered key '{key}' which was already encountered.")
	result_dict[key] = value
	return result_dict


	cdef class DictionaryScalar(Scalar):
	"""
	Concrete class for dictionary-encoded scalars.
	"""

	@staticmethod
	def _reconstruct(type, is_valid, index, dictionary):
	cdef:
	CDictionaryScalarIndexAndDictionary value
	shared_ptr[CDictionaryScalar] wrapped
	DataType type_
	Scalar index_
	Array dictionary_

	type_ = ensure_type(type, allow_none=False)
	if not isinstance(type_, DictionaryType):
	raise TypeError('Must pass a DictionaryType instance')

	if isinstance(index, Scalar):
	if not index.type.equals(type.index_type):
	raise TypeError("The Scalar value passed as index must have "
	"identical type to the dictionary type's "
	"index_type")
	index_ = index
	else:
	index_ = scalar(index, type=type_.index_type)

	if isinstance(dictionary, Array):
	if not dictionary.type.equals(type.value_type):
	raise TypeError("The Array passed as dictionary must have "
	"identical type to the dictionary type's "
	"value_type")
	dictionary_ = dictionary
	else:
	dictionary_ = array(dictionary, type=type_.value_type)

	value.index = pyarrow_unwrap_scalar(index_)
	value.dictionary = pyarrow_unwrap_array(dictionary_)

	wrapped = make_shared[CDictionaryScalar](
	value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid)
	)
	return Scalar.wrap(<shared_ptr[CScalar]> wrapped)

	def __reduce__(self):
	return DictionaryScalar._reconstruct, (
	self.type, self.is_valid, self.index, self.dictionary
	)

	@property
	def index(self):
	"""
	Return this value's underlying index as a scalar.
	"""
	cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
	return Scalar.wrap(sp.value.index)

	@property
	def value(self):
	"""
	Return the encoded value as a scalar.
	"""
	cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
	return Scalar.wrap(GetResultValue(sp.GetEncodedValue()))

	@property
	def dictionary(self):
	cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
	return pyarrow_wrap_array(sp.value.dictionary)

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this encoded value as a Python object.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	return self.value.as_py(maps_as_pydicts=maps_as_pydicts) if self.is_valid else None


	cdef class RunEndEncodedScalar(Scalar):
	"""
	Concrete class for RunEndEncoded scalars.
	"""
	@property
	def value(self):
	"""
	Return underlying value as a scalar.
	"""
	cdef CRunEndEncodedScalar* sp = <CRunEndEncodedScalar*> self.wrapped.get()
	return Scalar.wrap(sp.value)

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return underlying value as a Python object.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	return self.value.as_py(maps_as_pydicts=maps_as_pydicts)


	cdef class UnionScalar(Scalar):
	"""
	Concrete class for Union scalars.
	"""

	@property
	def value(self):
	"""
	Return underlying value as a scalar.
	"""
	cdef CSparseUnionScalar* sp
	cdef CDenseUnionScalar* dp
	if self.type.id == _Type_SPARSE_UNION:
	sp = <CSparseUnionScalar*> self.wrapped.get()
	return Scalar.wrap(sp.value[sp.child_id]) if sp.is_valid else None
	else:
	dp = <CDenseUnionScalar*> self.wrapped.get()
	return Scalar.wrap(dp.value) if dp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return underlying value as a Python object.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	value = self.value
	return None if value is None else value.as_py(maps_as_pydicts=maps_as_pydicts)

	@property
	def type_code(self):
	"""
	Return the union type code for this scalar.
	"""
	cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
	return sp.type_code


	cdef class ExtensionScalar(Scalar):
	"""
	Concrete class for Extension scalars.
	"""

	@property
	def value(self):
	"""
	Return storage value as a scalar.
	"""
	cdef CExtensionScalar* sp = <CExtensionScalar*> self.wrapped.get()
	return Scalar.wrap(sp.value) if sp.is_valid else None

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this scalar as a Python object.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	The default behavior (`None`), is to convert Arrow Map arrays to
	Python association lists (list-of-tuples) in the same order as the
	Arrow Map, as in [(key1, value1), (key2, value2), ...].

	If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.

	If 'lossy', whenever duplicate keys are detected, a warning will be printed.
	The last seen value of a duplicate key will be in the Python dictionary.
	If 'strict', this instead results in an exception being raised when detected.
	"""
	return None if self.value is None else self.value.as_py(maps_as_pydicts=maps_as_pydicts)

	@staticmethod
	def from_storage(BaseExtensionType typ, value):
	"""
	Construct ExtensionScalar from type and storage value.

	Parameters
	----------
	typ : DataType
	The extension type for the result scalar.
	value : object
	The storage value for the result scalar.

	Returns
	-------
	ext_scalar : ExtensionScalar
	"""
	cdef:
	shared_ptr[CExtensionScalar] sp_scalar
	shared_ptr[CScalar] sp_storage
	CExtensionScalar* ext_scalar

	if value is None:
	storage = None
	elif isinstance(value, Scalar):
	if value.type != typ.storage_type:
	raise TypeError("Incompatible storage type {0} "
	"for extension type {1}"
	.format(value.type, typ))
	storage = value
	else:
	storage = scalar(value, typ.storage_type)

	cdef c_bool is_valid = storage is not None and storage.is_valid
	if is_valid:
	sp_storage = pyarrow_unwrap_scalar(storage)
	else:
	sp_storage = MakeNullScalar((<DataType> typ.storage_type).sp_type)
	sp_scalar = make_shared[CExtensionScalar](sp_storage, typ.sp_type,
	is_valid)
	with nogil:
	check_status(sp_scalar.get().Validate())
	return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)


	class JsonScalar(ExtensionScalar):
	"""
	Concrete class for JSON extension scalar.
	"""


	class UuidScalar(ExtensionScalar):
	"""
	Concrete class for Uuid extension scalar.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this scalar as a Python UUID.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	return None if self.value is None else UUID(bytes=self.value.as_py())


	cdef class FixedShapeTensorScalar(ExtensionScalar):
	"""
	Concrete class for fixed shape tensor extension scalar.
	"""

	def to_numpy(self):
	"""
	Convert fixed shape tensor scalar to a numpy.ndarray.

	The resulting ndarray's shape matches the permuted shape of the
	fixed shape tensor scalar.
	The conversion is zero-copy.

	Returns
	-------
	numpy.ndarray
	"""
	return self.to_tensor().to_numpy()

	def to_tensor(self):
	"""
	Convert fixed shape tensor extension scalar to a pyarrow.Tensor, using shape
	and strides derived from corresponding FixedShapeTensorType.

	The conversion is zero-copy.

	Returns
	-------
	pyarrow.Tensor
	Tensor represented stored in FixedShapeTensorScalar.
	"""
	cdef:
	CFixedShapeTensorType* c_type = static_pointer_cast[CFixedShapeTensorType, CDataType](
	self.wrapped.get().type).get()
	shared_ptr[CExtensionScalar] scalar = static_pointer_cast[CExtensionScalar, CScalar](self.wrapped)
	shared_ptr[CTensor] ctensor

	with nogil:
	ctensor = GetResultValue(c_type.MakeTensor(scalar))
	return pyarrow_wrap_tensor(ctensor)


	cdef class OpaqueScalar(ExtensionScalar):
	"""
	Concrete class for opaque extension scalar.
	"""


	cdef class Bool8Scalar(ExtensionScalar):
	"""
	Concrete class for bool8 extension scalar.
	"""

	def as_py(self, *, maps_as_pydicts=None):
	"""
	Return this scalar as a Python object.

	Parameters
	----------
	maps_as_pydicts : str, optional, default `None`
	Valid values are `None`, 'lossy', or 'strict'.
	This parameter is ignored for non-nested Scalars.
	"""
	py_val = super().as_py()
	return None if py_val is None else py_val != 0

	cdef dict _scalar_classes = {
	_Type_BOOL: BooleanScalar,
	_Type_UINT8: UInt8Scalar,
	_Type_UINT16: UInt16Scalar,
	_Type_UINT32: UInt32Scalar,
	_Type_UINT64: UInt64Scalar,
	_Type_INT8: Int8Scalar,
	_Type_INT16: Int16Scalar,
	_Type_INT32: Int32Scalar,
	_Type_INT64: Int64Scalar,
	_Type_HALF_FLOAT: HalfFloatScalar,
	_Type_FLOAT: FloatScalar,
	_Type_DOUBLE: DoubleScalar,
	_Type_DECIMAL32: Decimal32Scalar,
	_Type_DECIMAL64: Decimal64Scalar,
	_Type_DECIMAL128: Decimal128Scalar,
	_Type_DECIMAL256: Decimal256Scalar,
	_Type_DATE32: Date32Scalar,
	_Type_DATE64: Date64Scalar,
	_Type_TIME32: Time32Scalar,
	_Type_TIME64: Time64Scalar,
	_Type_TIMESTAMP: TimestampScalar,
	_Type_DURATION: DurationScalar,
	_Type_BINARY: BinaryScalar,
	_Type_LARGE_BINARY: LargeBinaryScalar,
	_Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
	_Type_BINARY_VIEW: BinaryViewScalar,
	_Type_STRING: StringScalar,
	_Type_LARGE_STRING: LargeStringScalar,
	_Type_STRING_VIEW: StringViewScalar,
	_Type_LIST: ListScalar,
	_Type_LARGE_LIST: LargeListScalar,
	_Type_FIXED_SIZE_LIST: FixedSizeListScalar,
	_Type_LIST_VIEW: ListViewScalar,
	_Type_LARGE_LIST_VIEW: LargeListViewScalar,
	_Type_STRUCT: StructScalar,
	_Type_MAP: MapScalar,
	_Type_DICTIONARY: DictionaryScalar,
	_Type_RUN_END_ENCODED: RunEndEncodedScalar,
	_Type_SPARSE_UNION: UnionScalar,
	_Type_DENSE_UNION: UnionScalar,
	_Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalScalar,
	_Type_EXTENSION: ExtensionScalar,
	}


	cdef object get_scalar_class_from_type(
	const shared_ptr[CDataType]& sp_data_type):
	cdef CDataType* data_type = sp_data_type.get()
	if data_type == NULL:
	raise ValueError('Scalar data type was NULL')

	if data_type.id() == _Type_EXTENSION:
	py_ext_data_type = pyarrow_wrap_data_type(sp_data_type)
	return py_ext_data_type.__arrow_ext_scalar_class__()
	else:
	return _scalar_classes[data_type.id()]


	def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
	"""
	Create a pyarrow.Scalar instance from a Python object.

	Parameters
	----------
	value : Any
	Python object coercible to arrow's type system.
	type : pyarrow.DataType
	Explicit type to attempt to coerce to, otherwise will be inferred from
	the value.
	from_pandas : bool, default None
	Use pandas's semantics for inferring nulls from values in
	ndarray-like data. Defaults to False if not passed explicitly by user,
	or True if a pandas object is passed in.
	memory_pool : pyarrow.MemoryPool, optional
	If not passed, will allocate memory from the currently-set default
	memory pool.

	Returns
	-------
	scalar : pyarrow.Scalar

	Examples
	--------
	>>> import pyarrow as pa

	>>> pa.scalar(42)
	<pyarrow.Int64Scalar: 42>

	>>> pa.scalar("string")
	<pyarrow.StringScalar: 'string'>

	>>> pa.scalar([1, 2])
	<pyarrow.ListScalar: [1, 2]>

	>>> pa.scalar([1, 2], type=pa.list_(pa.int16()))
	<pyarrow.ListScalar: [1, 2]>
	"""
	cdef:
	DataType ty
	PyConversionOptions options
	shared_ptr[CScalar] scalar
	shared_ptr[CArray] array
	shared_ptr[CChunkedArray] chunked
	bint is_pandas_object = False
	CMemoryPool* pool

	type = ensure_type(type, allow_none=True)
	pool = maybe_unbox_memory_pool(memory_pool)

	extension_type = None
	if type is not None and type.id == _Type_EXTENSION:
	extension_type = type
	type = type.storage_type

	if _is_array_like(value):
	value = get_values(value, &is_pandas_object)

	options.size = 1

	if type is not None:
	ty = ensure_type(type)
	options.type = ty.sp_type

	if from_pandas is None:
	options.from_pandas = is_pandas_object
	else:
	options.from_pandas = from_pandas

	value = [value]
	with nogil:
	chunked = GetResultValue(ConvertPySequence(value, None, options, pool))

	# get the first chunk
	assert chunked.get().num_chunks() == 1
	array = chunked.get().chunk(0)

	# retrieve the scalar from the first position
	scalar = GetResultValue(array.get().GetScalar(0))
	result = Scalar.wrap(scalar)

	if extension_type is not None:
	result = ExtensionScalar.from_storage(extension_type, result)
	return result