|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import warnings |
|
from threading import Lock |
|
|
|
|
|
cdef class _PandasAPIShim(object): |
|
""" |
|
Lazy pandas importer that isolates usages of pandas APIs and avoids |
|
importing pandas until it's actually needed |
|
""" |
|
cdef: |
|
bint _tried_importing_pandas |
|
bint _have_pandas |
|
|
|
cdef readonly: |
|
object _loose_version, _version |
|
object _pd, _types_api, _compat_module |
|
object _data_frame, _index, _series, _categorical_type |
|
object _datetimetz_type, _extension_array, _extension_dtype |
|
object _array_like_types, _is_extension_array_dtype, _lock |
|
bint has_sparse |
|
bint _pd024 |
|
bint _is_v1, _is_ge_v21, _is_ge_v23, _is_ge_v3, _is_ge_v3_strict |
|
|
|
def __init__(self): |
|
self._lock = Lock() |
|
self._tried_importing_pandas = False |
|
self._have_pandas = 0 |
|
|
|
cdef _import_pandas(self, bint raise_): |
|
try: |
|
import pandas as pd |
|
import pyarrow.pandas_compat as pdcompat |
|
except ImportError: |
|
self._have_pandas = False |
|
if raise_: |
|
raise |
|
else: |
|
return |
|
|
|
from pyarrow.vendored.version import Version |
|
|
|
self._pd = pd |
|
self._version = pd.__version__ |
|
self._loose_version = Version(pd.__version__) |
|
self._is_v1 = False |
|
|
|
if self._loose_version < Version('1.0.0'): |
|
self._have_pandas = False |
|
if raise_: |
|
raise ImportError( |
|
"pyarrow requires pandas 1.0.0 or above, pandas {} is " |
|
"installed".format(self._version) |
|
) |
|
else: |
|
warnings.warn( |
|
"pyarrow requires pandas 1.0.0 or above, pandas {} is " |
|
"installed. Therefore, pandas-specific integration is not " |
|
"used.".format(self._version), stacklevel=2) |
|
return |
|
|
|
self._is_v1 = self._loose_version < Version('2.0.0') |
|
self._is_ge_v21 = self._loose_version >= Version('2.1.0') |
|
self._is_ge_v23 = self._loose_version >= Version('2.3.0.dev0') |
|
self._is_ge_v3 = self._loose_version >= Version('3.0.0.dev0') |
|
self._is_ge_v3_strict = self._loose_version >= Version('3.0.0') |
|
|
|
self._compat_module = pdcompat |
|
self._data_frame = pd.DataFrame |
|
self._index = pd.Index |
|
self._categorical_type = pd.Categorical |
|
self._series = pd.Series |
|
self._extension_array = pd.api.extensions.ExtensionArray |
|
self._array_like_types = ( |
|
self._series, self._index, self._categorical_type, |
|
self._extension_array) |
|
self._extension_dtype = pd.api.extensions.ExtensionDtype |
|
self._is_extension_array_dtype = ( |
|
pd.api.types.is_extension_array_dtype) |
|
self._types_api = pd.api.types |
|
self._datetimetz_type = pd.api.types.DatetimeTZDtype |
|
self._have_pandas = True |
|
self.has_sparse = False |
|
|
|
cdef inline _check_import(self, bint raise_=True): |
|
if not self._tried_importing_pandas: |
|
with self._lock: |
|
if not self._tried_importing_pandas: |
|
try: |
|
self._import_pandas(raise_) |
|
finally: |
|
self._tried_importing_pandas = True |
|
return |
|
|
|
if not self._have_pandas and raise_: |
|
self._import_pandas(raise_) |
|
|
|
def series(self, *args, **kwargs): |
|
self._check_import() |
|
return self._series(*args, **kwargs) |
|
|
|
def data_frame(self, *args, **kwargs): |
|
self._check_import() |
|
return self._data_frame(*args, **kwargs) |
|
|
|
cdef inline bint _have_pandas_internal(self): |
|
if not self._tried_importing_pandas: |
|
self._check_import(raise_=False) |
|
return self._have_pandas |
|
|
|
@property |
|
def have_pandas(self): |
|
return self._have_pandas_internal() |
|
|
|
@property |
|
def compat(self): |
|
self._check_import() |
|
return self._compat_module |
|
|
|
@property |
|
def pd(self): |
|
self._check_import() |
|
return self._pd |
|
|
|
cpdef infer_dtype(self, obj): |
|
self._check_import() |
|
try: |
|
return self._types_api.infer_dtype(obj, skipna=False) |
|
except AttributeError: |
|
return self._pd.lib.infer_dtype(obj) |
|
|
|
cpdef pandas_dtype(self, dtype): |
|
self._check_import() |
|
try: |
|
return self._types_api.pandas_dtype(dtype) |
|
except AttributeError: |
|
return None |
|
|
|
@property |
|
def loose_version(self): |
|
self._check_import() |
|
return self._loose_version |
|
|
|
@property |
|
def version(self): |
|
self._check_import() |
|
return self._version |
|
|
|
def is_v1(self): |
|
self._check_import() |
|
return self._is_v1 |
|
|
|
def is_ge_v21(self): |
|
self._check_import() |
|
return self._is_ge_v21 |
|
|
|
def is_ge_v23(self): |
|
self._check_import() |
|
return self._is_ge_v23 |
|
|
|
def is_ge_v3(self): |
|
self._check_import() |
|
return self._is_ge_v3 |
|
|
|
def is_ge_v3_strict(self): |
|
self._check_import() |
|
return self._is_ge_v3_strict |
|
|
|
def uses_string_dtype(self): |
|
if self.is_ge_v3_strict(): |
|
return True |
|
try: |
|
if self.is_ge_v23() and self.pd.options.future.infer_string: |
|
return True |
|
except: |
|
pass |
|
return False |
|
|
|
@property |
|
def categorical_type(self): |
|
self._check_import() |
|
return self._categorical_type |
|
|
|
@property |
|
def datetimetz_type(self): |
|
self._check_import() |
|
return self._datetimetz_type |
|
|
|
@property |
|
def extension_dtype(self): |
|
self._check_import() |
|
return self._extension_dtype |
|
|
|
cpdef is_array_like(self, obj): |
|
self._check_import() |
|
return isinstance(obj, self._array_like_types) |
|
|
|
cpdef is_categorical(self, obj): |
|
if self._have_pandas_internal(): |
|
return isinstance(obj, self._categorical_type) |
|
else: |
|
return False |
|
|
|
cpdef is_datetimetz(self, obj): |
|
if self._have_pandas_internal(): |
|
return isinstance(obj, self._datetimetz_type) |
|
else: |
|
return False |
|
|
|
cpdef is_extension_array_dtype(self, obj): |
|
self._check_import() |
|
if self._is_extension_array_dtype: |
|
return self._is_extension_array_dtype(obj) |
|
else: |
|
return False |
|
|
|
cpdef is_sparse(self, obj): |
|
if self._have_pandas_internal(): |
|
return isinstance(obj.dtype, self.pd.SparseDtype) |
|
else: |
|
return False |
|
|
|
cpdef is_data_frame(self, obj): |
|
if self._have_pandas_internal(): |
|
return isinstance(obj, self._data_frame) |
|
else: |
|
return False |
|
|
|
cpdef is_series(self, obj): |
|
if self._have_pandas_internal(): |
|
return isinstance(obj, self._series) |
|
else: |
|
return False |
|
|
|
cpdef is_index(self, obj): |
|
if self._have_pandas_internal(): |
|
return isinstance(obj, self._index) |
|
else: |
|
return False |
|
|
|
cpdef get_values(self, obj): |
|
""" |
|
Get the underlying array values of a pandas Series or Index in the |
|
format (np.ndarray or pandas ExtensionArray) as we need them. |
|
|
|
Assumes obj is a pandas Series or Index. |
|
""" |
|
self._check_import() |
|
if isinstance(obj.dtype, (self.pd.api.types.IntervalDtype, |
|
self.pd.api.types.PeriodDtype)): |
|
return obj.array |
|
return obj.values |
|
|
|
def get_rangeindex_attribute(self, level, name): |
|
|
|
self._check_import() |
|
if hasattr(level, name): |
|
return getattr(level, name) |
|
return getattr(level, '_' + name) |
|
|
|
|
|
cdef _PandasAPIShim pandas_api = _PandasAPIShim() |
|
_pandas_api = pandas_api |
|
|