|
from __future__ import annotations |
|
|
|
from typing import ( |
|
TYPE_CHECKING, |
|
Literal, |
|
) |
|
|
|
import numpy as np |
|
|
|
from pandas._config import using_string_dtype |
|
|
|
from pandas._libs import lib |
|
from pandas.compat import ( |
|
pa_version_under18p0, |
|
pa_version_under19p0, |
|
) |
|
from pandas.compat._optional import import_optional_dependency |
|
|
|
import pandas as pd |
|
|
|
if TYPE_CHECKING: |
|
from collections.abc import Callable |
|
|
|
import pyarrow |
|
|
|
from pandas._typing import DtypeBackend |
|
|
|
|
|
def _arrow_dtype_mapping() -> dict: |
|
pa = import_optional_dependency("pyarrow") |
|
return { |
|
pa.int8(): pd.Int8Dtype(), |
|
pa.int16(): pd.Int16Dtype(), |
|
pa.int32(): pd.Int32Dtype(), |
|
pa.int64(): pd.Int64Dtype(), |
|
pa.uint8(): pd.UInt8Dtype(), |
|
pa.uint16(): pd.UInt16Dtype(), |
|
pa.uint32(): pd.UInt32Dtype(), |
|
pa.uint64(): pd.UInt64Dtype(), |
|
pa.bool_(): pd.BooleanDtype(), |
|
pa.string(): pd.StringDtype(), |
|
pa.float32(): pd.Float32Dtype(), |
|
pa.float64(): pd.Float64Dtype(), |
|
pa.string(): pd.StringDtype(), |
|
pa.large_string(): pd.StringDtype(), |
|
} |
|
|
|
|
|
def _arrow_string_types_mapper() -> Callable: |
|
pa = import_optional_dependency("pyarrow") |
|
|
|
mapping = { |
|
pa.string(): pd.StringDtype(na_value=np.nan), |
|
pa.large_string(): pd.StringDtype(na_value=np.nan), |
|
} |
|
if not pa_version_under18p0: |
|
mapping[pa.string_view()] = pd.StringDtype(na_value=np.nan) |
|
|
|
return mapping.get |
|
|
|
|
|
def arrow_table_to_pandas( |
|
table: pyarrow.Table, |
|
dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default, |
|
null_to_int64: bool = False, |
|
to_pandas_kwargs: dict | None = None, |
|
) -> pd.DataFrame: |
|
if to_pandas_kwargs is None: |
|
to_pandas_kwargs = {} |
|
|
|
pa = import_optional_dependency("pyarrow") |
|
|
|
types_mapper: type[pd.ArrowDtype] | None | Callable |
|
if dtype_backend == "numpy_nullable": |
|
mapping = _arrow_dtype_mapping() |
|
if null_to_int64: |
|
|
|
|
|
mapping[pa.null()] = pd.Int64Dtype() |
|
types_mapper = mapping.get |
|
elif dtype_backend == "pyarrow": |
|
types_mapper = pd.ArrowDtype |
|
elif using_string_dtype(): |
|
if pa_version_under19p0: |
|
types_mapper = _arrow_string_types_mapper() |
|
else: |
|
types_mapper = None |
|
elif dtype_backend is lib.no_default or dtype_backend == "numpy": |
|
types_mapper = None |
|
else: |
|
raise NotImplementedError |
|
|
|
df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs) |
|
return df |
|
|