File size: 4,007 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
""" feather-format compat """
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
)
from pandas._config import using_string_dtype
from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc
from pandas.util._validators import check_dtype_backend
from pandas.core.api import DataFrame
from pandas.core.shared_docs import _shared_docs
from pandas.io._util import arrow_table_to_pandas
from pandas.io.common import get_handle
if TYPE_CHECKING:
from collections.abc import (
Hashable,
Sequence,
)
from pandas._typing import (
DtypeBackend,
FilePath,
ReadBuffer,
StorageOptions,
WriteBuffer,
)
@doc(storage_options=_shared_docs["storage_options"])
def to_feather(
df: DataFrame,
path: FilePath | WriteBuffer[bytes],
storage_options: StorageOptions | None = None,
**kwargs: Any,
) -> None:
"""
Write a DataFrame to the binary Feather format.
Parameters
----------
df : DataFrame
path : str, path object, or file-like object
{storage_options}
**kwargs :
Additional keywords passed to `pyarrow.feather.write_feather`.
"""
import_optional_dependency("pyarrow")
from pyarrow import feather
if not isinstance(df, DataFrame):
raise ValueError("feather only support IO with DataFrames")
with get_handle(
path, "wb", storage_options=storage_options, is_text=False
) as handles:
feather.write_feather(df, handles.handle, **kwargs)
@doc(storage_options=_shared_docs["storage_options"])
def read_feather(
path: FilePath | ReadBuffer[bytes],
columns: Sequence[Hashable] | None = None,
use_threads: bool = True,
storage_options: StorageOptions | None = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
) -> DataFrame:
"""
Load a feather-format object from the file path.
Parameters
----------
path : str, path object, or file-like object
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``read()`` function. The string could be a URL.
Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
expected. A local file could be: ``file://localhost/path/to/table.feather``.
columns : sequence, default None
If not provided, all columns are read.
use_threads : bool, default True
Whether to parallelize reading using multiple threads.
{storage_options}
dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
Back-end data type applied to the resultant :class:`DataFrame`
(still experimental). Behaviour is as follows:
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
(default).
* ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
DataFrame.
.. versionadded:: 2.0
Returns
-------
type of object stored in file
Examples
--------
>>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP
"""
import_optional_dependency("pyarrow")
from pyarrow import feather
# import utils to register the pyarrow extension types
import pandas.core.arrays.arrow.extension_types # pyright: ignore[reportUnusedImport] # noqa: F401
check_dtype_backend(dtype_backend)
with get_handle(
path, "rb", storage_options=storage_options, is_text=False
) as handles:
if dtype_backend is lib.no_default and not using_string_dtype():
return feather.read_feather(
handles.handle, columns=columns, use_threads=bool(use_threads)
)
pa_table = feather.read_table(
handles.handle, columns=columns, use_threads=bool(use_threads)
)
return arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
|