|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import subprocess |
|
import sys |
|
|
|
import pytest |
|
|
|
import pyarrow as pa |
|
from pyarrow.lib import ArrowInvalid |
|
|
|
|
|
def test_get_include(): |
|
include_dir = pa.get_include() |
|
assert os.path.exists(os.path.join(include_dir, 'arrow', 'api.h')) |
|
|
|
|
|
@pytest.mark.skipif('sys.platform != "win32"') |
|
def test_get_library_dirs_win32(): |
|
assert any(os.path.exists(os.path.join(directory, 'arrow.lib')) |
|
for directory in pa.get_library_dirs()) |
|
|
|
|
|
def test_cpu_count(): |
|
n = pa.cpu_count() |
|
assert n > 0 |
|
try: |
|
pa.set_cpu_count(n + 5) |
|
assert pa.cpu_count() == n + 5 |
|
finally: |
|
pa.set_cpu_count(n) |
|
|
|
|
|
def test_io_thread_count(): |
|
n = pa.io_thread_count() |
|
assert n > 0 |
|
try: |
|
pa.set_io_thread_count(n + 5) |
|
assert pa.io_thread_count() == n + 5 |
|
finally: |
|
pa.set_io_thread_count(n) |
|
|
|
|
|
@pytest.mark.processes |
|
def test_env_var_io_thread_count(): |
|
|
|
|
|
code = """if 1: |
|
import pyarrow as pa |
|
print(pa.io_thread_count()) |
|
""" |
|
|
|
def run_with_env_var(env_var): |
|
env = os.environ.copy() |
|
env['ARROW_IO_THREADS'] = env_var |
|
res = subprocess.run([sys.executable, "-c", code], env=env, |
|
capture_output=True) |
|
res.check_returncode() |
|
return res.stdout.decode(), res.stderr.decode() |
|
|
|
out, err = run_with_env_var('17') |
|
assert out.strip() == '17' |
|
assert err == '' |
|
|
|
for v in ('-1', 'z'): |
|
out, err = run_with_env_var(v) |
|
assert out.strip() == '8' |
|
assert ("ARROW_IO_THREADS does not contain a valid number of threads" |
|
in err.strip()) |
|
|
|
|
|
def test_build_info(): |
|
assert isinstance(pa.cpp_build_info, pa.BuildInfo) |
|
assert isinstance(pa.cpp_version_info, pa.VersionInfo) |
|
assert isinstance(pa.cpp_version, str) |
|
assert isinstance(pa.__version__, str) |
|
assert pa.cpp_build_info.version_info == pa.cpp_version_info |
|
|
|
assert pa.cpp_build_info.build_type in ( |
|
'debug', 'release', 'minsizerel', 'relwithdebinfo') |
|
|
|
|
|
|
|
|
|
def test_runtime_info(): |
|
info = pa.runtime_info() |
|
assert isinstance(info, pa.RuntimeInfo) |
|
possible_simd_levels = ('none', 'sse4_2', 'avx', 'avx2', 'avx512') |
|
assert info.simd_level in possible_simd_levels |
|
assert info.detected_simd_level in possible_simd_levels |
|
|
|
if info.simd_level != 'none': |
|
env = os.environ.copy() |
|
env['ARROW_USER_SIMD_LEVEL'] = 'none' |
|
code = f"""if 1: |
|
import pyarrow as pa |
|
|
|
info = pa.runtime_info() |
|
assert info.simd_level == 'none', info.simd_level |
|
assert info.detected_simd_level == {info.detected_simd_level!r},\ |
|
info.detected_simd_level |
|
""" |
|
subprocess.check_call([sys.executable, "-c", code], env=env) |
|
|
|
|
|
@pytest.mark.processes |
|
def test_import_at_shutdown(): |
|
|
|
code = """if 1: |
|
import atexit |
|
|
|
def import_arrow(): |
|
import pyarrow |
|
|
|
atexit.register(import_arrow) |
|
""" |
|
subprocess.check_call([sys.executable, "-c", code]) |
|
|
|
|
|
@pytest.mark.skipif(sys.platform == "win32", |
|
reason="Path to timezone database is not configurable " |
|
"on non-Windows platforms") |
|
def test_set_timezone_db_path_non_windows(): |
|
|
|
with pytest.raises(ArrowInvalid, |
|
match="Arrow was set to use OS timezone " |
|
"database at compile time"): |
|
pa.set_timezone_db_path("path") |
|
|
|
|
|
@pytest.mark.parametrize('klass', [ |
|
pa.Field, |
|
pa.Schema, |
|
pa.ChunkedArray, |
|
pa.RecordBatch, |
|
pa.Table, |
|
pa.Buffer, |
|
pa.Array, |
|
pa.Tensor, |
|
pa.DataType, |
|
pa.ListType, |
|
pa.LargeListType, |
|
pa.FixedSizeListType, |
|
pa.ListViewType, |
|
pa.LargeListViewType, |
|
pa.UnionType, |
|
pa.SparseUnionType, |
|
pa.DenseUnionType, |
|
pa.StructType, |
|
pa.Time32Type, |
|
pa.Time64Type, |
|
pa.TimestampType, |
|
pa.Decimal32Type, |
|
pa.Decimal64Type, |
|
pa.Decimal128Type, |
|
pa.Decimal256Type, |
|
pa.DictionaryType, |
|
pa.FixedSizeBinaryType, |
|
pa.NullArray, |
|
pa.NumericArray, |
|
pa.IntegerArray, |
|
pa.FloatingPointArray, |
|
pa.BooleanArray, |
|
pa.Int8Array, |
|
pa.Int16Array, |
|
pa.Int32Array, |
|
pa.Int64Array, |
|
pa.UInt8Array, |
|
pa.UInt16Array, |
|
pa.UInt32Array, |
|
pa.UInt64Array, |
|
pa.ListArray, |
|
pa.LargeListArray, |
|
pa.MapArray, |
|
pa.FixedSizeListArray, |
|
pa.UnionArray, |
|
pa.BinaryArray, |
|
pa.StringArray, |
|
pa.BinaryViewArray, |
|
pa.StringViewArray, |
|
pa.FixedSizeBinaryArray, |
|
pa.DictionaryArray, |
|
pa.Date32Array, |
|
pa.Date64Array, |
|
pa.TimestampArray, |
|
pa.Time32Array, |
|
pa.Time64Array, |
|
pa.DurationArray, |
|
pa.Decimal128Array, |
|
pa.Decimal256Array, |
|
pa.StructArray, |
|
pa.RunEndEncodedArray, |
|
pa.Scalar, |
|
pa.BooleanScalar, |
|
pa.Int8Scalar, |
|
pa.Int16Scalar, |
|
pa.Int32Scalar, |
|
pa.Int64Scalar, |
|
pa.UInt8Scalar, |
|
pa.UInt16Scalar, |
|
pa.UInt32Scalar, |
|
pa.UInt64Scalar, |
|
pa.HalfFloatScalar, |
|
pa.FloatScalar, |
|
pa.DoubleScalar, |
|
pa.Decimal128Scalar, |
|
pa.Decimal256Scalar, |
|
pa.Date32Scalar, |
|
pa.Date64Scalar, |
|
pa.Time32Scalar, |
|
pa.Time64Scalar, |
|
pa.TimestampScalar, |
|
pa.DurationScalar, |
|
pa.StringScalar, |
|
pa.BinaryScalar, |
|
pa.FixedSizeBinaryScalar, |
|
pa.BinaryViewScalar, |
|
pa.StringViewScalar, |
|
pa.ListScalar, |
|
pa.LargeListScalar, |
|
pa.ListViewScalar, |
|
pa.LargeListViewScalar, |
|
pa.MapScalar, |
|
pa.FixedSizeListScalar, |
|
pa.UnionScalar, |
|
pa.StructScalar, |
|
pa.DictionaryScalar, |
|
pa.RunEndEncodedScalar, |
|
pa.RecordBatchReader, |
|
pa.ipc.Message, |
|
pa.ipc.MessageReader, |
|
pa.MemoryPool, |
|
pa.LoggingMemoryPool, |
|
pa.ProxyMemoryPool, |
|
pa.Device, |
|
pa.MemoryManager, |
|
pa.OpaqueArray, |
|
pa.OpaqueScalar, |
|
pa.OpaqueType, |
|
pa.Bool8Array, |
|
pa.Bool8Scalar, |
|
pa.Bool8Type, |
|
pa.JsonArray, |
|
pa.JsonScalar, |
|
pa.JsonType, |
|
]) |
|
def test_extension_type_constructor_errors(klass): |
|
|
|
msg = "Do not call {cls}'s constructor directly, use .* instead." |
|
with pytest.raises(TypeError, match=msg.format(cls=klass.__name__)): |
|
klass() |
|
|