|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import functools |
|
import os |
|
import pathlib |
|
import subprocess |
|
import sys |
|
import time |
|
import urllib.request |
|
|
|
import pytest |
|
import hypothesis as h |
|
|
|
from ..conftest import groups, defaults |
|
|
|
from pyarrow import set_timezone_db_path |
|
from pyarrow.util import find_free_port |
|
|
|
|
|
|
|
h.settings.register_profile('ci', max_examples=1000) |
|
h.settings.register_profile('dev', max_examples=50) |
|
h.settings.register_profile('debug', max_examples=10, |
|
verbosity=h.Verbosity.verbose) |
|
|
|
|
|
|
|
|
|
|
|
h.settings.load_profile(os.environ.get('HYPOTHESIS_PROFILE', 'dev')) |
|
|
|
|
|
|
|
os.environ['AWS_CONFIG_FILE'] = "/dev/null" |
|
|
|
|
|
if sys.platform == 'win32': |
|
tzdata_set_path = os.environ.get('PYARROW_TZDATA_PATH', None) |
|
if tzdata_set_path: |
|
set_timezone_db_path(tzdata_set_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if sys.platform == 'win32': |
|
if os.environ.get('TZDIR', None) is None: |
|
from importlib import resources |
|
try: |
|
os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo') |
|
except ModuleNotFoundError: |
|
print( |
|
'Package "tzdata" not found. Not setting TZDIR environment variable.' |
|
) |
|
|
|
|
|
def pytest_addoption(parser): |
|
|
|
def bool_env(name, default=None): |
|
value = os.environ.get(name.upper()) |
|
if not value: |
|
return default |
|
value = value.lower() |
|
if value in {'1', 'true', 'on', 'yes', 'y'}: |
|
return True |
|
elif value in {'0', 'false', 'off', 'no', 'n'}: |
|
return False |
|
else: |
|
raise ValueError('{}={} is not parsable as boolean' |
|
.format(name.upper(), value)) |
|
|
|
for group in groups: |
|
default = bool_env('PYARROW_TEST_{}'.format(group), defaults[group]) |
|
parser.addoption('--enable-{}'.format(group), |
|
action='store_true', default=default, |
|
help=('Enable the {} test group'.format(group))) |
|
parser.addoption('--disable-{}'.format(group), |
|
action='store_true', default=False, |
|
help=('Disable the {} test group'.format(group))) |
|
|
|
|
|
class PyArrowConfig: |
|
def __init__(self): |
|
self.is_enabled = {} |
|
|
|
def apply_mark(self, mark): |
|
group = mark.name |
|
if group in groups: |
|
self.requires(group) |
|
|
|
def requires(self, group): |
|
if not self.is_enabled[group]: |
|
pytest.skip('{} NOT enabled'.format(group)) |
|
|
|
|
|
def pytest_configure(config): |
|
|
|
config.pyarrow = PyArrowConfig() |
|
|
|
for mark in groups: |
|
config.addinivalue_line( |
|
"markers", mark, |
|
) |
|
|
|
enable_flag = '--enable-{}'.format(mark) |
|
disable_flag = '--disable-{}'.format(mark) |
|
|
|
is_enabled = (config.getoption(enable_flag) and not |
|
config.getoption(disable_flag)) |
|
config.pyarrow.is_enabled[mark] = is_enabled |
|
|
|
|
|
def pytest_runtest_setup(item): |
|
|
|
for mark in item.iter_markers(): |
|
item.config.pyarrow.apply_mark(mark) |
|
|
|
|
|
@pytest.fixture |
|
def tempdir(tmpdir): |
|
|
|
return pathlib.Path(tmpdir.strpath) |
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def base_datadir(): |
|
return pathlib.Path(__file__).parent / 'data' |
|
|
|
|
|
@pytest.fixture(autouse=True) |
|
def disable_aws_metadata(monkeypatch): |
|
"""Stop the AWS SDK from trying to contact the EC2 metadata server. |
|
|
|
Otherwise, this causes a 5 second delay in tests that exercise the |
|
S3 filesystem. |
|
""" |
|
monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true") |
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def hdfs_connection(): |
|
host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default') |
|
port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 0)) |
|
user = os.environ.get('ARROW_HDFS_TEST_USER', 'hdfs') |
|
return host, port, user |
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def s3_connection(): |
|
host, port = '127.0.0.1', find_free_port() |
|
access_key, secret_key = 'arrow', 'apachearrow' |
|
return host, port, access_key, secret_key |
|
|
|
|
|
def retry(attempts=3, delay=1.0, max_delay=None, backoff=1): |
|
""" |
|
Retry decorator |
|
|
|
Parameters |
|
---------- |
|
attempts : int, default 3 |
|
The number of attempts. |
|
delay : float, default 1 |
|
Initial delay in seconds. |
|
max_delay : float, optional |
|
The max delay between attempts. |
|
backoff : float, default 1 |
|
The multiplier to delay after each attempt. |
|
""" |
|
def decorate(func): |
|
@functools.wraps(func) |
|
def wrapper(*args, **kwargs): |
|
remaining_attempts = attempts |
|
curr_delay = delay |
|
while remaining_attempts > 0: |
|
try: |
|
return func(*args, **kwargs) |
|
except Exception as err: |
|
remaining_attempts -= 1 |
|
last_exception = err |
|
curr_delay *= backoff |
|
if max_delay: |
|
curr_delay = min(curr_delay, max_delay) |
|
time.sleep(curr_delay) |
|
raise last_exception |
|
return wrapper |
|
return decorate |
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def s3_server(s3_connection, tmpdir_factory): |
|
@retry(attempts=5, delay=1, backoff=2) |
|
def minio_server_health_check(address): |
|
resp = urllib.request.urlopen(f"http://{address}/minio/health/live") |
|
assert resp.getcode() == 200 |
|
|
|
tmpdir = tmpdir_factory.getbasetemp() |
|
host, port, access_key, secret_key = s3_connection |
|
|
|
address = '{}:{}'.format(host, port) |
|
env = os.environ.copy() |
|
env.update({ |
|
'MINIO_ACCESS_KEY': access_key, |
|
'MINIO_SECRET_KEY': secret_key |
|
}) |
|
|
|
args = ['minio', '--compat', 'server', '--quiet', '--address', |
|
address, tmpdir] |
|
proc = None |
|
try: |
|
proc = subprocess.Popen(args, env=env) |
|
except OSError: |
|
pytest.skip('`minio` command cannot be located') |
|
else: |
|
|
|
minio_server_health_check(address) |
|
|
|
yield { |
|
'connection': s3_connection, |
|
'process': proc, |
|
'tempdir': tmpdir |
|
} |
|
finally: |
|
if proc is not None: |
|
proc.kill() |
|
proc.wait() |
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def gcs_server(): |
|
port = find_free_port() |
|
env = os.environ.copy() |
|
exe = 'storage-testbench' |
|
args = [exe, '--port', str(port)] |
|
proc = None |
|
try: |
|
|
|
proc = subprocess.Popen(args, env=env) |
|
|
|
if proc.poll() is not None: |
|
pytest.skip(f"Command {args} did not start server successfully!") |
|
except OSError as e: |
|
pytest.skip(f"Command {args} failed to execute: {e}") |
|
else: |
|
yield { |
|
'connection': ('localhost', port), |
|
'process': proc, |
|
} |
|
finally: |
|
if proc is not None: |
|
proc.kill() |
|
proc.wait() |
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def azure_server(tmpdir_factory): |
|
port = find_free_port() |
|
env = os.environ.copy() |
|
tmpdir = tmpdir_factory.getbasetemp() |
|
|
|
args = ['azurite-blob', "--location", tmpdir, "--blobPort", str(port)] |
|
|
|
|
|
args += ["--skipApiVersionCheck"] |
|
proc = None |
|
try: |
|
proc = subprocess.Popen(args, env=env) |
|
|
|
if proc.poll() is not None: |
|
pytest.skip(f"Command {args} did not start server successfully!") |
|
except (ModuleNotFoundError, OSError) as e: |
|
pytest.skip(f"Command {args} failed to execute: {e}") |
|
else: |
|
yield { |
|
|
|
|
|
'connection': ('127.0.0.1', port, 'devstoreaccount1', |
|
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2' |
|
'UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=='), |
|
'process': proc, |
|
'tempdir': tmpdir, |
|
} |
|
finally: |
|
if proc is not None: |
|
proc.kill() |
|
proc.wait() |
|
|
|
|
|
@pytest.fixture( |
|
params=[ |
|
'builtin_pickle', |
|
'cloudpickle' |
|
], |
|
scope='session' |
|
) |
|
def pickle_module(request): |
|
return request.getfixturevalue(request.param) |
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def builtin_pickle(): |
|
import pickle |
|
return pickle |
|
|
|
|
|
@pytest.fixture(scope='session') |
|
def cloudpickle(): |
|
cp = pytest.importorskip('cloudpickle') |
|
if 'HIGHEST_PROTOCOL' not in cp.__dict__: |
|
cp.HIGHEST_PROTOCOL = cp.DEFAULT_PROTOCOL |
|
return cp |
|
|