Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /pyarrow /tests /test_compute.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

149 kB

	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	from collections import namedtuple
	import datetime
	import decimal
	from functools import lru_cache, partial
	import inspect
	import itertools
	import math
	import os
	import pytest
	import random
	import sys
	import textwrap

	try:
	import numpy as np
	except ImportError:
	np = None

	try:
	import pandas as pd
	except ImportError:
	pd = None

	import pyarrow as pa
	import pyarrow.compute as pc
	from pyarrow.lib import ArrowNotImplementedError

	try:
	import pyarrow.substrait as pas
	except ImportError:
	pas = None

	exported_functions = [
	func for (name, func) in sorted(pc.__dict__.items())
	if hasattr(func, '__arrow_compute_function__')]

	exported_option_classes = [
	cls for (name, cls) in sorted(pc.__dict__.items())
	if (isinstance(cls, type) and
	cls is not pc.FunctionOptions and
	issubclass(cls, pc.FunctionOptions))]

	numerical_arrow_types = [
	pa.int8(),
	pa.int16(),
	pa.int64(),
	pa.uint8(),
	pa.uint16(),
	pa.uint64(),
	pa.float32(),
	pa.float64()
	]


	all_array_types = [
	('bool', [True, False, False, True, True]),
	('uint8', range(5)),
	('int8', range(5)),
	('uint16', range(5)),
	('int16', range(5)),
	('uint32', range(5)),
	('int32', range(5)),
	('uint64', range(5, 10)),
	('int64', range(5, 10)),
	('float', [0, 0.1, 0.2, 0.3, 0.4]),
	('double', [0, 0.1, 0.2, 0.3, 0.4]),
	('string', ['a', 'b', None, 'ddd', 'ee']),
	('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
	(pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
	(pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
	(pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
	(pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
	{'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
	]


	def test_exported_functions():
	# Check that all exported concrete functions can be called with
	# the right number of arguments.
	# Note that unregistered functions (e.g. with a mismatching name)
	# will raise KeyError.
	functions = exported_functions
	assert len(functions) >= 10
	for func in functions:
	desc = func.__arrow_compute_function__
	if desc['options_required']:
	# Skip this function as it will fail with a different error
	# message if we don't pass an options instance.
	continue
	arity = desc['arity']
	if arity == 0:
	continue
	if arity is Ellipsis:
	args = [object()] * 3
	else:
	args = [object()] * arity
	with pytest.raises(TypeError,
	match="Got unexpected argument type "
	"<class 'object'> for compute function"):
	func(*args)


	def test_hash_aggregate_not_exported():
	# Ensure we are not leaking hash aggregate functions
	# which are not callable by themselves.
	for func in exported_functions:
	arrow_f = pc.get_function(func.__arrow_compute_function__["name"])
	assert arrow_f.kind != "hash_aggregate"


	def test_exported_option_classes():
	classes = exported_option_classes
	assert len(classes) >= 10
	for cls in classes:
	# Option classes must have an introspectable constructor signature,
	# and that signature should not have any args or *kwargs.
	sig = inspect.signature(cls)
	for param in sig.parameters.values():
	assert param.kind not in (param.VAR_POSITIONAL,
	param.VAR_KEYWORD)


	@pytest.mark.filterwarnings(
	"ignore:pyarrow.CumulativeSumOptions is deprecated as of 14.0"
	)
	def test_option_class_equality(request):
	options = [
	pc.ArraySortOptions(),
	pc.AssumeTimezoneOptions("UTC"),
	pc.CastOptions.safe(pa.int8()),
	pc.CumulativeOptions(start=None, skip_nulls=False),
	pc.CountOptions(),
	pc.DayOfWeekOptions(count_from_zero=False, week_start=0),
	pc.DictionaryEncodeOptions(),
	pc.RunEndEncodeOptions(),
	pc.ElementWiseAggregateOptions(skip_nulls=True),
	pc.ExtractRegexOptions("pattern"),
	pc.ExtractRegexSpanOptions("pattern"),
	pc.FilterOptions(),
	pc.IndexOptions(pa.scalar(1)),
	pc.JoinOptions(),
	pc.ListSliceOptions(0, -1, 1, True),
	pc.ListFlattenOptions(recursive=False),
	pc.MakeStructOptions(["field", "names"],
	field_nullability=[True, True],
	field_metadata=[pa.KeyValueMetadata({"a": "1"}),
	pa.KeyValueMetadata({"b": "2"})]),
	pc.MapLookupOptions(pa.scalar(1), "first"),
	pc.MatchSubstringOptions("pattern"),
	pc.ModeOptions(),
	pc.NullOptions(),
	pc.PadOptions(5),
	pc.PairwiseOptions(period=1),
	pc.PartitionNthOptions(1, null_placement="at_start"),
	pc.PivotWiderOptions(["height"], unexpected_key_behavior="raise"),
	pc.QuantileOptions(),
	pc.RandomOptions(),
	pc.RankOptions(sort_keys="ascending",
	null_placement="at_start", tiebreaker="max"),
	pc.RankQuantileOptions(sort_keys="ascending",
	null_placement="at_start"),
	pc.ReplaceSliceOptions(0, 1, "a"),
	pc.ReplaceSubstringOptions("a", "b"),
	pc.RoundOptions(2, "towards_infinity"),
	pc.RoundBinaryOptions("towards_infinity"),
	pc.RoundTemporalOptions(1, "second", week_starts_monday=True),
	pc.RoundToMultipleOptions(100, "towards_infinity"),
	pc.ScalarAggregateOptions(),
	pc.SelectKOptions(0, sort_keys=[("b", "ascending")]),
	pc.SetLookupOptions(pa.array([1])),
	pc.SkewOptions(min_count=2),
	pc.SliceOptions(0, 1, 1),
	pc.SortOptions([("dummy", "descending")], null_placement="at_start"),
	pc.SplitOptions(),
	pc.SplitPatternOptions("pattern"),
	pc.StrftimeOptions(),
	pc.StrptimeOptions("%Y", "s", True),
	pc.StructFieldOptions(indices=[]),
	pc.TakeOptions(),
	pc.TDigestOptions(),
	pc.TrimOptions(" "),
	pc.Utf8NormalizeOptions("NFKC"),
	pc.VarianceOptions(),
	pc.WinsorizeOptions(0.05, 0.9),
	pc.WeekOptions(week_starts_monday=True, count_from_zero=False,
	first_week_is_fully_in_year=False),
	]
	# Timezone database might not be installed on Windows or Emscripten
	if request.config.pyarrow.is_enabled["timezone_data"]:
	options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana"))

	classes = {type(option) for option in options}

	for cls in exported_option_classes:
	# Timezone database might not be installed on Windows or Emscripten
	if (
	cls not in classes
	and (request.config.pyarrow.is_enabled["timezone_data"])
	and cls != pc.AssumeTimezoneOptions
	):
	try:
	options.append(cls())
	except TypeError:
	pytest.fail(f"Options class is not tested: {cls}")

	for option in options:
	assert option == option
	assert repr(option).startswith(option.__class__.__name__)
	buf = option.serialize()
	deserialized = pc.FunctionOptions.deserialize(buf)
	assert option == deserialized
	# TODO remove the check under the if statement and the filterwarnings
	# mark when the deprecated class CumulativeSumOptions is removed.
	if repr(option).startswith("CumulativeSumOptions"):
	assert repr(deserialized).startswith("CumulativeOptions")
	else:
	assert repr(option) == repr(deserialized)
	for option1, option2 in zip(options, options[1:]):
	assert option1 != option2

	assert repr(pc.IndexOptions(pa.scalar(1))) == "IndexOptions(value=int64:1)"
	assert repr(pc.ArraySortOptions()) == \
	"ArraySortOptions(order=Ascending, null_placement=AtEnd)"


	def test_list_functions():
	assert len(pc.list_functions()) > 10
	assert "add" in pc.list_functions()


	def _check_get_function(name, expected_func_cls, expected_ker_cls,
	min_num_kernels=1):
	func = pc.get_function(name)
	assert isinstance(func, expected_func_cls)
	n = func.num_kernels
	assert n >= min_num_kernels
	assert n == len(func.kernels)
	assert all(isinstance(ker, expected_ker_cls) for ker in func.kernels)


	def test_get_function_scalar():
	_check_get_function("add", pc.ScalarFunction, pc.ScalarKernel, 8)


	def test_get_function_vector():
	_check_get_function("unique", pc.VectorFunction, pc.VectorKernel, 8)


	def test_get_function_scalar_aggregate():
	_check_get_function("mean", pc.ScalarAggregateFunction,
	pc.ScalarAggregateKernel, 8)


	def test_get_function_hash_aggregate():
	_check_get_function("hash_sum", pc.HashAggregateFunction,
	pc.HashAggregateKernel, 1)


	@pytest.mark.numpy
	def test_call_function_with_memory_pool():
	arr = pa.array(["foo", "bar", "baz"])
	indices = np.array([2, 2, 1])
	result1 = arr.take(indices)
	result2 = pc.call_function('take', [arr, indices],
	memory_pool=pa.default_memory_pool())
	expected = pa.array(["baz", "baz", "bar"])
	assert result1.equals(expected)
	assert result2.equals(expected)

	result3 = pc.take(arr, indices, memory_pool=pa.default_memory_pool())
	assert result3.equals(expected)


	def test_pickle_functions(pickle_module):
	# Pickle registered functions
	for name in pc.list_functions():
	func = pc.get_function(name)
	reconstructed = pickle_module.loads(pickle_module.dumps(func))
	assert type(reconstructed) is type(func)
	assert reconstructed.name == func.name
	assert reconstructed.arity == func.arity
	assert reconstructed.num_kernels == func.num_kernels


	def test_pickle_global_functions(pickle_module):
	# Pickle global wrappers (manual or automatic) of registered functions
	for name in pc.list_functions():
	try:
	func = getattr(pc, name)
	except AttributeError:
	# hash_aggregate functions are not exported as callables.
	continue
	reconstructed = pickle_module.loads(pickle_module.dumps(func))
	assert reconstructed is func


	def test_function_attributes():
	# Sanity check attributes of registered functions
	for name in pc.list_functions():
	func = pc.get_function(name)
	assert isinstance(func, pc.Function)
	assert func.name == name
	kernels = func.kernels
	assert func.num_kernels == len(kernels)
	assert all(isinstance(ker, pc.Kernel) for ker in kernels)
	repr(func)
	for ker in kernels:
	repr(ker)


	def test_input_type_conversion():
	# Automatic array conversion from Python
	arr = pc.add([1, 2], [4, None])
	assert arr.to_pylist() == [5, None]
	# Automatic scalar conversion from Python
	arr = pc.add([1, 2], 4)
	assert arr.to_pylist() == [5, 6]
	# Other scalar type
	assert pc.equal(["foo", "bar", None],
	"foo").to_pylist() == [True, False, None]


	@pytest.mark.parametrize('arrow_type', numerical_arrow_types)
	def test_sum_array(arrow_type):
	arr = pa.array([1, 2, 3, 4], type=arrow_type)
	assert arr.sum().as_py() == 10
	assert pc.sum(arr).as_py() == 10

	arr = pa.array([1, 2, 3, 4, None], type=arrow_type)
	assert arr.sum().as_py() == 10
	assert pc.sum(arr).as_py() == 10

	arr = pa.array([None], type=arrow_type)
	assert arr.sum().as_py() is None # noqa: E711
	assert pc.sum(arr).as_py() is None # noqa: E711
	assert arr.sum(min_count=0).as_py() == 0
	assert pc.sum(arr, min_count=0).as_py() == 0

	arr = pa.array([], type=arrow_type)
	assert arr.sum().as_py() is None # noqa: E711
	assert arr.sum(min_count=0).as_py() == 0
	assert pc.sum(arr, min_count=0).as_py() == 0


	@pytest.mark.parametrize('arrow_type', numerical_arrow_types)
	def test_sum_chunked_array(arrow_type):
	arr = pa.chunked_array([pa.array([1, 2, 3, 4], type=arrow_type)])
	assert pc.sum(arr).as_py() == 10

	arr = pa.chunked_array([
	pa.array([1, 2], type=arrow_type), pa.array([3, 4], type=arrow_type)
	])
	assert pc.sum(arr).as_py() == 10

	arr = pa.chunked_array([
	pa.array([1, 2], type=arrow_type),
	pa.array([], type=arrow_type),
	pa.array([3, 4], type=arrow_type)
	])
	assert pc.sum(arr).as_py() == 10

	arr = pa.chunked_array((), type=arrow_type)
	assert arr.num_chunks == 0
	assert pc.sum(arr).as_py() is None # noqa: E711
	assert pc.sum(arr, min_count=0).as_py() == 0


	def test_mode_array():
	# ARROW-9917
	arr = pa.array([1, 1, 3, 4, 3, 5], type='int64')
	mode = pc.mode(arr)
	assert len(mode) == 1
	assert mode[0].as_py() == {"mode": 1, "count": 2}

	mode = pc.mode(arr, n=2)
	assert len(mode) == 2
	assert mode[0].as_py() == {"mode": 1, "count": 2}
	assert mode[1].as_py() == {"mode": 3, "count": 2}

	arr = pa.array([], type='int64')
	assert len(pc.mode(arr)) == 0

	arr = pa.array([1, 1, 3, 4, 3, None], type='int64')
	mode = pc.mode(arr, skip_nulls=False)
	assert len(mode) == 0
	mode = pc.mode(arr, min_count=6)
	assert len(mode) == 0
	mode = pc.mode(arr, skip_nulls=False, min_count=5)
	assert len(mode) == 0

	arr = pa.array([True, False])
	mode = pc.mode(arr, n=2)
	assert len(mode) == 2
	assert mode[0].as_py() == {"mode": False, "count": 1}
	assert mode[1].as_py() == {"mode": True, "count": 1}


	def test_mode_chunked_array():
	# ARROW-9917
	arr = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')])
	mode = pc.mode(arr)
	assert len(mode) == 1
	assert mode[0].as_py() == {"mode": 1, "count": 2}

	mode = pc.mode(arr, n=2)
	assert len(mode) == 2
	assert mode[0].as_py() == {"mode": 1, "count": 2}
	assert mode[1].as_py() == {"mode": 3, "count": 2}

	arr = pa.chunked_array((), type='int64')
	assert arr.num_chunks == 0
	assert len(pc.mode(arr)) == 0


	def test_empty_chunked_array():
	msg = "cannot construct ChunkedArray from empty vector and omitted type"
	with pytest.raises(pa.ArrowInvalid, match=msg):
	pa.chunked_array([])

	pa.chunked_array([], type=pa.int8())


	def test_variance():
	data = [1, 2, 3, 4, 5, 6, 7, 8]
	assert pc.variance(data).as_py() == 5.25
	assert pc.variance(data, ddof=0).as_py() == 5.25
	assert pc.variance(data, ddof=1).as_py() == 6.0


	def test_skew():
	data = [1, 1, None, 2]
	assert pc.skew(data).as_py() == pytest.approx(0.707106781186548, rel=1e-10)
	assert pc.skew(data, skip_nulls=False).as_py() is None
	assert pc.skew(data, min_count=4).as_py() is None


	def test_kurtosis():
	data = [1, 1, None, 2]
	assert pc.kurtosis(data).as_py() == pytest.approx(-1.5, rel=1e-10)
	assert pc.kurtosis(data, skip_nulls=False).as_py() is None
	assert pc.kurtosis(data, min_count=4).as_py() is None


	@pytest.mark.parametrize("input, expected", (
	(
	[1.0, 2.0, 3.0, 40.0, None],
	{'skew': pytest.approx(1.988947740397821),
	'kurtosis': pytest.approx(3.9631931024230695)}
	),
	([1, 2, 40], {'skew': pytest.approx(1.7281098503730385), 'kurtosis': None}),
	([1, 40], {'skew': None, 'kurtosis': None}),
	))
	def test_unbiased_skew_and_kurtosis(input, expected):
	arrow_skew = pc.skew(input, skip_nulls=True, biased=False)
	arrow_kurtosis = pc.kurtosis(input, skip_nulls=True, biased=False)
	assert arrow_skew.as_py() == expected['skew']
	assert arrow_kurtosis.as_py() == expected['kurtosis']


	def test_count_substring():
	for (ty, offset) in [(pa.string(), pa.int32()),
	(pa.large_string(), pa.int64())]:
	arr = pa.array(["ab", "cab", "abcab", "ba", "AB", None], type=ty)

	result = pc.count_substring(arr, "ab")
	expected = pa.array([1, 1, 2, 0, 0, None], type=offset)
	assert expected == result

	result = pc.count_substring(arr, "ab", ignore_case=True)
	expected = pa.array([1, 1, 2, 0, 1, None], type=offset)
	assert expected == result


	def test_count_substring_regex():
	for (ty, offset) in [(pa.string(), pa.int32()),
	(pa.large_string(), pa.int64())]:
	arr = pa.array(["ab", "cab", "baAacaa", "ba", "AB", None], type=ty)

	result = pc.count_substring_regex(arr, "a+")
	expected = pa.array([1, 1, 3, 1, 0, None], type=offset)
	assert expected.equals(result)

	result = pc.count_substring_regex(arr, "a+", ignore_case=True)
	expected = pa.array([1, 1, 2, 1, 1, None], type=offset)
	assert expected.equals(result)


	def test_find_substring():
	for ty in [pa.string(), pa.binary(), pa.large_string(), pa.large_binary()]:
	arr = pa.array(["ab", "cab", "ba", None], type=ty)
	result = pc.find_substring(arr, "ab")
	assert result.to_pylist() == [0, 1, -1, None]

	result = pc.find_substring_regex(arr, "a?b")
	assert result.to_pylist() == [0, 1, 0, None]

	arr = pa.array(["ab", "cAB", "ba", "aB?"], type=ty)
	result = pc.find_substring(arr, "aB*", ignore_case=True)
	assert result.to_pylist() == [0, 1, -1, -1]

	result = pc.find_substring_regex(arr, "a?b", ignore_case=True)
	assert result.to_pylist() == [0, 1, 0, 0]


	def test_match_like():
	arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
	result = pc.match_like(arr, r"_a\%%")
	expected = pa.array([False, True, False, True, None])
	assert expected.equals(result)

	arr = pa.array(["aB", "bA%", "ba", "ca%d", None])
	result = pc.match_like(arr, r"_a\%%", ignore_case=True)
	expected = pa.array([False, True, False, True, None])
	assert expected.equals(result)
	result = pc.match_like(arr, r"_a\%%", ignore_case=False)
	expected = pa.array([False, False, False, True, None])
	assert expected.equals(result)


	def test_match_substring():
	arr = pa.array(["ab", "abc", "ba", None])
	result = pc.match_substring(arr, "ab")
	expected = pa.array([True, True, False, None])
	assert expected.equals(result)

	arr = pa.array(["áB", "Ábc", "ba", None])
	result = pc.match_substring(arr, "áb", ignore_case=True)
	expected = pa.array([True, True, False, None])
	assert expected.equals(result)
	result = pc.match_substring(arr, "áb", ignore_case=False)
	expected = pa.array([False, False, False, None])
	assert expected.equals(result)


	def test_match_substring_regex():
	arr = pa.array(["ab", "abc", "ba", "c", None])
	result = pc.match_substring_regex(arr, "^a?b")
	expected = pa.array([True, True, True, False, None])
	assert expected.equals(result)

	arr = pa.array(["aB", "Abc", "BA", "c", None])
	result = pc.match_substring_regex(arr, "^a?b", ignore_case=True)
	expected = pa.array([True, True, True, False, None])
	assert expected.equals(result)
	result = pc.match_substring_regex(arr, "^a?b", ignore_case=False)
	expected = pa.array([False, False, False, False, None])
	assert expected.equals(result)


	def test_trim():
	# \u3000 is unicode whitespace
	arr = pa.array([" foo", None, " \u3000foo bar \t"])
	result = pc.utf8_trim_whitespace(arr)
	expected = pa.array(["foo", None, "foo bar"])
	assert expected.equals(result)

	arr = pa.array([" foo", None, " \u3000foo bar \t"])
	result = pc.ascii_trim_whitespace(arr)
	expected = pa.array(["foo", None, "\u3000foo bar"])
	assert expected.equals(result)

	arr = pa.array([" foo", None, " \u3000foo bar \t"])
	result = pc.utf8_trim(arr, characters=' f\u3000')
	expected = pa.array(["oo", None, "oo bar \t"])
	assert expected.equals(result)
	# Positional option
	result = pc.utf8_trim(arr, ' f\u3000')
	expected = pa.array(["oo", None, "oo bar \t"])
	assert expected.equals(result)


	def test_slice_compatibility():
	arr = pa.array(["", "𝑓", "𝑓ö", "𝑓öõ", "𝑓öõḍ", "𝑓öõḍš"])
	for start in range(-6, 6):
	for stop in itertools.chain(range(-6, 6), [None]):
	for step in [-3, -2, -1, 1, 2, 3]:
	expected = pa.array([k.as_py()[start:stop:step]
	for k in arr])
	result = pc.utf8_slice_codeunits(
	arr, start=start, stop=stop, step=step)
	assert expected.equals(result)
	# Positional options
	assert pc.utf8_slice_codeunits(arr,
	start, stop, step) == result


	def test_binary_slice_compatibility():
	data = [b"", b"a", b"a\xff", b"ab\x00", b"abc\xfb", b"ab\xf2de"]
	arr = pa.array(data)
	for start, stop, step in itertools.product(range(-6, 6),
	range(-6, 6),
	range(-3, 4)):
	if step == 0:
	continue
	expected = pa.array([k.as_py()[start:stop:step]
	for k in arr])
	result = pc.binary_slice(
	arr, start=start, stop=stop, step=step)
	assert expected.equals(result)
	# Positional options
	assert pc.binary_slice(arr, start, stop, step) == result
	# Fixed size binary input / output
	for item in data:
	fsb_scalar = pa.scalar(item, type=pa.binary(len(item)))
	expected = item[start:stop:step]
	actual = pc.binary_slice(fsb_scalar, start, stop, step)
	assert actual.type == pa.binary(len(expected))
	assert actual.as_py() == expected


	def test_split_pattern():
	arr = pa.array(["-foo---bar--", "---foo---b"])
	result = pc.split_pattern(arr, pattern="---")
	expected = pa.array([["-foo", "bar--"], ["", "foo", "b"]])
	assert expected.equals(result)

	result = pc.split_pattern(arr, "---", max_splits=1)
	expected = pa.array([["-foo", "bar--"], ["", "foo---b"]])
	assert expected.equals(result)

	result = pc.split_pattern(arr, "---", max_splits=1, reverse=True)
	expected = pa.array([["-foo", "bar--"], ["---foo", "b"]])
	assert expected.equals(result)


	def test_split_whitespace_utf8():
	arr = pa.array(["foo bar", " foo \u3000\tb"])
	result = pc.utf8_split_whitespace(arr)
	expected = pa.array([["foo", "bar"], ["", "foo", "b"]])
	assert expected.equals(result)

	result = pc.utf8_split_whitespace(arr, max_splits=1)
	expected = pa.array([["foo", "bar"], ["", "foo \u3000\tb"]])
	assert expected.equals(result)

	result = pc.utf8_split_whitespace(arr, max_splits=1, reverse=True)
	expected = pa.array([["foo", "bar"], [" foo", "b"]])
	assert expected.equals(result)


	def test_split_whitespace_ascii():
	arr = pa.array(["foo bar", " foo \u3000\tb"])
	result = pc.ascii_split_whitespace(arr)
	expected = pa.array([["foo", "bar"], ["", "foo", "\u3000", "b"]])
	assert expected.equals(result)

	result = pc.ascii_split_whitespace(arr, max_splits=1)
	expected = pa.array([["foo", "bar"], ["", "foo \u3000\tb"]])
	assert expected.equals(result)

	result = pc.ascii_split_whitespace(arr, max_splits=1, reverse=True)
	expected = pa.array([["foo", "bar"], [" foo \u3000", "b"]])
	assert expected.equals(result)


	def test_split_pattern_regex():
	arr = pa.array(["-foo---bar--", "---foo---b"])
	result = pc.split_pattern_regex(arr, pattern="-+")
	expected = pa.array([["", "foo", "bar", ""], ["", "foo", "b"]])
	assert expected.equals(result)

	result = pc.split_pattern_regex(arr, "-+", max_splits=1)
	expected = pa.array([["", "foo---bar--"], ["", "foo---b"]])
	assert expected.equals(result)

	with pytest.raises(NotImplementedError,
	match="Cannot split in reverse with regex"):
	result = pc.split_pattern_regex(
	arr, pattern="---", max_splits=1, reverse=True)


	def test_min_max():
	# An example generated function wrapper with possible options
	data = [4, 5, 6, None, 1]
	s = pc.min_max(data)
	assert s.as_py() == {'min': 1, 'max': 6}
	s = pc.min_max(data, options=pc.ScalarAggregateOptions())
	assert s.as_py() == {'min': 1, 'max': 6}
	s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
	assert s.as_py() == {'min': 1, 'max': 6}
	s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
	assert s.as_py() == {'min': None, 'max': None}

	# Options as dict of kwargs
	s = pc.min_max(data, options={'skip_nulls': False})
	assert s.as_py() == {'min': None, 'max': None}
	# Options as named functions arguments
	s = pc.min_max(data, skip_nulls=False)
	assert s.as_py() == {'min': None, 'max': None}

	# Both options and named arguments
	with pytest.raises(TypeError):
	s = pc.min_max(
	data, options=pc.ScalarAggregateOptions(), skip_nulls=False)

	# Wrong options type
	options = pc.TakeOptions()
	with pytest.raises(TypeError):
	s = pc.min_max(data, options=options)

	# Missing argument
	with pytest.raises(TypeError, match="min_max takes 1 positional"):
	s = pc.min_max()


	def test_any():
	# ARROW-1846

	options = pc.ScalarAggregateOptions(skip_nulls=False, min_count=0)

	a = pa.array([], type='bool')
	assert pc.any(a).as_py() is None
	assert pc.any(a, min_count=0).as_py() is False
	assert pc.any(a, options=options).as_py() is False

	a = pa.array([False, None, True])
	assert pc.any(a).as_py() is True
	assert pc.any(a, options=options).as_py() is True

	a = pa.array([False, None, False])
	assert pc.any(a).as_py() is False
	assert pc.any(a, options=options).as_py() is None


	def test_all():
	# ARROW-10301

	options = pc.ScalarAggregateOptions(skip_nulls=False, min_count=0)

	a = pa.array([], type='bool')
	assert pc.all(a).as_py() is None
	assert pc.all(a, min_count=0).as_py() is True
	assert pc.all(a, options=options).as_py() is True

	a = pa.array([False, True])
	assert pc.all(a).as_py() is False
	assert pc.all(a, options=options).as_py() is False

	a = pa.array([True, None])
	assert pc.all(a).as_py() is True
	assert pc.all(a, options=options).as_py() is None

	a = pa.chunked_array([[True], [True, None]])
	assert pc.all(a).as_py() is True
	assert pc.all(a, options=options).as_py() is None

	a = pa.chunked_array([[True], [False]])
	assert pc.all(a).as_py() is False
	assert pc.all(a, options=options).as_py() is False


	def test_is_valid():
	# An example generated function wrapper without options
	data = [4, 5, None]
	assert pc.is_valid(data).to_pylist() == [True, True, False]

	with pytest.raises(TypeError):
	pc.is_valid(data, options=None)


	def test_generated_docstrings():
	# With options
	assert pc.min_max.__doc__ == textwrap.dedent("""\
	Compute the minimum and maximum values of a numeric array.

	Null values are ignored by default.
	This can be changed through ScalarAggregateOptions.

	Parameters
	----------
	array : Array-like
	Argument to compute function.
	skip_nulls : bool, default True
	Whether to skip (ignore) nulls in the input.
	If False, any null in the input forces the output to null.
	min_count : int, default 1
	Minimum number of non-null values in the input. If the number
	of non-null values is below `min_count`, the output is null.
	options : pyarrow.compute.ScalarAggregateOptions, optional
	Alternative way of passing options.
	memory_pool : pyarrow.MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.
	""")
	# Without options
	assert pc.add.__doc__ == textwrap.dedent("""\
	Add the arguments element-wise.

	Results will wrap around on integer overflow.
	Use function "add_checked" if you want overflow
	to return an error.

	Parameters
	----------
	x : Array-like or scalar-like
	Argument to compute function.
	y : Array-like or scalar-like
	Argument to compute function.
	memory_pool : pyarrow.MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.
	""")
	# Varargs with options
	assert pc.min_element_wise.__doc__ == textwrap.dedent("""\
	Find the element-wise minimum value.

	Nulls are ignored (by default) or propagated.
	NaN is preferred over null, but not over any valid value.

	Parameters
	----------
	*args : Array-like or scalar-like
	Argument to compute function.
	skip_nulls : bool, default True
	Whether to skip (ignore) nulls in the input.
	If False, any null in the input forces the output to null.
	options : pyarrow.compute.ElementWiseAggregateOptions, optional
	Alternative way of passing options.
	memory_pool : pyarrow.MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.
	""")
	assert pc.filter.__doc__ == textwrap.dedent("""\
	Filter with a boolean selection filter.

	The output is populated with values from the input at positions
	where the selection filter is non-zero. Nulls in the selection filter
	are handled based on FilterOptions.

	Parameters
	----------
	input : Array-like or scalar-like
	Argument to compute function.
	selection_filter : Array-like or scalar-like
	Argument to compute function.
	null_selection_behavior : str, default "drop"
	How to handle nulls in the selection filter.
	Accepted values are "drop", "emit_null".
	options : pyarrow.compute.FilterOptions, optional
	Alternative way of passing options.
	memory_pool : pyarrow.MemoryPool, optional
	If not passed, will allocate memory from the default memory pool.

	Examples
	--------
	>>> import pyarrow as pa
	>>> arr = pa.array(["a", "b", "c", None, "e"])
	>>> mask = pa.array([True, False, None, False, True])
	>>> arr.filter(mask)
	<pyarrow.lib.StringArray object at ...>
	[
	"a",
	"e"
	]
	>>> arr.filter(mask, null_selection_behavior='emit_null')
	<pyarrow.lib.StringArray object at ...>
	[
	"a",
	null,
	"e"
	]
	""")


	def test_generated_signatures():
	# The self-documentation provided by signatures should show acceptable
	# options and their default values.

	# Without options
	sig = inspect.signature(pc.add)
	assert str(sig) == "(x, y, /, *, memory_pool=None)"
	# With options
	sig = inspect.signature(pc.min_max)
	assert str(sig) == ("(array, /, *, skip_nulls=True, min_count=1, "
	"options=None, memory_pool=None)")
	# With positional options
	sig = inspect.signature(pc.quantile)
	assert str(sig) == ("(array, /, q=0.5, *, interpolation='linear', "
	"skip_nulls=True, min_count=0, "
	"options=None, memory_pool=None)")
	# Varargs with options
	sig = inspect.signature(pc.binary_join_element_wise)
	assert str(sig) == ("(*strings, null_handling='emit_null', "
	"null_replacement='', options=None, "
	"memory_pool=None)")
	# Varargs without options
	sig = inspect.signature(pc.choose)
	assert str(sig) == "(indices, /, *values, memory_pool=None)"
	# Nullary with options
	sig = inspect.signature(pc.random)
	assert str(sig) == ("(n, *, initializer='system', "
	"options=None, memory_pool=None)")


	# We use isprintable to find about codepoints that Python doesn't know, but
	# utf8proc does (or in a future version of Python the other way around).
	# These codepoints cannot be compared between Arrow and the Python
	# implementation.
	@lru_cache()
	def find_new_unicode_codepoints():
	new = set()
	characters = [chr(c) for c in range(0x80, 0x11000)
	if not (0xD800 <= c < 0xE000)]
	is_printable = pc.utf8_is_printable(pa.array(characters)).to_pylist()
	for i, c in enumerate(characters):
	if is_printable[i] != c.isprintable():
	new.add(ord(c))
	return new


	# Python claims there are not alpha, not sure why, they are in
	# gc='Other Letter': https://graphemica.com/%E1%B3%B2
	unknown_issue_is_alpha = {0x1cf2, 0x1cf3}
	# utf8proc does not know if codepoints are lower case
	utf8proc_issue_is_lower = {
	0xaa, 0xba, 0x2b0, 0x2b1, 0x2b2, 0x2b3, 0x2b4,
	0x2b5, 0x2b6, 0x2b7, 0x2b8, 0x2c0, 0x2c1, 0x2e0,
	0x2e1, 0x2e2, 0x2e3, 0x2e4, 0x37a, 0x1d2c, 0x1d2d,
	0x1d2e, 0x1d2f, 0x1d30, 0x1d31, 0x1d32, 0x1d33,
	0x1d34, 0x1d35, 0x1d36, 0x1d37, 0x1d38, 0x1d39,
	0x1d3a, 0x1d3b, 0x1d3c, 0x1d3d, 0x1d3e, 0x1d3f,
	0x1d40, 0x1d41, 0x1d42, 0x1d43, 0x1d44, 0x1d45,
	0x1d46, 0x1d47, 0x1d48, 0x1d49, 0x1d4a, 0x1d4b,
	0x1d4c, 0x1d4d, 0x1d4e, 0x1d4f, 0x1d50, 0x1d51,
	0x1d52, 0x1d53, 0x1d54, 0x1d55, 0x1d56, 0x1d57,
	0x1d58, 0x1d59, 0x1d5a, 0x1d5b, 0x1d5c, 0x1d5d,
	0x1d5e, 0x1d5f, 0x1d60, 0x1d61, 0x1d62, 0x1d63,
	0x1d64, 0x1d65, 0x1d66, 0x1d67, 0x1d68, 0x1d69,
	0x1d6a, 0x1d78, 0x1d9b, 0x1d9c, 0x1d9d, 0x1d9e,
	0x1d9f, 0x1da0, 0x1da1, 0x1da2, 0x1da3, 0x1da4,
	0x1da5, 0x1da6, 0x1da7, 0x1da8, 0x1da9, 0x1daa,
	0x1dab, 0x1dac, 0x1dad, 0x1dae, 0x1daf, 0x1db0,
	0x1db1, 0x1db2, 0x1db3, 0x1db4, 0x1db5, 0x1db6,
	0x1db7, 0x1db8, 0x1db9, 0x1dba, 0x1dbb, 0x1dbc,
	0x1dbd, 0x1dbe, 0x1dbf, 0x2071, 0x207f, 0x2090,
	0x2091, 0x2092, 0x2093, 0x2094, 0x2095, 0x2096,
	0x2097, 0x2098, 0x2099, 0x209a, 0x209b, 0x209c,
	0x2c7c, 0x2c7d, 0xa69c, 0xa69d, 0xa770, 0xa7f8,
	0xa7f9, 0xab5c, 0xab5d, 0xab5e, 0xab5f, }
	# utf8proc does not store if a codepoint is numeric
	numeric_info_missing = {
	0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
	0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
	0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70,
	0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341,
	0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2,
	0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a,
	0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10,
	0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e,
	0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621,
	0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973,
	0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, 0x10fc5,
	0x10fc6, 0x10fc7, 0x10fc8, 0x10fc9, 0x10fca,
	0x10fcb, }
	# utf8proc has no no digit/numeric information
	digit_info_missing = {
	0xb2, 0xb3, 0xb9, 0x1369, 0x136a, 0x136b, 0x136c,
	0x136d, 0x136e, 0x136f, 0x1370, 0x1371, 0x19da, 0x2070,
	0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079, 0x2080,
	0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087,
	0x2088, 0x2089, 0x2460, 0x2461, 0x2462, 0x2463, 0x2464,
	0x2465, 0x2466, 0x2467, 0x2468, 0x2474, 0x2475, 0x2476,
	0x2477, 0x2478, 0x2479, 0x247a, 0x247b, 0x247c, 0x2488,
	0x2489, 0x248a, 0x248b, 0x248c, 0x248d, 0x248e, 0x248f,
	0x2490, 0x24ea, 0x24f5, 0x24f6, 0x24f7, 0x24f8, 0x24f9,
	0x24fa, 0x24fb, 0x24fc, 0x24fd, 0x24ff, 0x2776, 0x2777,
	0x2778, 0x2779, 0x277a, 0x277b, 0x277c, 0x277d, 0x277e,
	0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
	0x2787, 0x2788, 0x278a, 0x278b, 0x278c, 0x278d, 0x278e,
	0x278f, 0x2790, 0x2791, 0x2792, 0x10a40, 0x10a41,
	0x10a42, 0x10a43, 0x10e60, 0x10e61, 0x10e62, 0x10e63,
	0x10e64, 0x10e65, 0x10e66, 0x10e67, 0x10e68, }
	numeric_info_missing = {
	0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
	0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
	0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70,
	0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341,
	0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2,
	0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a,
	0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10,
	0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e,
	0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621,
	0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973,
	0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, }

	codepoints_ignore = {
	'is_alnum': numeric_info_missing \| digit_info_missing \|
	unknown_issue_is_alpha,
	'is_alpha': unknown_issue_is_alpha,
	'is_digit': digit_info_missing,
	'is_numeric': numeric_info_missing,
	'is_lower': utf8proc_issue_is_lower
	}


	@pytest.mark.parametrize('function_name', ['is_alnum', 'is_alpha',
	'is_ascii', 'is_decimal',
	'is_digit', 'is_lower',
	'is_numeric', 'is_printable',
	'is_space', 'is_upper', ])
	@pytest.mark.parametrize('variant', ['ascii', 'utf8'])
	def test_string_py_compat_boolean(function_name, variant):
	arrow_name = variant + "_" + function_name
	py_name = function_name.replace('_', '')
	ignore = codepoints_ignore.get(function_name, set()) \| \
	find_new_unicode_codepoints()
	for i in range(128 if ascii else 0x11000):
	if i in range(0xD800, 0xE000):
	continue # bug? pyarrow doesn't allow utf16 surrogates
	# the issues we know of, we skip
	if i in ignore:
	continue
	# Compare results with the equivalent Python predicate
	# (except "is_space" where functions are known to be incompatible)
	c = chr(i)
	if hasattr(pc, arrow_name) and function_name != 'is_space':
	ar = pa.array([c])
	arrow_func = getattr(pc, arrow_name)
	assert arrow_func(ar)[0].as_py() == getattr(c, py_name)()


	def test_pad():
	arr = pa.array([None, 'a', 'abcd'])
	assert pc.ascii_center(arr, width=3).tolist() == [None, ' a ', 'abcd']
	assert pc.ascii_lpad(arr, width=3).tolist() == [None, ' a', 'abcd']
	assert pc.ascii_rpad(arr, width=3).tolist() == [None, 'a ', 'abcd']
	assert pc.ascii_center(arr, 3).tolist() == [None, ' a ', 'abcd']
	assert pc.ascii_lpad(arr, 3).tolist() == [None, ' a', 'abcd']
	assert pc.ascii_rpad(arr, 3).tolist() == [None, 'a ', 'abcd']

	arr = pa.array([None, 'á', 'abcd'])
	assert pc.utf8_center(arr, width=3).tolist() == [None, ' á ', 'abcd']
	assert pc.utf8_lpad(arr, width=3).tolist() == [None, ' á', 'abcd']
	assert pc.utf8_rpad(arr, width=3).tolist() == [None, 'á ', 'abcd']
	assert pc.utf8_center(arr, 3).tolist() == [None, ' á ', 'abcd']
	assert pc.utf8_lpad(arr, 3).tolist() == [None, ' á', 'abcd']
	assert pc.utf8_rpad(arr, 3).tolist() == [None, 'á ', 'abcd']


	@pytest.mark.pandas
	def test_replace_slice():
	offsets = range(-3, 4)

	arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde'])
	series = arr.to_pandas().astype(object).replace({np.nan: None})
	for start in offsets:
	for stop in offsets:
	expected = series.str.slice_replace(start, stop, 'XX')
	actual = pc.binary_replace_slice(
	arr, start=start, stop=stop, replacement='XX')
	assert actual.tolist() == expected.tolist()
	# Positional options
	assert pc.binary_replace_slice(arr, start, stop, 'XX') == actual

	arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde'])
	series = arr.to_pandas().astype(object).replace({np.nan: None})
	for start in offsets:
	for stop in offsets:
	expected = series.str.slice_replace(start, stop, 'XX')
	actual = pc.utf8_replace_slice(
	arr, start=start, stop=stop, replacement='XX')
	assert actual.tolist() == expected.tolist()


	def test_replace_plain():
	data = pa.array(['foozfoo', 'food', None])
	ar = pc.replace_substring(data, pattern='foo', replacement='bar')
	assert ar.tolist() == ['barzbar', 'bard', None]
	ar = pc.replace_substring(data, 'foo', 'bar')
	assert ar.tolist() == ['barzbar', 'bard', None]

	ar = pc.replace_substring(data, pattern='foo', replacement='bar',
	max_replacements=1)
	assert ar.tolist() == ['barzfoo', 'bard', None]
	ar = pc.replace_substring(data, 'foo', 'bar', max_replacements=1)
	assert ar.tolist() == ['barzfoo', 'bard', None]


	def test_replace_regex():
	data = pa.array(['foo', 'mood', None])
	expected = ['f00', 'm00d', None]
	ar = pc.replace_substring_regex(data, pattern='(.)oo', replacement=r'\100')
	assert ar.tolist() == expected
	ar = pc.replace_substring_regex(data, '(.)oo', replacement=r'\100')
	assert ar.tolist() == expected
	ar = pc.replace_substring_regex(data, '(.)oo', r'\100')
	assert ar.tolist() == expected


	def test_extract_regex():
	ar = pa.array(['a1', 'zb2z'])
	expected = [{'letter': 'a', 'digit': '1'}, {'letter': 'b', 'digit': '2'}]
	struct = pc.extract_regex(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d)')
	assert struct.tolist() == expected
	struct = pc.extract_regex(ar, r'(?P<letter>[ab])(?P<digit>\d)')
	assert struct.tolist() == expected


	def test_extract_regex_span():
	ar = pa.array(['a1', 'zb234z'])
	expected = [{'letter': [0, 1], 'digit': [1, 1]},
	{'letter': [1, 1], 'digit': [2, 3]}]
	struct = pc.extract_regex_span(ar, pattern=r'(?P<letter>[ab])(?P<digit>\d+)')
	assert struct.tolist() == expected
	struct = pc.extract_regex_span(ar, r'(?P<letter>[ab])(?P<digit>\d+)')
	assert struct.tolist() == expected


	def test_binary_join():
	ar_list = pa.array([['foo', 'bar'], None, []])
	expected = pa.array(['foo-bar', None, ''])
	assert pc.binary_join(ar_list, '-').equals(expected)

	separator_array = pa.array(['1', '2'], type=pa.binary())
	expected = pa.array(['a1b', 'c2d'], type=pa.binary())
	ar_list = pa.array([['a', 'b'], ['c', 'd']], type=pa.list_(pa.binary()))
	assert pc.binary_join(ar_list, separator_array).equals(expected)


	def test_binary_join_element_wise():
	null = pa.scalar(None, type=pa.string())
	arrs = [[None, 'a', 'b'], ['c', None, 'd'], [None, '-', '--']]
	assert pc.binary_join_element_wise(*arrs).to_pylist() == \
	[None, None, 'b--d']
	assert pc.binary_join_element_wise('a', 'b', '-').as_py() == 'a-b'
	assert pc.binary_join_element_wise('a', null, '-').as_py() is None
	assert pc.binary_join_element_wise('a', 'b', null).as_py() is None

	skip = pc.JoinOptions(null_handling='skip')
	assert pc.binary_join_element_wise(*arrs, options=skip).to_pylist() == \
	[None, 'a', 'b--d']
	assert pc.binary_join_element_wise(
	'a', 'b', '-', options=skip).as_py() == 'a-b'
	assert pc.binary_join_element_wise(
	'a', null, '-', options=skip).as_py() == 'a'
	assert pc.binary_join_element_wise(
	'a', 'b', null, options=skip).as_py() is None

	replace = pc.JoinOptions(null_handling='replace', null_replacement='spam')
	assert pc.binary_join_element_wise(*arrs, options=replace).to_pylist() == \
	[None, 'a-spam', 'b--d']
	assert pc.binary_join_element_wise(
	'a', 'b', '-', options=replace).as_py() == 'a-b'
	assert pc.binary_join_element_wise(
	'a', null, '-', options=replace).as_py() == 'a-spam'
	assert pc.binary_join_element_wise(
	'a', 'b', null, options=replace).as_py() is None


	@pytest.mark.parametrize(('ty', 'values'), all_array_types)
	def test_take(ty, values):
	arr = pa.array(values, type=ty)
	for indices_type in [pa.int8(), pa.int64()]:
	indices = pa.array([0, 4, 2, None], type=indices_type)
	result = arr.take(indices)
	result.validate()
	expected = pa.array([values[0], values[4], values[2], None], type=ty)
	assert result.equals(expected)

	# empty indices
	indices = pa.array([], type=indices_type)
	result = arr.take(indices)
	result.validate()
	expected = pa.array([], type=ty)
	assert result.equals(expected)

	indices = pa.array([2, 5])
	with pytest.raises(IndexError):
	arr.take(indices)

	indices = pa.array([2, -1])
	with pytest.raises(IndexError):
	arr.take(indices)


	def test_take_indices_types():
	arr = pa.array(range(5))

	for indices_type in ['uint8', 'int8', 'uint16', 'int16',
	'uint32', 'int32', 'uint64', 'int64']:
	indices = pa.array([0, 4, 2, None], type=indices_type)
	result = arr.take(indices)
	result.validate()
	expected = pa.array([0, 4, 2, None])
	assert result.equals(expected)

	for indices_type in [pa.float32(), pa.float64()]:
	indices = pa.array([0, 4, 2], type=indices_type)
	with pytest.raises(NotImplementedError):
	arr.take(indices)


	def test_take_on_chunked_array():
	# ARROW-9504
	arr = pa.chunked_array([
	[
	"a",
	"b",
	"c",
	"d",
	"e"
	],
	[
	"f",
	"g",
	"h",
	"i",
	"j"
	]
	])

	indices = pa.array([0, 5, 1, 6, 9, 2])
	result = arr.take(indices)
	expected = pa.chunked_array([["a", "f", "b", "g", "j", "c"]])
	assert result.equals(expected)

	indices = pa.chunked_array([[1], [9, 2]])
	result = arr.take(indices)
	expected = pa.chunked_array([
	[
	"b"
	],
	[
	"j",
	"c"
	]
	])
	assert result.equals(expected)


	@pytest.mark.parametrize('ordered', [False, True])
	def test_take_dictionary(ordered):
	arr = pa.DictionaryArray.from_arrays([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'],
	ordered=ordered)
	result = arr.take(pa.array([0, 1, 3]))
	result.validate()
	assert result.to_pylist() == ['a', 'b', 'a']
	assert result.dictionary.to_pylist() == ['a', 'b', 'c']
	assert result.type.ordered is ordered


	def test_take_null_type():
	# ARROW-10027
	arr = pa.array([None] * 10)
	chunked_arr = pa.chunked_array([[None] * 5] * 2)
	batch = pa.record_batch([arr], names=['a'])
	table = pa.table({'a': arr})

	indices = pa.array([1, 3, 7, None])
	assert len(arr.take(indices)) == 4
	assert len(chunked_arr.take(indices)) == 4
	assert len(batch.take(indices).column(0)) == 4
	assert len(table.take(indices).column(0)) == 4


	@pytest.mark.parametrize(('ty', 'values'), all_array_types)
	def test_drop_null(ty, values):
	arr = pa.array(values, type=ty)
	result = arr.drop_null()
	result.validate(full=True)
	indices = [i for i in range(len(arr)) if arr[i].is_valid]
	expected = arr.take(pa.array(indices))
	assert result.equals(expected)


	def test_drop_null_chunked_array():
	arr = pa.chunked_array([["a", None], ["c", "d", None], [None], []])
	expected_drop = pa.chunked_array([["a"], ["c", "d"], [], []])

	result = arr.drop_null()
	assert result.equals(expected_drop)


	def test_drop_null_record_batch():
	batch = pa.record_batch(
	[pa.array(["a", None, "c", "d", None])], names=["a'"])
	result = batch.drop_null()
	expected = pa.record_batch([pa.array(["a", "c", "d"])], names=["a'"])
	assert result.equals(expected)

	batch = pa.record_batch(
	[pa.array(["a", None, "c", "d", None]),
	pa.array([None, None, "c", None, "e"])], names=["a'", "b'"])

	result = batch.drop_null()
	expected = pa.record_batch(
	[pa.array(["c"]), pa.array(["c"])], names=["a'", "b'"])
	assert result.equals(expected)


	def test_drop_null_table():
	table = pa.table([pa.array(["a", None, "c", "d", None])], names=["a"])
	expected = pa.table([pa.array(["a", "c", "d"])], names=["a"])
	result = table.drop_null()
	assert result.equals(expected)

	table = pa.table([pa.chunked_array([["a", None], ["c", "d", None]]),
	pa.chunked_array([["a", None], [None, "d", None]]),
	pa.chunked_array([["a"], ["b"], [None], ["d", None]])],
	names=["a", "b", "c"])
	expected = pa.table([pa.array(["a", "d"]),
	pa.array(["a", "d"]),
	pa.array(["a", "d"])],
	names=["a", "b", "c"])
	result = table.drop_null()
	assert result.equals(expected)

	table = pa.table([pa.chunked_array([["a", "b"], ["c", "d", "e"]]),
	pa.chunked_array([["A"], ["B"], [None], ["D", None]]),
	pa.chunked_array([["a`", None], ["c`", "d`", None]])],
	names=["a", "b", "c"])
	expected = pa.table([pa.array(["a", "d"]),
	pa.array(["A", "D"]),
	pa.array(["a`", "d`"])],
	names=["a", "b", "c"])
	result = table.drop_null()
	assert result.equals(expected)


	def test_drop_null_null_type():
	arr = pa.array([None] * 10)
	chunked_arr = pa.chunked_array([[None] * 5] * 2)
	batch = pa.record_batch([arr], names=['a'])
	table = pa.table({'a': arr})

	assert len(arr.drop_null()) == 0
	assert len(chunked_arr.drop_null()) == 0
	assert len(batch.drop_null().column(0)) == 0
	assert len(table.drop_null().column(0)) == 0


	@pytest.mark.parametrize(('ty', 'values'), all_array_types)
	def test_filter(ty, values):
	arr = pa.array(values, type=ty)

	mask = pa.array([True, False, False, True, None])
	result = arr.filter(mask, null_selection_behavior='drop')
	result.validate()
	assert result.equals(pa.array([values[0], values[3]], type=ty))
	result = arr.filter(mask, null_selection_behavior='emit_null')
	result.validate()
	assert result.equals(pa.array([values[0], values[3], None], type=ty))

	# non-boolean dtype
	mask = pa.array([0, 1, 0, 1, 0])
	with pytest.raises(NotImplementedError):
	arr.filter(mask)

	# wrong length
	mask = pa.array([True, False, True])
	with pytest.raises(ValueError, match="must all be the same length"):
	arr.filter(mask)


	@pytest.mark.numpy
	@pytest.mark.parametrize(('ty', 'values'), all_array_types)
	def test_filter_numpy_array_mask(ty, values):
	arr = pa.array(values, type=ty)
	# same test as test_filter with different array type
	mask = np.array([True, False, False, True, None])
	result = arr.filter(mask, null_selection_behavior='drop')
	result.validate()
	assert result.equals(pa.array([values[0], values[3]], type=ty))


	def test_filter_chunked_array():
	arr = pa.chunked_array([["a", None], ["c", "d", "e"]])
	expected_drop = pa.chunked_array([["a"], ["e"]])
	expected_null = pa.chunked_array([["a"], [None, "e"]])

	for mask in [
	# mask is array
	pa.array([True, False, None, False, True]),
	# mask is chunked array
	pa.chunked_array([[True, False, None], [False, True]]),
	# mask is python object
	[True, False, None, False, True]
	]:
	result = arr.filter(mask)
	assert result.equals(expected_drop)
	result = arr.filter(mask, null_selection_behavior="emit_null")
	assert result.equals(expected_null)


	def test_filter_record_batch():
	batch = pa.record_batch(
	[pa.array(["a", None, "c", "d", "e"])], names=["a'"])

	# mask is array
	mask = pa.array([True, False, None, False, True])
	result = batch.filter(mask)
	expected = pa.record_batch([pa.array(["a", "e"])], names=["a'"])
	assert result.equals(expected)

	# GH-38770: mask is chunked array
	chunked_mask = pa.chunked_array([[True, False], [None], [False, True]])
	result = batch.filter(chunked_mask)
	assert result.equals(expected)

	result = batch.filter(mask, null_selection_behavior="emit_null")
	expected = pa.record_batch([pa.array(["a", None, "e"])], names=["a'"])
	assert result.equals(expected)


	def test_filter_table():
	table = pa.table([pa.array(["a", None, "c", "d", "e"])], names=["a"])
	expected_drop = pa.table([pa.array(["a", "e"])], names=["a"])
	expected_null = pa.table([pa.array(["a", None, "e"])], names=["a"])

	for mask in [
	# mask is array
	pa.array([True, False, None, False, True]),
	# mask is chunked array
	pa.chunked_array([[True, False], [None, False, True]]),
	# mask is python object
	[True, False, None, False, True]
	]:
	result = table.filter(mask)
	assert result.equals(expected_drop)
	result = table.filter(mask, null_selection_behavior="emit_null")
	assert result.equals(expected_null)


	def test_filter_errors():
	arr = pa.chunked_array([["a", None], ["c", "d", "e"]])
	batch = pa.record_batch(
	[pa.array(["a", None, "c", "d", "e"])], names=["a'"])
	table = pa.table([pa.array(["a", None, "c", "d", "e"])], names=["a"])

	for obj in [arr, batch, table]:
	# non-boolean dtype
	mask = pa.array([0, 1, 0, 1, 0])
	with pytest.raises(NotImplementedError):
	obj.filter(mask)

	# wrong length
	mask = pa.array([True, False, True])
	with pytest.raises(pa.ArrowInvalid,
	match="must all be the same length"):
	obj.filter(mask)

	scalar = pa.scalar(True)
	for filt in [batch, table, scalar]:
	with pytest.raises(TypeError):
	table.filter(filt)


	def test_filter_null_type():
	# ARROW-10027
	arr = pa.array([None] * 10)
	chunked_arr = pa.chunked_array([[None] * 5] * 2)
	batch = pa.record_batch([arr], names=['a'])
	table = pa.table({'a': arr})

	mask = pa.array([True, False] * 5)
	assert len(arr.filter(mask)) == 5
	assert len(chunked_arr.filter(mask)) == 5
	assert len(batch.filter(mask).column(0)) == 5
	assert len(table.filter(mask).column(0)) == 5


	@pytest.mark.parametrize("typ", ["array", "chunked_array"])
	def test_compare_array(typ):
	if typ == "array":
	def con(values):
	return pa.array(values)
	else:
	def con(values):
	return pa.chunked_array([values])

	arr1 = con([1, 2, 3, 4, None])
	arr2 = con([1, 1, 4, None, 4])

	result = pc.equal(arr1, arr2)
	assert result.equals(con([True, False, False, None, None]))

	result = pc.not_equal(arr1, arr2)
	assert result.equals(con([False, True, True, None, None]))

	result = pc.less(arr1, arr2)
	assert result.equals(con([False, False, True, None, None]))

	result = pc.less_equal(arr1, arr2)
	assert result.equals(con([True, False, True, None, None]))

	result = pc.greater(arr1, arr2)
	assert result.equals(con([False, True, False, None, None]))

	result = pc.greater_equal(arr1, arr2)
	assert result.equals(con([True, True, False, None, None]))


	@pytest.mark.parametrize("typ", ["array", "chunked_array"])
	def test_compare_string_scalar(typ):
	if typ == "array":
	def con(values):
	return pa.array(values)
	else:
	def con(values):
	return pa.chunked_array([values])

	arr = con(['a', 'b', 'c', None])
	scalar = pa.scalar('b')

	result = pc.equal(arr, scalar)
	assert result.equals(con([False, True, False, None]))

	if typ == "array":
	nascalar = pa.scalar(None, type="string")
	result = pc.equal(arr, nascalar)
	isnull = pc.is_null(result)
	assert isnull.equals(con([True, True, True, True]))

	result = pc.not_equal(arr, scalar)
	assert result.equals(con([True, False, True, None]))

	result = pc.less(arr, scalar)
	assert result.equals(con([True, False, False, None]))

	result = pc.less_equal(arr, scalar)
	assert result.equals(con([True, True, False, None]))

	result = pc.greater(arr, scalar)
	assert result.equals(con([False, False, True, None]))

	result = pc.greater_equal(arr, scalar)
	assert result.equals(con([False, True, True, None]))


	@pytest.mark.parametrize("typ", ["array", "chunked_array"])
	def test_compare_scalar(typ):
	if typ == "array":
	def con(values):
	return pa.array(values)
	else:
	def con(values):
	return pa.chunked_array([values])

	arr = con([1, 2, 3, None])
	scalar = pa.scalar(2)

	result = pc.equal(arr, scalar)
	assert result.equals(con([False, True, False, None]))

	if typ == "array":
	nascalar = pa.scalar(None, type="int64")
	result = pc.equal(arr, nascalar)
	assert result.to_pylist() == [None, None, None, None]

	result = pc.not_equal(arr, scalar)
	assert result.equals(con([True, False, True, None]))

	result = pc.less(arr, scalar)
	assert result.equals(con([True, False, False, None]))

	result = pc.less_equal(arr, scalar)
	assert result.equals(con([True, True, False, None]))

	result = pc.greater(arr, scalar)
	assert result.equals(con([False, False, True, None]))

	result = pc.greater_equal(arr, scalar)
	assert result.equals(con([False, True, True, None]))


	def test_compare_chunked_array_mixed():
	arr = pa.array([1, 2, 3, 4, None])
	arr_chunked = pa.chunked_array([[1, 2, 3], [4, None]])
	arr_chunked2 = pa.chunked_array([[1, 2], [3, 4, None]])

	expected = pa.chunked_array([[True, True, True, True, None]])

	for left, right in [
	(arr, arr_chunked),
	(arr_chunked, arr),
	(arr_chunked, arr_chunked2),
	]:
	result = pc.equal(left, right)
	assert result.equals(expected)


	def test_arithmetic_add():
	left = pa.array([1, 2, 3, 4, 5])
	right = pa.array([0, -1, 1, 2, 3])
	result = pc.add(left, right)
	expected = pa.array([1, 1, 4, 6, 8])
	assert result.equals(expected)


	def test_arithmetic_subtract():
	left = pa.array([1, 2, 3, 4, 5])
	right = pa.array([0, -1, 1, 2, 3])
	result = pc.subtract(left, right)
	expected = pa.array([1, 3, 2, 2, 2])
	assert result.equals(expected)


	def test_arithmetic_multiply():
	left = pa.array([1, 2, 3, 4, 5])
	right = pa.array([0, -1, 1, 2, 3])
	result = pc.multiply(left, right)
	expected = pa.array([0, -2, 3, 8, 15])
	assert result.equals(expected)


	@pytest.mark.parametrize("ty", ["round", "round_to_multiple"])
	def test_round_to_integer(ty):
	if ty == "round":
	round = pc.round
	RoundOptions = partial(pc.RoundOptions, ndigits=0)
	elif ty == "round_to_multiple":
	round = pc.round_to_multiple
	RoundOptions = partial(pc.RoundToMultipleOptions, multiple=1)

	values = [3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7, None]
	rmode_and_expected = {
	"down": [3, 3, 3, 4, -4, -4, -4, None],
	"up": [4, 4, 4, 5, -3, -3, -3, None],
	"towards_zero": [3, 3, 3, 4, -3, -3, -3, None],
	"towards_infinity": [4, 4, 4, 5, -4, -4, -4, None],
	"half_down": [3, 3, 4, 4, -3, -4, -4, None],
	"half_up": [3, 4, 4, 5, -3, -3, -4, None],
	"half_towards_zero": [3, 3, 4, 4, -3, -3, -4, None],
	"half_towards_infinity": [3, 4, 4, 5, -3, -4, -4, None],
	"half_to_even": [3, 4, 4, 4, -3, -4, -4, None],
	"half_to_odd": [3, 3, 4, 5, -3, -3, -4, None],
	}
	for round_mode, expected in rmode_and_expected.items():
	options = RoundOptions(round_mode=round_mode)
	result = round(values, options=options)
	expected_array = pa.array(expected, type=pa.float64())
	assert expected_array.equals(result)


	@pytest.mark.numpy
	def test_round():
	values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
	ndigits_and_expected = {
	-2: [300, 0, 0, 0, -0, -0, -0, None],
	-1: [320, 0, 0, 0, -0, -40, -0, None],
	0: [320, 4, 3, 5, -3, -35, -3, None],
	1: [320, 3.5, 3.1, 4.5, -3.2, -35.1, -3, None],
	2: [320, 3.5, 3.08, 4.5, -3.21, -35.12, -3.05, None],
	}
	for ndigits, expected in ndigits_and_expected.items():
	options = pc.RoundOptions(ndigits, "half_towards_infinity")
	result = pc.round(values, options=options)
	np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
	assert pc.round(values, ndigits,
	round_mode="half_towards_infinity") == result
	assert pc.round(values, ndigits, "half_towards_infinity") == result


	@pytest.mark.numpy
	def test_round_to_multiple():
	values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
	multiple_and_expected = {
	0.05: [320, 3.5, 3.1, 4.5, -3.2, -35.1, -3.05, None],
	pa.scalar(0.1): [320, 3.5, 3.1, 4.5, -3.2, -35.1, -3, None],
	2: [320, 4, 4, 4, -4, -36, -4, None],
	10: [320, 0, 0, 0, -0, -40, -0, None],
	pa.scalar(100, type=pa.decimal256(10, 4)):
	[300, 0, 0, 0, -0, -0, -0, None],
	}
	for multiple, expected in multiple_and_expected.items():
	options = pc.RoundToMultipleOptions(multiple, "half_towards_infinity")
	result = pc.round_to_multiple(values, options=options)
	np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
	assert pc.round_to_multiple(values, multiple,
	"half_towards_infinity") == result

	for multiple in [0, -2, pa.scalar(-10.4)]:
	with pytest.raises(pa.ArrowInvalid,
	match="Rounding multiple must be positive"):
	pc.round_to_multiple(values, multiple=multiple)

	for multiple in [object, 99999999999999999999999]:
	with pytest.raises(TypeError, match="is not a valid multiple type"):
	pc.round_to_multiple(values, multiple=multiple)


	def test_round_binary():
	values = [123.456, 234.567, 345.678, 456.789, 123.456, 234.567, 345.678]
	scales = pa.array([-3, -2, -1, 0, 1, 2, 3], pa.int32())
	expected = pa.array(
	[0, 200, 350, 457, 123.5, 234.57, 345.678], pa.float64())
	assert pc.round_binary(values, scales) == expected

	expect_zero = pa.scalar(0, pa.float64())
	expect_inf = pa.scalar(10, pa.float64())
	scale = pa.scalar(-1, pa.int32())

	assert pc.round_binary(
	5.0, scale, round_mode="half_towards_zero") == expect_zero
	assert pc.round_binary(
	5.0, scale, round_mode="half_towards_infinity") == expect_inf


	def test_is_null():
	arr = pa.array([1, 2, 3, None])
	result = arr.is_null()
	expected = pa.array([False, False, False, True])
	assert result.equals(expected)
	assert result.equals(pc.is_null(arr))
	result = arr.is_valid()
	expected = pa.array([True, True, True, False])
	assert result.equals(expected)
	assert result.equals(pc.is_valid(arr))

	arr = pa.chunked_array([[1, 2], [3, None]])
	result = arr.is_null()
	expected = pa.chunked_array([[False, False], [False, True]])
	assert result.equals(expected)
	result = arr.is_valid()
	expected = pa.chunked_array([[True, True], [True, False]])
	assert result.equals(expected)

	arr = pa.array([1, 2, 3, None, float("nan")])
	result = arr.is_null()
	expected = pa.array([False, False, False, True, False])
	assert result.equals(expected)

	result = arr.is_null(nan_is_null=True)
	expected = pa.array([False, False, False, True, True])
	assert result.equals(expected)


	def test_is_nan():
	arr = pa.array([1, 2, 3, None, float("nan")])
	result = arr.is_nan()
	expected = pa.array([False, False, False, None, True])
	assert result.equals(expected)

	arr = pa.array(["1", "2", None], type=pa.string())
	with pytest.raises(
	ArrowNotImplementedError, match="has no kernel matching input types"):
	_ = arr.is_nan()

	with pytest.raises(
	ArrowNotImplementedError, match="has no kernel matching input types"):
	arr = pa.array([b'a', b'bb', None], type=pa.large_binary())
	_ = arr.is_nan()


	def test_fill_null():
	arr = pa.array([1, 2, None, 4], type=pa.int8())
	fill_value = pa.array([5], type=pa.int8())
	with pytest.raises(pa.ArrowInvalid,
	match="Array arguments must all be the same length"):
	arr.fill_null(fill_value)

	arr = pa.array([None, None, None, None], type=pa.null())
	fill_value = pa.scalar(None, type=pa.null())
	result = arr.fill_null(fill_value)
	expected = pa.array([None, None, None, None])
	assert result.equals(expected)

	arr = pa.array(['a', 'bb', None])
	result = arr.fill_null('ccc')
	expected = pa.array(['a', 'bb', 'ccc'])
	assert result.equals(expected)

	arr = pa.array([b'a', b'bb', None], type=pa.large_binary())
	result = arr.fill_null('ccc')
	expected = pa.array([b'a', b'bb', b'ccc'], type=pa.large_binary())
	assert result.equals(expected)

	arr = pa.array(['a', 'bb', None])
	result = arr.fill_null(None)
	expected = pa.array(['a', 'bb', None])
	assert result.equals(expected)


	@pytest.mark.parametrize('arrow_type', numerical_arrow_types)
	def test_fill_null_array(arrow_type):
	arr = pa.array([1, 2, None, 4], type=arrow_type)
	fill_value = pa.scalar(5, type=arrow_type)
	result = arr.fill_null(fill_value)
	expected = pa.array([1, 2, 5, 4], type=arrow_type)
	assert result.equals(expected)

	# Implicit conversions
	result = arr.fill_null(5)
	assert result.equals(expected)

	# ARROW-9451: Unsigned integers allow this for some reason
	if not pa.types.is_unsigned_integer(arr.type):
	with pytest.raises((ValueError, TypeError)):
	arr.fill_null('5')

	result = arr.fill_null(pa.scalar(5, type='int8'))
	assert result.equals(expected)


	@pytest.mark.parametrize('arrow_type', numerical_arrow_types)
	def test_fill_null_chunked_array(arrow_type):
	fill_value = pa.scalar(5, type=arrow_type)
	arr = pa.chunked_array([pa.array([None, 2, 3, 4], type=arrow_type)])
	result = arr.fill_null(fill_value)
	expected = pa.chunked_array([pa.array([5, 2, 3, 4], type=arrow_type)])
	assert result.equals(expected)

	arr = pa.chunked_array([
	pa.array([1, 2], type=arrow_type),
	pa.array([], type=arrow_type),
	pa.array([None, 4], type=arrow_type)
	])
	expected = pa.chunked_array([
	pa.array([1, 2], type=arrow_type),
	pa.array([], type=arrow_type),
	pa.array([5, 4], type=arrow_type)
	])
	result = arr.fill_null(fill_value)
	assert result.equals(expected)

	# Implicit conversions
	result = arr.fill_null(5)
	assert result.equals(expected)

	result = arr.fill_null(pa.scalar(5, type='int8'))
	assert result.equals(expected)


	def test_logical():
	a = pa.array([True, False, False, None])
	b = pa.array([True, True, False, True])

	assert pc.and_(a, b) == pa.array([True, False, False, None])
	assert pc.and_kleene(a, b) == pa.array([True, False, False, None])

	assert pc.or_(a, b) == pa.array([True, True, False, None])
	assert pc.or_kleene(a, b) == pa.array([True, True, False, True])

	assert pc.xor(a, b) == pa.array([False, True, False, None])

	assert pc.invert(a) == pa.array([False, True, True, None])


	def test_dictionary_decode():
	array = pa.array(["a", "a", "b", "c", "b"])
	dictionary_array = array.dictionary_encode()
	dictionary_array_decode = pc.dictionary_decode(dictionary_array)

	assert array != dictionary_array

	assert array == dictionary_array_decode
	assert array == pc.dictionary_decode(array)
	assert pc.dictionary_encode(dictionary_array) == dictionary_array


	def test_cast():
	arr = pa.array([1, 2, 3, 4], type='int64')
	options = pc.CastOptions(pa.int8())

	with pytest.raises(TypeError):
	pc.cast(arr, target_type=None)

	with pytest.raises(ValueError):
	pc.cast(arr, 'int32', options=options)

	with pytest.raises(ValueError):
	pc.cast(arr, safe=True, options=options)

	assert pc.cast(arr, options=options) == pa.array(
	[1, 2, 3, 4], type='int8')

	arr = pa.array([2 ** 63 - 1], type='int64')
	allow_overflow_options = pc.CastOptions(
	pa.int32(), allow_int_overflow=True)

	with pytest.raises(pa.ArrowInvalid):
	pc.cast(arr, 'int32')

	assert pc.cast(arr, 'int32', safe=False) == pa.array([-1], type='int32')

	assert pc.cast(arr, options=allow_overflow_options) == pa.array(
	[-1], type='int32')

	arr = pa.array(
	[datetime.datetime(2010, 1, 1), datetime.datetime(2015, 1, 1)])
	expected = pa.array([1262304000000, 1420070400000], type='timestamp[ms]')
	assert pc.cast(arr, 'timestamp[ms]') == expected

	arr = pa.array([[1, 2], [3, 4, 5]], type=pa.large_list(pa.int8()))
	expected = pa.array([["1", "2"], ["3", "4", "5"]],
	type=pa.list_(pa.utf8()))
	assert pc.cast(arr, expected.type) == expected


	@pytest.mark.parametrize('value_type', [pa.date32(), pa.date64()])
	def test_identity_cast_dates(value_type):
	dt = datetime.date(1990, 3, 1)

	arr = pa.array([dt], type=value_type)
	assert pc.cast(arr, value_type) == arr


	@pytest.mark.parametrize('value_type', numerical_arrow_types)
	def test_fsl_to_fsl_cast(value_type):
	# Different field name and different type.
	cast_type = pa.list_(pa.field("element", value_type), 2)

	dtype = pa.int32()
	type = pa.list_(pa.field("values", dtype), 2)

	fsl = pa.FixedSizeListArray.from_arrays(
	pa.array([1, 2, 3, 4, 5, 6], type=dtype), type=type)
	assert cast_type == fsl.cast(cast_type).type

	# Different field name and different type (with null values).
	fsl = pa.FixedSizeListArray.from_arrays(
	pa.array([1, None, None, 4, 5, 6], type=dtype), type=type)
	assert cast_type == fsl.cast(cast_type).type

	# Null FSL type.
	dtype = pa.null()
	type = pa.list_(pa.field("values", dtype), 2)
	fsl = pa.FixedSizeListArray.from_arrays(
	pa.array([None, None, None, None, None, None], type=dtype), type=type)
	assert cast_type == fsl.cast(cast_type).type

	# Different sized FSL
	cast_type = pa.list_(pa.field("element", value_type), 3)
	err_msg = 'Size of FixedSizeList is not the same.'
	with pytest.raises(pa.lib.ArrowTypeError, match=err_msg):
	fsl.cast(cast_type)


	DecimalTypeTraits = namedtuple('DecimalTypeTraits',
	('name', 'factory', 'max_precision'))

	FloatToDecimalCase = namedtuple('FloatToDecimalCase',
	('precision', 'scale', 'float_val'))

	decimal_type_traits = [DecimalTypeTraits('decimal32', pa.decimal32, 9),
	DecimalTypeTraits('decimal64', pa.decimal64, 18),
	DecimalTypeTraits('decimal128', pa.decimal128, 38),
	DecimalTypeTraits('decimal256', pa.decimal256, 76)]


	def largest_scaled_float_not_above(val, scale):
	"""
	Find the largest float f such as `f * 10**scale <= val`
	"""
	assert val >= 0
	assert scale >= 0
	float_val = float(val) / 10**scale
	if float_val * 10**scale > val:
	# Take the float just below... it should satisfy
	float_val = np.nextafter(float_val, 0.0)
	if float_val * 10**scale > val:
	float_val = np.nextafter(float_val, 0.0)
	assert float_val * 10**scale <= val
	return float_val


	def scaled_float(int_val, scale):
	"""
	Return a float representation (possibly approximate) of `int_val**-scale`
	"""
	assert isinstance(int_val, int)
	unscaled = decimal.Decimal(int_val)
	scaled = unscaled.scaleb(-scale)
	float_val = float(scaled)
	return float_val


	def integral_float_to_decimal_cast_cases(float_ty, max_precision):
	"""
	Return FloatToDecimalCase instances with integral values.
	"""
	mantissa_digits = 16
	for precision in range(1, max_precision, 3):
	for scale in range(0, precision, 2):
	yield FloatToDecimalCase(precision, scale, 0.0)
	yield FloatToDecimalCase(precision, scale, 1.0)
	epsilon = 10**max(precision - mantissa_digits, scale)
	abs_maxval = largest_scaled_float_not_above(
	10**precision - epsilon, scale)
	yield FloatToDecimalCase(precision, scale, abs_maxval)


	def real_float_to_decimal_cast_cases(float_ty, max_precision):
	"""
	Return FloatToDecimalCase instances with real values.
	"""
	mantissa_digits = 16
	for precision in range(1, max_precision, 3):
	for scale in range(0, precision, 2):
	epsilon = 2 * 10**max(precision - mantissa_digits, 0)
	abs_minval = largest_scaled_float_not_above(epsilon, scale)
	abs_maxval = largest_scaled_float_not_above(
	10**precision - epsilon, scale)
	yield FloatToDecimalCase(precision, scale, abs_minval)
	yield FloatToDecimalCase(precision, scale, abs_maxval)


	def random_float_to_decimal_cast_cases(float_ty, max_precision):
	"""
	Return random-generated FloatToDecimalCase instances.
	"""
	r = random.Random(42)
	for precision in range(1, max_precision, 6):
	for scale in range(0, precision, 4):
	for i in range(20):
	unscaled = r.randrange(0, 10**precision)
	float_val = scaled_float(unscaled, scale)
	assert float_val * 10scale < 10precision
	yield FloatToDecimalCase(precision, scale, float_val)


	def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx,
	max_precision):
	# Use the Python decimal module to build the expected result
	# using the right precision
	decimal_ctx.prec = decimal_ty.precision
	decimal_ctx.rounding = decimal.ROUND_HALF_EVEN
	expected = decimal_ctx.create_decimal_from_float(float_val)
	# Round `expected` to `scale` digits after the decimal point
	expected = expected.quantize(decimal.Decimal(1).scaleb(-decimal_ty.scale))
	s = pa.scalar(float_val, type=float_ty)
	actual = pc.cast(s, decimal_ty).as_py()
	if actual != expected:
	# Allow the last digit to vary. The tolerance is higher for
	# very high precisions as rounding errors can accumulate in
	# the iterative algorithm (GH-35576).
	diff_digits = abs(actual - expected) * 10**decimal_ty.scale
	limit = 2 if decimal_ty.precision < max_precision - 2 else 4
	assert diff_digits <= limit, (
	f"float_val = {float_val!r}, precision={decimal_ty.precision}, "
	f"expected = {expected!r}, actual = {actual!r}, "
	f"diff_digits = {diff_digits!r}")


	# Cannot test float32 as case generators above assume float64
	@pytest.mark.numpy
	@pytest.mark.parametrize('float_ty', [pa.float64()], ids=str)
	@pytest.mark.parametrize('decimal_ty', decimal_type_traits,
	ids=lambda v: v.name)
	@pytest.mark.parametrize('case_generator',
	[integral_float_to_decimal_cast_cases,
	real_float_to_decimal_cast_cases,
	random_float_to_decimal_cast_cases],
	ids=['integrals', 'reals', 'random'])
	def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator):
	with decimal.localcontext() as ctx:
	for case in case_generator(float_ty, decimal_ty.max_precision):
	check_cast_float_to_decimal(
	float_ty, case.float_val,
	decimal_ty.factory(case.precision, case.scale),
	ctx, decimal_ty.max_precision)


	@pytest.mark.numpy
	@pytest.mark.parametrize('float_ty', [pa.float32(), pa.float64()], ids=str)
	@pytest.mark.parametrize('decimal_traits', decimal_type_traits,
	ids=lambda v: v.name)
	def test_cast_float_to_decimal_random(float_ty, decimal_traits):
	"""
	Test float-to-decimal conversion against exactly generated values.
	"""
	r = random.Random(43)
	np_float_ty = {
	pa.float32(): np.float32,
	pa.float64(): np.float64,
	}[float_ty]
	mantissa_bits = {
	pa.float32(): 24,
	pa.float64(): 53,
	}[float_ty]
	float_exp_min, float_exp_max = {
	pa.float32(): (-126, 127),
	pa.float64(): (-1022, 1023),
	}[float_ty]
	mantissa_digits = math.floor(math.log10(2**mantissa_bits))
	max_precision = decimal_traits.max_precision

	# For example, decimal32 <-> float64
	if max_precision < mantissa_digits:
	mantissa_bits = math.floor(math.log2(10**max_precision))
	mantissa_digits = math.floor(math.log10(2**mantissa_bits))

	with decimal.localcontext() as ctx:
	precision = mantissa_digits
	ctx.prec = precision
	# The scale must be chosen so as
	# 1) it's within bounds for the decimal type
	# 2) the floating point exponent is within bounds
	min_scale = max(-max_precision,
	precision + math.ceil(math.log10(2**float_exp_min)))
	max_scale = min(max_precision,
	math.floor(math.log10(2**float_exp_max)))
	for scale in range(min_scale, max_scale):
	decimal_ty = decimal_traits.factory(precision, scale)
	# We want to random-generate a float from its mantissa bits
	# and exponent, and compute the expected value in the
	# decimal domain. The float exponent has to ensure the
	# expected value doesn't overflow and doesn't lose precision.
	float_exp = (-mantissa_bits +
	math.floor(math.log2(10**(precision - scale))))
	assert float_exp_min <= float_exp <= float_exp_max
	for i in range(5):
	mantissa = r.randrange(0, 2**mantissa_bits)
	float_val = np.ldexp(np_float_ty(mantissa), float_exp)
	assert isinstance(float_val, np_float_ty)
	# Make sure we compute the exact expected value and
	# round by half-to-even when converting to the expected precision.
	if float_exp >= 0:
	expected = decimal.Decimal(mantissa) * 2**float_exp
	else:
	expected = decimal.Decimal(mantissa) / 2**-float_exp
	expected_as_int = round(expected.scaleb(scale))
	actual = pc.cast(
	pa.scalar(float_val, type=float_ty), decimal_ty).as_py()
	actual_as_int = round(actual.scaleb(scale))
	# We allow for a minor rounding error between expected and actual
	assert abs(actual_as_int - expected_as_int) <= 1


	def test_strptime():
	arr = pa.array(["5/1/2020", None, "12/13/1900"])

	got = pc.strptime(arr, format='%m/%d/%Y', unit='s')
	expected = pa.array(
	[datetime.datetime(2020, 5, 1), None, datetime.datetime(1900, 12, 13)],
	type=pa.timestamp('s'))
	assert got == expected
	# Positional format
	assert pc.strptime(arr, '%m/%d/%Y', unit='s') == got

	expected = pa.array([datetime.datetime(2020, 1, 5), None, None],
	type=pa.timestamp('s'))
	got = pc.strptime(arr, format='%d/%m/%Y', unit='s', error_is_null=True)
	assert got == expected

	with pytest.raises(pa.ArrowInvalid,
	match="Failed to parse string: '5/1/2020'"):
	pc.strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=False)

	with pytest.raises(pa.ArrowInvalid,
	match="Failed to parse string: '5/1/2020'"):
	pc.strptime(arr, format='%Y-%m-%d', unit='s')

	got = pc.strptime(arr, format='%Y-%m-%d', unit='s', error_is_null=True)
	assert got == pa.array([None, None, None], type=pa.timestamp('s'))


	@pytest.mark.pandas
	@pytest.mark.timezone_data
	def test_strftime():
	times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
	timezones = ["CET", "UTC", "Europe/Ljubljana"]

	formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I",
	"%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%%", "%G", "%V", "%u"]
	if sys.platform != "win32":
	# Locale-dependent formats don't match on Windows
	formats.extend(["%c", "%x", "%X"])

	for timezone in timezones:
	ts = pd.to_datetime(times).tz_localize(timezone)
	for unit in ["s", "ms", "us", "ns"]:
	tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
	for fmt in formats:
	options = pc.StrftimeOptions(fmt)
	result = pc.strftime(tsa, options=options)
	# cast to the same type as result to ignore string vs large_string
	expected = pa.array(ts.strftime(fmt)).cast(result.type)
	assert result.equals(expected)

	fmt = "%Y-%m-%dT%H:%M:%S"

	# Default format
	tsa = pa.array(ts, type=pa.timestamp("s", timezone))
	result = pc.strftime(tsa, options=pc.StrftimeOptions())
	expected = pa.array(ts.strftime(fmt)).cast(result.type)
	assert result.equals(expected)

	# Default format plus timezone
	tsa = pa.array(ts, type=pa.timestamp("s", timezone))
	result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
	expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type)
	assert result.equals(expected)

	# Pandas %S is equivalent to %S in arrow for unit="s"
	tsa = pa.array(ts, type=pa.timestamp("s", timezone))
	options = pc.StrftimeOptions("%S")
	result = pc.strftime(tsa, options=options)
	expected = pa.array(ts.strftime("%S")).cast(result.type)
	assert result.equals(expected)

	# Pandas %S.%f is equivalent to %S in arrow for unit="us"
	tsa = pa.array(ts, type=pa.timestamp("us", timezone))
	options = pc.StrftimeOptions("%S")
	result = pc.strftime(tsa, options=options)
	expected = pa.array(ts.strftime("%S.%f")).cast(result.type)
	assert result.equals(expected)

	# Test setting locale
	tsa = pa.array(ts, type=pa.timestamp("s", timezone))
	options = pc.StrftimeOptions(fmt, locale="C")
	result = pc.strftime(tsa, options=options)
	expected = pa.array(ts.strftime(fmt)).cast(result.type)
	assert result.equals(expected)

	# Test timestamps without timezone
	fmt = "%Y-%m-%dT%H:%M:%S"
	ts = pd.to_datetime(times)
	tsa = pa.array(ts, type=pa.timestamp("s"))
	result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
	expected = pa.array(ts.strftime(fmt)).cast(result.type)

	# Positional format
	assert pc.strftime(tsa, fmt) == result

	assert result.equals(expected)
	with pytest.raises(pa.ArrowInvalid,
	match="Timezone not present, cannot convert to string"):
	pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
	with pytest.raises(pa.ArrowInvalid,
	match="Timezone not present, cannot convert to string"):
	pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%z"))


	def _check_datetime_components(timestamps, timezone=None):
	from pyarrow.vendored.version import Version

	ts = pd.to_datetime(timestamps).tz_localize(
	"UTC").tz_convert(timezone).to_series()
	tsa = pa.array(ts, pa.timestamp("ns", tz=timezone))

	subseconds = ((ts.dt.microsecond * 10 ** 3 +
	ts.dt.nanosecond) * 10 ** -9).round(9)
	iso_calendar_fields = [
	pa.field('iso_year', pa.int64()),
	pa.field('iso_week', pa.int64()),
	pa.field('iso_day_of_week', pa.int64())
	]

	if Version(pd.__version__) < Version("1.1.0"):
	# https://github.com/pandas-dev/pandas/issues/33206
	iso_year = ts.map(lambda x: x.isocalendar()[0]).astype("int64")
	iso_week = ts.map(lambda x: x.isocalendar()[1]).astype("int64")
	iso_day = ts.map(lambda x: x.isocalendar()[2]).astype("int64")
	else:
	# Casting is required because pandas isocalendar returns int32
	# while arrow isocalendar returns int64.
	iso_year = ts.dt.isocalendar()["year"].astype("int64")
	iso_week = ts.dt.isocalendar()["week"].astype("int64")
	iso_day = ts.dt.isocalendar()["day"].astype("int64")

	iso_calendar = pa.StructArray.from_arrays(
	[iso_year, iso_week, iso_day],
	fields=iso_calendar_fields)

	# Casting is required because pandas with 2.0.0 various numeric
	# date/time attributes have dtype int32 (previously int64)
	year = ts.dt.year.astype("int64")
	month = ts.dt.month.astype("int64")
	day = ts.dt.day.astype("int64")
	dayofweek = ts.dt.dayofweek.astype("int64")
	dayofyear = ts.dt.dayofyear.astype("int64")
	quarter = ts.dt.quarter.astype("int64")
	hour = ts.dt.hour.astype("int64")
	minute = ts.dt.minute.astype("int64")
	second = ts.dt.second.values.astype("int64")
	microsecond = ts.dt.microsecond.astype("int64")
	nanosecond = ts.dt.nanosecond.astype("int64")

	assert pc.year(tsa).equals(pa.array(year))
	assert pc.is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year))
	assert pc.month(tsa).equals(pa.array(month))
	assert pc.day(tsa).equals(pa.array(day))
	assert pc.day_of_week(tsa).equals(pa.array(dayofweek))
	assert pc.day_of_year(tsa).equals(pa.array(dayofyear))
	assert pc.iso_year(tsa).equals(pa.array(iso_year))
	assert pc.iso_week(tsa).equals(pa.array(iso_week))
	assert pc.iso_calendar(tsa).equals(iso_calendar)
	assert pc.quarter(tsa).equals(pa.array(quarter))
	assert pc.hour(tsa).equals(pa.array(hour))
	assert pc.minute(tsa).equals(pa.array(minute))
	assert pc.second(tsa).equals(pa.array(second))
	assert pc.millisecond(tsa).equals(pa.array(microsecond // 10 ** 3))
	assert pc.microsecond(tsa).equals(pa.array(microsecond % 10 ** 3))
	assert pc.nanosecond(tsa).equals(pa.array(nanosecond))
	assert pc.subsecond(tsa).equals(pa.array(subseconds))
	assert pc.local_timestamp(tsa).equals(pa.array(ts.dt.tz_localize(None)))

	if ts.dt.tz:
	if ts.dt.tz is datetime.timezone.utc:
	# datetime with utc returns None for dst()
	is_dst = [False] * len(ts)
	else:
	is_dst = ts.apply(lambda x: x.dst().seconds > 0)
	assert pc.is_dst(tsa).equals(pa.array(is_dst))

	day_of_week_options = pc.DayOfWeekOptions(
	count_from_zero=False, week_start=1)
	assert pc.day_of_week(tsa, options=day_of_week_options).equals(
	pa.array(dayofweek + 1))

	week_options = pc.WeekOptions(
	week_starts_monday=True, count_from_zero=False,
	first_week_is_fully_in_year=False)
	assert pc.week(tsa, options=week_options).equals(pa.array(iso_week))


	@pytest.mark.pandas
	def test_extract_datetime_components(request):
	timestamps = ["1970-01-01T00:00:59.123456789",
	"2000-02-29T23:23:23.999999999",
	"2033-05-18T03:33:20.000000000",
	"2020-01-01T01:05:05.001",
	"2019-12-31T02:10:10.002",
	"2019-12-30T03:15:15.003",
	"2009-12-31T04:20:20.004132",
	"2010-01-01T05:25:25.005321",
	"2010-01-03T06:30:30.006163",
	"2010-01-04T07:35:35.0",
	"2006-01-01T08:40:40.0",
	"2005-12-31T09:45:45.0",
	"2008-12-28T00:00:00.0",
	"2008-12-29T00:00:00.0",
	"2012-01-01T01:02:03.0"]
	timezones = ["UTC", "US/Central", "Asia/Kolkata",
	"Etc/GMT-4", "Etc/GMT+4", "Australia/Broken_Hill"]

	# Test timezone naive timestamp array
	_check_datetime_components(timestamps)

	# Test timezone aware timestamp array
	if not request.config.pyarrow.is_enabled["timezone_data"]:
	pytest.skip('Timezone database is not installed on Windows')
	else:
	for timezone in timezones:
	_check_datetime_components(timestamps, timezone)


	@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
	def test_iso_calendar_longer_array(unit):
	# https://github.com/apache/arrow/issues/38655
	# ensure correct result for array length > 32
	arr = pa.array([datetime.datetime(2022, 1, 2, 9)]*50, pa.timestamp(unit))
	result = pc.iso_calendar(arr)
	expected = pa.StructArray.from_arrays(
	[[2021]50, [52]50, [7]*50],
	names=['iso_year', 'iso_week', 'iso_day_of_week']
	)
	assert result.equals(expected)


	@pytest.mark.pandas
	@pytest.mark.timezone_data
	def test_assume_timezone():
	ts_type = pa.timestamp("ns")
	timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789",
	"2000-02-29T23:23:23.999999999",
	"2033-05-18T03:33:20.000000000",
	"2020-01-01T01:05:05.001",
	"2019-12-31T02:10:10.002",
	"2019-12-30T03:15:15.003",
	"2009-12-31T04:20:20.004132",
	"2010-01-01T05:25:25.005321",
	"2010-01-03T06:30:30.006163",
	"2010-01-04T07:35:35.0",
	"2006-01-01T08:40:40.0",
	"2005-12-31T09:45:45.0",
	"2008-12-28T00:00:00.0",
	"2008-12-29T00:00:00.0",
	"2012-01-01T01:02:03.0"])
	nonexistent = pd.to_datetime(["2015-03-29 02:30:00",
	"2015-03-29 03:30:00"])
	ambiguous = pd.to_datetime(["2018-10-28 01:20:00",
	"2018-10-28 02:36:00",
	"2018-10-28 03:46:00"])
	ambiguous_array = pa.array(ambiguous, type=ts_type)
	nonexistent_array = pa.array(nonexistent, type=ts_type)

	for timezone in ["UTC", "US/Central", "Asia/Kolkata"]:
	options = pc.AssumeTimezoneOptions(timezone)
	ta = pa.array(timestamps, type=ts_type)
	expected = timestamps.tz_localize(timezone)
	result = pc.assume_timezone(ta, options=options)
	assert result.equals(pa.array(expected))
	result = pc.assume_timezone(ta, timezone) # Positional option
	assert result.equals(pa.array(expected))

	ta_zoned = pa.array(timestamps, type=pa.timestamp("ns", timezone))
	with pytest.raises(pa.ArrowInvalid, match="already have a timezone:"):
	pc.assume_timezone(ta_zoned, options=options)

	invalid_options = pc.AssumeTimezoneOptions("Europe/Brusselsss")
	with pytest.raises(ValueError, match="not found in timezone database"):
	pc.assume_timezone(ta, options=invalid_options)

	timezone = "Europe/Brussels"

	options_nonexistent_raise = pc.AssumeTimezoneOptions(timezone)
	options_nonexistent_earliest = pc.AssumeTimezoneOptions(
	timezone, ambiguous="raise", nonexistent="earliest")
	options_nonexistent_latest = pc.AssumeTimezoneOptions(
	timezone, ambiguous="raise", nonexistent="latest")

	with pytest.raises(ValueError,
	match="Timestamp doesn't exist in "
	f"timezone '{timezone}'"):
	pc.assume_timezone(nonexistent_array,
	options=options_nonexistent_raise)

	expected = pa.array(nonexistent.tz_localize(
	timezone, nonexistent="shift_forward"))
	result = pc.assume_timezone(
	nonexistent_array, options=options_nonexistent_latest)
	expected.equals(result)

	expected = pa.array(nonexistent.tz_localize(
	timezone, nonexistent="shift_backward"))
	result = pc.assume_timezone(
	nonexistent_array, options=options_nonexistent_earliest)
	expected.equals(result)

	options_ambiguous_raise = pc.AssumeTimezoneOptions(timezone)
	options_ambiguous_latest = pc.AssumeTimezoneOptions(
	timezone, ambiguous="latest", nonexistent="raise")
	options_ambiguous_earliest = pc.AssumeTimezoneOptions(
	timezone, ambiguous="earliest", nonexistent="raise")

	with pytest.raises(ValueError,
	match="Timestamp is ambiguous in "
	f"timezone '{timezone}'"):
	pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise)

	expected = ambiguous.tz_localize(timezone, ambiguous=[True, True, True])
	result = pc.assume_timezone(
	ambiguous_array, options=options_ambiguous_earliest)
	result.equals(pa.array(expected))

	expected = ambiguous.tz_localize(timezone, ambiguous=[False, False, False])
	result = pc.assume_timezone(
	ambiguous_array, options=options_ambiguous_latest)
	result.equals(pa.array(expected))


	def _check_temporal_rounding(ts, values, unit):
	unit_shorthand = {
	"nanosecond": "ns",
	"microsecond": "us",
	"millisecond": "ms",
	"second": "s",
	"minute": "min",
	"hour": "h",
	"day": "D"
	}
	greater_unit = {
	"nanosecond": "us",
	"microsecond": "ms",
	"millisecond": "s",
	"second": "min",
	"minute": "h",
	"hour": "D",
	}
	ta = pa.array(ts)

	for value in values:
	frequency = str(value) + unit_shorthand[unit]
	options = pc.RoundTemporalOptions(value, unit)

	result = pc.ceil_temporal(ta, options=options).to_pandas()
	expected = ts.dt.ceil(frequency)
	np.testing.assert_array_equal(result, expected)

	result = pc.floor_temporal(ta, options=options).to_pandas()
	expected = ts.dt.floor(frequency)
	np.testing.assert_array_equal(result, expected)

	result = pc.round_temporal(ta, options=options).to_pandas()
	expected = ts.dt.round(frequency)
	np.testing.assert_array_equal(result, expected)

	# Check rounding with calendar_based_origin=True.
	# Note: rounding to month is not supported in Pandas so we can't
	# approximate this functionality and exclude unit == "day".
	if unit != "day":
	options = pc.RoundTemporalOptions(
	value, unit, calendar_based_origin=True)
	origin = ts.dt.floor(greater_unit[unit])

	if ta.type.tz is None:
	result = pc.ceil_temporal(ta, options=options).to_pandas()
	expected = (ts - origin).dt.ceil(frequency) + origin
	np.testing.assert_array_equal(result, expected)

	result = pc.floor_temporal(ta, options=options).to_pandas()
	expected = (ts - origin).dt.floor(frequency) + origin
	np.testing.assert_array_equal(result, expected)

	result = pc.round_temporal(ta, options=options).to_pandas()
	expected = (ts - origin).dt.round(frequency) + origin
	np.testing.assert_array_equal(result, expected)

	# Check RoundTemporalOptions partial defaults
	if unit == "day":
	result = pc.ceil_temporal(ta, multiple=value).to_pandas()
	expected = ts.dt.ceil(frequency)
	np.testing.assert_array_equal(result, expected)

	result = pc.floor_temporal(ta, multiple=value).to_pandas()
	expected = ts.dt.floor(frequency)
	np.testing.assert_array_equal(result, expected)

	result = pc.round_temporal(ta, multiple=value).to_pandas()
	expected = ts.dt.round(frequency)
	np.testing.assert_array_equal(result, expected)

	# We naively test ceil_is_strictly_greater by adding time unit multiple
	# to regular ceiled timestamp if it is equal to the original timestamp.
	# This does not work if timestamp is zoned since our logic will not
	# account for DST jumps.
	if ta.type.tz is None:
	options = pc.RoundTemporalOptions(
	value, unit, ceil_is_strictly_greater=True)
	result = pc.ceil_temporal(ta, options=options)
	expected = ts.dt.ceil(frequency)

	expected = np.where(
	expected == ts,
	expected + pd.Timedelta(value, unit_shorthand[unit]),
	expected)
	np.testing.assert_array_equal(result, expected)

	# Check RoundTemporalOptions defaults
	if unit == "day":
	frequency = "1D"

	result = pc.ceil_temporal(ta).to_pandas()
	expected = ts.dt.ceil(frequency)
	np.testing.assert_array_equal(result, expected)

	result = pc.floor_temporal(ta).to_pandas()
	expected = ts.dt.floor(frequency)
	np.testing.assert_array_equal(result, expected)

	result = pc.round_temporal(ta).to_pandas()
	expected = ts.dt.round(frequency)
	np.testing.assert_array_equal(result, expected)


	@pytest.mark.timezone_data
	@pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond",
	"second", "minute", "hour", "day"))
	@pytest.mark.pandas
	def test_round_temporal(unit):
	values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750)
	timestamps = [
	"1923-07-07 08:52:35.203790336",
	"1931-03-17 10:45:00.641559040",
	"1932-06-16 01:16:42.911994368",
	"1941-05-27 11:46:43.822831872",
	"1943-12-14 07:32:05.424766464",
	"1954-04-12 04:31:50.699881472",
	"1966-02-12 17:41:28.693282560",
	"1967-02-26 05:56:46.922376960",
	"1975-11-01 10:55:37.016146432",
	"1982-01-21 18:43:44.517366784",
	"1992-01-01 00:00:00.100000000",
	"1999-12-04 05:55:34.794991104",
	"2026-10-26 08:39:00.316686848"]
	ts = pd.Series([pd.Timestamp(x, unit="ns") for x in timestamps])
	_check_temporal_rounding(ts, values, unit)

	timezones = ["Asia/Kolkata", "America/New_York", "Etc/GMT-4", "Etc/GMT+4",
	"Europe/Brussels", "Pacific/Marquesas", "US/Central", "UTC"]

	for timezone in timezones:
	ts_zoned = ts.dt.tz_localize("UTC").dt.tz_convert(timezone)
	_check_temporal_rounding(ts_zoned, values, unit)


	def test_count():
	arr = pa.array([1, 2, 3, None, None])
	assert pc.count(arr).as_py() == 3
	assert pc.count(arr, mode='only_valid').as_py() == 3
	assert pc.count(arr, mode='only_null').as_py() == 2
	assert pc.count(arr, mode='all').as_py() == 5
	assert pc.count(arr, 'all').as_py() == 5

	with pytest.raises(ValueError,
	match='"something else" is not a valid count mode'):
	pc.count(arr, 'something else')


	def test_index():
	arr = pa.array([0, 1, None, 3, 4], type=pa.int64())
	assert pc.index(arr, pa.scalar(0)).as_py() == 0
	assert pc.index(arr, pa.scalar(2, type=pa.int8())).as_py() == -1
	assert pc.index(arr, 4).as_py() == 4
	assert arr.index(3, start=2).as_py() == 3
	assert arr.index(None).as_py() == -1

	arr = pa.chunked_array([[1, 2], [1, 3]], type=pa.int64())
	assert arr.index(1).as_py() == 0
	assert arr.index(1, start=2).as_py() == 2
	assert arr.index(1, start=1, end=2).as_py() == -1


	def check_partition_nth(data, indices, pivot, null_placement):
	indices = indices.to_pylist()
	assert len(indices) == len(data)
	assert sorted(indices) == list(range(len(data)))
	until_pivot = [data[indices[i]] for i in range(pivot)]
	after_pivot = [data[indices[i]] for i in range(pivot, len(data))]
	p = data[indices[pivot]]
	if p is None:
	if null_placement == "at_start":
	assert all(v is None for v in until_pivot)
	else:
	assert all(v is None for v in after_pivot)
	else:
	if null_placement == "at_start":
	assert all(v is None or v <= p for v in until_pivot)
	assert all(v >= p for v in after_pivot)
	else:
	assert all(v <= p for v in until_pivot)
	assert all(v is None or v >= p for v in after_pivot)


	def test_partition_nth():
	data = list(range(100, 140))
	random.shuffle(data)
	pivot = 10
	indices = pc.partition_nth_indices(data, pivot=pivot)
	check_partition_nth(data, indices, pivot, "at_end")
	# Positional pivot argument
	assert pc.partition_nth_indices(data, pivot) == indices

	with pytest.raises(
	ValueError,
	match="'partition_nth_indices' cannot be called without options"):
	pc.partition_nth_indices(data)


	def test_partition_nth_null_placement():
	data = list(range(10)) + [None] * 10
	random.shuffle(data)

	for pivot in (0, 7, 13, 19):
	for null_placement in ("at_start", "at_end"):
	indices = pc.partition_nth_indices(data, pivot=pivot,
	null_placement=null_placement)
	check_partition_nth(data, indices, pivot, null_placement)


	def test_select_k_array():
	def validate_select_k(select_k_indices, arr, order, stable_sort=False):
	sorted_indices = pc.sort_indices(arr, sort_keys=[("dummy", order)])
	head_k_indices = sorted_indices.slice(0, len(select_k_indices))
	if stable_sort:
	assert select_k_indices == head_k_indices
	else:
	expected = pc.take(arr, head_k_indices)
	actual = pc.take(arr, select_k_indices)
	assert actual == expected

	arr = pa.array([1, 2, None, 0])
	for k in [0, 2, 4]:
	for order in ["descending", "ascending"]:
	result = pc.select_k_unstable(
	arr, k=k, sort_keys=[("dummy", order)])
	validate_select_k(result, arr, order)

	result = pc.top_k_unstable(arr, k=k)
	validate_select_k(result, arr, "descending")

	result = pc.bottom_k_unstable(arr, k=k)
	validate_select_k(result, arr, "ascending")

	result = pc.select_k_unstable(
	arr, options=pc.SelectKOptions(
	k=2, sort_keys=[("dummy", "descending")])
	)
	validate_select_k(result, arr, "descending")

	result = pc.select_k_unstable(
	arr, options=pc.SelectKOptions(k=2, sort_keys=[("dummy", "ascending")])
	)
	validate_select_k(result, arr, "ascending")

	# Position options
	assert pc.select_k_unstable(arr, 2,
	sort_keys=[("dummy", "ascending")]) == result
	assert pc.select_k_unstable(arr, 2, [("dummy", "ascending")]) == result


	def test_select_k_table():
	def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
	sorted_indices = pc.sort_indices(tbl, sort_keys=sort_keys)
	head_k_indices = sorted_indices.slice(0, len(select_k_indices))
	if stable_sort:
	assert select_k_indices == head_k_indices
	else:
	expected = pc.take(tbl, head_k_indices)
	actual = pc.take(tbl, select_k_indices)
	assert actual == expected

	table = pa.table({"a": [1, 2, 0], "b": [1, 0, 1]})
	for k in [0, 2, 4]:
	result = pc.select_k_unstable(
	table, k=k, sort_keys=[("a", "ascending")])
	validate_select_k(result, table, sort_keys=[("a", "ascending")])

	result = pc.select_k_unstable(
	table, k=k, sort_keys=[(pc.field("a"), "ascending"), ("b", "ascending")])
	validate_select_k(
	result, table, sort_keys=[("a", "ascending"), ("b", "ascending")])

	result = pc.top_k_unstable(table, k=k, sort_keys=["a"])
	validate_select_k(result, table, sort_keys=[("a", "descending")])

	result = pc.bottom_k_unstable(table, k=k, sort_keys=["a", "b"])
	validate_select_k(
	result, table, sort_keys=[("a", "ascending"), ("b", "ascending")])

	with pytest.raises(
	ValueError,
	match="'select_k_unstable' cannot be called without options"):
	pc.select_k_unstable(table)

	with pytest.raises(ValueError,
	match="select_k_unstable requires a nonnegative `k`"):
	pc.select_k_unstable(table, k=-1, sort_keys=[("a", "ascending")])

	with pytest.raises(ValueError,
	match="select_k_unstable requires a "
	"non-empty `sort_keys`"):
	pc.select_k_unstable(table, k=2, sort_keys=[])

	with pytest.raises(ValueError, match="not a valid sort order"):
	pc.select_k_unstable(table, k=k, sort_keys=[("a", "nonscending")])

	with pytest.raises(ValueError,
	match="Invalid sort key column: No match for.*unknown"):
	pc.select_k_unstable(table, k=k, sort_keys=[("unknown", "ascending")])


	def test_array_sort_indices():
	arr = pa.array([1, 2, None, 0])
	result = pc.array_sort_indices(arr)
	assert result.to_pylist() == [3, 0, 1, 2]
	result = pc.array_sort_indices(arr, order="ascending")
	assert result.to_pylist() == [3, 0, 1, 2]
	result = pc.array_sort_indices(arr, order="descending")
	assert result.to_pylist() == [1, 0, 3, 2]
	result = pc.array_sort_indices(arr, order="descending",
	null_placement="at_start")
	assert result.to_pylist() == [2, 1, 0, 3]
	result = pc.array_sort_indices(arr, "descending",
	null_placement="at_start")
	assert result.to_pylist() == [2, 1, 0, 3]

	with pytest.raises(ValueError, match="not a valid sort order"):
	pc.array_sort_indices(arr, order="nonscending")


	def test_sort_indices_array():
	arr = pa.array([1, 2, None, 0])
	result = pc.sort_indices(arr)
	assert result.to_pylist() == [3, 0, 1, 2]
	result = pc.sort_indices(arr, sort_keys=[("dummy", "ascending")])
	assert result.to_pylist() == [3, 0, 1, 2]
	result = pc.sort_indices(arr, sort_keys=[("dummy", "descending")])
	assert result.to_pylist() == [1, 0, 3, 2]
	result = pc.sort_indices(arr, sort_keys=[("dummy", "descending")],
	null_placement="at_start")
	assert result.to_pylist() == [2, 1, 0, 3]
	# Positional `sort_keys`
	result = pc.sort_indices(arr, [("dummy", "descending")],
	null_placement="at_start")
	assert result.to_pylist() == [2, 1, 0, 3]
	# Using SortOptions
	result = pc.sort_indices(
	arr, options=pc.SortOptions(sort_keys=[("dummy", "descending")])
	)
	assert result.to_pylist() == [1, 0, 3, 2]
	result = pc.sort_indices(
	arr, options=pc.SortOptions(sort_keys=[("dummy", "descending")],
	null_placement="at_start")
	)
	assert result.to_pylist() == [2, 1, 0, 3]


	def test_sort_indices_table():
	table = pa.table({"a": [1, 1, None, 0], "b": [1, 0, 0, 1]})

	result = pc.sort_indices(table, sort_keys=[("a", "ascending")])
	assert result.to_pylist() == [3, 0, 1, 2]
	result = pc.sort_indices(table, sort_keys=[(pc.field("a"), "ascending")],
	null_placement="at_start")
	assert result.to_pylist() == [2, 3, 0, 1]

	result = pc.sort_indices(
	table, sort_keys=[("a", "descending"), ("b", "ascending")]
	)
	assert result.to_pylist() == [1, 0, 3, 2]
	result = pc.sort_indices(
	table, sort_keys=[("a", "descending"), ("b", "ascending")],
	null_placement="at_start"
	)
	assert result.to_pylist() == [2, 1, 0, 3]
	# Positional `sort_keys`
	result = pc.sort_indices(
	table, [("a", "descending"), ("b", "ascending")],
	null_placement="at_start"
	)
	assert result.to_pylist() == [2, 1, 0, 3]

	with pytest.raises(ValueError, match="Must specify one or more sort keys"):
	pc.sort_indices(table)

	with pytest.raises(ValueError,
	match="Invalid sort key column: No match for.*unknown"):
	pc.sort_indices(table, sort_keys=[("unknown", "ascending")])

	with pytest.raises(ValueError, match="not a valid sort order"):
	pc.sort_indices(table, sort_keys=[("a", "nonscending")])


	def test_is_in():
	arr = pa.array([1, 2, None, 1, 2, 3])

	result = pc.is_in(arr, value_set=pa.array([1, 3, None]))
	assert result.to_pylist() == [True, False, True, True, False, True]

	result = pc.is_in(arr, value_set=pa.array([1, 3, None]), skip_nulls=True)
	assert result.to_pylist() == [True, False, False, True, False, True]

	result = pc.is_in(arr, value_set=pa.array([1, 3]))
	assert result.to_pylist() == [True, False, False, True, False, True]

	result = pc.is_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
	assert result.to_pylist() == [True, False, False, True, False, True]


	def test_index_in():
	arr = pa.array([1, 2, None, 1, 2, 3])

	result = pc.index_in(arr, value_set=pa.array([1, 3, None]))
	assert result.to_pylist() == [0, None, 2, 0, None, 1]

	result = pc.index_in(arr, value_set=pa.array([1, 3, None]),
	skip_nulls=True)
	assert result.to_pylist() == [0, None, None, 0, None, 1]

	result = pc.index_in(arr, value_set=pa.array([1, 3]))
	assert result.to_pylist() == [0, None, None, 0, None, 1]

	result = pc.index_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
	assert result.to_pylist() == [0, None, None, 0, None, 1]

	# Positional value_set
	result = pc.index_in(arr, pa.array([1, 3]), skip_nulls=True)
	assert result.to_pylist() == [0, None, None, 0, None, 1]


	def test_quantile():
	arr = pa.array([1, 2, 3, 4])

	result = pc.quantile(arr)
	assert result.to_pylist() == [2.5]

	result = pc.quantile(arr, interpolation='lower')
	assert result.to_pylist() == [2]
	result = pc.quantile(arr, interpolation='higher')
	assert result.to_pylist() == [3]
	result = pc.quantile(arr, interpolation='nearest')
	assert result.to_pylist() == [3]
	result = pc.quantile(arr, interpolation='midpoint')
	assert result.to_pylist() == [2.5]
	result = pc.quantile(arr, interpolation='linear')
	assert result.to_pylist() == [2.5]

	arr = pa.array([1, 2])

	result = pc.quantile(arr, q=[0.25, 0.5, 0.75])
	assert result.to_pylist() == [1.25, 1.5, 1.75]

	result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='lower')
	assert result.to_pylist() == [1, 1, 1]
	result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='higher')
	assert result.to_pylist() == [2, 2, 2]
	result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='midpoint')
	assert result.to_pylist() == [1.5, 1.5, 1.5]
	result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='nearest')
	assert result.to_pylist() == [1, 1, 2]
	result = pc.quantile(arr, q=[0.25, 0.5, 0.75], interpolation='linear')
	assert result.to_pylist() == [1.25, 1.5, 1.75]

	# Positional `q`
	result = pc.quantile(arr, [0.25, 0.5, 0.75], interpolation='linear')
	assert result.to_pylist() == [1.25, 1.5, 1.75]

	with pytest.raises(ValueError, match="Quantile must be between 0 and 1"):
	pc.quantile(arr, q=1.1)
	with pytest.raises(ValueError, match="not a valid quantile interpolation"):
	pc.quantile(arr, interpolation='zzz')


	def test_tdigest():
	arr = pa.array([1, 2, 3, 4])
	result = pc.tdigest(arr)
	assert result.to_pylist() == [2.5]

	arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
	result = pc.tdigest(arr)
	assert result.to_pylist() == [2.5]

	arr = pa.array([1, 2, 3, 4])
	result = pc.tdigest(arr, q=[0, 0.5, 1])
	assert result.to_pylist() == [1, 2.5, 4]

	arr = pa.chunked_array([pa.array([1, 2]), pa.array([3, 4])])
	result = pc.tdigest(arr, [0, 0.5, 1]) # positional `q`
	assert result.to_pylist() == [1, 2.5, 4]


	def test_fill_null_segfault():
	# ARROW-12672
	arr = pa.array([None], pa.bool_()).fill_null(False)
	result = arr.cast(pa.int8())
	assert result == pa.array([0], pa.int8())


	def test_min_max_element_wise():
	arr1 = pa.array([1, 2, 3])
	arr2 = pa.array([3, 1, 2])
	arr3 = pa.array([2, 3, None])

	result = pc.max_element_wise(arr1, arr2)
	assert result == pa.array([3, 2, 3])
	result = pc.min_element_wise(arr1, arr2)
	assert result == pa.array([1, 1, 2])

	result = pc.max_element_wise(arr1, arr2, arr3)
	assert result == pa.array([3, 3, 3])
	result = pc.min_element_wise(arr1, arr2, arr3)
	assert result == pa.array([1, 1, 2])

	# with specifying the option
	result = pc.max_element_wise(arr1, arr3, skip_nulls=True)
	assert result == pa.array([2, 3, 3])
	result = pc.min_element_wise(arr1, arr3, skip_nulls=True)
	assert result == pa.array([1, 2, 3])
	result = pc.max_element_wise(
	arr1, arr3, options=pc.ElementWiseAggregateOptions())
	assert result == pa.array([2, 3, 3])
	result = pc.min_element_wise(
	arr1, arr3, options=pc.ElementWiseAggregateOptions())
	assert result == pa.array([1, 2, 3])

	# not skipping nulls
	result = pc.max_element_wise(arr1, arr3, skip_nulls=False)
	assert result == pa.array([2, 3, None])
	result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
	assert result == pa.array([1, 2, None])


	@pytest.mark.numpy
	@pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
	@pytest.mark.parametrize('skip_nulls', (True, False))
	def test_cumulative_sum(start, skip_nulls):
	# Exact tests (e.g., integral types)
	start_int = int(start)
	starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
	pa.scalar(start_int, type=pa.int64())]
	for strt in starts:
	arrays = [
	pa.array([1, 2, 3]),
	pa.array([0, None, 20, 30]),
	pa.chunked_array([[0, None], [20, 30]])
	]
	expected_arrays = [
	pa.array([1, 3, 6]),
	pa.array([0, None, 20, 50])
	if skip_nulls else pa.array([0, None, None, None]),
	pa.chunked_array([[0, None, 20, 50]])
	if skip_nulls else pa.chunked_array([[0, None, None, None]])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
	# Add `start` offset to expected array before comparing
	expected = pc.add(expected_arrays[i], strt if strt is not None
	else 0)
	assert result.equals(expected)

	starts = [None, start, pa.scalar(start, type=pa.float32()),
	pa.scalar(start, type=pa.float64())]
	for strt in starts:
	arrays = [
	pa.array([1.125, 2.25, 3.03125]),
	pa.array([1, np.nan, 2, -3, 4, 5]),
	pa.array([1, np.nan, None, 3, None, 5])
	]
	expected_arrays = [
	np.array([1.125, 3.375, 6.40625]),
	np.array([1, np.nan, np.nan, np.nan, np.nan, np.nan]),
	np.array([1, np.nan, None, np.nan, None, np.nan])
	if skip_nulls else np.array([1, np.nan, None, None, None, None])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_sum(arr, start=strt, skip_nulls=skip_nulls)
	# Add `start` offset to expected array before comparing
	expected = pc.add(expected_arrays[i], strt if strt is not None
	else 0)
	np.testing.assert_array_almost_equal(result.to_numpy(
	zero_copy_only=False), expected.to_numpy(zero_copy_only=False))

	for strt in ['a', pa.scalar('arrow'), 1.1]:
	with pytest.raises(pa.ArrowInvalid):
	pc.cumulative_sum([1, 2, 3], start=strt)


	@pytest.mark.numpy
	@pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
	@pytest.mark.parametrize('skip_nulls', (True, False))
	def test_cumulative_prod(start, skip_nulls):
	# Exact tests (e.g., integral types)
	start_int = int(start)
	starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
	pa.scalar(start_int, type=pa.int64())]
	for strt in starts:
	arrays = [
	pa.array([1, 2, 3]),
	pa.array([1, None, 20, 5]),
	pa.chunked_array([[1, None], [20, 5]])
	]
	expected_arrays = [
	pa.array([1, 2, 6]),
	pa.array([1, None, 20, 100])
	if skip_nulls else pa.array([1, None, None, None]),
	pa.chunked_array([[1, None, 20, 100]])
	if skip_nulls else pa.chunked_array([[1, None, None, None]])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
	# Multiply `start` offset to expected array before comparing
	expected = pc.multiply(expected_arrays[i], strt if strt is not None
	else 1)
	assert result.equals(expected)

	starts = [None, start, pa.scalar(start, type=pa.float32()),
	pa.scalar(start, type=pa.float64())]
	for strt in starts:
	arrays = [
	pa.array([1.5, 2.5, 3.5]),
	pa.array([1, np.nan, 2, -3, 4, 5]),
	pa.array([1, np.nan, None, 3, None, 5])
	]
	expected_arrays = [
	np.array([1.5, 3.75, 13.125]),
	np.array([1, np.nan, np.nan, np.nan, np.nan, np.nan]),
	np.array([1, np.nan, None, np.nan, None, np.nan])
	if skip_nulls else np.array([1, np.nan, None, None, None, None])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_prod(arr, start=strt, skip_nulls=skip_nulls)
	# Multiply `start` offset to expected array before comparing
	expected = pc.multiply(expected_arrays[i], strt if strt is not None
	else 1)
	np.testing.assert_array_almost_equal(result.to_numpy(
	zero_copy_only=False), expected.to_numpy(zero_copy_only=False))

	for strt in ['a', pa.scalar('arrow'), 1.1]:
	with pytest.raises(pa.ArrowInvalid):
	pc.cumulative_prod([1, 2, 3], start=strt)


	@pytest.mark.numpy
	@pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
	@pytest.mark.parametrize('skip_nulls', (True, False))
	def test_cumulative_max(start, skip_nulls):
	# Exact tests (e.g., integral types)
	start_int = int(start)
	starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
	pa.scalar(start_int, type=pa.int64())]
	for strt in starts:
	arrays = [
	pa.array([2, 1, 3, 5, 4, 6]),
	pa.array([2, 1, None, 5, 4, None]),
	pa.chunked_array([[2, 1, None], [5, 4, None]])
	]
	expected_arrays = [
	pa.array([2, 2, 3, 5, 5, 6]),
	pa.array([2, 2, None, 5, 5, None])
	if skip_nulls else pa.array([2, 2, None, None, None, None]),
	pa.chunked_array([[2, 2, None, 5, 5, None]])
	if skip_nulls else
	pa.chunked_array([[2, 2, None, None, None, None]])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
	# Max `start` offset with expected array before comparing
	expected = pc.max_element_wise(
	expected_arrays[i], strt if strt is not None else int(-1e9),
	skip_nulls=False)
	assert result.equals(expected)

	starts = [None, start, pa.scalar(start, type=pa.float32()),
	pa.scalar(start, type=pa.float64())]
	for strt in starts:
	arrays = [
	pa.array([2.5, 1.3, 3.7, 5.1, 4.9, 6.2]),
	pa.array([2.5, 1.3, 3.7, np.nan, 4.9, 6.2]),
	pa.array([2.5, 1.3, None, np.nan, 4.9, None])
	]
	expected_arrays = [
	np.array([2.5, 2.5, 3.7, 5.1, 5.1, 6.2]),
	np.array([2.5, 2.5, 3.7, 3.7, 4.9, 6.2]),
	np.array([2.5, 2.5, None, 2.5, 4.9, None])
	if skip_nulls else np.array([2.5, 2.5, None, None, None, None])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls)
	# Max `start` offset with expected array before comparing
	expected = pc.max_element_wise(
	expected_arrays[i], strt if strt is not None else -1e9,
	skip_nulls=False)
	np.testing.assert_array_almost_equal(result.to_numpy(
	zero_copy_only=False), expected.to_numpy(zero_copy_only=False))

	for strt in ['a', pa.scalar('arrow'), 1.1]:
	with pytest.raises(pa.ArrowInvalid):
	pc.cumulative_max([1, 2, 3], start=strt)


	@pytest.mark.numpy
	@pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
	@pytest.mark.parametrize('skip_nulls', (True, False))
	def test_cumulative_min(start, skip_nulls):
	# Exact tests (e.g., integral types)
	start_int = int(start)
	starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
	pa.scalar(start_int, type=pa.int64())]
	for strt in starts:
	arrays = [
	pa.array([5, 6, 4, 2, 3, 1]),
	pa.array([5, 6, None, 2, 3, None]),
	pa.chunked_array([[5, 6, None], [2, 3, None]])
	]
	expected_arrays = [
	pa.array([5, 5, 4, 2, 2, 1]),
	pa.array([5, 5, None, 2, 2, None])
	if skip_nulls else pa.array([5, 5, None, None, None, None]),
	pa.chunked_array([[5, 5, None, 2, 2, None]])
	if skip_nulls else
	pa.chunked_array([[5, 5, None, None, None, None]])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
	# Min `start` offset with expected array before comparing
	expected = pc.min_element_wise(
	expected_arrays[i], strt if strt is not None else int(1e9),
	skip_nulls=False)
	assert result.equals(expected)

	starts = [None, start, pa.scalar(start, type=pa.float32()),
	pa.scalar(start, type=pa.float64())]
	for strt in starts:
	arrays = [
	pa.array([5.5, 6.3, 4.7, 2.1, 3.9, 1.2]),
	pa.array([5.5, 6.3, 4.7, np.nan, 3.9, 1.2]),
	pa.array([5.5, 6.3, None, np.nan, 3.9, None])
	]
	expected_arrays = [
	np.array([5.5, 5.5, 4.7, 2.1, 2.1, 1.2]),
	np.array([5.5, 5.5, 4.7, 4.7, 3.9, 1.2]),
	np.array([5.5, 5.5, None, 5.5, 3.9, None])
	if skip_nulls else np.array([5.5, 5.5, None, None, None, None])
	]
	for i, arr in enumerate(arrays):
	result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls)
	# Min `start` offset with expected array before comparing
	expected = pc.min_element_wise(
	expected_arrays[i], strt if strt is not None else 1e9,
	skip_nulls=False)
	np.testing.assert_array_almost_equal(result.to_numpy(
	zero_copy_only=False), expected.to_numpy(zero_copy_only=False))

	for strt in ['a', pa.scalar('arrow'), 1.1]:
	with pytest.raises(pa.ArrowInvalid):
	pc.cumulative_max([1, 2, 3], start=strt)


	def test_make_struct():
	assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}

	assert pc.make_struct(1, 'a', field_names=['i', 's']).as_py() == {
	'i': 1, 's': 'a'}

	assert pc.make_struct([1, 2, 3],
	"a b c".split()) == pa.StructArray.from_arrays([
	[1, 2, 3],
	"a b c".split()], names='0 1'.split())

	with pytest.raises(ValueError,
	match="Array arguments must all be the same length"):
	pc.make_struct([1, 2, 3, 4], "a b c".split())

	with pytest.raises(ValueError, match="0 arguments but 2 field names"):
	pc.make_struct(field_names=['one', 'two'])


	def test_map_lookup():
	ty = pa.map_(pa.utf8(), pa.int32())
	arr = pa.array([[('one', 1), ('two', 2)], [('none', 3)],
	[], [('one', 5), ('one', 7)], None], type=ty)
	result_first = pa.array([1, None, None, 5, None], type=pa.int32())
	result_last = pa.array([1, None, None, 7, None], type=pa.int32())
	result_all = pa.array([[1], None, None, [5, 7], None],
	type=pa.list_(pa.int32()))

	assert pc.map_lookup(arr, 'one', 'first') == result_first
	assert pc.map_lookup(arr, pa.scalar(
	'one', type=pa.utf8()), 'first') == result_first
	assert pc.map_lookup(arr, pa.scalar(
	'one', type=pa.utf8()), 'last') == result_last
	assert pc.map_lookup(arr, pa.scalar(
	'one', type=pa.utf8()), 'all') == result_all


	def test_struct_fields_options():
	a = pa.array([4, 5, 6], type=pa.int64())
	b = pa.array(["bar", None, ""])
	c = pa.StructArray.from_arrays([a, b], ["a", "b"])
	arr = pa.StructArray.from_arrays([a, c], ["a", "c"])

	assert pc.struct_field(arr, '.c.b') == b
	assert pc.struct_field(arr, b'.c.b') == b
	assert pc.struct_field(arr, ['c', 'b']) == b
	assert pc.struct_field(arr, [1, 'b']) == b
	assert pc.struct_field(arr, (b'c', 'b')) == b
	assert pc.struct_field(arr, pc.field(('c', 'b'))) == b

	assert pc.struct_field(arr, '.a') == a
	assert pc.struct_field(arr, ['a']) == a
	assert pc.struct_field(arr, 'a') == a
	assert pc.struct_field(arr, pc.field(('a',))) == a

	assert pc.struct_field(arr, indices=[1, 1]) == b
	assert pc.struct_field(arr, (1, 1)) == b
	assert pc.struct_field(arr, [0]) == a
	assert pc.struct_field(arr, []) == arr

	with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
	pc.struct_field(arr, 'foo')

	with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
	pc.struct_field(arr, '.c.foo')

	# drill into a non-struct array and continue to ask for a field
	with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef"):
	pc.struct_field(arr, '.a.foo')

	# TODO: https://issues.apache.org/jira/browse/ARROW-14853
	# assert pc.struct_field(arr) == arr


	def test_case_when():
	assert pc.case_when(pc.make_struct([True, False, None],
	[False, True, None]),
	[1, 2, 3],
	[11, 12, 13]) == pa.array([1, 12, None])


	def test_list_element():
	element_type = pa.struct([('a', pa.float64()), ('b', pa.int8())])
	list_type = pa.list_(element_type)
	l1 = [{'a': .4, 'b': 2}, None, {'a': .2, 'b': 4}, None, {'a': 5.6, 'b': 6}]
	l2 = [None, {'a': .52, 'b': 3}, {'a': .7, 'b': 4}, None, {'a': .6, 'b': 8}]
	lists = pa.array([l1, l2], list_type)

	index = 1
	result = pa.compute.list_element(lists, index)
	expected = pa.array([None, {'a': 0.52, 'b': 3}], element_type)
	assert result.equals(expected)

	index = 4
	result = pa.compute.list_element(lists, index)
	expected = pa.array([{'a': 5.6, 'b': 6}, {'a': .6, 'b': 8}], element_type)
	assert result.equals(expected)


	def test_count_distinct():
	samples = [datetime.datetime(year=y, month=1, day=1) for y in range(1992, 2092)]
	arr = pa.array(samples, pa.timestamp("ns"))
	assert pc.count_distinct(arr) == pa.scalar(len(samples), type=pa.int64())


	def test_count_distinct_options():
	arr = pa.array([1, 2, 3, None, None])
	assert pc.count_distinct(arr).as_py() == 3
	assert pc.count_distinct(arr, mode='only_valid').as_py() == 3
	assert pc.count_distinct(arr, mode='only_null').as_py() == 1
	assert pc.count_distinct(arr, mode='all').as_py() == 4
	assert pc.count_distinct(arr, 'all').as_py() == 4


	def test_utf8_normalize():
	arr = pa.array(["01²3"])
	assert pc.utf8_normalize(arr, form="NFC") == arr
	assert pc.utf8_normalize(arr, form="NFKC") == pa.array(["0123"])
	assert pc.utf8_normalize(arr, "NFD") == arr
	assert pc.utf8_normalize(arr, "NFKD") == pa.array(["0123"])
	with pytest.raises(
	ValueError,
	match='"NFZ" is not a valid Unicode normalization form'):
	pc.utf8_normalize(arr, form="NFZ")


	def test_random():
	# (note negative integer initializers are accepted)
	for initializer in ['system', 42, -42, b"abcdef"]:
	assert pc.random(0, initializer=initializer) == \
	pa.array([], type=pa.float64())

	# System random initialization => outputs all distinct
	arrays = [tuple(pc.random(100).to_pylist()) for i in range(10)]
	assert len(set(arrays)) == len(arrays)

	arrays = [tuple(pc.random(100, initializer=i % 7).to_pylist())
	for i in range(0, 100)]
	assert len(set(arrays)) == 7

	# Arbitrary hashable objects can be given as initializer
	initializers = [object(), (4, 5, 6), "foo"]
	initializers.extend(os.urandom(10) for i in range(10))
	arrays = [tuple(pc.random(100, initializer=i).to_pylist())
	for i in initializers]
	assert len(set(arrays)) == len(arrays)

	with pytest.raises(TypeError,
	match=r"initializer should be 'system', an integer, "
	r"or a hashable object; got \[\]"):
	pc.random(100, initializer=[])


	@pytest.mark.parametrize(
	"tiebreaker,expected_values",
	[("min", [3, 1, 4, 6, 4, 6, 1]),
	("max", [3, 2, 5, 7, 5, 7, 2]),
	("first", [3, 1, 4, 6, 5, 7, 2]),
	("dense", [2, 1, 3, 4, 3, 4, 1])]
	)
	def test_rank_options_tiebreaker(tiebreaker, expected_values):
	arr = pa.array([1.2, 0.0, 5.3, None, 5.3, None, 0.0])
	rank_options = pc.RankOptions(sort_keys="ascending",
	null_placement="at_end",
	tiebreaker=tiebreaker)
	result = pc.rank(arr, options=rank_options)
	expected = pa.array(expected_values, type=pa.uint64())
	assert result.equals(expected)


	def test_rank_options():
	arr = pa.array([1.2, 0.0, 5.3, None, 5.3, None, 0.0])
	expected = pa.array([3, 1, 4, 6, 5, 7, 2], type=pa.uint64())

	# Ensure rank can be called without specifying options
	result = pc.rank(arr)
	assert result.equals(expected)

	# Ensure default RankOptions
	result = pc.rank(arr, options=pc.RankOptions())
	assert result.equals(expected)

	# Ensure sort_keys tuple usage
	result = pc.rank(arr, options=pc.RankOptions(
	sort_keys=[("b", "ascending")])
	)
	assert result.equals(expected)

	result = pc.rank(arr, null_placement="at_start")
	expected_at_start = pa.array([5, 3, 6, 1, 7, 2, 4], type=pa.uint64())
	assert result.equals(expected_at_start)

	result = pc.rank(arr, sort_keys="descending")
	expected_descending = pa.array([3, 4, 1, 6, 2, 7, 5], type=pa.uint64())
	assert result.equals(expected_descending)

	with pytest.raises(ValueError,
	match=r'"NonExisting" is not a valid tiebreaker'):
	pc.RankOptions(sort_keys="descending",
	null_placement="at_end",
	tiebreaker="NonExisting")


	def test_rank_quantile_options():
	arr = pa.array([None, 1, None, 2, None])
	expected = pa.array([0.7, 0.1, 0.7, 0.3, 0.7], type=pa.float64())

	# Ensure rank_quantile can be called without specifying options
	result = pc.rank_quantile(arr)
	assert result.equals(expected)

	# Ensure default RankOptions
	result = pc.rank_quantile(arr, options=pc.RankQuantileOptions())
	assert result.equals(expected)

	# Ensure sort_keys tuple usage
	result = pc.rank_quantile(arr, options=pc.RankQuantileOptions(
	sort_keys=[("b", "ascending")])
	)
	assert result.equals(expected)

	result = pc.rank_quantile(arr, null_placement="at_start")
	expected_at_start = pa.array([0.3, 0.7, 0.3, 0.9, 0.3], type=pa.float64())
	assert result.equals(expected_at_start)

	result = pc.rank_quantile(arr, sort_keys="descending")
	expected_descending = pa.array([0.7, 0.3, 0.7, 0.1, 0.7], type=pa.float64())
	assert result.equals(expected_descending)

	with pytest.raises(ValueError, match="not a valid sort order"):
	pc.rank_quantile(arr, sort_keys="XXX")


	def test_rank_normal_options():
	arr = pa.array([None, 1, None, 2, None])

	expected = pytest.approx(
	[0.5244005127080407, -1.2815515655446004, 0.5244005127080407,
	-0.5244005127080409, 0.5244005127080407])
	result = pc.rank_normal(arr)
	assert result.to_pylist() == expected
	result = pc.rank_normal(arr, null_placement="at_end", sort_keys="ascending")
	assert result.to_pylist() == expected
	result = pc.rank_normal(arr, options=pc.RankQuantileOptions())
	assert result.to_pylist() == expected

	expected = pytest.approx(
	[-0.5244005127080409, 1.2815515655446004, -0.5244005127080409,
	0.5244005127080407, -0.5244005127080409])
	result = pc.rank_normal(arr, null_placement="at_start", sort_keys="descending")
	assert result.to_pylist() == expected
	result = pc.rank_normal(arr,
	options=pc.RankQuantileOptions(null_placement="at_start",
	sort_keys="descending"))
	assert result.to_pylist() == expected


	def create_sample_expressions():
	# We need a schema for substrait conversion
	schema = pa.schema([pa.field("i64", pa.int64()), pa.field(
	"foo", pa.struct([pa.field("bar", pa.string())]))])

	# Creates a bunch of sample expressions for testing
	# serialization and deserialization. The expressions are categorized
	# to reflect certain nuances in Substrait conversion.
	a = pc.scalar(1)
	b = pc.scalar(1.1)
	c = pc.scalar(True)
	d = pc.scalar("string")
	e = pc.scalar(None)
	f = pc.scalar({'a': 1})
	g = pc.scalar(pa.scalar(1))
	h = pc.scalar(np.int64(2))
	j = pc.scalar(False)
	k = pc.scalar(0)

	# These expression consist entirely of literals
	literal_exprs = [a, b, c, d, e, g, h, j, k]

	# These expressions include at least one function call
	exprs_with_call = [a == b, a != b, a > b, c & j, c \| j, ~c, d.is_valid(),
	a + b, a - b, a * b, a / b, pc.negate(a),
	pc.add(a, b), pc.subtract(a, b), pc.divide(a, b),
	pc.multiply(a, b), pc.power(a, a), pc.sqrt(a),
	pc.exp(b), pc.cos(b), pc.sin(b), pc.tan(b),
	pc.acos(b), pc.atan(b), pc.asin(b), pc.atan2(b, b),
	pc.sinh(a), pc.cosh(a), pc.tanh(a),
	pc.asinh(a), pc.acosh(b), pc.atanh(k),
	pc.abs(b), pc.sign(a), pc.bit_wise_not(a),
	pc.bit_wise_and(a, a), pc.bit_wise_or(a, a),
	pc.bit_wise_xor(a, a), pc.is_nan(b), pc.is_finite(b),
	pc.coalesce(a, b),
	a.cast(pa.int32(), safe=False)]

	# These expressions test out various reference styles and may include function
	# calls. Named references are used here.
	exprs_with_ref = [pc.field('i64') > 5, pc.field('i64') == 5,
	pc.field('i64') == 7,
	pc.field(('foo', 'bar')) == 'value',
	pc.field('foo', 'bar') == 'value']

	# Similar to above but these use numeric references instead of string refs
	exprs_with_numeric_refs = [pc.field(0) > 5, pc.field(0) == 5,
	pc.field(0) == 7,
	pc.field((1, 0)) == 'value',
	pc.field(1, 0) == 'value']

	# Expressions that behave uniquely when converting to/from substrait
	special_cases = [
	f, # Struct literals lose their field names
	a.isin([1, 2, 3]), # isin converts to an or list
	pc.field('i64').is_null() # pyarrow always specifies a FunctionOptions
	# for is_null which, being the default, is
	# dropped on serialization
	]

	all_exprs = literal_exprs.copy()
	all_exprs += exprs_with_call
	all_exprs += exprs_with_ref
	all_exprs += special_cases

	return {
	"all": all_exprs,
	"literals": literal_exprs,
	"calls": exprs_with_call,
	"refs": exprs_with_ref,
	"numeric_refs": exprs_with_numeric_refs,
	"special": special_cases,
	"schema": schema
	}

	# Tests the Arrow-specific serialization mechanism


	@pytest.mark.numpy
	def test_expression_serialization_arrow(pickle_module):
	for expr in create_sample_expressions()["all"]:
	assert isinstance(expr, pc.Expression)
	restored = pickle_module.loads(pickle_module.dumps(expr))
	assert expr.equals(restored)


	@pytest.mark.numpy
	@pytest.mark.substrait
	def test_expression_serialization_substrait():

	exprs = create_sample_expressions()
	schema = exprs["schema"]

	# Basic literals don't change on binding and so they will round
	# trip without any change
	for expr in exprs["literals"]:
	serialized = expr.to_substrait(schema)
	deserialized = pc.Expression.from_substrait(serialized)
	assert expr.equals(deserialized)

	# Expressions are bound when they get serialized. Since bound
	# expressions are not equal to their unbound variants we cannot
	# compare the round tripped with the original
	for expr in exprs["calls"]:
	serialized = expr.to_substrait(schema)
	deserialized = pc.Expression.from_substrait(serialized)
	# We can't compare the expressions themselves because of the bound
	# unbound difference. But we can compare the string representation
	assert str(deserialized) == str(expr)
	serialized_again = deserialized.to_substrait(schema)
	deserialized_again = pc.Expression.from_substrait(serialized_again)
	assert deserialized.equals(deserialized_again)

	for expr, expr_norm in zip(exprs["refs"], exprs["numeric_refs"]):
	serialized = expr.to_substrait(schema)
	deserialized = pc.Expression.from_substrait(serialized)
	assert str(deserialized) == str(expr_norm)
	serialized_again = deserialized.to_substrait(schema)
	deserialized_again = pc.Expression.from_substrait(serialized_again)
	assert deserialized.equals(deserialized_again)

	# For the special cases we get various wrinkles in serialization but we
	# should always get the same thing from round tripping twice
	for expr in exprs["special"]:
	serialized = expr.to_substrait(schema)
	deserialized = pc.Expression.from_substrait(serialized)
	serialized_again = deserialized.to_substrait(schema)
	deserialized_again = pc.Expression.from_substrait(serialized_again)
	assert deserialized.equals(deserialized_again)

	# Special case, we lose the field names of struct literals
	f = exprs["special"][0]
	serialized = f.to_substrait(schema)
	deserialized = pc.Expression.from_substrait(serialized)
	assert deserialized.equals(pc.scalar({'': 1}))

	# Special case, is_in converts to a == opt[0] \|\| a == opt[1] ...
	a = pc.scalar(1)
	expr = a.isin([1, 2, 3])
	target = (a == 1) \| (a == 2) \| (a == 3)
	serialized = expr.to_substrait(schema)
	deserialized = pc.Expression.from_substrait(serialized)
	# Compare str's here to bypass the bound/unbound difference
	assert str(target) == str(deserialized)
	serialized_again = deserialized.to_substrait(schema)
	deserialized_again = pc.Expression.from_substrait(serialized_again)
	assert deserialized.equals(deserialized_again)


	def test_expression_construction():
	zero = pc.scalar(0)
	one = pc.scalar(1)
	true = pc.scalar(True)
	false = pc.scalar(False)
	string = pc.scalar("string")
	field = pc.field("field")
	nested_mixed_types = pc.field(b"a", 1, "b")
	nested_field = pc.field(("nested", "field"))
	nested_field2 = pc.field("nested", "field")

	zero \| one == string
	~true == false
	for typ in ("bool", pa.bool_()):
	field.cast(typ) == true

	field.isin([1, 2])
	nested_mixed_types.isin(["foo", "bar"])
	nested_field.isin(["foo", "bar"])
	nested_field2.isin(["foo", "bar"])

	with pytest.raises(TypeError):
	field.isin(1)

	with pytest.raises(pa.ArrowInvalid):
	field != object()


	def test_expression_boolean_operators():
	# https://issues.apache.org/jira/browse/ARROW-11412
	true = pc.scalar(True)
	false = pc.scalar(False)

	with pytest.raises(ValueError, match="cannot be evaluated to python True"):
	true and false

	with pytest.raises(ValueError, match="cannot be evaluated to python True"):
	true or false

	with pytest.raises(ValueError, match="cannot be evaluated to python True"):
	bool(true)

	with pytest.raises(ValueError, match="cannot be evaluated to python True"):
	not true


	def test_expression_call_function():
	field = pc.field("field")

	# no options
	assert str(pc.hour(field)) == "hour(field)"

	# default options
	assert str(pc.round(field)) == "round(field)"
	# specified options
	assert str(pc.round(field, ndigits=1)) == \
	"round(field, {ndigits=1, round_mode=HALF_TO_EVEN})"

	# Will convert non-expression arguments if possible
	assert str(pc.add(field, 1)) == "add(field, 1)"
	assert str(pc.add(field, pa.scalar(1))) == "add(field, 1)"

	# Invalid pc.scalar input gives original error message
	msg = "only other expressions allowed as arguments"
	with pytest.raises(TypeError, match=msg):
	pc.add(field, object)


	def test_cast_table_raises():
	table = pa.table({'a': [1, 2]})

	with pytest.raises(pa.lib.ArrowTypeError):
	pc.cast(table, pa.int64())


	@pytest.mark.parametrize("start,stop,expected", (
	(0, None, [[1, 2, 3], [4, 5, None], [6, None, None], None]),
	(0, 1, [[1], [4], [6], None]),
	(0, 2, [[1, 2], [4, 5], [6, None], None]),
	(1, 2, [[2], [5], [None], None]),
	(2, 4, [[3, None], [None, None], [None, None], None])
	))
	@pytest.mark.parametrize("step", (1, 2))
	@pytest.mark.parametrize("value_type", (pa.string, pa.int16, pa.float64))
	@pytest.mark.parametrize("list_type", (pa.list_, pa.large_list, "fixed"))
	def test_list_slice_output_fixed(start, stop, step, expected, value_type,
	list_type):
	if list_type == "fixed":
	arr = pa.array([[1, 2, 3], [4, 5, None], [6, None, None], None],
	pa.list_(pa.int8(), 3)).cast(pa.list_(value_type(), 3))
	else:
	arr = pa.array([[1, 2, 3], [4, 5], [6], None],
	pa.list_(pa.int8())).cast(list_type(value_type()))

	args = arr, start, stop, step, True
	if stop is None and list_type != "fixed":
	msg = ("Unable to produce FixedSizeListArray from "
	"non-FixedSizeListArray without `stop` being set.")
	with pytest.raises(pa.ArrowInvalid, match=msg):
	pc.list_slice(*args)
	else:
	result = pc.list_slice(*args)
	pylist = result.cast(pa.list_(pa.int8(),
	result.type.list_size)).to_pylist()
	assert pylist == [e[::step] if e else e for e in expected]


	@pytest.mark.parametrize("start,stop", (
	(0, None,),
	(0, 1,),
	(0, 2,),
	(1, 2,),
	(2, 4,)
	))
	@pytest.mark.parametrize("step", (1, 2))
	@pytest.mark.parametrize("value_type", (pa.string, pa.int16, pa.float64))
	@pytest.mark.parametrize("list_type", (pa.list_, pa.large_list, "fixed"))
	def test_list_slice_output_variable(start, stop, step, value_type, list_type):
	if list_type == "fixed":
	data = [[1, 2, 3], [4, 5, None], [6, None, None], None]
	arr = pa.array(
	data,
	pa.list_(pa.int8(), 3)).cast(pa.list_(value_type(), 3))
	else:
	data = [[1, 2, 3], [4, 5], [6], None]
	arr = pa.array(data,
	pa.list_(pa.int8())).cast(list_type(value_type()))

	# Gets same list type (ListArray vs LargeList)
	if list_type == "fixed":
	list_type = pa.list_ # non fixed output type

	result = pc.list_slice(arr, start, stop, step,
	return_fixed_size_list=False)
	assert result.type == list_type(value_type())

	pylist = result.cast(pa.list_(pa.int8())).to_pylist()

	# Variable output slicing follows Python's slice semantics
	expected = [d[start:stop:step] if d is not None else None for d in data]
	assert pylist == expected


	@pytest.mark.parametrize("return_fixed_size", (True, False, None))
	@pytest.mark.parametrize("type", (
	lambda: pa.list_(pa.field('col', pa.int8())),
	lambda: pa.list_(pa.field('col', pa.int8()), 1),
	lambda: pa.large_list(pa.field('col', pa.int8()))))
	def test_list_slice_field_names_retained(return_fixed_size, type):
	arr = pa.array([[1]], type())
	out = pc.list_slice(arr, 0, 1, return_fixed_size_list=return_fixed_size)
	assert arr.type.field(0).name == out.type.field(0).name

	# Verify out type matches in type if return_fixed_size_list==None
	if return_fixed_size is None:
	assert arr.type == out.type


	def test_list_slice_bad_parameters():
	arr = pa.array([[1]], pa.list_(pa.int8(), 1))
	msg = r"`start`(.) should be greater than 0 and smaller than `stop`(.)"
	with pytest.raises(pa.ArrowInvalid, match=msg):
	pc.list_slice(arr, -1, 1) # negative start?
	with pytest.raises(pa.ArrowInvalid, match=msg):
	pc.list_slice(arr, 2, 1) # start > stop?

	# TODO(ARROW-18281): start==stop -> empty lists
	with pytest.raises(pa.ArrowInvalid, match=msg):
	pc.list_slice(arr, 0, 0) # start == stop?

	# Step not >= 1
	msg = "`step` must be >= 1, got: "
	with pytest.raises(pa.ArrowInvalid, match=msg + "0"):
	pc.list_slice(arr, 0, 1, step=0)
	with pytest.raises(pa.ArrowInvalid, match=msg + "-1"):
	pc.list_slice(arr, 0, 1, step=-1)


	def check_run_end_encode_decode(run_end_encode_opts=None):
	arr = pa.array([1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3])
	encoded = pc.run_end_encode(arr, options=run_end_encode_opts)
	decoded = pc.run_end_decode(encoded)
	assert decoded.type == arr.type
	assert decoded.equals(arr)


	def test_run_end_encode():
	check_run_end_encode_decode()
	check_run_end_encode_decode(pc.RunEndEncodeOptions(pa.int16()))
	check_run_end_encode_decode(pc.RunEndEncodeOptions('int32'))
	check_run_end_encode_decode(pc.RunEndEncodeOptions(pa.int64()))


	def test_pairwise_diff():
	arr = pa.array([1, 2, 3, None, 4, 5])
	expected = pa.array([None, 1, 1, None, None, 1])
	result = pa.compute.pairwise_diff(arr, period=1)
	assert result.equals(expected)

	arr = pa.array([1, 2, 3, None, 4, 5])
	expected = pa.array([None, None, 2, None, 1, None])
	result = pa.compute.pairwise_diff(arr, period=2)
	assert result.equals(expected)

	# negative period
	arr = pa.array([1, 2, 3, None, 4, 5], type=pa.int8())
	expected = pa.array([-1, -1, None, None, -1, None], type=pa.int8())
	result = pa.compute.pairwise_diff(arr, period=-1)
	assert result.equals(expected)

	# wrap around overflow
	arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
	expected = pa.array([255, 255, None, None, 255, None], type=pa.uint8())
	result = pa.compute.pairwise_diff(arr, period=-1)
	assert result.equals(expected)

	# fail on overflow
	arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
	with pytest.raises(pa.ArrowInvalid,
	match="overflow"):
	pa.compute.pairwise_diff_checked(arr, period=-1)


	def test_pivot_wider():
	key_names = ["width", "height"]

	result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11])
	assert result.as_py() == {}

	result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
	key_names)
	assert result.as_py() == {"width": None, "height": 10}
	# check key order
	assert list(result.as_py()) == ["width", "height"]

	result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
	key_names=key_names)
	assert result.as_py() == {"width": None, "height": 10}

	with pytest.raises(KeyError, match="Unexpected pivot key: depth"):
	result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
	key_names=key_names,
	unexpected_key_behavior="raise")

	with pytest.raises(ValueError, match="Encountered more than one non-null value"):
	result = pc.pivot_wider(["height", "width", "height"], [10, None, 11],
	key_names=key_names)


	def test_winsorize():
	arr = pa.array([10, 4, 9, 8, 5, 3, 7, 2, 1, 6])

	result = pc.winsorize(arr, 0.1, 0.8)
	assert result.to_pylist() == [8, 4, 8, 8, 5, 3, 7, 2, 2, 6]

	result = pc.winsorize(
	arr, options=pc.WinsorizeOptions(lower_limit=0.1, upper_limit=0.8))
	assert result.to_pylist() == [8, 4, 8, 8, 5, 3, 7, 2, 2, 6]