|
from textwrap import dedent |
|
|
|
import numpy as np |
|
import pytest |
|
|
|
from pandas.errors import ( |
|
PyperclipException, |
|
PyperclipWindowsException, |
|
) |
|
|
|
import pandas as pd |
|
from pandas import ( |
|
NA, |
|
DataFrame, |
|
Series, |
|
get_option, |
|
read_clipboard, |
|
) |
|
import pandas._testing as tm |
|
|
|
from pandas.io.clipboard import ( |
|
CheckedCall, |
|
_stringifyText, |
|
init_qt_clipboard, |
|
) |
|
|
|
|
|
def build_kwargs(sep, excel): |
|
kwargs = {} |
|
if excel != "default": |
|
kwargs["excel"] = excel |
|
if sep != "default": |
|
kwargs["sep"] = sep |
|
return kwargs |
|
|
|
|
|
@pytest.fixture( |
|
params=[ |
|
"delims", |
|
"utf8", |
|
"utf16", |
|
"string", |
|
"long", |
|
"nonascii", |
|
"colwidth", |
|
"mixed", |
|
"float", |
|
"int", |
|
] |
|
) |
|
def df(request): |
|
data_type = request.param |
|
|
|
if data_type == "delims": |
|
return DataFrame({"a": ['"a,\t"b|c', "d\tef`"], "b": ["hi'j", "k''lm"]}) |
|
elif data_type == "utf8": |
|
return DataFrame({"a": ["µasd", "Ωœ∑`"], "b": ["øπ∆˚¬", "œ∑`®"]}) |
|
elif data_type == "utf16": |
|
return DataFrame( |
|
{"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} |
|
) |
|
elif data_type == "string": |
|
return DataFrame( |
|
np.array([f"i-{i}" for i in range(15)]).reshape(5, 3), columns=list("abc") |
|
) |
|
elif data_type == "long": |
|
max_rows = get_option("display.max_rows") |
|
return DataFrame( |
|
np.random.default_rng(2).integers(0, 10, size=(max_rows + 1, 3)), |
|
columns=list("abc"), |
|
) |
|
elif data_type == "nonascii": |
|
return DataFrame({"en": "in English".split(), "es": "en español".split()}) |
|
elif data_type == "colwidth": |
|
_cw = get_option("display.max_colwidth") + 1 |
|
return DataFrame( |
|
np.array(["x" * _cw for _ in range(15)]).reshape(5, 3), columns=list("abc") |
|
) |
|
elif data_type == "mixed": |
|
return DataFrame( |
|
{ |
|
"a": np.arange(1.0, 6.0) + 0.01, |
|
"b": np.arange(1, 6).astype(np.int64), |
|
"c": list("abcde"), |
|
} |
|
) |
|
elif data_type == "float": |
|
return DataFrame(np.random.default_rng(2).random((5, 3)), columns=list("abc")) |
|
elif data_type == "int": |
|
return DataFrame( |
|
np.random.default_rng(2).integers(0, 10, (5, 3)), columns=list("abc") |
|
) |
|
else: |
|
raise ValueError |
|
|
|
|
|
@pytest.fixture |
|
def mock_ctypes(monkeypatch): |
|
""" |
|
Mocks WinError to help with testing the clipboard. |
|
""" |
|
|
|
def _mock_win_error(): |
|
return "Window Error" |
|
|
|
|
|
with monkeypatch.context() as m: |
|
m.setattr("ctypes.WinError", _mock_win_error, raising=False) |
|
yield |
|
|
|
|
|
@pytest.mark.usefixtures("mock_ctypes") |
|
def test_checked_call_with_bad_call(monkeypatch): |
|
""" |
|
Give CheckCall a function that returns a falsey value and |
|
mock get_errno so it returns false so an exception is raised. |
|
""" |
|
|
|
def _return_false(): |
|
return False |
|
|
|
monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: True) |
|
msg = f"Error calling {_return_false.__name__} \\(Window Error\\)" |
|
|
|
with pytest.raises(PyperclipWindowsException, match=msg): |
|
CheckedCall(_return_false)() |
|
|
|
|
|
@pytest.mark.usefixtures("mock_ctypes") |
|
def test_checked_call_with_valid_call(monkeypatch): |
|
""" |
|
Give CheckCall a function that returns a truthy value and |
|
mock get_errno so it returns true so an exception is not raised. |
|
The function should return the results from _return_true. |
|
""" |
|
|
|
def _return_true(): |
|
return True |
|
|
|
monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: False) |
|
|
|
|
|
checked_call = CheckedCall(_return_true) |
|
assert checked_call() is True |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"text", |
|
[ |
|
"String_test", |
|
True, |
|
1, |
|
1.0, |
|
1j, |
|
], |
|
) |
|
def test_stringify_text(text): |
|
valid_types = (str, int, float, bool) |
|
|
|
if isinstance(text, valid_types): |
|
result = _stringifyText(text) |
|
assert result == str(text) |
|
else: |
|
msg = ( |
|
"only str, int, float, and bool values " |
|
f"can be copied to the clipboard, not {type(text).__name__}" |
|
) |
|
with pytest.raises(PyperclipException, match=msg): |
|
_stringifyText(text) |
|
|
|
|
|
@pytest.fixture |
|
def set_pyqt_clipboard(monkeypatch): |
|
qt_cut, qt_paste = init_qt_clipboard() |
|
with monkeypatch.context() as m: |
|
m.setattr(pd.io.clipboard, "clipboard_set", qt_cut) |
|
m.setattr(pd.io.clipboard, "clipboard_get", qt_paste) |
|
yield |
|
|
|
|
|
@pytest.fixture |
|
def clipboard(qapp): |
|
clip = qapp.clipboard() |
|
yield clip |
|
clip.clear() |
|
|
|
|
|
@pytest.mark.single_cpu |
|
@pytest.mark.clipboard |
|
@pytest.mark.usefixtures("set_pyqt_clipboard") |
|
@pytest.mark.usefixtures("clipboard") |
|
class TestClipboard: |
|
|
|
|
|
@pytest.mark.parametrize("sep", [None, "\t", ",", "|"]) |
|
@pytest.mark.parametrize("encoding", [None, "UTF-8", "utf-8", "utf8"]) |
|
def test_round_trip_frame_sep(self, df, sep, encoding): |
|
df.to_clipboard(excel=None, sep=sep, encoding=encoding) |
|
result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding) |
|
tm.assert_frame_equal(df, result) |
|
|
|
|
|
def test_round_trip_frame_string(self, df): |
|
df.to_clipboard(excel=False, sep=None) |
|
result = read_clipboard() |
|
assert df.to_string() == result.to_string() |
|
assert df.shape == result.shape |
|
|
|
|
|
|
|
def test_excel_sep_warning(self, df): |
|
with tm.assert_produces_warning( |
|
UserWarning, |
|
match="to_clipboard in excel mode requires a single character separator.", |
|
check_stacklevel=False, |
|
): |
|
df.to_clipboard(excel=True, sep=r"\t") |
|
|
|
|
|
def test_copy_delim_warning(self, df): |
|
with tm.assert_produces_warning(): |
|
df.to_clipboard(excel=False, sep="\t") |
|
|
|
|
|
|
|
@pytest.mark.parametrize("sep", ["\t", None, "default"]) |
|
@pytest.mark.parametrize("excel", [True, None, "default"]) |
|
def test_clipboard_copy_tabs_default(self, sep, excel, df, clipboard): |
|
kwargs = build_kwargs(sep, excel) |
|
df.to_clipboard(**kwargs) |
|
assert clipboard.text() == df.to_csv(sep="\t") |
|
|
|
|
|
@pytest.mark.parametrize("sep", [None, "default"]) |
|
def test_clipboard_copy_strings(self, sep, df): |
|
kwargs = build_kwargs(sep, False) |
|
df.to_clipboard(**kwargs) |
|
result = read_clipboard(sep=r"\s+") |
|
assert result.to_string() == df.to_string() |
|
assert df.shape == result.shape |
|
|
|
def test_read_clipboard_infer_excel(self, clipboard): |
|
|
|
clip_kwargs = {"engine": "python"} |
|
|
|
text = dedent( |
|
""" |
|
John James\tCharlie Mingus |
|
1\t2 |
|
4\tHarry Carney |
|
""".strip() |
|
) |
|
clipboard.setText(text) |
|
df = read_clipboard(**clip_kwargs) |
|
|
|
|
|
assert df.iloc[1, 1] == "Harry Carney" |
|
|
|
|
|
text = dedent( |
|
""" |
|
a\t b |
|
1 2 |
|
3 4 |
|
""".strip() |
|
) |
|
clipboard.setText(text) |
|
res = read_clipboard(**clip_kwargs) |
|
|
|
text = dedent( |
|
""" |
|
a b |
|
1 2 |
|
3 4 |
|
""".strip() |
|
) |
|
clipboard.setText(text) |
|
exp = read_clipboard(**clip_kwargs) |
|
|
|
tm.assert_frame_equal(res, exp) |
|
|
|
def test_infer_excel_with_nulls(self, clipboard): |
|
|
|
text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen" |
|
|
|
clipboard.setText(text) |
|
df = read_clipboard() |
|
df_expected = DataFrame( |
|
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]} |
|
) |
|
|
|
|
|
tm.assert_frame_equal(df, df_expected) |
|
|
|
@pytest.mark.parametrize( |
|
"multiindex", |
|
[ |
|
( |
|
"\n".join( |
|
[ |
|
"\t\t\tcol1\tcol2", |
|
"A\t0\tTrue\t1\tred", |
|
"A\t1\tTrue\t\tblue", |
|
"B\t0\tFalse\t2\tgreen", |
|
] |
|
), |
|
[["A", "A", "B"], [0, 1, 0], [True, True, False]], |
|
), |
|
( |
|
"\n".join( |
|
["\t\tcol1\tcol2", "A\t0\t1\tred", "A\t1\t\tblue", "B\t0\t2\tgreen"] |
|
), |
|
[["A", "A", "B"], [0, 1, 0]], |
|
), |
|
], |
|
) |
|
def test_infer_excel_with_multiindex(self, clipboard, multiindex): |
|
|
|
|
|
clipboard.setText(multiindex[0]) |
|
df = read_clipboard() |
|
df_expected = DataFrame( |
|
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}, |
|
index=multiindex[1], |
|
) |
|
|
|
|
|
tm.assert_frame_equal(df, df_expected) |
|
|
|
def test_invalid_encoding(self, df): |
|
msg = "clipboard only supports utf-8 encoding" |
|
|
|
with pytest.raises(ValueError, match=msg): |
|
df.to_clipboard(encoding="ascii") |
|
with pytest.raises(NotImplementedError, match=msg): |
|
read_clipboard(encoding="ascii") |
|
|
|
@pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑`...", "abcd..."]) |
|
def test_raw_roundtrip(self, data): |
|
|
|
df = DataFrame({"data": [data]}) |
|
df.to_clipboard() |
|
result = read_clipboard() |
|
tm.assert_frame_equal(df, result) |
|
|
|
@pytest.mark.parametrize("engine", ["c", "python"]) |
|
def test_read_clipboard_dtype_backend( |
|
self, clipboard, string_storage, dtype_backend, engine, using_infer_string |
|
): |
|
|
|
if dtype_backend == "pyarrow": |
|
pa = pytest.importorskip("pyarrow") |
|
if engine == "c" and string_storage == "pyarrow": |
|
|
|
string_dtype = pd.ArrowDtype(pa.large_string()) |
|
else: |
|
string_dtype = pd.ArrowDtype(pa.string()) |
|
else: |
|
string_dtype = pd.StringDtype(string_storage) |
|
|
|
text = """a,b,c,d,e,f,g,h,i |
|
x,1,4.0,x,2,4.0,,True,False |
|
y,2,5.0,,,,,False,""" |
|
clipboard.setText(text) |
|
|
|
with pd.option_context("mode.string_storage", string_storage): |
|
result = read_clipboard(sep=",", dtype_backend=dtype_backend, engine=engine) |
|
|
|
expected = DataFrame( |
|
{ |
|
"a": Series(["x", "y"], dtype=string_dtype), |
|
"b": Series([1, 2], dtype="Int64"), |
|
"c": Series([4.0, 5.0], dtype="Float64"), |
|
"d": Series(["x", None], dtype=string_dtype), |
|
"e": Series([2, NA], dtype="Int64"), |
|
"f": Series([4.0, NA], dtype="Float64"), |
|
"g": Series([NA, NA], dtype="Int64"), |
|
"h": Series([True, False], dtype="boolean"), |
|
"i": Series([False, NA], dtype="boolean"), |
|
} |
|
) |
|
if dtype_backend == "pyarrow": |
|
from pandas.arrays import ArrowExtensionArray |
|
|
|
expected = DataFrame( |
|
{ |
|
col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True)) |
|
for col in expected.columns |
|
} |
|
) |
|
expected["g"] = ArrowExtensionArray(pa.array([None, None])) |
|
|
|
if using_infer_string: |
|
expected.columns = expected.columns.astype( |
|
pd.StringDtype(string_storage, na_value=np.nan) |
|
) |
|
|
|
tm.assert_frame_equal(result, expected) |
|
|
|
def test_invalid_dtype_backend(self): |
|
msg = ( |
|
"dtype_backend numpy is invalid, only 'numpy_nullable' and " |
|
"'pyarrow' are allowed." |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
read_clipboard(dtype_backend="numpy") |
|
|
|
def test_to_clipboard_pos_args_deprecation(self): |
|
|
|
df = DataFrame({"a": [1, 2, 3]}) |
|
msg = ( |
|
r"Starting with pandas version 3.0 all arguments of to_clipboard " |
|
r"will be keyword-only." |
|
) |
|
with tm.assert_produces_warning(FutureWarning, match=msg): |
|
df.to_clipboard(True, None) |
|
|