Static code analysis and corrections
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
"""Base test suite for extension arrays.
|
||||
|
||||
These tests are intended for third-party libraries to subclass to validate
|
||||
that their extension arrays and dtypes satisfy the interface. Moving or
|
||||
renaming the tests should not be done lightly.
|
||||
|
||||
Libraries are expected to implement a few pytest fixtures to provide data
|
||||
for the tests. The fixtures may be located in either
|
||||
|
||||
* The same module as your test class.
|
||||
* A ``conftest.py`` in the same directory as your test class.
|
||||
|
||||
The full list of fixtures may be found in the ``conftest.py`` next to this
|
||||
file.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import pytest
|
||||
from pandas.tests.extension.base import BaseDtypeTests
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return MyDtype()
|
||||
|
||||
|
||||
class TestMyDtype(BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
Your class ``TestDtype`` will inherit all the tests defined on
|
||||
``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
|
||||
wherever the test requires it. You're free to implement additional tests.
|
||||
|
||||
All the tests in these modules use ``self.assert_frame_equal`` or
|
||||
``self.assert_series_equal`` for dataframe or series comparisons. By default,
|
||||
they use the usual ``pandas.testing.assert_frame_equal`` and
|
||||
``pandas.testing.assert_series_equal``. You can override the checks used
|
||||
by defining the staticmethods ``assert_frame_equal`` and
|
||||
``assert_series_equal`` on your base test class.
|
||||
|
||||
"""
|
||||
from .casting import BaseCastingTests # noqa
|
||||
from .constructors import BaseConstructorsTests # noqa
|
||||
from .dtype import BaseDtypeTests # noqa
|
||||
from .getitem import BaseGetitemTests # noqa
|
||||
from .groupby import BaseGroupbyTests # noqa
|
||||
from .interface import BaseInterfaceTests # noqa
|
||||
from .methods import BaseMethodsTests # noqa
|
||||
from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil # noqa
|
||||
from .printing import BasePrintingTests # noqa
|
||||
from .reduce import BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests # noqa
|
||||
from .missing import BaseMissingTests # noqa
|
||||
from .reshaping import BaseReshapingTests # noqa
|
||||
from .setitem import BaseSetitemTests # noqa
|
||||
from .io import BaseParsingTests # noqa
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class BaseExtensionTests(object):
|
||||
assert_equal = staticmethod(tm.assert_equal)
|
||||
assert_series_equal = staticmethod(tm.assert_series_equal)
|
||||
assert_frame_equal = staticmethod(tm.assert_frame_equal)
|
||||
assert_extension_array_equal = staticmethod(
|
||||
tm.assert_extension_array_equal
|
||||
)
|
||||
@@ -0,0 +1,23 @@
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ObjectBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseCastingTests(BaseExtensionTests):
|
||||
"""Casting to and from ExtensionDtypes"""
|
||||
|
||||
def test_astype_object_series(self, all_data):
|
||||
ser = pd.Series({"A": all_data})
|
||||
result = ser.astype(object)
|
||||
assert isinstance(result._data.blocks[0], ObjectBlock)
|
||||
|
||||
def test_tolist(self, data):
|
||||
result = pd.Series(data).tolist()
|
||||
expected = list(data)
|
||||
assert result == expected
|
||||
|
||||
def test_astype_str(self, data):
|
||||
result = pd.Series(data[:5]).astype(str)
|
||||
expected = pd.Series(data[:5].astype(str))
|
||||
self.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ExtensionBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseConstructorsTests(BaseExtensionTests):
|
||||
|
||||
def test_from_sequence_from_cls(self, data):
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
data = data[:0]
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_array_from_scalars(self, data):
|
||||
scalars = [data[0], data[1], data[2]]
|
||||
result = data._from_sequence(scalars)
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_series_constructor(self, data):
|
||||
result = pd.Series(data)
|
||||
assert result.dtype == data.dtype
|
||||
assert len(result) == len(data)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
assert result._data.blocks[0].values is data
|
||||
|
||||
# Series[EA] is unboxed / boxed correctly
|
||||
result2 = pd.Series(result)
|
||||
assert result2.dtype == data.dtype
|
||||
assert isinstance(result2._data.blocks[0], ExtensionBlock)
|
||||
|
||||
@pytest.mark.parametrize("from_series", [True, False])
|
||||
def test_dataframe_constructor_from_dict(self, data, from_series):
|
||||
if from_series:
|
||||
data = pd.Series(data)
|
||||
result = pd.DataFrame({"A": data})
|
||||
assert result.dtypes['A'] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
def test_dataframe_from_series(self, data):
|
||||
result = pd.DataFrame(pd.Series(data))
|
||||
assert result.dtypes[0] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
def test_series_given_mismatched_index_raises(self, data):
|
||||
msg = 'Length of passed values is 3, index implies 5'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Series(data[:3], index=[0, 1, 2, 3, 4])
|
||||
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(list(data), dtype=dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(list(data), dtype=str(dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_pandas_array(self, data):
|
||||
# pd.array(extension_array) should be idempotent...
|
||||
result = pd.array(data)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_pandas_array_dtype(self, data):
|
||||
# ... but specifying dtype will override idempotency
|
||||
result = pd.array(data, dtype=np.dtype(object))
|
||||
expected = pd.arrays.PandasArray(np.asarray(data, dtype=object))
|
||||
self.assert_equal(result, expected)
|
||||
@@ -0,0 +1,91 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseDtypeTests(BaseExtensionTests):
|
||||
"""Base class for ExtensionDtype classes"""
|
||||
|
||||
def test_name(self, dtype):
|
||||
assert isinstance(dtype.name, str)
|
||||
|
||||
def test_kind(self, dtype):
|
||||
valid = set('biufcmMOSUV')
|
||||
if dtype.kind is not None:
|
||||
assert dtype.kind in valid
|
||||
|
||||
def test_construct_from_string_own_name(self, dtype):
|
||||
result = dtype.construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
# check OK as classmethod
|
||||
result = type(dtype).construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
def test_is_dtype_from_name(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype.name)
|
||||
assert result is True
|
||||
|
||||
def test_is_dtype_unboxes_dtype(self, data, dtype):
|
||||
assert dtype.is_dtype(data) is True
|
||||
|
||||
def test_is_dtype_from_self(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype)
|
||||
assert result is True
|
||||
|
||||
def test_is_not_string_type(self, dtype):
|
||||
return not pd.api.types.is_string_dtype(dtype)
|
||||
|
||||
def test_is_not_object_type(self, dtype):
|
||||
return not pd.api.types.is_object_dtype(dtype)
|
||||
|
||||
def test_eq_with_str(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != dtype.name + '-suffix'
|
||||
|
||||
def test_eq_with_numpy_object(self, dtype):
|
||||
assert dtype != np.dtype('object')
|
||||
|
||||
def test_eq_with_self(self, dtype):
|
||||
assert dtype == dtype
|
||||
assert dtype != object()
|
||||
|
||||
def test_array_type(self, data, dtype):
|
||||
assert dtype.construct_array_type() is type(data)
|
||||
|
||||
def test_check_dtype(self, data):
|
||||
dtype = data.dtype
|
||||
|
||||
# check equivalency for using .dtypes
|
||||
df = pd.DataFrame({'A': pd.Series(data, dtype=dtype),
|
||||
'B': data,
|
||||
'C': 'foo', 'D': 1})
|
||||
|
||||
# np.dtype('int64') == 'Int64' == 'int64'
|
||||
# so can't distinguish
|
||||
if dtype.name == 'Int64':
|
||||
expected = pd.Series([True, True, False, True],
|
||||
index=list('ABCD'))
|
||||
else:
|
||||
expected = pd.Series([True, True, False, False],
|
||||
index=list('ABCD'))
|
||||
|
||||
# XXX: This should probably be *fixed* not ignored.
|
||||
# See libops.scalar_compare
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", DeprecationWarning)
|
||||
result = df.dtypes == str(dtype)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series([True, True, False, False],
|
||||
index=list('ABCD'))
|
||||
result = df.dtypes.apply(str) == str(dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_hashable(self, dtype):
|
||||
hash(dtype) # no error
|
||||
@@ -0,0 +1,248 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseGetitemTests(BaseExtensionTests):
|
||||
"""Tests for ExtensionArray.__getitem__."""
|
||||
|
||||
def test_iloc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.iloc[:4]
|
||||
expected = pd.Series(data[:4])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.iloc[[0, 1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, 'B':
|
||||
np.arange(len(data), dtype='int64')})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.iloc[:4, [0]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.iloc[[0, 1, 2, 3], [0]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name='A')
|
||||
|
||||
# slice -> series
|
||||
result = df.iloc[:4, 0]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.iloc[:4, 0]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.loc[:3]
|
||||
expected = pd.Series(data[:4])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.loc[[0, 1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data,
|
||||
'B': np.arange(len(data), dtype='int64')})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.loc[:3, ['A']]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.loc[[0, 1, 2, 3], ['A']]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name='A')
|
||||
|
||||
# slice -> series
|
||||
result = df.loc[:3, 'A']
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.loc[:3, 'A']
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_scalar(self, data):
|
||||
result = data[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
result = pd.Series(data)[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
|
||||
result = data_missing[0]
|
||||
assert na_cmp(result, na_value)
|
||||
|
||||
def test_getitem_mask(self, data):
|
||||
# Empty mask, raw array
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = data[mask]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# Empty mask, in series
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 0
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
# non-empty mask, raw array
|
||||
mask[0] = True
|
||||
result = data[mask]
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# non-empty mask, in series
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 1
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
def test_getitem_slice(self, data):
|
||||
# getitem[slice] should return an array
|
||||
result = data[slice(0)] # empty
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
result = data[slice(1)] # scalar
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_get(self, data):
|
||||
# GH 20882
|
||||
s = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = pd.Series(data[:6], index=list('abcdef'))
|
||||
assert s.get('c') == s.iloc[2]
|
||||
|
||||
result = s.get(slice('b', 'd'))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get('Z')
|
||||
assert result is None
|
||||
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(data)
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
def test_take_sequence(self, data):
|
||||
result = pd.Series(data)[[0, 1, 3]]
|
||||
assert result.iloc[0] == data[0]
|
||||
assert result.iloc[1] == data[1]
|
||||
assert result.iloc[2] == data[3]
|
||||
|
||||
def test_take(self, data, na_value, na_cmp):
|
||||
result = data.take([0, -1])
|
||||
assert result.dtype == data.dtype
|
||||
assert result[0] == data[0]
|
||||
assert result[1] == data[-1]
|
||||
|
||||
result = data.take([0, -1], allow_fill=True, fill_value=na_value)
|
||||
assert result[0] == data[0]
|
||||
assert na_cmp(result[1], na_value)
|
||||
|
||||
with pytest.raises(IndexError, match="out of bounds"):
|
||||
data.take([len(data) + 1])
|
||||
|
||||
def test_take_empty(self, data, na_value, na_cmp):
|
||||
empty = data[:0]
|
||||
|
||||
result = empty.take([-1], allow_fill=True)
|
||||
assert na_cmp(result[0], na_value)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
empty.take([-1])
|
||||
|
||||
with pytest.raises(IndexError, match="cannot do a non-empty take"):
|
||||
empty.take([0, 1])
|
||||
|
||||
def test_take_negative(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20640
|
||||
n = len(data)
|
||||
result = data.take([0, -n, n - 1, -1])
|
||||
expected = data.take([0, 0, n - 1, n - 1])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_non_na_fill_value(self, data_missing):
|
||||
fill_value = data_missing[1] # valid
|
||||
na = data_missing[0]
|
||||
|
||||
array = data_missing._from_sequence([na, fill_value, na])
|
||||
result = array.take([-1, 1], fill_value=fill_value, allow_fill=True)
|
||||
expected = array.take([1, 1])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_pandas_style_negative_raises(self, data, na_value):
|
||||
with pytest.raises(ValueError):
|
||||
data.take([0, -2], fill_value=na_value, allow_fill=True)
|
||||
|
||||
@pytest.mark.parametrize('allow_fill', [True, False])
|
||||
def test_take_out_of_bounds_raises(self, data, allow_fill):
|
||||
arr = data[:3]
|
||||
with pytest.raises(IndexError):
|
||||
arr.take(np.asarray([0, 3]), allow_fill=allow_fill)
|
||||
|
||||
def test_take_series(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.take([0, -1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
|
||||
index=[0, len(data) - 1])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
s = pd.Series(data)
|
||||
result = s.reindex([0, 1, 3])
|
||||
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
n = len(data)
|
||||
result = s.reindex([-1, 0, n])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, data[0], na_value],
|
||||
dtype=s.dtype),
|
||||
index=[-1, 0, n])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.reindex([n, n + 1])
|
||||
expected = pd.Series(data._from_sequence([na_value, na_value],
|
||||
dtype=s.dtype),
|
||||
index=[n, n + 1])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex_non_na_fill_value(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
na = data_missing[0]
|
||||
|
||||
array = data_missing._from_sequence([na, valid])
|
||||
ser = pd.Series(array)
|
||||
result = ser.reindex([0, 1, 2], fill_value=valid)
|
||||
expected = pd.Series(data_missing._from_sequence([na, valid, valid]))
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,83 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseGroupbyTests(BaseExtensionTests):
|
||||
"""Groupby-specific tests."""
|
||||
|
||||
def test_grouping_grouper(self, data_for_grouping):
|
||||
df = pd.DataFrame({
|
||||
"A": ["B", "B", None, None, "A", "A", "B", "C"],
|
||||
"B": data_for_grouping
|
||||
})
|
||||
gr1 = df.groupby("A").grouper.groupings[0]
|
||||
gr2 = df.groupby("B").grouper.groupings[0]
|
||||
|
||||
tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
|
||||
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
|
||||
|
||||
@pytest.mark.parametrize('as_index', [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping})
|
||||
result = df.groupby("B", as_index=as_index).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=True)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
expected = pd.Series([3, 1, 4], index=index, name="A")
|
||||
if as_index:
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
expected = expected.reset_index()
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_no_sort(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping})
|
||||
result = df.groupby("B", sort=False).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=False)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
expected = pd.Series([1, 3, 4], index=index, name="A")
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_transform(self, data_for_grouping):
|
||||
valid = data_for_grouping[~data_for_grouping.isna()]
|
||||
df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4],
|
||||
"B": valid})
|
||||
|
||||
result = df.groupby("B").A.transform(len)
|
||||
expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('op', [
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: pd.Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
], ids=['scalar', 'list', 'series', 'object'])
|
||||
def test_groupby_extension_apply(self, data_for_grouping, op):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping})
|
||||
df.groupby("B").apply(op)
|
||||
df.groupby("B").A.apply(op)
|
||||
df.groupby("A").apply(op)
|
||||
df.groupby("A").B.apply(op)
|
||||
|
||||
def test_in_numeric_groupby(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping,
|
||||
"C": [1, 1, 1, 1, 1, 1, 1, 1]})
|
||||
result = df.groupby("A").sum().columns
|
||||
|
||||
if data_for_grouping.dtype._is_numeric:
|
||||
expected = pd.Index(['B', 'C'])
|
||||
else:
|
||||
expected = pd.Index(['C'])
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,68 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseInterfaceTests(BaseExtensionTests):
|
||||
"""Tests that the basic interface is satisfied."""
|
||||
# ------------------------------------------------------------------------
|
||||
# Interface
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def test_len(self, data):
|
||||
assert len(data) == 100
|
||||
|
||||
def test_ndim(self, data):
|
||||
assert data.ndim == 1
|
||||
|
||||
def test_can_hold_na_valid(self, data):
|
||||
# GH-20761
|
||||
assert data._can_hold_na is True
|
||||
|
||||
def test_memory_usage(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.memory_usage(index=False)
|
||||
assert result == s.nbytes
|
||||
|
||||
def test_array_interface(self, data):
|
||||
result = np.array(data)
|
||||
assert result[0] == data[0]
|
||||
|
||||
result = np.array(data, dtype=object)
|
||||
expected = np.array(list(data), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_is_extension_array_dtype(self, data):
|
||||
assert is_extension_array_dtype(data)
|
||||
assert is_extension_array_dtype(data.dtype)
|
||||
assert is_extension_array_dtype(pd.Series(data))
|
||||
assert isinstance(data.dtype, ExtensionDtype)
|
||||
|
||||
def test_no_values_attribute(self, data):
|
||||
# GH-20735: EA's with .values attribute give problems with internal
|
||||
# code, disallowing this for now until solved
|
||||
assert not hasattr(data, 'values')
|
||||
assert not hasattr(data, '_values')
|
||||
|
||||
def test_is_numeric_honored(self, data):
|
||||
result = pd.Series(data)
|
||||
assert result._data.blocks[0].is_numeric is data.dtype._is_numeric
|
||||
|
||||
def test_isna_extension_array(self, data_missing):
|
||||
# If your `isna` returns an ExtensionArray, you must also implement
|
||||
# _reduce. At the *very* least, you must implement any and all
|
||||
na = data_missing.isna()
|
||||
if is_extension_array_dtype(na):
|
||||
assert na._reduce('any')
|
||||
assert na.any()
|
||||
|
||||
assert not na._reduce('all')
|
||||
assert not na.all()
|
||||
|
||||
assert na.dtype._is_boolean
|
||||
@@ -0,0 +1,23 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import StringIO
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseParsingTests(BaseExtensionTests):
|
||||
|
||||
@pytest.mark.parametrize('engine', ['c', 'python'])
|
||||
def test_EA_types(self, engine, data):
|
||||
df = pd.DataFrame({
|
||||
'with_dtype': pd.Series(data, dtype=str(data.dtype))
|
||||
})
|
||||
csv_output = df.to_csv(index=False, na_rep=np.nan)
|
||||
result = pd.read_csv(StringIO(csv_output), dtype={
|
||||
'with_dtype': str(data.dtype)
|
||||
}, engine=engine)
|
||||
expected = df
|
||||
self.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,341 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseMethodsTests(BaseExtensionTests):
|
||||
"""Various Series and DataFrame methods."""
|
||||
|
||||
@pytest.mark.parametrize('dropna', [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(
|
||||
dropna=dropna).sort_index()
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_count(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df.count(axis='columns')
|
||||
expected = pd.Series([0, 1])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_apply_simple_series(self, data):
|
||||
result = pd.Series(data).apply(id)
|
||||
assert isinstance(result, pd.Series)
|
||||
|
||||
def test_argsort(self, data_for_sorting):
|
||||
result = pd.Series(data_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([2, 0, 1], dtype=np.int64))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_argsort_missing(self, data_missing_for_sorting):
|
||||
result = pd.Series(data_missing_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending):
|
||||
ser = pd.Series(data_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending)
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
if not ascending:
|
||||
expected = expected[::-1]
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values_missing(self, data_missing_for_sorting, ascending):
|
||||
ser = pd.Series(data_missing_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending)
|
||||
if ascending:
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
else:
|
||||
expected = ser.iloc[[0, 2, 1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values_frame(self, data_for_sorting, ascending):
|
||||
df = pd.DataFrame({"A": [1, 2, 1],
|
||||
"B": data_for_sorting})
|
||||
result = df.sort_values(['A', 'B'])
|
||||
expected = pd.DataFrame({"A": [1, 1, 2],
|
||||
'B': data_for_sorting.take([2, 0, 1])},
|
||||
index=[2, 0, 1])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('box', [pd.Series, lambda x: x])
|
||||
@pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique])
|
||||
def test_unique(self, data, box, method):
|
||||
duplicated = box(data._from_sequence([data[0], data[0]]))
|
||||
|
||||
result = method(duplicated)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
assert result[0] == duplicated[0]
|
||||
|
||||
@pytest.mark.parametrize('na_sentinel', [-1, -2])
|
||||
def test_factorize(self, data_for_grouping, na_sentinel):
|
||||
labels, uniques = pd.factorize(data_for_grouping,
|
||||
na_sentinel=na_sentinel)
|
||||
expected_labels = np.array([0, 0, na_sentinel,
|
||||
na_sentinel, 1, 1, 0, 2],
|
||||
dtype=np.intp)
|
||||
expected_uniques = data_for_grouping.take([0, 4, 7])
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
self.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
@pytest.mark.parametrize('na_sentinel', [-1, -2])
|
||||
def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
|
||||
l1, u1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
|
||||
l2, u2 = data_for_grouping.factorize(na_sentinel=na_sentinel)
|
||||
|
||||
tm.assert_numpy_array_equal(l1, l2)
|
||||
self.assert_extension_array_equal(u1, u2)
|
||||
|
||||
def test_factorize_empty(self, data):
|
||||
labels, uniques = pd.factorize(data[:0])
|
||||
expected_labels = np.array([], dtype=np.intp)
|
||||
expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
self.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr})
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
assert df.A.values is not result.A.values
|
||||
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr)
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
|
||||
assert ser._values is not result._values
|
||||
assert ser._values is arr
|
||||
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
msg = "Length of 'value' does not match."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data_missing.fillna(data_missing.take([1]))
|
||||
|
||||
def test_combine_le(self, data_repeated):
|
||||
# GH 20825
|
||||
# Test that combine works when doing a <= (le) comparison
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series([a <= b for (a, b) in
|
||||
zip(list(orig_data1), list(orig_data2))])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series([a <= val for a in list(orig_data1)])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
with np.errstate(over='ignore'):
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence([a + b for (a, b) in
|
||||
zip(list(orig_data1),
|
||||
list(orig_data2))]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence([a + val for a in list(orig_data1)]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/24147
|
||||
a = pd.Series(data[:3])
|
||||
b = pd.Series(data[2:5], index=[2, 3, 4])
|
||||
result = a.combine_first(b)
|
||||
expected = pd.Series(data[:5])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('frame', [True, False])
|
||||
@pytest.mark.parametrize('periods, indices', [
|
||||
(-2, [2, 3, 4, -1, -1]),
|
||||
(0, [0, 1, 2, 3, 4]),
|
||||
(2, [-1, -1, 0, 1, 2]),
|
||||
])
|
||||
def test_container_shift(self, data, frame, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/22386
|
||||
subset = data[:5]
|
||||
data = pd.Series(subset, name='A')
|
||||
expected = pd.Series(subset.take(indices, allow_fill=True), name='A')
|
||||
|
||||
if frame:
|
||||
result = data.to_frame(name='A').assign(B=1).shift(periods)
|
||||
expected = pd.concat([
|
||||
expected,
|
||||
pd.Series([1] * 5, name='B').shift(periods)
|
||||
], axis=1)
|
||||
compare = self.assert_frame_equal
|
||||
else:
|
||||
result = data.shift(periods)
|
||||
compare = self.assert_series_equal
|
||||
|
||||
compare(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('periods, indices', [
|
||||
[-4, [-1, -1]],
|
||||
[-1, [1, -1]],
|
||||
[0, [0, 1]],
|
||||
[1, [-1, 0]],
|
||||
[4, [-1, -1]]
|
||||
])
|
||||
def test_shift_non_empty_array(self, data, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
subset = data[:2]
|
||||
result = subset.shift(periods)
|
||||
expected = subset.take(indices, allow_fill=True)
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('periods', [
|
||||
-4, -1, 0, 1, 4
|
||||
])
|
||||
def test_shift_empty_array(self, data, periods):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
empty = data[:0]
|
||||
result = empty.shift(periods)
|
||||
expected = empty
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_shift_fill_value(self, data):
|
||||
arr = data[:4]
|
||||
fill_value = data[0]
|
||||
result = arr.shift(1, fill_value=fill_value)
|
||||
expected = data.take([0, 0, 1, 2])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr.shift(-2, fill_value=fill_value)
|
||||
expected = data.take([2, 3, 0, 0])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("as_frame", [True, False])
|
||||
def test_hash_pandas_object_works(self, data, as_frame):
|
||||
# https://github.com/pandas-dev/pandas/issues/23066
|
||||
data = pd.Series(data)
|
||||
if as_frame:
|
||||
data = data.to_frame()
|
||||
a = pd.util.hash_pandas_object(data)
|
||||
b = pd.util.hash_pandas_object(data)
|
||||
self.assert_equal(a, b)
|
||||
|
||||
@pytest.mark.parametrize("as_series", [True, False])
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
b, c, a = data_for_sorting
|
||||
arr = type(data_for_sorting)._from_sequence([a, b, c])
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
assert arr.searchsorted(a) == 0
|
||||
assert arr.searchsorted(a, side="right") == 1
|
||||
|
||||
assert arr.searchsorted(b) == 1
|
||||
assert arr.searchsorted(b, side="right") == 2
|
||||
|
||||
assert arr.searchsorted(c) == 2
|
||||
assert arr.searchsorted(c, side="right") == 3
|
||||
|
||||
result = arr.searchsorted(arr.take([0, 2]))
|
||||
expected = np.array([0, 2], dtype=np.intp)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# sorter
|
||||
sorter = np.array([1, 2, 0])
|
||||
assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
|
||||
|
||||
@pytest.mark.parametrize("as_frame", [True, False])
|
||||
def test_where_series(self, data, na_value, as_frame):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
cond = np.array([True, True, False, False])
|
||||
|
||||
if as_frame:
|
||||
ser = ser.to_frame(name='a')
|
||||
cond = cond.reshape(-1, 1)
|
||||
|
||||
result = ser.where(cond)
|
||||
expected = pd.Series(cls._from_sequence([a, a, na_value, na_value],
|
||||
dtype=data.dtype))
|
||||
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name='a')
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
# array other
|
||||
cond = np.array([True, False, True, True])
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
if as_frame:
|
||||
other = pd.DataFrame({"a": other})
|
||||
cond = pd.DataFrame({"a": cond})
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b],
|
||||
dtype=data.dtype))
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name='a')
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("use_numpy", [True, False])
|
||||
@pytest.mark.parametrize("as_series", [True, False])
|
||||
@pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]])
|
||||
def test_repeat(self, data, repeats, as_series, use_numpy):
|
||||
arr = type(data)._from_sequence(data[:3], dtype=data.dtype)
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
|
||||
result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats)
|
||||
|
||||
repeats = [repeats] * 3 if isinstance(repeats, int) else repeats
|
||||
expected = [x for x, n in zip(arr, repeats) for _ in range(n)]
|
||||
expected = type(data)._from_sequence(expected, dtype=data.dtype)
|
||||
if as_series:
|
||||
expected = pd.Series(expected, index=arr.index.repeat(repeats))
|
||||
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("use_numpy", [True, False])
|
||||
@pytest.mark.parametrize('repeats, kwargs, error, msg', [
|
||||
(2, dict(axis=1), ValueError, "'axis"),
|
||||
(-1, dict(), ValueError, "negative"),
|
||||
([1, 2], dict(), ValueError, "shape"),
|
||||
(2, dict(foo='bar'), TypeError, "'foo'")])
|
||||
def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy):
|
||||
with pytest.raises(error, match=msg):
|
||||
if use_numpy:
|
||||
np.repeat(data, repeats, **kwargs)
|
||||
else:
|
||||
data.repeat(repeats, **kwargs)
|
||||
@@ -0,0 +1,132 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseMissingTests(BaseExtensionTests):
|
||||
def test_isna(self, data_missing):
|
||||
expected = np.array([True, False])
|
||||
|
||||
result = pd.isna(data_missing)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = pd.Series(data_missing).isna()
|
||||
expected = pd.Series(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# GH 21189
|
||||
result = pd.Series(data_missing).drop([0, 1]).isna()
|
||||
expected = pd.Series([], dtype=bool)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_array(self, data_missing):
|
||||
result = data_missing.dropna()
|
||||
expected = data_missing[[1]]
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_dropna_series(self, data_missing):
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.dropna()
|
||||
expected = ser.iloc[[1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_frame(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
|
||||
# defaults
|
||||
result = df.dropna()
|
||||
expected = df.iloc[[1]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.dropna(axis='columns')
|
||||
expected = pd.DataFrame(index=[0, 1])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple
|
||||
df = pd.DataFrame({"A": data_missing,
|
||||
"B": [1, np.nan]})
|
||||
result = df.dropna()
|
||||
expected = df.iloc[:0]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_scalar(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
result = data_missing.fillna(valid)
|
||||
expected = data_missing.fillna(valid)
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method='ffill', limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method='backfill', limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_series(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
ser = pd.Series(data_missing)
|
||||
|
||||
result = ser.fillna(fill_value)
|
||||
expected = pd.Series(data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series
|
||||
result = ser.fillna(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series not affecting the missing values
|
||||
result = ser.fillna(ser)
|
||||
self.assert_series_equal(result, ser)
|
||||
|
||||
@pytest.mark.parametrize('method', ['ffill', 'bfill'])
|
||||
def test_fillna_series_method(self, data_missing, method):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
if method == 'ffill':
|
||||
data_missing = data_missing[::-1]
|
||||
|
||||
result = pd.Series(data_missing).fillna(method=method)
|
||||
expected = pd.Series(data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype))
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({
|
||||
"A": data_missing,
|
||||
"B": [1, 2]
|
||||
}).fillna(fill_value)
|
||||
|
||||
expected = pd.DataFrame({
|
||||
"A": data_missing._from_sequence([fill_value, fill_value],
|
||||
dtype=data_missing.dtype),
|
||||
"B": [1, 2],
|
||||
})
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_fill_other(self, data):
|
||||
result = pd.DataFrame({
|
||||
"A": data,
|
||||
"B": [np.nan] * len(data)
|
||||
}).fillna({"B": 0.0})
|
||||
|
||||
expected = pd.DataFrame({
|
||||
"A": data,
|
||||
"B": [0.0] * len(result),
|
||||
})
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,166 @@
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core import ops
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseOpsUtil(BaseExtensionTests):
|
||||
|
||||
def get_op_from_name(self, op_name):
|
||||
short_opname = op_name.strip('_')
|
||||
try:
|
||||
op = getattr(operator, short_opname)
|
||||
except AttributeError:
|
||||
# Assume it is the reverse operator
|
||||
rop = getattr(operator, short_opname[1:])
|
||||
op = lambda x, y: rop(y, x)
|
||||
|
||||
return op
|
||||
|
||||
def check_opname(self, s, op_name, other, exc=Exception):
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
self._check_op(s, op, other, op_name, exc)
|
||||
|
||||
def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
|
||||
if exc is None:
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, op)
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
op(s, other)
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=Exception):
|
||||
# divmod has multiple return values, so check separatly
|
||||
if exc is None:
|
||||
result_div, result_mod = op(s, other)
|
||||
if op is divmod:
|
||||
expected_div, expected_mod = s // other, s % other
|
||||
else:
|
||||
expected_div, expected_mod = other // s, other % s
|
||||
self.assert_series_equal(result_div, expected_div)
|
||||
self.assert_series_equal(result_mod, expected_mod)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
divmod(s, other)
|
||||
|
||||
|
||||
class BaseArithmeticOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame arithmetic ops methods.
|
||||
|
||||
Subclasses supporting various ops should set the class variables
|
||||
to indicate that they support ops of that kind
|
||||
|
||||
* series_scalar_exc = TypeError
|
||||
* frame_scalar_exc = TypeError
|
||||
* series_array_exc = TypeError
|
||||
* divmod_exc = TypeError
|
||||
"""
|
||||
series_scalar_exc = TypeError
|
||||
frame_scalar_exc = TypeError
|
||||
series_array_exc = TypeError
|
||||
divmod_exc = TypeError
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# series & scalar
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, op_name, s.iloc[0], exc=self.series_scalar_exc)
|
||||
|
||||
@pytest.mark.xfail(run=False, reason="_reduce needs implementation")
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||||
# frame & scalar
|
||||
op_name = all_arithmetic_operators
|
||||
df = pd.DataFrame({'A': data})
|
||||
self.check_opname(df, op_name, data[0], exc=self.frame_scalar_exc)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
# ndarray & other series
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, op_name, pd.Series([s.iloc[0]] * len(s)),
|
||||
exc=self.series_array_exc)
|
||||
|
||||
def test_divmod(self, data):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, 1, exc=self.divmod_exc)
|
||||
self._check_divmod_op(1, ops.rdivmod, s, exc=self.divmod_exc)
|
||||
|
||||
def test_divmod_series_array(self, data):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, data)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s + data
|
||||
expected = pd.Series(data + data)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
op_name = all_arithmetic_operators
|
||||
with pytest.raises(AttributeError):
|
||||
getattr(data, op_name)
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# EAs should return NotImplemented for ops with Series.
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = pd.Series(data)
|
||||
if hasattr(data, '__add__'):
|
||||
result = data.__add__(other)
|
||||
assert result is NotImplemented
|
||||
else:
|
||||
raise pytest.skip(
|
||||
"{} does not implement add".format(data.__class__.__name__)
|
||||
)
|
||||
|
||||
|
||||
class BaseComparisonOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame comparison ops methods."""
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
if op_name == '__eq__':
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
assert not op(s, other).all()
|
||||
elif op_name == '__ne__':
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
assert op(s, other).all()
|
||||
|
||||
else:
|
||||
|
||||
# array
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
|
||||
# series
|
||||
s = pd.Series(data)
|
||||
with pytest.raises(TypeError):
|
||||
op(s, other)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
self._compare_other(s, data, op_name, 0)
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
other = pd.Series([data[0]] * len(data))
|
||||
self._compare_other(s, data, op_name, other)
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# EAs should return NotImplemented for ops with Series.
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = pd.Series(data)
|
||||
if hasattr(data, '__eq__'):
|
||||
result = data.__eq__(other)
|
||||
assert result is NotImplemented
|
||||
else:
|
||||
raise pytest.skip(
|
||||
"{} does not implement __eq__".format(data.__class__.__name__)
|
||||
)
|
||||
@@ -0,0 +1,44 @@
|
||||
import io
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import compat
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BasePrintingTests(BaseExtensionTests):
|
||||
"""Tests checking the formatting of your EA when printed."""
|
||||
|
||||
@pytest.mark.parametrize("size", ["big", "small"])
|
||||
def test_array_repr(self, data, size):
|
||||
if size == "small":
|
||||
data = data[:5]
|
||||
else:
|
||||
data = type(data)._concat_same_type([data] * 5)
|
||||
|
||||
result = repr(data)
|
||||
assert data.__class__.__name__ in result
|
||||
assert 'Length: {}'.format(len(data)) in result
|
||||
assert str(data.dtype) in result
|
||||
if size == 'big':
|
||||
assert '...' in result
|
||||
|
||||
def test_array_repr_unicode(self, data):
|
||||
result = compat.text_type(data)
|
||||
assert isinstance(result, compat.text_type)
|
||||
|
||||
def test_series_repr(self, data):
|
||||
ser = pd.Series(data)
|
||||
assert data.dtype.name in repr(ser)
|
||||
|
||||
def test_dataframe_repr(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
repr(df)
|
||||
|
||||
def test_dtype_name_in_info(self, data):
|
||||
buf = io.StringIO()
|
||||
pd.DataFrame({"A": data}).info(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert data.dtype.name in result
|
||||
@@ -0,0 +1,61 @@
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseReduceTests(BaseExtensionTests):
|
||||
"""
|
||||
Reduction specific tests. Generally these only
|
||||
make sense for numeric/boolean operations.
|
||||
"""
|
||||
def check_reduce(self, s, op_name, skipna):
|
||||
result = getattr(s, op_name)(skipna=skipna)
|
||||
expected = getattr(s.astype('float64'), op_name)(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class BaseNoReduceTests(BaseReduceTests):
|
||||
""" we don't define any reductions """
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
|
||||
class BaseNumericReduceTests(BaseReduceTests):
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
# min/max with empty produce numpy warnings
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
self.check_reduce(s, op_name, skipna)
|
||||
|
||||
|
||||
class BaseBooleanReduceTests(BaseReduceTests):
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
s = pd.Series(data)
|
||||
self.check_reduce(s, op_name, skipna)
|
||||
@@ -0,0 +1,271 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ExtensionBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseReshapingTests(BaseExtensionTests):
|
||||
"""Tests for reshaping and concatenation."""
|
||||
@pytest.mark.parametrize('in_frame', [True, False])
|
||||
def test_concat(self, data, in_frame):
|
||||
wrapped = pd.Series(data)
|
||||
if in_frame:
|
||||
wrapped = pd.DataFrame(wrapped)
|
||||
result = pd.concat([wrapped, wrapped], ignore_index=True)
|
||||
|
||||
assert len(result) == len(data) * 2
|
||||
|
||||
if in_frame:
|
||||
dtype = result.dtypes[0]
|
||||
else:
|
||||
dtype = result.dtype
|
||||
|
||||
assert dtype == data.dtype
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
@pytest.mark.parametrize('in_frame', [True, False])
|
||||
def test_concat_all_na_block(self, data_missing, in_frame):
|
||||
valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
|
||||
na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
|
||||
if in_frame:
|
||||
valid_block = pd.DataFrame({"a": valid_block})
|
||||
na_block = pd.DataFrame({"a": na_block})
|
||||
result = pd.concat([valid_block, na_block])
|
||||
if in_frame:
|
||||
expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])})
|
||||
self.assert_frame_equal(result, expected)
|
||||
else:
|
||||
expected = pd.Series(data_missing.take([1, 1, 0, 0]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
df1 = pd.DataFrame({'A': data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat([x.astype(object) for x in dfs])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# series
|
||||
result = pd.concat([x['A'] for x in dfs])
|
||||
expected = pd.concat([x['A'].astype(object) for x in dfs])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# simple test for just EA and one other
|
||||
result = pd.concat([df1, df2])
|
||||
expected = pd.concat([df1.astype('object'), df2.astype('object')])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.concat([df1['A'], df2['A']])
|
||||
expected = pd.concat([df1['A'].astype('object'),
|
||||
df2['A'].astype('object')])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
df1 = pd.DataFrame({'A': data[:3]})
|
||||
df2 = pd.DataFrame({'B': [1, 2, 3]})
|
||||
|
||||
expected = pd.DataFrame({'A': data[:3], 'B': [1, 2, 3]})
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1['A'], df2['B']], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# non-aligned
|
||||
df2 = pd.DataFrame({'B': [1, 2, 3]}, index=[1, 2, 3])
|
||||
expected = pd.DataFrame({
|
||||
'A': data._from_sequence(list(data[:3]) + [na_value],
|
||||
dtype=data.dtype),
|
||||
'B': [np.nan, 1, 2, 3]})
|
||||
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1['A'], df2['B']], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.Series(data._from_sequence(list(a) + [na_value],
|
||||
dtype=data.dtype))
|
||||
e2 = pd.Series(data._from_sequence([na_value] + list(b),
|
||||
dtype=data.dtype))
|
||||
self.assert_series_equal(r1, e1)
|
||||
self.assert_series_equal(r2, e2)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.DataFrame({'A': a}).align(
|
||||
pd.DataFrame({'A': b}, index=[1, 2, 3])
|
||||
)
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.DataFrame({'A': data._from_sequence(list(a) + [na_value],
|
||||
dtype=data.dtype)})
|
||||
e2 = pd.DataFrame({'A': data._from_sequence([na_value] + list(b),
|
||||
dtype=data.dtype)})
|
||||
self.assert_frame_equal(r1, e1)
|
||||
self.assert_frame_equal(r2, e2)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/20576
|
||||
ser = pd.Series(data, name='a')
|
||||
df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
|
||||
r1, r2 = ser.align(df)
|
||||
|
||||
e1 = pd.Series(data._from_sequence(list(data) + [na_value],
|
||||
dtype=data.dtype),
|
||||
name=ser.name)
|
||||
|
||||
self.assert_series_equal(r1, e1)
|
||||
self.assert_frame_equal(r2, df)
|
||||
|
||||
def test_set_frame_expand_regular_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
df['B'] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
self.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_expand_extension_with_regular(self, data):
|
||||
df = pd.DataFrame({'A': data})
|
||||
df['B'] = [1] * len(data)
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
self.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_overwrite_object(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20555
|
||||
df = pd.DataFrame({"A": [1] * len(data)}, dtype=object)
|
||||
df['A'] = data
|
||||
assert df.dtypes['A'] == data.dtype
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
# GH-20743
|
||||
df1 = pd.DataFrame({'ext': data[:3], 'int1': [1, 2, 3],
|
||||
'key': [0, 1, 2]})
|
||||
df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key': [0, 0, 1, 3]})
|
||||
|
||||
res = pd.merge(df1, df2)
|
||||
exp = pd.DataFrame(
|
||||
{'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
|
||||
'ext': data._from_sequence([data[0], data[0], data[1]],
|
||||
dtype=data.dtype)})
|
||||
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
|
||||
|
||||
res = pd.merge(df1, df2, how='outer')
|
||||
exp = pd.DataFrame(
|
||||
{'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
|
||||
'key': [0, 0, 1, 2, 3],
|
||||
'ext': data._from_sequence(
|
||||
[data[0], data[0], data[1], data[2], na_value],
|
||||
dtype=data.dtype)})
|
||||
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
|
||||
|
||||
def test_merge_on_extension_array(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b], dtype=data.dtype)
|
||||
|
||||
df = pd.DataFrame({"key": key, "val": [1, 2]})
|
||||
result = pd.merge(df, df, on='key')
|
||||
expected = pd.DataFrame({"key": key,
|
||||
"val_x": [1, 2],
|
||||
"val_y": [1, 2]})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# order
|
||||
result = pd.merge(df.iloc[[1, 0]], df, on='key')
|
||||
expected = expected.iloc[[1, 0]].reset_index(drop=True)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_merge_on_extension_array_duplicates(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b, a], dtype=data.dtype)
|
||||
df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
|
||||
result = pd.merge(df1, df2, on='key')
|
||||
expected = pd.DataFrame({
|
||||
"key": key.take([0, 0, 0, 0, 1]),
|
||||
"val_x": [1, 1, 3, 3, 2],
|
||||
"val_y": [1, 3, 1, 3, 2],
|
||||
})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("columns", [
|
||||
["A", "B"],
|
||||
pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b')],
|
||||
names=['outer', 'inner']),
|
||||
])
|
||||
def test_stack(self, data, columns):
|
||||
df = pd.DataFrame({"A": data[:5], "B": data[:5]})
|
||||
df.columns = columns
|
||||
result = df.stack()
|
||||
expected = df.astype(object).stack()
|
||||
# we need a second astype(object), in case the constructor inferred
|
||||
# object -> specialized, as is done for period.
|
||||
expected = expected.astype(object)
|
||||
|
||||
if isinstance(expected, pd.Series):
|
||||
assert result.dtype == df.iloc[:, 0].dtype
|
||||
else:
|
||||
assert all(result.dtypes == df.iloc[:, 0].dtype)
|
||||
|
||||
result = result.astype(object)
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("index", [
|
||||
# Two levels, uniform.
|
||||
pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]),
|
||||
names=['a', 'b']),
|
||||
|
||||
# non-uniform
|
||||
pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), ('B', 'b')]),
|
||||
|
||||
# three levels, non-uniform
|
||||
pd.MultiIndex.from_product([('A', 'B'), ('a', 'b', 'c'), (0, 1, 2)]),
|
||||
pd.MultiIndex.from_tuples([
|
||||
('A', 'a', 1),
|
||||
('A', 'b', 0),
|
||||
('A', 'a', 0),
|
||||
('B', 'a', 0),
|
||||
('B', 'c', 1),
|
||||
]),
|
||||
])
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, data, index, obj):
|
||||
data = data[:len(index)]
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
|
||||
n = index.nlevels
|
||||
levels = list(range(n))
|
||||
# [0, 1, 2]
|
||||
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
||||
combinations = itertools.chain.from_iterable(
|
||||
itertools.permutations(levels, i) for i in range(1, n)
|
||||
)
|
||||
|
||||
for level in combinations:
|
||||
result = ser.unstack(level=level)
|
||||
assert all(isinstance(result[col].array, type(data))
|
||||
for col in result.columns)
|
||||
expected = ser.astype(object).unstack(level=level)
|
||||
result = result.astype(object)
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,189 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseSetitemTests(BaseExtensionTests):
|
||||
def test_setitem_scalar_series(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[0] = data[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_setitem_sequence(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
|
||||
data[[0, 1]] = [data[1], data[0]]
|
||||
assert data[0] == original[1]
|
||||
assert data[1] == original[0]
|
||||
|
||||
@pytest.mark.parametrize('as_array', [True, False])
|
||||
def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
|
||||
ser = pd.Series(data)
|
||||
original = ser.copy()
|
||||
value = [data[0]]
|
||||
if as_array:
|
||||
value = data._from_sequence(value)
|
||||
|
||||
xpr = 'cannot set using a {} indexer with a different length'
|
||||
with pytest.raises(ValueError, match=xpr.format('list-like')):
|
||||
ser[[0, 1]] = value
|
||||
# Ensure no modifications made before the exception
|
||||
self.assert_series_equal(ser, original)
|
||||
|
||||
with pytest.raises(ValueError, match=xpr.format('slice')):
|
||||
ser[slice(3)] = value
|
||||
self.assert_series_equal(ser, original)
|
||||
|
||||
def test_setitem_empty_indxer(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
data[np.array([], dtype=int)] = []
|
||||
self.assert_equal(data, original)
|
||||
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[[0, 1]] = data[2]
|
||||
assert data[0] == data[2]
|
||||
assert data[1] == data[2]
|
||||
|
||||
@pytest.mark.parametrize('setter', ['loc', 'iloc'])
|
||||
def test_setitem_scalar(self, data, setter):
|
||||
arr = pd.Series(data)
|
||||
setter = getattr(arr, setter)
|
||||
operator.setitem(setter, 0, data[1])
|
||||
assert arr[0] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.loc[0, 'B'] = data[1]
|
||||
assert df.loc[0, 'B'] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.loc[10, 'B'] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.loc[10, 'B'] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.iloc[0, 1] = data[1]
|
||||
assert df.loc[0, 'B'] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.iloc[10, 0] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.iloc[10, 1] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
@pytest.mark.parametrize('as_callable', [True, False])
|
||||
@pytest.mark.parametrize('setter', ['loc', None])
|
||||
def test_setitem_mask_aligned(self, data, as_callable, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if as_callable:
|
||||
mask2 = lambda x: mask
|
||||
else:
|
||||
mask2 = mask
|
||||
|
||||
if setter:
|
||||
# loc
|
||||
target = getattr(ser, setter)
|
||||
else:
|
||||
# Series.__setitem__
|
||||
target = ser
|
||||
|
||||
operator.setitem(target, mask2, data[5:7])
|
||||
|
||||
ser[mask2] = data[5:7]
|
||||
assert ser[0] == data[5]
|
||||
assert ser[1] == data[6]
|
||||
|
||||
@pytest.mark.parametrize('setter', ['loc', None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if setter: # loc
|
||||
target = getattr(ser, setter)
|
||||
else: # __setitem__
|
||||
target = ser
|
||||
|
||||
operator.setitem(target, mask, data[10])
|
||||
assert ser[0] == data[10]
|
||||
assert ser[1] == data[10]
|
||||
|
||||
def test_setitem_expand_columns(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
result = df.copy()
|
||||
result['B'] = 1
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, 'B'] = 1
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# overwrite with new type
|
||||
result['B'] = data
|
||||
expected = pd.DataFrame({"A": data, "B": data})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_expand_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
result = df.copy()
|
||||
result['B'] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, 'B'] = data
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_frame_invalid_length(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
xpr = "Length of values does not match length of index"
|
||||
with pytest.raises(ValueError, match=xpr):
|
||||
df['B'] = data[:5]
|
||||
|
||||
@pytest.mark.xfail(reason="GH#20441: setitem on extension types.")
|
||||
def test_setitem_tuple_index(self, data):
|
||||
s = pd.Series(data[:2], index=[(0, 0), (0, 1)])
|
||||
expected = pd.Series(data.take([1, 1]), index=s.index)
|
||||
s[(0, 1)] = data[1]
|
||||
self.assert_series_equal(s, expected)
|
||||
|
||||
def test_setitem_slice_mismatch_length_raises(self, data):
|
||||
arr = data[:5]
|
||||
with pytest.raises(ValueError):
|
||||
arr[:1] = arr[:2]
|
||||
|
||||
def test_setitem_slice_array(self, data):
|
||||
arr = data[:5].copy()
|
||||
arr[:5] = data[-5:]
|
||||
self.assert_extension_array_equal(arr, data[-5:])
|
||||
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
arr = data[:5].copy()
|
||||
with pytest.raises(ValueError):
|
||||
arr[0] = arr[[0, 1]]
|
||||
Reference in New Issue
Block a user