demo + utils venv
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,144 @@
|
||||
"""Rudimentary Apache Arrow-backed ExtensionArray.
|
||||
|
||||
At the moment, just a boolean array / type is implemented.
|
||||
Eventually, we'll want to parametrize the type and support
|
||||
multiple dtypes. Not all methods are implemented yet, and the
|
||||
current implementation is not efficient.
|
||||
"""
|
||||
import copy
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import (
|
||||
ExtensionArray, ExtensionDtype, register_extension_dtype, take)
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class ArrowBoolDtype(ExtensionDtype):
|
||||
|
||||
type = np.bool_
|
||||
kind = 'b'
|
||||
name = 'arrow_bool'
|
||||
na_value = pa.NULL
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string):
|
||||
if string == cls.name:
|
||||
return cls()
|
||||
else:
|
||||
raise TypeError("Cannot construct a '{}' from "
|
||||
"'{}'".format(cls, string))
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
return ArrowBoolArray
|
||||
|
||||
def _is_boolean(self):
|
||||
return True
|
||||
|
||||
|
||||
class ArrowBoolArray(ExtensionArray):
|
||||
def __init__(self, values):
|
||||
if not isinstance(values, pa.ChunkedArray):
|
||||
raise ValueError
|
||||
|
||||
assert values.type == pa.bool_()
|
||||
self._data = values
|
||||
self._dtype = ArrowBoolDtype()
|
||||
|
||||
def __repr__(self):
|
||||
return "ArrowBoolArray({})".format(repr(self._data))
|
||||
|
||||
@classmethod
|
||||
def from_scalars(cls, values):
|
||||
arr = pa.chunked_array([pa.array(np.asarray(values))])
|
||||
return cls(arr)
|
||||
|
||||
@classmethod
|
||||
def from_array(cls, arr):
|
||||
assert isinstance(arr, pa.Array)
|
||||
return cls(pa.chunked_array([arr]))
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
return cls.from_scalars(scalars)
|
||||
|
||||
def __getitem__(self, item):
|
||||
if pd.api.types.is_scalar(item):
|
||||
return self._data.to_pandas()[item]
|
||||
else:
|
||||
vals = self._data.to_pandas()[item]
|
||||
return type(self).from_scalars(vals)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# needed to fix this astype for the Series constructor.
|
||||
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
||||
if copy:
|
||||
return self.copy()
|
||||
return self
|
||||
return super(ArrowBoolArray, self).astype(dtype, copy)
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return self._dtype
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
return sum(x.size for chunk in self._data.chunks
|
||||
for x in chunk.buffers()
|
||||
if x is not None)
|
||||
|
||||
def isna(self):
|
||||
nas = pd.isna(self._data.to_pandas())
|
||||
return type(self).from_scalars(nas)
|
||||
|
||||
def take(self, indices, allow_fill=False, fill_value=None):
|
||||
data = self._data.to_pandas()
|
||||
|
||||
if allow_fill and fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
|
||||
result = take(data, indices, fill_value=fill_value,
|
||||
allow_fill=allow_fill)
|
||||
return self._from_sequence(result, dtype=self.dtype)
|
||||
|
||||
def copy(self, deep=False):
|
||||
if deep:
|
||||
return type(self)(copy.deepcopy(self._data))
|
||||
else:
|
||||
return type(self)(copy.copy(self._data))
|
||||
|
||||
def _concat_same_type(cls, to_concat):
|
||||
chunks = list(itertools.chain.from_iterable(x._data.chunks
|
||||
for x in to_concat))
|
||||
arr = pa.chunked_array(chunks)
|
||||
return cls(arr)
|
||||
|
||||
def __invert__(self):
|
||||
return type(self).from_scalars(
|
||||
~self._data.to_pandas()
|
||||
)
|
||||
|
||||
def _reduce(self, method, skipna=True, **kwargs):
|
||||
if skipna:
|
||||
arr = self[~self.isna()]
|
||||
else:
|
||||
arr = self
|
||||
|
||||
try:
|
||||
op = getattr(arr, method)
|
||||
except AttributeError:
|
||||
raise TypeError
|
||||
return op(**kwargs)
|
||||
|
||||
def any(self, axis=0, out=None):
|
||||
return self._data.to_pandas().any()
|
||||
|
||||
def all(self, axis=0, out=None):
|
||||
return self._data.to_pandas().all()
|
||||
@@ -0,0 +1,68 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
pytest.importorskip('pyarrow', minversion="0.10.0")
|
||||
|
||||
from .bool import ArrowBoolArray, ArrowBoolDtype # isort:skip
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return ArrowBoolDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return ArrowBoolArray.from_scalars(np.random.randint(0, 2, size=100,
|
||||
dtype=bool))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
return ArrowBoolArray.from_scalars([None, True])
|
||||
|
||||
|
||||
class BaseArrowTests(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype(BaseArrowTests, base.BaseDtypeTests):
|
||||
def test_array_type_with_arg(self, data, dtype):
|
||||
pytest.skip("GH-22666")
|
||||
|
||||
|
||||
class TestInterface(BaseArrowTests, base.BaseInterfaceTests):
|
||||
def test_repr(self, data):
|
||||
raise pytest.skip("TODO")
|
||||
|
||||
|
||||
class TestConstructors(BaseArrowTests, base.BaseConstructorsTests):
|
||||
def test_from_dtype(self, data):
|
||||
pytest.skip("GH-22666")
|
||||
|
||||
# seems like some bug in isna on empty BoolArray returning floats.
|
||||
@pytest.mark.xfail(reason='bad is-na for empty data')
|
||||
def test_from_sequence_from_cls(self, data):
|
||||
super(TestConstructors, self).test_from_sequence_from_cls(data)
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
def test_reduce_series_boolean(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestReduceBoolean(base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
def test_is_bool_dtype(data):
|
||||
assert pd.api.types.is_bool_dtype(data)
|
||||
assert pd.core.common.is_bool_indexer(data)
|
||||
s = pd.Series(range(len(data)))
|
||||
result = s[data]
|
||||
expected = s[np.asarray(data)]
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,56 @@
|
||||
"""Base test suite for extension arrays.
|
||||
|
||||
These tests are intended for third-party libraries to subclass to validate
|
||||
that their extension arrays and dtypes satisfy the interface. Moving or
|
||||
renaming the tests should not be done lightly.
|
||||
|
||||
Libraries are expected to implement a few pytest fixtures to provide data
|
||||
for the tests. The fixtures may be located in either
|
||||
|
||||
* The same module as your test class.
|
||||
* A ``conftest.py`` in the same directory as your test class.
|
||||
|
||||
The full list of fixtures may be found in the ``conftest.py`` next to this
|
||||
file.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import pytest
|
||||
from pandas.tests.extension.base import BaseDtypeTests
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return MyDtype()
|
||||
|
||||
|
||||
class TestMyDtype(BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
Your class ``TestDtype`` will inherit all the tests defined on
|
||||
``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
|
||||
wherever the test requires it. You're free to implement additional tests.
|
||||
|
||||
All the tests in these modules use ``self.assert_frame_equal`` or
|
||||
``self.assert_series_equal`` for dataframe or series comparisons. By default,
|
||||
they use the usual ``pandas.testing.assert_frame_equal`` and
|
||||
``pandas.testing.assert_series_equal``. You can override the checks used
|
||||
by defining the staticmethods ``assert_frame_equal`` and
|
||||
``assert_series_equal`` on your base test class.
|
||||
|
||||
"""
|
||||
from .casting import BaseCastingTests # noqa
|
||||
from .constructors import BaseConstructorsTests # noqa
|
||||
from .dtype import BaseDtypeTests # noqa
|
||||
from .getitem import BaseGetitemTests # noqa
|
||||
from .groupby import BaseGroupbyTests # noqa
|
||||
from .interface import BaseInterfaceTests # noqa
|
||||
from .methods import BaseMethodsTests # noqa
|
||||
from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil # noqa
|
||||
from .printing import BasePrintingTests # noqa
|
||||
from .reduce import BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests # noqa
|
||||
from .missing import BaseMissingTests # noqa
|
||||
from .reshaping import BaseReshapingTests # noqa
|
||||
from .setitem import BaseSetitemTests # noqa
|
||||
from .io import BaseParsingTests # noqa
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class BaseExtensionTests(object):
|
||||
assert_equal = staticmethod(tm.assert_equal)
|
||||
assert_series_equal = staticmethod(tm.assert_series_equal)
|
||||
assert_frame_equal = staticmethod(tm.assert_frame_equal)
|
||||
assert_extension_array_equal = staticmethod(
|
||||
tm.assert_extension_array_equal
|
||||
)
|
||||
@@ -0,0 +1,23 @@
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ObjectBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseCastingTests(BaseExtensionTests):
|
||||
"""Casting to and from ExtensionDtypes"""
|
||||
|
||||
def test_astype_object_series(self, all_data):
|
||||
ser = pd.Series({"A": all_data})
|
||||
result = ser.astype(object)
|
||||
assert isinstance(result._data.blocks[0], ObjectBlock)
|
||||
|
||||
def test_tolist(self, data):
|
||||
result = pd.Series(data).tolist()
|
||||
expected = list(data)
|
||||
assert result == expected
|
||||
|
||||
def test_astype_str(self, data):
|
||||
result = pd.Series(data[:5]).astype(str)
|
||||
expected = pd.Series(data[:5].astype(str))
|
||||
self.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ExtensionBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseConstructorsTests(BaseExtensionTests):
|
||||
|
||||
def test_from_sequence_from_cls(self, data):
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
data = data[:0]
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_array_from_scalars(self, data):
|
||||
scalars = [data[0], data[1], data[2]]
|
||||
result = data._from_sequence(scalars)
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_series_constructor(self, data):
|
||||
result = pd.Series(data)
|
||||
assert result.dtype == data.dtype
|
||||
assert len(result) == len(data)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
assert result._data.blocks[0].values is data
|
||||
|
||||
# Series[EA] is unboxed / boxed correctly
|
||||
result2 = pd.Series(result)
|
||||
assert result2.dtype == data.dtype
|
||||
assert isinstance(result2._data.blocks[0], ExtensionBlock)
|
||||
|
||||
@pytest.mark.parametrize("from_series", [True, False])
|
||||
def test_dataframe_constructor_from_dict(self, data, from_series):
|
||||
if from_series:
|
||||
data = pd.Series(data)
|
||||
result = pd.DataFrame({"A": data})
|
||||
assert result.dtypes['A'] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
def test_dataframe_from_series(self, data):
|
||||
result = pd.DataFrame(pd.Series(data))
|
||||
assert result.dtypes[0] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
def test_series_given_mismatched_index_raises(self, data):
|
||||
msg = 'Length of passed values is 3, index implies 5'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Series(data[:3], index=[0, 1, 2, 3, 4])
|
||||
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(list(data), dtype=dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(list(data), dtype=str(dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_pandas_array(self, data):
|
||||
# pd.array(extension_array) should be idempotent...
|
||||
result = pd.array(data)
|
||||
self.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_pandas_array_dtype(self, data):
|
||||
# ... but specifying dtype will override idempotency
|
||||
result = pd.array(data, dtype=np.dtype(object))
|
||||
expected = pd.arrays.PandasArray(np.asarray(data, dtype=object))
|
||||
self.assert_equal(result, expected)
|
||||
@@ -0,0 +1,91 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseDtypeTests(BaseExtensionTests):
|
||||
"""Base class for ExtensionDtype classes"""
|
||||
|
||||
def test_name(self, dtype):
|
||||
assert isinstance(dtype.name, str)
|
||||
|
||||
def test_kind(self, dtype):
|
||||
valid = set('biufcmMOSUV')
|
||||
if dtype.kind is not None:
|
||||
assert dtype.kind in valid
|
||||
|
||||
def test_construct_from_string_own_name(self, dtype):
|
||||
result = dtype.construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
# check OK as classmethod
|
||||
result = type(dtype).construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
def test_is_dtype_from_name(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype.name)
|
||||
assert result is True
|
||||
|
||||
def test_is_dtype_unboxes_dtype(self, data, dtype):
|
||||
assert dtype.is_dtype(data) is True
|
||||
|
||||
def test_is_dtype_from_self(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype)
|
||||
assert result is True
|
||||
|
||||
def test_is_not_string_type(self, dtype):
|
||||
return not pd.api.types.is_string_dtype(dtype)
|
||||
|
||||
def test_is_not_object_type(self, dtype):
|
||||
return not pd.api.types.is_object_dtype(dtype)
|
||||
|
||||
def test_eq_with_str(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != dtype.name + '-suffix'
|
||||
|
||||
def test_eq_with_numpy_object(self, dtype):
|
||||
assert dtype != np.dtype('object')
|
||||
|
||||
def test_eq_with_self(self, dtype):
|
||||
assert dtype == dtype
|
||||
assert dtype != object()
|
||||
|
||||
def test_array_type(self, data, dtype):
|
||||
assert dtype.construct_array_type() is type(data)
|
||||
|
||||
def test_check_dtype(self, data):
|
||||
dtype = data.dtype
|
||||
|
||||
# check equivalency for using .dtypes
|
||||
df = pd.DataFrame({'A': pd.Series(data, dtype=dtype),
|
||||
'B': data,
|
||||
'C': 'foo', 'D': 1})
|
||||
|
||||
# np.dtype('int64') == 'Int64' == 'int64'
|
||||
# so can't distinguish
|
||||
if dtype.name == 'Int64':
|
||||
expected = pd.Series([True, True, False, True],
|
||||
index=list('ABCD'))
|
||||
else:
|
||||
expected = pd.Series([True, True, False, False],
|
||||
index=list('ABCD'))
|
||||
|
||||
# XXX: This should probably be *fixed* not ignored.
|
||||
# See libops.scalar_compare
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", DeprecationWarning)
|
||||
result = df.dtypes == str(dtype)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series([True, True, False, False],
|
||||
index=list('ABCD'))
|
||||
result = df.dtypes.apply(str) == str(dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_hashable(self, dtype):
|
||||
hash(dtype) # no error
|
||||
@@ -0,0 +1,248 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseGetitemTests(BaseExtensionTests):
|
||||
"""Tests for ExtensionArray.__getitem__."""
|
||||
|
||||
def test_iloc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.iloc[:4]
|
||||
expected = pd.Series(data[:4])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.iloc[[0, 1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, 'B':
|
||||
np.arange(len(data), dtype='int64')})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.iloc[:4, [0]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.iloc[[0, 1, 2, 3], [0]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name='A')
|
||||
|
||||
# slice -> series
|
||||
result = df.iloc[:4, 0]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.iloc[:4, 0]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.loc[:3]
|
||||
expected = pd.Series(data[:4])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.loc[[0, 1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data,
|
||||
'B': np.arange(len(data), dtype='int64')})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.loc[:3, ['A']]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.loc[[0, 1, 2, 3], ['A']]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name='A')
|
||||
|
||||
# slice -> series
|
||||
result = df.loc[:3, 'A']
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.loc[:3, 'A']
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_scalar(self, data):
|
||||
result = data[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
result = pd.Series(data)[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
|
||||
result = data_missing[0]
|
||||
assert na_cmp(result, na_value)
|
||||
|
||||
def test_getitem_mask(self, data):
|
||||
# Empty mask, raw array
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = data[mask]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# Empty mask, in series
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 0
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
# non-empty mask, raw array
|
||||
mask[0] = True
|
||||
result = data[mask]
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# non-empty mask, in series
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 1
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
def test_getitem_slice(self, data):
|
||||
# getitem[slice] should return an array
|
||||
result = data[slice(0)] # empty
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
result = data[slice(1)] # scalar
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_get(self, data):
|
||||
# GH 20882
|
||||
s = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = pd.Series(data[:6], index=list('abcdef'))
|
||||
assert s.get('c') == s.iloc[2]
|
||||
|
||||
result = s.get(slice('b', 'd'))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get('Z')
|
||||
assert result is None
|
||||
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(data)
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
def test_take_sequence(self, data):
|
||||
result = pd.Series(data)[[0, 1, 3]]
|
||||
assert result.iloc[0] == data[0]
|
||||
assert result.iloc[1] == data[1]
|
||||
assert result.iloc[2] == data[3]
|
||||
|
||||
def test_take(self, data, na_value, na_cmp):
|
||||
result = data.take([0, -1])
|
||||
assert result.dtype == data.dtype
|
||||
assert result[0] == data[0]
|
||||
assert result[1] == data[-1]
|
||||
|
||||
result = data.take([0, -1], allow_fill=True, fill_value=na_value)
|
||||
assert result[0] == data[0]
|
||||
assert na_cmp(result[1], na_value)
|
||||
|
||||
with pytest.raises(IndexError, match="out of bounds"):
|
||||
data.take([len(data) + 1])
|
||||
|
||||
def test_take_empty(self, data, na_value, na_cmp):
|
||||
empty = data[:0]
|
||||
|
||||
result = empty.take([-1], allow_fill=True)
|
||||
assert na_cmp(result[0], na_value)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
empty.take([-1])
|
||||
|
||||
with pytest.raises(IndexError, match="cannot do a non-empty take"):
|
||||
empty.take([0, 1])
|
||||
|
||||
def test_take_negative(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20640
|
||||
n = len(data)
|
||||
result = data.take([0, -n, n - 1, -1])
|
||||
expected = data.take([0, 0, n - 1, n - 1])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_non_na_fill_value(self, data_missing):
|
||||
fill_value = data_missing[1] # valid
|
||||
na = data_missing[0]
|
||||
|
||||
array = data_missing._from_sequence([na, fill_value, na])
|
||||
result = array.take([-1, 1], fill_value=fill_value, allow_fill=True)
|
||||
expected = array.take([1, 1])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_pandas_style_negative_raises(self, data, na_value):
|
||||
with pytest.raises(ValueError):
|
||||
data.take([0, -2], fill_value=na_value, allow_fill=True)
|
||||
|
||||
@pytest.mark.parametrize('allow_fill', [True, False])
|
||||
def test_take_out_of_bounds_raises(self, data, allow_fill):
|
||||
arr = data[:3]
|
||||
with pytest.raises(IndexError):
|
||||
arr.take(np.asarray([0, 3]), allow_fill=allow_fill)
|
||||
|
||||
def test_take_series(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.take([0, -1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
|
||||
index=[0, len(data) - 1])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
s = pd.Series(data)
|
||||
result = s.reindex([0, 1, 3])
|
||||
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
n = len(data)
|
||||
result = s.reindex([-1, 0, n])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, data[0], na_value],
|
||||
dtype=s.dtype),
|
||||
index=[-1, 0, n])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
result = s.reindex([n, n + 1])
|
||||
expected = pd.Series(data._from_sequence([na_value, na_value],
|
||||
dtype=s.dtype),
|
||||
index=[n, n + 1])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex_non_na_fill_value(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
na = data_missing[0]
|
||||
|
||||
array = data_missing._from_sequence([na, valid])
|
||||
ser = pd.Series(array)
|
||||
result = ser.reindex([0, 1, 2], fill_value=valid)
|
||||
expected = pd.Series(data_missing._from_sequence([na, valid, valid]))
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,83 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseGroupbyTests(BaseExtensionTests):
|
||||
"""Groupby-specific tests."""
|
||||
|
||||
def test_grouping_grouper(self, data_for_grouping):
|
||||
df = pd.DataFrame({
|
||||
"A": ["B", "B", None, None, "A", "A", "B", "C"],
|
||||
"B": data_for_grouping
|
||||
})
|
||||
gr1 = df.groupby("A").grouper.groupings[0]
|
||||
gr2 = df.groupby("B").grouper.groupings[0]
|
||||
|
||||
tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
|
||||
tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
|
||||
|
||||
@pytest.mark.parametrize('as_index', [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping})
|
||||
result = df.groupby("B", as_index=as_index).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=True)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
expected = pd.Series([3, 1, 4], index=index, name="A")
|
||||
if as_index:
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
expected = expected.reset_index()
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_no_sort(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping})
|
||||
result = df.groupby("B", sort=False).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=False)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
expected = pd.Series([1, 3, 4], index=index, name="A")
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_transform(self, data_for_grouping):
|
||||
valid = data_for_grouping[~data_for_grouping.isna()]
|
||||
df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4],
|
||||
"B": valid})
|
||||
|
||||
result = df.groupby("B").A.transform(len)
|
||||
expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('op', [
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: pd.Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
], ids=['scalar', 'list', 'series', 'object'])
|
||||
def test_groupby_extension_apply(self, data_for_grouping, op):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping})
|
||||
df.groupby("B").apply(op)
|
||||
df.groupby("B").A.apply(op)
|
||||
df.groupby("A").apply(op)
|
||||
df.groupby("A").B.apply(op)
|
||||
|
||||
def test_in_numeric_groupby(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping,
|
||||
"C": [1, 1, 1, 1, 1, 1, 1, 1]})
|
||||
result = df.groupby("A").sum().columns
|
||||
|
||||
if data_for_grouping.dtype._is_numeric:
|
||||
expected = pd.Index(['B', 'C'])
|
||||
else:
|
||||
expected = pd.Index(['C'])
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,68 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseInterfaceTests(BaseExtensionTests):
|
||||
"""Tests that the basic interface is satisfied."""
|
||||
# ------------------------------------------------------------------------
|
||||
# Interface
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def test_len(self, data):
|
||||
assert len(data) == 100
|
||||
|
||||
def test_ndim(self, data):
|
||||
assert data.ndim == 1
|
||||
|
||||
def test_can_hold_na_valid(self, data):
|
||||
# GH-20761
|
||||
assert data._can_hold_na is True
|
||||
|
||||
def test_memory_usage(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.memory_usage(index=False)
|
||||
assert result == s.nbytes
|
||||
|
||||
def test_array_interface(self, data):
|
||||
result = np.array(data)
|
||||
assert result[0] == data[0]
|
||||
|
||||
result = np.array(data, dtype=object)
|
||||
expected = np.array(list(data), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_is_extension_array_dtype(self, data):
|
||||
assert is_extension_array_dtype(data)
|
||||
assert is_extension_array_dtype(data.dtype)
|
||||
assert is_extension_array_dtype(pd.Series(data))
|
||||
assert isinstance(data.dtype, ExtensionDtype)
|
||||
|
||||
def test_no_values_attribute(self, data):
|
||||
# GH-20735: EA's with .values attribute give problems with internal
|
||||
# code, disallowing this for now until solved
|
||||
assert not hasattr(data, 'values')
|
||||
assert not hasattr(data, '_values')
|
||||
|
||||
def test_is_numeric_honored(self, data):
|
||||
result = pd.Series(data)
|
||||
assert result._data.blocks[0].is_numeric is data.dtype._is_numeric
|
||||
|
||||
def test_isna_extension_array(self, data_missing):
|
||||
# If your `isna` returns an ExtensionArray, you must also implement
|
||||
# _reduce. At the *very* least, you must implement any and all
|
||||
na = data_missing.isna()
|
||||
if is_extension_array_dtype(na):
|
||||
assert na._reduce('any')
|
||||
assert na.any()
|
||||
|
||||
assert not na._reduce('all')
|
||||
assert not na.all()
|
||||
|
||||
assert na.dtype._is_boolean
|
||||
@@ -0,0 +1,23 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import StringIO
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseParsingTests(BaseExtensionTests):
|
||||
|
||||
@pytest.mark.parametrize('engine', ['c', 'python'])
|
||||
def test_EA_types(self, engine, data):
|
||||
df = pd.DataFrame({
|
||||
'with_dtype': pd.Series(data, dtype=str(data.dtype))
|
||||
})
|
||||
csv_output = df.to_csv(index=False, na_rep=np.nan)
|
||||
result = pd.read_csv(StringIO(csv_output), dtype={
|
||||
'with_dtype': str(data.dtype)
|
||||
}, engine=engine)
|
||||
expected = df
|
||||
self.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,341 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseMethodsTests(BaseExtensionTests):
|
||||
"""Various Series and DataFrame methods."""
|
||||
|
||||
@pytest.mark.parametrize('dropna', [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(
|
||||
dropna=dropna).sort_index()
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_count(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df.count(axis='columns')
|
||||
expected = pd.Series([0, 1])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_apply_simple_series(self, data):
|
||||
result = pd.Series(data).apply(id)
|
||||
assert isinstance(result, pd.Series)
|
||||
|
||||
def test_argsort(self, data_for_sorting):
|
||||
result = pd.Series(data_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([2, 0, 1], dtype=np.int64))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_argsort_missing(self, data_missing_for_sorting):
|
||||
result = pd.Series(data_missing_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending):
|
||||
ser = pd.Series(data_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending)
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
if not ascending:
|
||||
expected = expected[::-1]
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values_missing(self, data_missing_for_sorting, ascending):
|
||||
ser = pd.Series(data_missing_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending)
|
||||
if ascending:
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
else:
|
||||
expected = ser.iloc[[0, 2, 1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values_frame(self, data_for_sorting, ascending):
|
||||
df = pd.DataFrame({"A": [1, 2, 1],
|
||||
"B": data_for_sorting})
|
||||
result = df.sort_values(['A', 'B'])
|
||||
expected = pd.DataFrame({"A": [1, 1, 2],
|
||||
'B': data_for_sorting.take([2, 0, 1])},
|
||||
index=[2, 0, 1])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('box', [pd.Series, lambda x: x])
|
||||
@pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique])
|
||||
def test_unique(self, data, box, method):
|
||||
duplicated = box(data._from_sequence([data[0], data[0]]))
|
||||
|
||||
result = method(duplicated)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
assert result[0] == duplicated[0]
|
||||
|
||||
@pytest.mark.parametrize('na_sentinel', [-1, -2])
|
||||
def test_factorize(self, data_for_grouping, na_sentinel):
|
||||
labels, uniques = pd.factorize(data_for_grouping,
|
||||
na_sentinel=na_sentinel)
|
||||
expected_labels = np.array([0, 0, na_sentinel,
|
||||
na_sentinel, 1, 1, 0, 2],
|
||||
dtype=np.intp)
|
||||
expected_uniques = data_for_grouping.take([0, 4, 7])
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
self.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
@pytest.mark.parametrize('na_sentinel', [-1, -2])
|
||||
def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
|
||||
l1, u1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
|
||||
l2, u2 = data_for_grouping.factorize(na_sentinel=na_sentinel)
|
||||
|
||||
tm.assert_numpy_array_equal(l1, l2)
|
||||
self.assert_extension_array_equal(u1, u2)
|
||||
|
||||
def test_factorize_empty(self, data):
|
||||
labels, uniques = pd.factorize(data[:0])
|
||||
expected_labels = np.array([], dtype=np.intp)
|
||||
expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
self.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr})
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
assert df.A.values is not result.A.values
|
||||
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr)
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
|
||||
assert ser._values is not result._values
|
||||
assert ser._values is arr
|
||||
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
msg = "Length of 'value' does not match."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data_missing.fillna(data_missing.take([1]))
|
||||
|
||||
def test_combine_le(self, data_repeated):
|
||||
# GH 20825
|
||||
# Test that combine works when doing a <= (le) comparison
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series([a <= b for (a, b) in
|
||||
zip(list(orig_data1), list(orig_data2))])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series([a <= val for a in list(orig_data1)])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
with np.errstate(over='ignore'):
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence([a + b for (a, b) in
|
||||
zip(list(orig_data1),
|
||||
list(orig_data2))]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence([a + val for a in list(orig_data1)]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/24147
|
||||
a = pd.Series(data[:3])
|
||||
b = pd.Series(data[2:5], index=[2, 3, 4])
|
||||
result = a.combine_first(b)
|
||||
expected = pd.Series(data[:5])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('frame', [True, False])
|
||||
@pytest.mark.parametrize('periods, indices', [
|
||||
(-2, [2, 3, 4, -1, -1]),
|
||||
(0, [0, 1, 2, 3, 4]),
|
||||
(2, [-1, -1, 0, 1, 2]),
|
||||
])
|
||||
def test_container_shift(self, data, frame, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/22386
|
||||
subset = data[:5]
|
||||
data = pd.Series(subset, name='A')
|
||||
expected = pd.Series(subset.take(indices, allow_fill=True), name='A')
|
||||
|
||||
if frame:
|
||||
result = data.to_frame(name='A').assign(B=1).shift(periods)
|
||||
expected = pd.concat([
|
||||
expected,
|
||||
pd.Series([1] * 5, name='B').shift(periods)
|
||||
], axis=1)
|
||||
compare = self.assert_frame_equal
|
||||
else:
|
||||
result = data.shift(periods)
|
||||
compare = self.assert_series_equal
|
||||
|
||||
compare(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('periods, indices', [
|
||||
[-4, [-1, -1]],
|
||||
[-1, [1, -1]],
|
||||
[0, [0, 1]],
|
||||
[1, [-1, 0]],
|
||||
[4, [-1, -1]]
|
||||
])
|
||||
def test_shift_non_empty_array(self, data, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
subset = data[:2]
|
||||
result = subset.shift(periods)
|
||||
expected = subset.take(indices, allow_fill=True)
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('periods', [
|
||||
-4, -1, 0, 1, 4
|
||||
])
|
||||
def test_shift_empty_array(self, data, periods):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
empty = data[:0]
|
||||
result = empty.shift(periods)
|
||||
expected = empty
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_shift_fill_value(self, data):
|
||||
arr = data[:4]
|
||||
fill_value = data[0]
|
||||
result = arr.shift(1, fill_value=fill_value)
|
||||
expected = data.take([0, 0, 1, 2])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr.shift(-2, fill_value=fill_value)
|
||||
expected = data.take([2, 3, 0, 0])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("as_frame", [True, False])
|
||||
def test_hash_pandas_object_works(self, data, as_frame):
|
||||
# https://github.com/pandas-dev/pandas/issues/23066
|
||||
data = pd.Series(data)
|
||||
if as_frame:
|
||||
data = data.to_frame()
|
||||
a = pd.util.hash_pandas_object(data)
|
||||
b = pd.util.hash_pandas_object(data)
|
||||
self.assert_equal(a, b)
|
||||
|
||||
@pytest.mark.parametrize("as_series", [True, False])
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
b, c, a = data_for_sorting
|
||||
arr = type(data_for_sorting)._from_sequence([a, b, c])
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
assert arr.searchsorted(a) == 0
|
||||
assert arr.searchsorted(a, side="right") == 1
|
||||
|
||||
assert arr.searchsorted(b) == 1
|
||||
assert arr.searchsorted(b, side="right") == 2
|
||||
|
||||
assert arr.searchsorted(c) == 2
|
||||
assert arr.searchsorted(c, side="right") == 3
|
||||
|
||||
result = arr.searchsorted(arr.take([0, 2]))
|
||||
expected = np.array([0, 2], dtype=np.intp)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# sorter
|
||||
sorter = np.array([1, 2, 0])
|
||||
assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
|
||||
|
||||
@pytest.mark.parametrize("as_frame", [True, False])
|
||||
def test_where_series(self, data, na_value, as_frame):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
cond = np.array([True, True, False, False])
|
||||
|
||||
if as_frame:
|
||||
ser = ser.to_frame(name='a')
|
||||
cond = cond.reshape(-1, 1)
|
||||
|
||||
result = ser.where(cond)
|
||||
expected = pd.Series(cls._from_sequence([a, a, na_value, na_value],
|
||||
dtype=data.dtype))
|
||||
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name='a')
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
# array other
|
||||
cond = np.array([True, False, True, True])
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
if as_frame:
|
||||
other = pd.DataFrame({"a": other})
|
||||
cond = pd.DataFrame({"a": cond})
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b],
|
||||
dtype=data.dtype))
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name='a')
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("use_numpy", [True, False])
|
||||
@pytest.mark.parametrize("as_series", [True, False])
|
||||
@pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]])
|
||||
def test_repeat(self, data, repeats, as_series, use_numpy):
|
||||
arr = type(data)._from_sequence(data[:3], dtype=data.dtype)
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
|
||||
result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats)
|
||||
|
||||
repeats = [repeats] * 3 if isinstance(repeats, int) else repeats
|
||||
expected = [x for x, n in zip(arr, repeats) for _ in range(n)]
|
||||
expected = type(data)._from_sequence(expected, dtype=data.dtype)
|
||||
if as_series:
|
||||
expected = pd.Series(expected, index=arr.index.repeat(repeats))
|
||||
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("use_numpy", [True, False])
|
||||
@pytest.mark.parametrize('repeats, kwargs, error, msg', [
|
||||
(2, dict(axis=1), ValueError, "'axis"),
|
||||
(-1, dict(), ValueError, "negative"),
|
||||
([1, 2], dict(), ValueError, "shape"),
|
||||
(2, dict(foo='bar'), TypeError, "'foo'")])
|
||||
def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy):
|
||||
with pytest.raises(error, match=msg):
|
||||
if use_numpy:
|
||||
np.repeat(data, repeats, **kwargs)
|
||||
else:
|
||||
data.repeat(repeats, **kwargs)
|
||||
@@ -0,0 +1,132 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseMissingTests(BaseExtensionTests):
|
||||
def test_isna(self, data_missing):
|
||||
expected = np.array([True, False])
|
||||
|
||||
result = pd.isna(data_missing)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = pd.Series(data_missing).isna()
|
||||
expected = pd.Series(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# GH 21189
|
||||
result = pd.Series(data_missing).drop([0, 1]).isna()
|
||||
expected = pd.Series([], dtype=bool)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_array(self, data_missing):
|
||||
result = data_missing.dropna()
|
||||
expected = data_missing[[1]]
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_dropna_series(self, data_missing):
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.dropna()
|
||||
expected = ser.iloc[[1]]
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_frame(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
|
||||
# defaults
|
||||
result = df.dropna()
|
||||
expected = df.iloc[[1]]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.dropna(axis='columns')
|
||||
expected = pd.DataFrame(index=[0, 1])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple
|
||||
df = pd.DataFrame({"A": data_missing,
|
||||
"B": [1, np.nan]})
|
||||
result = df.dropna()
|
||||
expected = df.iloc[:0]
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_scalar(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
result = data_missing.fillna(valid)
|
||||
expected = data_missing.fillna(valid)
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method='ffill', limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method='backfill', limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_series(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
ser = pd.Series(data_missing)
|
||||
|
||||
result = ser.fillna(fill_value)
|
||||
expected = pd.Series(data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series
|
||||
result = ser.fillna(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series not affecting the missing values
|
||||
result = ser.fillna(ser)
|
||||
self.assert_series_equal(result, ser)
|
||||
|
||||
@pytest.mark.parametrize('method', ['ffill', 'bfill'])
|
||||
def test_fillna_series_method(self, data_missing, method):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
if method == 'ffill':
|
||||
data_missing = data_missing[::-1]
|
||||
|
||||
result = pd.Series(data_missing).fillna(method=method)
|
||||
expected = pd.Series(data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype))
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({
|
||||
"A": data_missing,
|
||||
"B": [1, 2]
|
||||
}).fillna(fill_value)
|
||||
|
||||
expected = pd.DataFrame({
|
||||
"A": data_missing._from_sequence([fill_value, fill_value],
|
||||
dtype=data_missing.dtype),
|
||||
"B": [1, 2],
|
||||
})
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_fill_other(self, data):
|
||||
result = pd.DataFrame({
|
||||
"A": data,
|
||||
"B": [np.nan] * len(data)
|
||||
}).fillna({"B": 0.0})
|
||||
|
||||
expected = pd.DataFrame({
|
||||
"A": data,
|
||||
"B": [0.0] * len(result),
|
||||
})
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,166 @@
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core import ops
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseOpsUtil(BaseExtensionTests):
|
||||
|
||||
def get_op_from_name(self, op_name):
|
||||
short_opname = op_name.strip('_')
|
||||
try:
|
||||
op = getattr(operator, short_opname)
|
||||
except AttributeError:
|
||||
# Assume it is the reverse operator
|
||||
rop = getattr(operator, short_opname[1:])
|
||||
op = lambda x, y: rop(y, x)
|
||||
|
||||
return op
|
||||
|
||||
def check_opname(self, s, op_name, other, exc=Exception):
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
self._check_op(s, op, other, op_name, exc)
|
||||
|
||||
def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
|
||||
if exc is None:
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, op)
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
op(s, other)
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=Exception):
|
||||
# divmod has multiple return values, so check separatly
|
||||
if exc is None:
|
||||
result_div, result_mod = op(s, other)
|
||||
if op is divmod:
|
||||
expected_div, expected_mod = s // other, s % other
|
||||
else:
|
||||
expected_div, expected_mod = other // s, other % s
|
||||
self.assert_series_equal(result_div, expected_div)
|
||||
self.assert_series_equal(result_mod, expected_mod)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
divmod(s, other)
|
||||
|
||||
|
||||
class BaseArithmeticOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame arithmetic ops methods.
|
||||
|
||||
Subclasses supporting various ops should set the class variables
|
||||
to indicate that they support ops of that kind
|
||||
|
||||
* series_scalar_exc = TypeError
|
||||
* frame_scalar_exc = TypeError
|
||||
* series_array_exc = TypeError
|
||||
* divmod_exc = TypeError
|
||||
"""
|
||||
series_scalar_exc = TypeError
|
||||
frame_scalar_exc = TypeError
|
||||
series_array_exc = TypeError
|
||||
divmod_exc = TypeError
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# series & scalar
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, op_name, s.iloc[0], exc=self.series_scalar_exc)
|
||||
|
||||
@pytest.mark.xfail(run=False, reason="_reduce needs implementation")
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||||
# frame & scalar
|
||||
op_name = all_arithmetic_operators
|
||||
df = pd.DataFrame({'A': data})
|
||||
self.check_opname(df, op_name, data[0], exc=self.frame_scalar_exc)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
# ndarray & other series
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, op_name, pd.Series([s.iloc[0]] * len(s)),
|
||||
exc=self.series_array_exc)
|
||||
|
||||
def test_divmod(self, data):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, 1, exc=self.divmod_exc)
|
||||
self._check_divmod_op(1, ops.rdivmod, s, exc=self.divmod_exc)
|
||||
|
||||
def test_divmod_series_array(self, data):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, data)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s + data
|
||||
expected = pd.Series(data + data)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
op_name = all_arithmetic_operators
|
||||
with pytest.raises(AttributeError):
|
||||
getattr(data, op_name)
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# EAs should return NotImplemented for ops with Series.
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = pd.Series(data)
|
||||
if hasattr(data, '__add__'):
|
||||
result = data.__add__(other)
|
||||
assert result is NotImplemented
|
||||
else:
|
||||
raise pytest.skip(
|
||||
"{} does not implement add".format(data.__class__.__name__)
|
||||
)
|
||||
|
||||
|
||||
class BaseComparisonOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame comparison ops methods."""
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
if op_name == '__eq__':
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
assert not op(s, other).all()
|
||||
elif op_name == '__ne__':
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
assert op(s, other).all()
|
||||
|
||||
else:
|
||||
|
||||
# array
|
||||
assert getattr(data, op_name)(other) is NotImplemented
|
||||
|
||||
# series
|
||||
s = pd.Series(data)
|
||||
with pytest.raises(TypeError):
|
||||
op(s, other)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
self._compare_other(s, data, op_name, 0)
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
other = pd.Series([data[0]] * len(data))
|
||||
self._compare_other(s, data, op_name, other)
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# EAs should return NotImplemented for ops with Series.
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = pd.Series(data)
|
||||
if hasattr(data, '__eq__'):
|
||||
result = data.__eq__(other)
|
||||
assert result is NotImplemented
|
||||
else:
|
||||
raise pytest.skip(
|
||||
"{} does not implement __eq__".format(data.__class__.__name__)
|
||||
)
|
||||
@@ -0,0 +1,44 @@
|
||||
import io
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import compat
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BasePrintingTests(BaseExtensionTests):
|
||||
"""Tests checking the formatting of your EA when printed."""
|
||||
|
||||
@pytest.mark.parametrize("size", ["big", "small"])
|
||||
def test_array_repr(self, data, size):
|
||||
if size == "small":
|
||||
data = data[:5]
|
||||
else:
|
||||
data = type(data)._concat_same_type([data] * 5)
|
||||
|
||||
result = repr(data)
|
||||
assert data.__class__.__name__ in result
|
||||
assert 'Length: {}'.format(len(data)) in result
|
||||
assert str(data.dtype) in result
|
||||
if size == 'big':
|
||||
assert '...' in result
|
||||
|
||||
def test_array_repr_unicode(self, data):
|
||||
result = compat.text_type(data)
|
||||
assert isinstance(result, compat.text_type)
|
||||
|
||||
def test_series_repr(self, data):
|
||||
ser = pd.Series(data)
|
||||
assert data.dtype.name in repr(ser)
|
||||
|
||||
def test_dataframe_repr(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
repr(df)
|
||||
|
||||
def test_dtype_name_in_info(self, data):
|
||||
buf = io.StringIO()
|
||||
pd.DataFrame({"A": data}).info(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert data.dtype.name in result
|
||||
@@ -0,0 +1,61 @@
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseReduceTests(BaseExtensionTests):
|
||||
"""
|
||||
Reduction specific tests. Generally these only
|
||||
make sense for numeric/boolean operations.
|
||||
"""
|
||||
def check_reduce(self, s, op_name, skipna):
|
||||
result = getattr(s, op_name)(skipna=skipna)
|
||||
expected = getattr(s.astype('float64'), op_name)(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class BaseNoReduceTests(BaseReduceTests):
|
||||
""" we don't define any reductions """
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
|
||||
class BaseNumericReduceTests(BaseReduceTests):
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
|
||||
# min/max with empty produce numpy warnings
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
self.check_reduce(s, op_name, skipna)
|
||||
|
||||
|
||||
class BaseBooleanReduceTests(BaseReduceTests):
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_reduce_series(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
s = pd.Series(data)
|
||||
self.check_reduce(s, op_name, skipna)
|
||||
@@ -0,0 +1,271 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import ExtensionBlock
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseReshapingTests(BaseExtensionTests):
|
||||
"""Tests for reshaping and concatenation."""
|
||||
@pytest.mark.parametrize('in_frame', [True, False])
|
||||
def test_concat(self, data, in_frame):
|
||||
wrapped = pd.Series(data)
|
||||
if in_frame:
|
||||
wrapped = pd.DataFrame(wrapped)
|
||||
result = pd.concat([wrapped, wrapped], ignore_index=True)
|
||||
|
||||
assert len(result) == len(data) * 2
|
||||
|
||||
if in_frame:
|
||||
dtype = result.dtypes[0]
|
||||
else:
|
||||
dtype = result.dtype
|
||||
|
||||
assert dtype == data.dtype
|
||||
assert isinstance(result._data.blocks[0], ExtensionBlock)
|
||||
|
||||
@pytest.mark.parametrize('in_frame', [True, False])
|
||||
def test_concat_all_na_block(self, data_missing, in_frame):
|
||||
valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
|
||||
na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
|
||||
if in_frame:
|
||||
valid_block = pd.DataFrame({"a": valid_block})
|
||||
na_block = pd.DataFrame({"a": na_block})
|
||||
result = pd.concat([valid_block, na_block])
|
||||
if in_frame:
|
||||
expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])})
|
||||
self.assert_frame_equal(result, expected)
|
||||
else:
|
||||
expected = pd.Series(data_missing.take([1, 1, 0, 0]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
df1 = pd.DataFrame({'A': data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat([x.astype(object) for x in dfs])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# series
|
||||
result = pd.concat([x['A'] for x in dfs])
|
||||
expected = pd.concat([x['A'].astype(object) for x in dfs])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# simple test for just EA and one other
|
||||
result = pd.concat([df1, df2])
|
||||
expected = pd.concat([df1.astype('object'), df2.astype('object')])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.concat([df1['A'], df2['A']])
|
||||
expected = pd.concat([df1['A'].astype('object'),
|
||||
df2['A'].astype('object')])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
df1 = pd.DataFrame({'A': data[:3]})
|
||||
df2 = pd.DataFrame({'B': [1, 2, 3]})
|
||||
|
||||
expected = pd.DataFrame({'A': data[:3], 'B': [1, 2, 3]})
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1['A'], df2['B']], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# non-aligned
|
||||
df2 = pd.DataFrame({'B': [1, 2, 3]}, index=[1, 2, 3])
|
||||
expected = pd.DataFrame({
|
||||
'A': data._from_sequence(list(data[:3]) + [na_value],
|
||||
dtype=data.dtype),
|
||||
'B': [np.nan, 1, 2, 3]})
|
||||
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1['A'], df2['B']], axis=1)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.Series(data._from_sequence(list(a) + [na_value],
|
||||
dtype=data.dtype))
|
||||
e2 = pd.Series(data._from_sequence([na_value] + list(b),
|
||||
dtype=data.dtype))
|
||||
self.assert_series_equal(r1, e1)
|
||||
self.assert_series_equal(r2, e2)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.DataFrame({'A': a}).align(
|
||||
pd.DataFrame({'A': b}, index=[1, 2, 3])
|
||||
)
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.DataFrame({'A': data._from_sequence(list(a) + [na_value],
|
||||
dtype=data.dtype)})
|
||||
e2 = pd.DataFrame({'A': data._from_sequence([na_value] + list(b),
|
||||
dtype=data.dtype)})
|
||||
self.assert_frame_equal(r1, e1)
|
||||
self.assert_frame_equal(r2, e2)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/20576
|
||||
ser = pd.Series(data, name='a')
|
||||
df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
|
||||
r1, r2 = ser.align(df)
|
||||
|
||||
e1 = pd.Series(data._from_sequence(list(data) + [na_value],
|
||||
dtype=data.dtype),
|
||||
name=ser.name)
|
||||
|
||||
self.assert_series_equal(r1, e1)
|
||||
self.assert_frame_equal(r2, df)
|
||||
|
||||
def test_set_frame_expand_regular_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
df['B'] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
self.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_expand_extension_with_regular(self, data):
|
||||
df = pd.DataFrame({'A': data})
|
||||
df['B'] = [1] * len(data)
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
self.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_overwrite_object(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20555
|
||||
df = pd.DataFrame({"A": [1] * len(data)}, dtype=object)
|
||||
df['A'] = data
|
||||
assert df.dtypes['A'] == data.dtype
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
# GH-20743
|
||||
df1 = pd.DataFrame({'ext': data[:3], 'int1': [1, 2, 3],
|
||||
'key': [0, 1, 2]})
|
||||
df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key': [0, 0, 1, 3]})
|
||||
|
||||
res = pd.merge(df1, df2)
|
||||
exp = pd.DataFrame(
|
||||
{'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
|
||||
'ext': data._from_sequence([data[0], data[0], data[1]],
|
||||
dtype=data.dtype)})
|
||||
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
|
||||
|
||||
res = pd.merge(df1, df2, how='outer')
|
||||
exp = pd.DataFrame(
|
||||
{'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
|
||||
'key': [0, 0, 1, 2, 3],
|
||||
'ext': data._from_sequence(
|
||||
[data[0], data[0], data[1], data[2], na_value],
|
||||
dtype=data.dtype)})
|
||||
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
|
||||
|
||||
def test_merge_on_extension_array(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b], dtype=data.dtype)
|
||||
|
||||
df = pd.DataFrame({"key": key, "val": [1, 2]})
|
||||
result = pd.merge(df, df, on='key')
|
||||
expected = pd.DataFrame({"key": key,
|
||||
"val_x": [1, 2],
|
||||
"val_y": [1, 2]})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# order
|
||||
result = pd.merge(df.iloc[[1, 0]], df, on='key')
|
||||
expected = expected.iloc[[1, 0]].reset_index(drop=True)
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_merge_on_extension_array_duplicates(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b, a], dtype=data.dtype)
|
||||
df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
|
||||
result = pd.merge(df1, df2, on='key')
|
||||
expected = pd.DataFrame({
|
||||
"key": key.take([0, 0, 0, 0, 1]),
|
||||
"val_x": [1, 1, 3, 3, 2],
|
||||
"val_y": [1, 3, 1, 3, 2],
|
||||
})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("columns", [
|
||||
["A", "B"],
|
||||
pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b')],
|
||||
names=['outer', 'inner']),
|
||||
])
|
||||
def test_stack(self, data, columns):
|
||||
df = pd.DataFrame({"A": data[:5], "B": data[:5]})
|
||||
df.columns = columns
|
||||
result = df.stack()
|
||||
expected = df.astype(object).stack()
|
||||
# we need a second astype(object), in case the constructor inferred
|
||||
# object -> specialized, as is done for period.
|
||||
expected = expected.astype(object)
|
||||
|
||||
if isinstance(expected, pd.Series):
|
||||
assert result.dtype == df.iloc[:, 0].dtype
|
||||
else:
|
||||
assert all(result.dtypes == df.iloc[:, 0].dtype)
|
||||
|
||||
result = result.astype(object)
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("index", [
|
||||
# Two levels, uniform.
|
||||
pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]),
|
||||
names=['a', 'b']),
|
||||
|
||||
# non-uniform
|
||||
pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), ('B', 'b')]),
|
||||
|
||||
# three levels, non-uniform
|
||||
pd.MultiIndex.from_product([('A', 'B'), ('a', 'b', 'c'), (0, 1, 2)]),
|
||||
pd.MultiIndex.from_tuples([
|
||||
('A', 'a', 1),
|
||||
('A', 'b', 0),
|
||||
('A', 'a', 0),
|
||||
('B', 'a', 0),
|
||||
('B', 'c', 1),
|
||||
]),
|
||||
])
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, data, index, obj):
|
||||
data = data[:len(index)]
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
|
||||
n = index.nlevels
|
||||
levels = list(range(n))
|
||||
# [0, 1, 2]
|
||||
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
||||
combinations = itertools.chain.from_iterable(
|
||||
itertools.permutations(levels, i) for i in range(1, n)
|
||||
)
|
||||
|
||||
for level in combinations:
|
||||
result = ser.unstack(level=level)
|
||||
assert all(isinstance(result[col].array, type(data))
|
||||
for col in result.columns)
|
||||
expected = ser.astype(object).unstack(level=level)
|
||||
result = result.astype(object)
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,189 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseExtensionTests
|
||||
|
||||
|
||||
class BaseSetitemTests(BaseExtensionTests):
|
||||
def test_setitem_scalar_series(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[0] = data[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_setitem_sequence(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
|
||||
data[[0, 1]] = [data[1], data[0]]
|
||||
assert data[0] == original[1]
|
||||
assert data[1] == original[0]
|
||||
|
||||
@pytest.mark.parametrize('as_array', [True, False])
|
||||
def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
|
||||
ser = pd.Series(data)
|
||||
original = ser.copy()
|
||||
value = [data[0]]
|
||||
if as_array:
|
||||
value = data._from_sequence(value)
|
||||
|
||||
xpr = 'cannot set using a {} indexer with a different length'
|
||||
with pytest.raises(ValueError, match=xpr.format('list-like')):
|
||||
ser[[0, 1]] = value
|
||||
# Ensure no modifications made before the exception
|
||||
self.assert_series_equal(ser, original)
|
||||
|
||||
with pytest.raises(ValueError, match=xpr.format('slice')):
|
||||
ser[slice(3)] = value
|
||||
self.assert_series_equal(ser, original)
|
||||
|
||||
def test_setitem_empty_indxer(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
data[np.array([], dtype=int)] = []
|
||||
self.assert_equal(data, original)
|
||||
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[[0, 1]] = data[2]
|
||||
assert data[0] == data[2]
|
||||
assert data[1] == data[2]
|
||||
|
||||
@pytest.mark.parametrize('setter', ['loc', 'iloc'])
|
||||
def test_setitem_scalar(self, data, setter):
|
||||
arr = pd.Series(data)
|
||||
setter = getattr(arr, setter)
|
||||
operator.setitem(setter, 0, data[1])
|
||||
assert arr[0] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.loc[0, 'B'] = data[1]
|
||||
assert df.loc[0, 'B'] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.loc[10, 'B'] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.loc[10, 'B'] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.iloc[0, 1] = data[1]
|
||||
assert df.loc[0, 'B'] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.iloc[10, 0] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.iloc[10, 1] = data[1]
|
||||
assert df.loc[10, 'B'] == data[1]
|
||||
|
||||
@pytest.mark.parametrize('as_callable', [True, False])
|
||||
@pytest.mark.parametrize('setter', ['loc', None])
|
||||
def test_setitem_mask_aligned(self, data, as_callable, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if as_callable:
|
||||
mask2 = lambda x: mask
|
||||
else:
|
||||
mask2 = mask
|
||||
|
||||
if setter:
|
||||
# loc
|
||||
target = getattr(ser, setter)
|
||||
else:
|
||||
# Series.__setitem__
|
||||
target = ser
|
||||
|
||||
operator.setitem(target, mask2, data[5:7])
|
||||
|
||||
ser[mask2] = data[5:7]
|
||||
assert ser[0] == data[5]
|
||||
assert ser[1] == data[6]
|
||||
|
||||
@pytest.mark.parametrize('setter', ['loc', None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if setter: # loc
|
||||
target = getattr(ser, setter)
|
||||
else: # __setitem__
|
||||
target = ser
|
||||
|
||||
operator.setitem(target, mask, data[10])
|
||||
assert ser[0] == data[10]
|
||||
assert ser[1] == data[10]
|
||||
|
||||
def test_setitem_expand_columns(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
result = df.copy()
|
||||
result['B'] = 1
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, 'B'] = 1
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
# overwrite with new type
|
||||
result['B'] = data
|
||||
expected = pd.DataFrame({"A": data, "B": data})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_expand_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
result = df.copy()
|
||||
result['B'] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, 'B'] = data
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_frame_invalid_length(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
xpr = "Length of values does not match length of index"
|
||||
with pytest.raises(ValueError, match=xpr):
|
||||
df['B'] = data[:5]
|
||||
|
||||
@pytest.mark.xfail(reason="GH#20441: setitem on extension types.")
|
||||
def test_setitem_tuple_index(self, data):
|
||||
s = pd.Series(data[:2], index=[(0, 0), (0, 1)])
|
||||
expected = pd.Series(data.take([1, 1]), index=s.index)
|
||||
s[(0, 1)] = data[1]
|
||||
self.assert_series_equal(s, expected)
|
||||
|
||||
def test_setitem_slice_mismatch_length_raises(self, data):
|
||||
arr = data[:5]
|
||||
with pytest.raises(ValueError):
|
||||
arr[:1] = arr[:2]
|
||||
|
||||
def test_setitem_slice_array(self, data):
|
||||
arr = data[:5].copy()
|
||||
arr[:5] = data[-5:]
|
||||
self.assert_extension_array_equal(arr, data[-5:])
|
||||
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
arr = data[:5].copy()
|
||||
with pytest.raises(ValueError):
|
||||
arr[0] = arr[[0, 1]]
|
||||
@@ -0,0 +1,110 @@
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
"""A fixture providing the ExtensionDtype to validate."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not gbe equal
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length-2 array with [NA, Valid]"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture(params=['data', 'data_missing'])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == 'data':
|
||||
return data
|
||||
elif request.param == 'data_missing':
|
||||
return data_missing
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_repeated(data):
|
||||
"""
|
||||
Generate many datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : fixture implementing `data`
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable[[int], Generator]:
|
||||
A callable that takes a `count` argument and
|
||||
returns a generator yielding `count` datasets.
|
||||
"""
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield data
|
||||
return gen
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
"""Binary operator for comparing NA values.
|
||||
|
||||
Should return a function of two arguments that returns
|
||||
True if both arguments are (scalar) NA for your type.
|
||||
|
||||
By default, uses ``operator.is_``
|
||||
"""
|
||||
return operator.is_
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
"""The scalar missing value for this type. Default 'None'"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def box_in_series(request):
|
||||
"""Whether to box the data in a Series"""
|
||||
return request.param
|
||||
@@ -0,0 +1,4 @@
|
||||
from .array import DecimalArray, DecimalDtype, to_decimal, make_data
|
||||
|
||||
|
||||
__all__ = ['DecimalArray', 'DecimalDtype', 'to_decimal', 'make_data']
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,166 @@
|
||||
import decimal
|
||||
import numbers
|
||||
import random
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import register_extension_dtype
|
||||
from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class DecimalDtype(ExtensionDtype):
|
||||
type = decimal.Decimal
|
||||
name = 'decimal'
|
||||
na_value = decimal.Decimal('NaN')
|
||||
_metadata = ('context',)
|
||||
|
||||
def __init__(self, context=None):
|
||||
self.context = context or decimal.getcontext()
|
||||
|
||||
def __repr__(self):
|
||||
return 'DecimalDtype(context={})'.format(self.context)
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
"""Return the array type associated with this dtype
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return DecimalArray
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string):
|
||||
if string == cls.name:
|
||||
return cls()
|
||||
else:
|
||||
raise TypeError("Cannot construct a '{}' from "
|
||||
"'{}'".format(cls, string))
|
||||
|
||||
@property
|
||||
def _is_numeric(self):
|
||||
return True
|
||||
|
||||
|
||||
class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, dtype=None, copy=False, context=None):
|
||||
for val in values:
|
||||
if not isinstance(val, decimal.Decimal):
|
||||
raise TypeError("All values must be of type " +
|
||||
str(decimal.Decimal))
|
||||
values = np.asarray(values, dtype=object)
|
||||
|
||||
self._data = values
|
||||
# Some aliases for common attribute names to ensure pandas supports
|
||||
# these
|
||||
self._items = self.data = self._data
|
||||
# those aliases are currently not working due to assumptions
|
||||
# in internal code (GH-20735)
|
||||
# self._values = self.values = self.data
|
||||
self._dtype = DecimalDtype(context)
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return self._dtype
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
return cls(scalars)
|
||||
|
||||
@classmethod
|
||||
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
|
||||
return cls._from_sequence([decimal.Decimal(x) for x in strings],
|
||||
dtype, copy)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return cls(values)
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self._data[item]
|
||||
else:
|
||||
return type(self)(self._data[item])
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
from pandas.api.extensions import take
|
||||
|
||||
data = self._data
|
||||
if allow_fill and fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
|
||||
result = take(data, indexer, fill_value=fill_value,
|
||||
allow_fill=allow_fill)
|
||||
return self._from_sequence(result)
|
||||
|
||||
def copy(self, deep=False):
|
||||
if deep:
|
||||
return type(self)(self._data.copy())
|
||||
return type(self)(self)
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
if isinstance(dtype, type(self.dtype)):
|
||||
return type(self)(self._data, context=dtype.context)
|
||||
return np.asarray(self, dtype=dtype)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if pd.api.types.is_list_like(value):
|
||||
if pd.api.types.is_scalar(key):
|
||||
raise ValueError("setting an array element with a sequence.")
|
||||
value = [decimal.Decimal(v) for v in value]
|
||||
else:
|
||||
value = decimal.Decimal(value)
|
||||
self._data[key] = value
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
n = len(self)
|
||||
if n:
|
||||
return n * sys.getsizeof(self[0])
|
||||
return 0
|
||||
|
||||
def isna(self):
|
||||
return np.array([x.is_nan() for x in self._data], dtype=bool)
|
||||
|
||||
@property
|
||||
def _na_value(self):
|
||||
return decimal.Decimal('NaN')
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
return cls(np.concatenate([x._data for x in to_concat]))
|
||||
|
||||
def _reduce(self, name, skipna=True, **kwargs):
|
||||
|
||||
if skipna:
|
||||
raise NotImplementedError("decimal does not support skipna=True")
|
||||
|
||||
try:
|
||||
op = getattr(self.data, name)
|
||||
except AttributeError:
|
||||
raise NotImplementedError("decimal does not support "
|
||||
"the {} operation".format(name))
|
||||
return op(axis=0)
|
||||
|
||||
|
||||
def to_decimal(values, context=None):
|
||||
return DecimalArray([decimal.Decimal(x) for x in values], context=context)
|
||||
|
||||
|
||||
def make_data():
|
||||
return [decimal.Decimal(random.random()) for _ in range(100)]
|
||||
|
||||
|
||||
DecimalArray._add_arithmetic_ops()
|
||||
DecimalArray._add_comparison_ops()
|
||||
+401
@@ -0,0 +1,401 @@
|
||||
import decimal
|
||||
import math
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import compat
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .array import DecimalArray, DecimalDtype, make_data, to_decimal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return DecimalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return DecimalArray(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return DecimalArray([decimal.Decimal('1'),
|
||||
decimal.Decimal('2'),
|
||||
decimal.Decimal('0')])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return DecimalArray([decimal.Decimal('1'),
|
||||
decimal.Decimal('NaN'),
|
||||
decimal.Decimal('0')])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return lambda x, y: x.is_nan() and y.is_nan()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return decimal.Decimal("NaN")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
b = decimal.Decimal('1.0')
|
||||
a = decimal.Decimal('0.0')
|
||||
c = decimal.Decimal('2.0')
|
||||
na = decimal.Decimal('NaN')
|
||||
return DecimalArray([b, b, na, na, a, a, b, c])
|
||||
|
||||
|
||||
class BaseDecimal(object):
|
||||
|
||||
def assert_series_equal(self, left, right, *args, **kwargs):
|
||||
def convert(x):
|
||||
# need to convert array([Decimal(NaN)], dtype='object') to np.NaN
|
||||
# because Series[object].isnan doesn't recognize decimal(NaN) as
|
||||
# NA.
|
||||
try:
|
||||
return math.isnan(x)
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
if left.dtype == 'object':
|
||||
left_na = left.apply(convert)
|
||||
else:
|
||||
left_na = left.isna()
|
||||
if right.dtype == 'object':
|
||||
right_na = right.apply(convert)
|
||||
else:
|
||||
right_na = right.isna()
|
||||
|
||||
tm.assert_series_equal(left_na, right_na)
|
||||
return tm.assert_series_equal(left[~left_na],
|
||||
right[~right_na],
|
||||
*args, **kwargs)
|
||||
|
||||
def assert_frame_equal(self, left, right, *args, **kwargs):
|
||||
# TODO(EA): select_dtypes
|
||||
tm.assert_index_equal(
|
||||
left.columns, right.columns,
|
||||
exact=kwargs.get('check_column_type', 'equiv'),
|
||||
check_names=kwargs.get('check_names', True),
|
||||
check_exact=kwargs.get('check_exact', False),
|
||||
check_categorical=kwargs.get('check_categorical', True),
|
||||
obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
|
||||
|
||||
decimals = (left.dtypes == 'decimal').index
|
||||
|
||||
for col in decimals:
|
||||
self.assert_series_equal(left[col], right[col],
|
||||
*args, **kwargs)
|
||||
|
||||
left = left.drop(columns=decimals)
|
||||
right = right.drop(columns=decimals)
|
||||
tm.assert_frame_equal(left, right, *args, **kwargs)
|
||||
|
||||
|
||||
class TestDtype(BaseDecimal, base.BaseDtypeTests):
|
||||
@pytest.mark.skipif(compat.PY2, reason="Context not hashable.")
|
||||
def test_hashable(self, dtype):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseDecimal, base.BaseInterfaceTests):
|
||||
|
||||
pytestmark = pytest.mark.skipif(compat.PY2,
|
||||
reason="Unhashble dtype in Py2.")
|
||||
|
||||
|
||||
class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
|
||||
|
||||
@pytest.mark.skip(reason="not implemented constructor from dtype")
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseDecimal, base.BaseReshapingTests):
|
||||
pytestmark = pytest.mark.skipif(compat.PY2,
|
||||
reason="Unhashble dtype in Py2.")
|
||||
|
||||
|
||||
class TestGetitem(BaseDecimal, base.BaseGetitemTests):
|
||||
|
||||
def test_take_na_value_other_decimal(self):
|
||||
arr = DecimalArray([decimal.Decimal('1.0'),
|
||||
decimal.Decimal('2.0')])
|
||||
result = arr.take([0, -1], allow_fill=True,
|
||||
fill_value=decimal.Decimal('-1.0'))
|
||||
expected = DecimalArray([decimal.Decimal('1.0'),
|
||||
decimal.Decimal('-1.0')])
|
||||
self.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestMissing(BaseDecimal, base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class Reduce(object):
|
||||
|
||||
def check_reduce(self, s, op_name, skipna):
|
||||
|
||||
if skipna or op_name in ['median', 'skew', 'kurt']:
|
||||
with pytest.raises(NotImplementedError):
|
||||
getattr(s, op_name)(skipna=skipna)
|
||||
|
||||
else:
|
||||
result = getattr(s, op_name)(skipna=skipna)
|
||||
expected = getattr(np.asarray(s), op_name)()
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseDecimal, base.BaseMethodsTests):
|
||||
@pytest.mark.parametrize('dropna', [True, False])
|
||||
@pytest.mark.xfail(reason="value_counts not implemented yet.")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestCasting(BaseDecimal, base.BaseCastingTests):
|
||||
pytestmark = pytest.mark.skipif(compat.PY2,
|
||||
reason="Unhashble dtype in Py2.")
|
||||
|
||||
|
||||
class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
|
||||
pytestmark = pytest.mark.skipif(compat.PY2,
|
||||
reason="Unhashble dtype in Py2.")
|
||||
|
||||
|
||||
class TestSetitem(BaseDecimal, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseDecimal, base.BasePrintingTests):
|
||||
pytestmark = pytest.mark.skipif(compat.PY2,
|
||||
reason="Unhashble dtype in Py2.")
|
||||
|
||||
|
||||
# TODO(extension)
|
||||
@pytest.mark.xfail(reason=(
|
||||
"raising AssertionError as this is not implemented, "
|
||||
"though easy enough to do"))
|
||||
def test_series_constructor_coerce_data_to_extension_dtype_raises():
|
||||
xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
|
||||
"extension array directly.")
|
||||
with pytest.raises(ValueError, match=xpr):
|
||||
pd.Series([0, 1, 2], dtype=DecimalDtype())
|
||||
|
||||
|
||||
def test_series_constructor_with_dtype():
|
||||
arr = DecimalArray([decimal.Decimal('10.0')])
|
||||
result = pd.Series(arr, dtype=DecimalDtype())
|
||||
expected = pd.Series(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(arr, dtype='int64')
|
||||
expected = pd.Series([10])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_dataframe_constructor_with_dtype():
|
||||
arr = DecimalArray([decimal.Decimal('10.0')])
|
||||
|
||||
result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
|
||||
expected = pd.DataFrame({"A": arr})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
arr = DecimalArray([decimal.Decimal('10.0')])
|
||||
result = pd.DataFrame({"A": arr}, dtype='int64')
|
||||
expected = pd.DataFrame({"A": [10]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
def test_astype_dispatches(frame):
|
||||
# This is a dtype-specific test that ensures Series[decimal].astype
|
||||
# gets all the way through to ExtensionArray.astype
|
||||
# Designing a reliable smoke test that works for arbitrary data types
|
||||
# is difficult.
|
||||
data = pd.Series(DecimalArray([decimal.Decimal(2)]), name='a')
|
||||
ctx = decimal.Context()
|
||||
ctx.prec = 5
|
||||
|
||||
if frame:
|
||||
data = data.to_frame()
|
||||
|
||||
result = data.astype(DecimalDtype(ctx))
|
||||
|
||||
if frame:
|
||||
result = result['a']
|
||||
|
||||
assert result.dtype.context.prec == ctx.prec
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseDecimal, base.BaseArithmeticOpsTests):
|
||||
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
super(TestArithmeticOps, self).check_opname(s, op_name,
|
||||
other, exc=None)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
op_name = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
|
||||
context = decimal.getcontext()
|
||||
divbyzerotrap = context.traps[decimal.DivisionByZero]
|
||||
invalidoptrap = context.traps[decimal.InvalidOperation]
|
||||
context.traps[decimal.DivisionByZero] = 0
|
||||
context.traps[decimal.InvalidOperation] = 0
|
||||
|
||||
# Decimal supports ops with int, but not float
|
||||
other = pd.Series([int(d * 100) for d in data])
|
||||
self.check_opname(s, op_name, other)
|
||||
|
||||
if "mod" not in op_name:
|
||||
self.check_opname(s, op_name, s * 2)
|
||||
|
||||
self.check_opname(s, op_name, 0)
|
||||
self.check_opname(s, op_name, 5)
|
||||
context.traps[decimal.DivisionByZero] = divbyzerotrap
|
||||
context.traps[decimal.InvalidOperation] = invalidoptrap
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
# We implement divmod
|
||||
super(TestArithmeticOps, self)._check_divmod_op(
|
||||
s, op, other, exc=None
|
||||
)
|
||||
|
||||
def test_error(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(BaseDecimal, base.BaseComparisonOpsTests):
|
||||
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
super(TestComparisonOps, self).check_opname(s, op_name,
|
||||
other, exc=None)
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
self.check_opname(s, op_name, other)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
self._compare_other(s, data, op_name, 0.5)
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
|
||||
alter = np.random.choice([-1, 0, 1], len(data))
|
||||
# Randomly double, halve or keep same value
|
||||
other = pd.Series(data) * [decimal.Decimal(pow(2.0, i))
|
||||
for i in alter]
|
||||
self._compare_other(s, data, op_name, other)
|
||||
|
||||
|
||||
class DecimalArrayWithoutFromSequence(DecimalArray):
|
||||
"""Helper class for testing error handling in _from_sequence."""
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
raise KeyError("For the test")
|
||||
|
||||
|
||||
class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
|
||||
@classmethod
|
||||
def _create_arithmetic_method(cls, op):
|
||||
return cls._create_method(op, coerce_to_dtype=False)
|
||||
|
||||
|
||||
DecimalArrayWithoutCoercion._add_arithmetic_ops()
|
||||
|
||||
|
||||
def test_combine_from_sequence_raises():
|
||||
# https://github.com/pandas-dev/pandas/issues/22850
|
||||
ser = pd.Series(DecimalArrayWithoutFromSequence([
|
||||
decimal.Decimal("1.0"),
|
||||
decimal.Decimal("2.0")
|
||||
]))
|
||||
result = ser.combine(ser, operator.add)
|
||||
|
||||
# note: object dtype
|
||||
expected = pd.Series([decimal.Decimal("2.0"),
|
||||
decimal.Decimal("4.0")], dtype="object")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("class_", [DecimalArrayWithoutFromSequence,
|
||||
DecimalArrayWithoutCoercion])
|
||||
def test_scalar_ops_from_sequence_raises(class_):
|
||||
# op(EA, EA) should return an EA, or an ndarray if it's not possible
|
||||
# to return an EA with the return values.
|
||||
arr = class_([
|
||||
decimal.Decimal("1.0"),
|
||||
decimal.Decimal("2.0")
|
||||
])
|
||||
result = arr + arr
|
||||
expected = np.array([decimal.Decimal("2.0"), decimal.Decimal("4.0")],
|
||||
dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("reverse, expected_div, expected_mod", [
|
||||
(False, [0, 1, 1, 2], [1, 0, 1, 0]),
|
||||
(True, [2, 1, 0, 0], [0, 0, 2, 2]),
|
||||
])
|
||||
def test_divmod_array(reverse, expected_div, expected_mod):
|
||||
# https://github.com/pandas-dev/pandas/issues/22930
|
||||
arr = to_decimal([1, 2, 3, 4])
|
||||
if reverse:
|
||||
div, mod = divmod(2, arr)
|
||||
else:
|
||||
div, mod = divmod(arr, 2)
|
||||
expected_div = to_decimal(expected_div)
|
||||
expected_mod = to_decimal(expected_mod)
|
||||
|
||||
tm.assert_extension_array_equal(div, expected_div)
|
||||
tm.assert_extension_array_equal(mod, expected_mod)
|
||||
|
||||
|
||||
def test_formatting_values_deprecated():
|
||||
class DecimalArray2(DecimalArray):
|
||||
def _formatting_values(self):
|
||||
return np.array(self)
|
||||
|
||||
ser = pd.Series(DecimalArray2([decimal.Decimal('1.0')]))
|
||||
# different levels for 2 vs. 3
|
||||
check_stacklevel = compat.PY3
|
||||
|
||||
with tm.assert_produces_warning(DeprecationWarning,
|
||||
check_stacklevel=check_stacklevel):
|
||||
repr(ser)
|
||||
@@ -0,0 +1,3 @@
|
||||
from .array import JSONArray, JSONDtype, make_data
|
||||
|
||||
__all__ = ['JSONArray', 'JSONDtype', 'make_data']
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,199 @@
|
||||
"""Test extension array for storing nested data in a pandas container.
|
||||
|
||||
The JSONArray stores lists of dictionaries. The storage mechanism is a list,
|
||||
not an ndarray.
|
||||
|
||||
Note:
|
||||
|
||||
We currently store lists of UserDicts (Py3 only). Pandas has a few places
|
||||
internally that specifically check for dicts, and does non-scalar things
|
||||
in that case. We *want* the dictionaries to be treated as scalars, so we
|
||||
hack around pandas by using UserDicts.
|
||||
"""
|
||||
import collections
|
||||
import itertools
|
||||
import numbers
|
||||
import random
|
||||
import string
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
|
||||
from pandas import compat
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
|
||||
class JSONDtype(ExtensionDtype):
|
||||
type = compat.Mapping
|
||||
name = 'json'
|
||||
|
||||
try:
|
||||
na_value = collections.UserDict()
|
||||
except AttributeError:
|
||||
# source compatibility with Py2.
|
||||
na_value = {}
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
"""Return the array type associated with this dtype
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return JSONArray
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string):
|
||||
if string == cls.name:
|
||||
return cls()
|
||||
else:
|
||||
raise TypeError("Cannot construct a '{}' from "
|
||||
"'{}'".format(cls, string))
|
||||
|
||||
|
||||
class JSONArray(ExtensionArray):
|
||||
dtype = JSONDtype()
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, dtype=None, copy=False):
|
||||
for val in values:
|
||||
if not isinstance(val, self.dtype.type):
|
||||
raise TypeError("All values must be of type " +
|
||||
str(self.dtype.type))
|
||||
self.data = values
|
||||
|
||||
# Some aliases for common attribute names to ensure pandas supports
|
||||
# these
|
||||
self._items = self._data = self.data
|
||||
# those aliases are currently not working due to assumptions
|
||||
# in internal code (GH-20735)
|
||||
# self._values = self.values = self.data
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
return cls(scalars)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return cls([collections.UserDict(x) for x in values if x != ()])
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self.data[item]
|
||||
elif isinstance(item, np.ndarray) and item.dtype == 'bool':
|
||||
return self._from_sequence([x for x, m in zip(self, item) if m])
|
||||
elif isinstance(item, compat.Iterable):
|
||||
# fancy indexing
|
||||
return type(self)([self.data[i] for i in item])
|
||||
else:
|
||||
# slice
|
||||
return type(self)(self.data[item])
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if isinstance(key, numbers.Integral):
|
||||
self.data[key] = value
|
||||
else:
|
||||
if not isinstance(value, (type(self),
|
||||
compat.Sequence)):
|
||||
# broadcast value
|
||||
value = itertools.cycle([value])
|
||||
|
||||
if isinstance(key, np.ndarray) and key.dtype == 'bool':
|
||||
# masking
|
||||
for i, (k, v) in enumerate(zip(key, value)):
|
||||
if k:
|
||||
assert isinstance(v, self.dtype.type)
|
||||
self.data[i] = v
|
||||
else:
|
||||
for k, v in zip(key, value):
|
||||
assert isinstance(v, self.dtype.type)
|
||||
self.data[k] = v
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
@property
|
||||
def nbytes(self):
|
||||
return sys.getsizeof(self.data)
|
||||
|
||||
def isna(self):
|
||||
return np.array([x == self.dtype.na_value for x in self.data],
|
||||
dtype=bool)
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
# re-implement here, since NumPy has trouble setting
|
||||
# sized objects like UserDicts into scalar slots of
|
||||
# an ndarary.
|
||||
indexer = np.asarray(indexer)
|
||||
msg = ("Index is out of bounds or cannot do a "
|
||||
"non-empty take from an empty array.")
|
||||
|
||||
if allow_fill:
|
||||
if fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
# bounds check
|
||||
if (indexer < -1).any():
|
||||
raise ValueError
|
||||
try:
|
||||
output = [self.data[loc] if loc != -1 else fill_value
|
||||
for loc in indexer]
|
||||
except IndexError:
|
||||
raise IndexError(msg)
|
||||
else:
|
||||
try:
|
||||
output = [self.data[loc] for loc in indexer]
|
||||
except IndexError:
|
||||
raise IndexError(msg)
|
||||
|
||||
return self._from_sequence(output)
|
||||
|
||||
def copy(self, deep=False):
|
||||
return type(self)(self.data[:])
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# NumPy has issues when all the dicts are the same length.
|
||||
# np.array([UserDict(...), UserDict(...)]) fails,
|
||||
# but np.array([{...}, {...}]) works, so cast.
|
||||
|
||||
# needed to add this check for the Series constructor
|
||||
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
||||
if copy:
|
||||
return self.copy()
|
||||
return self
|
||||
return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
|
||||
|
||||
def unique(self):
|
||||
# Parent method doesn't work since np.array will try to infer
|
||||
# a 2-dim object.
|
||||
return type(self)([
|
||||
dict(x) for x in list({tuple(d.items()) for d in self.data})
|
||||
])
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
data = list(itertools.chain.from_iterable([x.data for x in to_concat]))
|
||||
return cls(data)
|
||||
|
||||
def _values_for_factorize(self):
|
||||
frozen = self._values_for_argsort()
|
||||
if len(frozen) == 0:
|
||||
# _factorize_array expects 1-d array, this is a len-0 2-d array.
|
||||
frozen = frozen.ravel()
|
||||
return frozen, ()
|
||||
|
||||
def _values_for_argsort(self):
|
||||
# Disable NumPy's shape inference by including an empty tuple...
|
||||
# If all the elemnts of self are the same size P, NumPy will
|
||||
# cast them to an (N, P) array, instead of an (N,) array of tuples.
|
||||
frozen = [()] + [tuple(x.items()) for x in self]
|
||||
return np.array(frozen, dtype=object)[1:]
|
||||
|
||||
|
||||
def make_data():
|
||||
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
|
||||
return [collections.UserDict([
|
||||
(random.choice(string.ascii_letters), random.randint(0, 100))
|
||||
for _ in range(random.randint(0, 10))]) for _ in range(100)]
|
||||
@@ -0,0 +1,304 @@
|
||||
import collections
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY2, PY36
|
||||
|
||||
import pandas as pd
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .array import JSONArray, JSONDtype, make_data
|
||||
|
||||
pytestmark = pytest.mark.skipif(PY2, reason="Py2 doesn't have a UserDict")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return JSONDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
data = make_data()
|
||||
|
||||
# Why the while loop? NumPy is unable to construct an ndarray from
|
||||
# equal-length ndarrays. Many of our operations involve coercing the
|
||||
# EA to an ndarray of objects. To avoid random test failures, we ensure
|
||||
# that our data is coercable to an ndarray. Several tests deal with only
|
||||
# the first two elements, so that's what we'll check.
|
||||
|
||||
while len(data[0]) == len(data[1]):
|
||||
data = make_data()
|
||||
|
||||
return JSONArray(data)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return JSONArray([{}, {'a': 10}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return JSONArray([{'b': 1}, {'c': 4}, {'a': 2, 'c': 3}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return JSONArray([{'b': 1}, {}, {'a': 4}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value(dtype):
|
||||
return dtype.na_value
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return operator.eq
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
return JSONArray([
|
||||
{'b': 1}, {'b': 1},
|
||||
{}, {},
|
||||
{'a': 0, 'c': 2}, {'a': 0, 'c': 2},
|
||||
{'b': 1},
|
||||
{'c': 2},
|
||||
])
|
||||
|
||||
|
||||
class BaseJSON(object):
|
||||
# NumPy doesn't handle an array of equal-length UserDicts.
|
||||
# The default assert_series_equal eventually does a
|
||||
# Series.values, which raises. We work around it by
|
||||
# converting the UserDicts to dicts.
|
||||
def assert_series_equal(self, left, right, **kwargs):
|
||||
if left.dtype.name == 'json':
|
||||
assert left.dtype == right.dtype
|
||||
left = pd.Series(JSONArray(left.values.astype(object)),
|
||||
index=left.index, name=left.name)
|
||||
right = pd.Series(JSONArray(right.values.astype(object)),
|
||||
index=right.index, name=right.name)
|
||||
tm.assert_series_equal(left, right, **kwargs)
|
||||
|
||||
def assert_frame_equal(self, left, right, *args, **kwargs):
|
||||
tm.assert_index_equal(
|
||||
left.columns, right.columns,
|
||||
exact=kwargs.get('check_column_type', 'equiv'),
|
||||
check_names=kwargs.get('check_names', True),
|
||||
check_exact=kwargs.get('check_exact', False),
|
||||
check_categorical=kwargs.get('check_categorical', True),
|
||||
obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
|
||||
|
||||
jsons = (left.dtypes == 'json').index
|
||||
|
||||
for col in jsons:
|
||||
self.assert_series_equal(left[col], right[col],
|
||||
*args, **kwargs)
|
||||
|
||||
left = left.drop(columns=jsons)
|
||||
right = right.drop(columns=jsons)
|
||||
tm.assert_frame_equal(left, right, *args, **kwargs)
|
||||
|
||||
|
||||
class TestDtype(BaseJSON, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseJSON, base.BaseInterfaceTests):
|
||||
def test_custom_asserts(self):
|
||||
# This would always trigger the KeyError from trying to put
|
||||
# an array of equal-length UserDicts inside an ndarray.
|
||||
data = JSONArray([collections.UserDict({'a': 1}),
|
||||
collections.UserDict({'b': 2}),
|
||||
collections.UserDict({'c': 3})])
|
||||
a = pd.Series(data)
|
||||
self.assert_series_equal(a, a)
|
||||
self.assert_frame_equal(a.to_frame(), a.to_frame())
|
||||
|
||||
b = pd.Series(data.take([0, 0, 1]))
|
||||
with pytest.raises(AssertionError):
|
||||
self.assert_series_equal(a, b)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
self.assert_frame_equal(a.to_frame(), b.to_frame())
|
||||
|
||||
|
||||
class TestConstructors(BaseJSON, base.BaseConstructorsTests):
|
||||
|
||||
@pytest.mark.skip(reason="not implemented constructor from dtype")
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseJSON, base.BaseReshapingTests):
|
||||
|
||||
@pytest.mark.skip(reason="Different definitions of NA")
|
||||
def test_stack(self):
|
||||
"""
|
||||
The test does .astype(object).stack(). If we happen to have
|
||||
any missing values in `data`, then we'll end up with different
|
||||
rows since we consider `{}` NA, but `.astype(object)` doesn't.
|
||||
"""
|
||||
|
||||
@pytest.mark.xfail(reason="dict for NA")
|
||||
def test_unstack(self, data, index):
|
||||
# The base test has NaN for the expected NA value.
|
||||
# this matches otherwise
|
||||
return super().test_unstack(data, index)
|
||||
|
||||
|
||||
class TestGetitem(BaseJSON, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(BaseJSON, base.BaseMissingTests):
|
||||
@pytest.mark.skip(reason="Setting a dict as a scalar")
|
||||
def test_fillna_series(self):
|
||||
"""We treat dictionaries as a mapping in fillna, not a scalar."""
|
||||
|
||||
@pytest.mark.skip(reason="Setting a dict as a scalar")
|
||||
def test_fillna_frame(self):
|
||||
"""We treat dictionaries as a mapping in fillna, not a scalar."""
|
||||
|
||||
|
||||
unhashable = pytest.mark.skip(reason="Unhashable")
|
||||
unstable = pytest.mark.skipif(not PY36, # 3.6 or higher
|
||||
reason="Dictionary order unstable")
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseJSON, base.BaseMethodsTests):
|
||||
@unhashable
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
@unhashable
|
||||
def test_sort_values_frame(self):
|
||||
# TODO (EA.factorize): see if _values_for_factorize allows this.
|
||||
pass
|
||||
|
||||
@unstable
|
||||
def test_argsort(self, data_for_sorting):
|
||||
super(TestMethods, self).test_argsort(data_for_sorting)
|
||||
|
||||
@unstable
|
||||
def test_argsort_missing(self, data_missing_for_sorting):
|
||||
super(TestMethods, self).test_argsort_missing(
|
||||
data_missing_for_sorting)
|
||||
|
||||
@unstable
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending):
|
||||
super(TestMethods, self).test_sort_values(
|
||||
data_for_sorting, ascending)
|
||||
|
||||
@unstable
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
def test_sort_values_missing(self, data_missing_for_sorting, ascending):
|
||||
super(TestMethods, self).test_sort_values_missing(
|
||||
data_missing_for_sorting, ascending)
|
||||
|
||||
@pytest.mark.skip(reason="combine for JSONArray not supported")
|
||||
def test_combine_le(self, data_repeated):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="combine for JSONArray not supported")
|
||||
def test_combine_add(self, data_repeated):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="combine for JSONArray not supported")
|
||||
def test_combine_first(self, data):
|
||||
pass
|
||||
|
||||
@unhashable
|
||||
def test_hash_pandas_object_works(self, data, kind):
|
||||
super().test_hash_pandas_object_works(data, kind)
|
||||
|
||||
@pytest.mark.skip(reason="broadcasting error")
|
||||
def test_where_series(self, data, na_value):
|
||||
# Fails with
|
||||
# *** ValueError: operands could not be broadcast together
|
||||
# with shapes (4,) (4,) (0,)
|
||||
super().test_where_series(data, na_value)
|
||||
|
||||
@pytest.mark.skip(reason="Can't compare dicts.")
|
||||
def test_searchsorted(self, data_for_sorting):
|
||||
super(TestMethods, self).test_searchsorted(data_for_sorting)
|
||||
|
||||
|
||||
class TestCasting(BaseJSON, base.BaseCastingTests):
|
||||
@pytest.mark.skip(reason="failing on np.array(self, dtype=str)")
|
||||
def test_astype_str(self):
|
||||
"""This currently fails in NumPy on np.array(self, dtype=str) with
|
||||
|
||||
*** ValueError: setting an array element with a sequence
|
||||
"""
|
||||
|
||||
|
||||
# We intentionally don't run base.BaseSetitemTests because pandas'
|
||||
# internals has trouble setting sequences of values into scalar positions.
|
||||
|
||||
|
||||
class TestGroupby(BaseJSON, base.BaseGroupbyTests):
|
||||
|
||||
@unhashable
|
||||
def test_groupby_extension_transform(self):
|
||||
"""
|
||||
This currently fails in Series.name.setter, since the
|
||||
name must be hashable, but the value is a dictionary.
|
||||
I think this is what we want, i.e. `.name` should be the original
|
||||
values, and not the values for factorization.
|
||||
"""
|
||||
|
||||
@unhashable
|
||||
def test_groupby_extension_apply(self):
|
||||
"""
|
||||
This fails in Index._do_unique_check with
|
||||
|
||||
> hash(val)
|
||||
E TypeError: unhashable type: 'UserDict' with
|
||||
|
||||
I suspect that once we support Index[ExtensionArray],
|
||||
we'll be able to dispatch unique.
|
||||
"""
|
||||
|
||||
@unstable
|
||||
@pytest.mark.parametrize('as_index', [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
super(TestGroupby, self).test_groupby_extension_agg(
|
||||
as_index, data_for_grouping
|
||||
)
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
ser = pd.Series(data)
|
||||
with pytest.raises(TypeError, match="unsupported"):
|
||||
ser + data
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
return super(TestArithmeticOps, self)._check_divmod_op(
|
||||
s, op, other, exc=TypeError
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseJSON, base.BasePrintingTests):
|
||||
pass
|
||||
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical
|
||||
from pandas.api.types import CategoricalDtype
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def make_data():
|
||||
while True:
|
||||
values = np.random.choice(list(string.ascii_letters), size=100)
|
||||
# ensure we meet the requirements
|
||||
# 1. first two not null
|
||||
# 2. first and second are different
|
||||
if values[0] != values[1]:
|
||||
break
|
||||
return values
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return CategoricalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not gbe equal
|
||||
"""
|
||||
return Categorical(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return Categorical([np.nan, 'A'])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],
|
||||
ordered=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return Categorical(['A', None, 'B'], categories=['B', 'A'],
|
||||
ordered=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
return Categorical(['a', 'a', None, None, 'b', 'b', 'a', 'c'])
|
||||
|
||||
|
||||
class TestDtype(base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(base.BaseInterfaceTests):
|
||||
@pytest.mark.skip(reason="Memory usage doesn't match")
|
||||
def test_memory_usage(self, data):
|
||||
# Is this deliberate?
|
||||
super(TestInterface, self).test_memory_usage(data)
|
||||
|
||||
|
||||
class TestConstructors(base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(base.BaseGetitemTests):
|
||||
skip_take = pytest.mark.skip(reason="GH-20664.")
|
||||
|
||||
@pytest.mark.skip(reason="Backwards compatibility")
|
||||
def test_getitem_scalar(self, data):
|
||||
# CategoricalDtype.type isn't "correct" since it should
|
||||
# be a parent of the elements (object). But don't want
|
||||
# to break things by changing.
|
||||
super(TestGetitem, self).test_getitem_scalar(data)
|
||||
|
||||
@skip_take
|
||||
def test_take(self, data, na_value, na_cmp):
|
||||
# TODO remove this once Categorical.take is fixed
|
||||
super(TestGetitem, self).test_take(data, na_value, na_cmp)
|
||||
|
||||
@skip_take
|
||||
def test_take_negative(self, data):
|
||||
super().test_take_negative(data)
|
||||
|
||||
@skip_take
|
||||
def test_take_pandas_style_negative_raises(self, data, na_value):
|
||||
super().test_take_pandas_style_negative_raises(data, na_value)
|
||||
|
||||
@skip_take
|
||||
def test_take_non_na_fill_value(self, data_missing):
|
||||
super().test_take_non_na_fill_value(data_missing)
|
||||
|
||||
@skip_take
|
||||
def test_take_out_of_bounds_raises(self, data, allow_fill):
|
||||
return super().test_take_out_of_bounds_raises(data, allow_fill)
|
||||
|
||||
@pytest.mark.skip(reason="GH-20747. Unobserved categories.")
|
||||
def test_take_series(self, data):
|
||||
super().test_take_series(data)
|
||||
|
||||
@skip_take
|
||||
def test_reindex_non_na_fill_value(self, data_missing):
|
||||
super().test_reindex_non_na_fill_value(data_missing)
|
||||
|
||||
@pytest.mark.skip(reason="Categorical.take buggy")
|
||||
def test_take_empty(self, data, na_value, na_cmp):
|
||||
super().test_take_empty(data, na_value, na_cmp)
|
||||
|
||||
@pytest.mark.skip(reason="test not written correctly for categorical")
|
||||
def test_reindex(self, data, na_value):
|
||||
super().test_reindex(data, na_value)
|
||||
|
||||
|
||||
class TestSetitem(base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(base.BaseMissingTests):
|
||||
|
||||
@pytest.mark.skip(reason="Not implemented")
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
super().test_fillna_limit_pad(data_missing)
|
||||
|
||||
@pytest.mark.skip(reason="Not implemented")
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(base.BaseMethodsTests):
|
||||
@pytest.mark.skip(reason="Unobserved categories included")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
return super().test_value_counts(all_data, dropna)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
# When adding categoricals in combine, result is a string
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(([a + b for (a, b) in
|
||||
zip(list(orig_data1), list(orig_data2))]))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series([a + val for a in list(orig_data1)])
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.skip(reason="Not Applicable")
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
super().test_fillna_length_mismatch(data_missing)
|
||||
|
||||
def test_searchsorted(self, data_for_sorting):
|
||||
if not data_for_sorting.ordered:
|
||||
raise pytest.skip(reason="searchsorted requires ordered data.")
|
||||
|
||||
|
||||
class TestCasting(base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(base.BaseArithmeticOpsTests):
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
|
||||
op_name = all_arithmetic_operators
|
||||
if op_name != '__rmod__':
|
||||
super(TestArithmeticOps, self).test_arith_series_with_scalar(
|
||||
data, op_name)
|
||||
else:
|
||||
pytest.skip('rmod never called when string is first argument')
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
ser = pd.Series(data)
|
||||
with pytest.raises(TypeError, match="cannot perform"):
|
||||
ser + data
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
return super(TestArithmeticOps, self)._check_divmod_op(
|
||||
s, op, other, exc=TypeError
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(base.BaseComparisonOpsTests):
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
if op_name == '__eq__':
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, lambda x, y: x == y)
|
||||
assert (result == expected).all()
|
||||
|
||||
elif op_name == '__ne__':
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, lambda x, y: x != y)
|
||||
assert (result == expected).all()
|
||||
|
||||
else:
|
||||
with pytest.raises(TypeError):
|
||||
op(data, other)
|
||||
|
||||
|
||||
class TestParsing(base.BaseParsingTests):
|
||||
pass
|
||||
@@ -0,0 +1,86 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes import dtypes
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class DummyDtype(dtypes.ExtensionDtype):
|
||||
pass
|
||||
|
||||
|
||||
class DummyArray(ExtensionArray):
|
||||
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
def __array__(self, dtype):
|
||||
return self.data
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return DummyDtype()
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# we don't support anything but a single dtype
|
||||
if isinstance(dtype, DummyDtype):
|
||||
if copy:
|
||||
return type(self)(self.data)
|
||||
return self
|
||||
|
||||
return np.array(self, dtype=dtype, copy=copy)
|
||||
|
||||
|
||||
class TestExtensionArrayDtype(object):
|
||||
|
||||
@pytest.mark.parametrize('values', [
|
||||
pd.Categorical([]),
|
||||
pd.Categorical([]).dtype,
|
||||
pd.Series(pd.Categorical([])),
|
||||
DummyDtype(),
|
||||
DummyArray(np.array([1, 2])),
|
||||
])
|
||||
def test_is_extension_array_dtype(self, values):
|
||||
assert is_extension_array_dtype(values)
|
||||
|
||||
@pytest.mark.parametrize('values', [
|
||||
np.array([]),
|
||||
pd.Series(np.array([])),
|
||||
])
|
||||
def test_is_not_extension_array_dtype(self, values):
|
||||
assert not is_extension_array_dtype(values)
|
||||
|
||||
|
||||
def test_astype():
|
||||
|
||||
arr = DummyArray(np.array([1, 2, 3]))
|
||||
expected = np.array([1, 2, 3], dtype=object)
|
||||
|
||||
result = arr.astype(object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype('object')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_no_copy():
|
||||
arr = DummyArray(np.array([1, 2, 3], dtype=np.int64))
|
||||
result = arr.astype(arr.dtype, copy=False)
|
||||
|
||||
assert arr is result
|
||||
|
||||
result = arr.astype(arr.dtype)
|
||||
assert arr is not result
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', [
|
||||
dtypes.CategoricalDtype(),
|
||||
dtypes.IntervalDtype(),
|
||||
])
|
||||
def test_is_extension_array_dtype(dtype):
|
||||
assert isinstance(dtype, dtypes.ExtensionDtype)
|
||||
assert is_extension_array_dtype(dtype)
|
||||
@@ -0,0 +1,237 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
@pytest.fixture(params=["US/Central"])
|
||||
def dtype(request):
|
||||
return DatetimeTZDtype(unit="ns", tz=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz),
|
||||
dtype=dtype)
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return DatetimeArray(
|
||||
np.array(['NaT', '2000-01-01'], dtype='datetime64[ns]'),
|
||||
dtype=dtype
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
a = pd.Timestamp('2000-01-01')
|
||||
b = pd.Timestamp('2000-01-02')
|
||||
c = pd.Timestamp('2000-01-03')
|
||||
return DatetimeArray(np.array([b, c, a], dtype='datetime64[ns]'),
|
||||
dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
a = pd.Timestamp('2000-01-01')
|
||||
b = pd.Timestamp('2000-01-02')
|
||||
return DatetimeArray(np.array([b, 'NaT', a], dtype='datetime64[ns]'),
|
||||
dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
"""
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
a = pd.Timestamp('2000-01-01')
|
||||
b = pd.Timestamp('2000-01-02')
|
||||
c = pd.Timestamp('2000-01-03')
|
||||
na = 'NaT'
|
||||
return DatetimeArray(np.array([b, b, na, na, a, a, b, c],
|
||||
dtype='datetime64[ns]'),
|
||||
dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
def cmp(a, b):
|
||||
return a is pd.NaT and a is b
|
||||
return cmp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return pd.NaT
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
class BaseDatetimeTests(object):
|
||||
pass
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Tests
|
||||
class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseDatetimeTests, base.BaseMethodsTests):
|
||||
@pytest.mark.skip(reason="Incorrect expected")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# Timestamp.__add__(Timestamp) not defined
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests):
|
||||
|
||||
def test_array_interface(self, data):
|
||||
if data.tz:
|
||||
# np.asarray(DTA) is currently always tz-naive.
|
||||
pytest.skip("GH-23569")
|
||||
else:
|
||||
super(TestInterface, self).test_array_interface(data)
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests):
|
||||
implements = {'__sub__', '__rsub__'}
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0],
|
||||
exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super(TestArithmeticOps, self).test_arith_series_with_scalar(
|
||||
data, all_arithmetic_operators
|
||||
)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
# Datetime + Datetime not implemented
|
||||
s = pd.Series(data)
|
||||
msg = 'cannot add DatetimeArray and DatetimeArray'
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s + data
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0],
|
||||
exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super(TestArithmeticOps, self).test_arith_series_with_scalar(
|
||||
data, all_arithmetic_operators
|
||||
)
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
@pytest.mark.xfail(reason="different implementation", strict=False)
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# Right now, we have trouble with this. Returning NotImplemented
|
||||
# fails other tests like
|
||||
# tests/arithmetic/test_datetime64::TestTimestampSeriesArithmetic::
|
||||
# test_dt64_seris_add_intlike
|
||||
return super(
|
||||
TestArithmeticOps,
|
||||
self
|
||||
).test_direct_arith_with_series_returns_not_implemented(data)
|
||||
|
||||
|
||||
class TestCasting(BaseDatetimeTests, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests):
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
# the base test is not appropriate for us. We raise on comparison
|
||||
# with (some) integers, depending on the value.
|
||||
pass
|
||||
|
||||
@pytest.mark.xfail(reason="different implementation", strict=False)
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
return super(
|
||||
TestComparisonOps,
|
||||
self
|
||||
).test_direct_arith_with_series_returns_not_implemented(data)
|
||||
|
||||
|
||||
class TestMissing(BaseDatetimeTests, base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests):
|
||||
|
||||
@pytest.mark.skip(reason="We have DatetimeTZBlock")
|
||||
def test_concat(self, data, in_frame):
|
||||
pass
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# concat(Series[datetimetz], Series[category]) uses a
|
||||
# plain np.array(values) on the DatetimeArray, which
|
||||
# drops the tz.
|
||||
super(TestReshaping, self).test_concat_mixed_dtypes(data)
|
||||
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, obj):
|
||||
# GH-13287: can't use base test, since building the expected fails.
|
||||
data = DatetimeArray._from_sequence(['2000', '2001', '2002', '2003'],
|
||||
tz='US/Central')
|
||||
index = pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]),
|
||||
names=['a', 'b'])
|
||||
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
expected = pd.DataFrame({
|
||||
"A": data.take([0, 1]),
|
||||
"B": data.take([2, 3])
|
||||
}, index=pd.Index(['a', 'b'], name='b'))
|
||||
expected.columns.name = 'a'
|
||||
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
expected = pd.DataFrame(
|
||||
{("A", "A"): data.take([0, 1]),
|
||||
("A", "B"): data.take([2, 3]),
|
||||
("B", "A"): data.take([0, 1]),
|
||||
("B", "B"): data.take([2, 3])},
|
||||
index=pd.Index(['a', 'b'], name='b')
|
||||
)
|
||||
expected.columns.names = [None, 'a']
|
||||
|
||||
result = ser.unstack(0)
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
|
||||
class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseDatetimeTests, base.BasePrintingTests):
|
||||
pass
|
||||
@@ -0,0 +1,76 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=W0102
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.internals import BlockManager, SingleBlockManager
|
||||
from pandas.core.internals.blocks import Block, NonConsolidatableMixIn
|
||||
|
||||
|
||||
class CustomBlock(NonConsolidatableMixIn, Block):
|
||||
|
||||
_holder = np.ndarray
|
||||
|
||||
def formatting_values(self):
|
||||
return np.array(["Val: {}".format(i) for i in self.values])
|
||||
|
||||
def concat_same_type(self, to_concat, placement=None):
|
||||
"""
|
||||
Always concatenate disregarding self.ndim as the values are
|
||||
always 1D in this custom Block
|
||||
"""
|
||||
values = np.concatenate([blk.values for blk in to_concat])
|
||||
return self.make_block_same_class(
|
||||
values, placement=placement or slice(0, len(values), 1))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
df1 = pd.DataFrame({'a': [1, 2, 3]})
|
||||
blocks = df1._data.blocks
|
||||
values = np.arange(3, dtype='int64')
|
||||
custom_block = CustomBlock(values, placement=slice(1, 2))
|
||||
blocks = blocks + (custom_block,)
|
||||
block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index])
|
||||
return pd.DataFrame(block_manager)
|
||||
|
||||
|
||||
def test_custom_repr():
|
||||
values = np.arange(3, dtype='int64')
|
||||
|
||||
# series
|
||||
block = CustomBlock(values, placement=slice(0, 3))
|
||||
|
||||
s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
|
||||
assert repr(s) == '0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64'
|
||||
|
||||
# dataframe
|
||||
block = CustomBlock(values, placement=slice(0, 1))
|
||||
blk_mgr = BlockManager([block], [['col'], range(3)])
|
||||
df = pd.DataFrame(blk_mgr)
|
||||
assert repr(df) == ' col\n0 Val: 0\n1 Val: 1\n2 Val: 2'
|
||||
|
||||
|
||||
def test_concat_series():
|
||||
# GH17728
|
||||
values = np.arange(3, dtype='int64')
|
||||
block = CustomBlock(values, placement=slice(0, 3))
|
||||
s = pd.Series(block, pd.RangeIndex(3), fastpath=True)
|
||||
|
||||
res = pd.concat([s, s])
|
||||
assert isinstance(res._data.blocks[0], CustomBlock)
|
||||
|
||||
|
||||
def test_concat_dataframe(df):
|
||||
# GH17728
|
||||
res = pd.concat([df, df])
|
||||
assert isinstance(res._data.blocks[1], CustomBlock)
|
||||
|
||||
|
||||
def test_concat_axis1(df):
|
||||
# GH17954
|
||||
df2 = pd.DataFrame({'c': [.1, .2, .3]})
|
||||
res = pd.concat([df, df2], axis=1)
|
||||
assert isinstance(res._data.blocks[1], CustomBlock)
|
||||
@@ -0,0 +1,224 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import integer_array
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype,
|
||||
UInt32Dtype, UInt64Dtype)
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def make_data():
|
||||
return (list(range(1, 9)) + [np.nan] + list(range(10, 98))
|
||||
+ [np.nan] + [99, 100])
|
||||
|
||||
|
||||
@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
|
||||
UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
|
||||
def dtype(request):
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return integer_array(make_data(), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return integer_array([np.nan, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
return integer_array([1, 2, 0], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
return integer_array([1, np.nan, 0], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
# we are np.nan
|
||||
return lambda x, y: np.isnan(x) and np.isnan(y)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
b = 1
|
||||
a = 0
|
||||
c = 2
|
||||
na = np.nan
|
||||
return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
|
||||
|
||||
|
||||
class TestDtype(base.BaseDtypeTests):
|
||||
|
||||
@pytest.mark.skip(reason="using multiple dtypes")
|
||||
def test_is_dtype_unboxes_dtype(self):
|
||||
# we have multiple dtypes, so skip
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(base.BaseArithmeticOpsTests):
|
||||
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
# overwriting to indicate ops don't raise an error
|
||||
super(TestArithmeticOps, self).check_opname(s, op_name,
|
||||
other, exc=None)
|
||||
|
||||
def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
|
||||
if exc is None:
|
||||
if s.dtype.is_unsigned_integer and (op_name == '__rsub__'):
|
||||
# TODO see https://github.com/pandas-dev/pandas/issues/22023
|
||||
pytest.skip("unsigned subtraction gives negative values")
|
||||
|
||||
if (hasattr(other, 'dtype')
|
||||
and not is_extension_array_dtype(other.dtype)
|
||||
and pd.api.types.is_integer_dtype(other.dtype)):
|
||||
# other is np.int64 and would therefore always result in
|
||||
# upcasting, so keeping other as same numpy_dtype
|
||||
other = other.astype(s.dtype.numpy_dtype)
|
||||
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, op)
|
||||
|
||||
if op_name == '__rdiv__':
|
||||
# combine is not giving the correct result for this case
|
||||
pytest.skip("skipping reverse div in python 2")
|
||||
elif op_name in ('__rtruediv__', '__truediv__', '__div__'):
|
||||
expected = expected.astype(float)
|
||||
if op_name == '__rtruediv__':
|
||||
# TODO reverse operators result in object dtype
|
||||
result = result.astype(float)
|
||||
elif op_name.startswith('__r'):
|
||||
# TODO reverse operators result in object dtype
|
||||
# see https://github.com/pandas-dev/pandas/issues/22024
|
||||
expected = expected.astype(s.dtype)
|
||||
result = result.astype(s.dtype)
|
||||
else:
|
||||
# combine method result in 'biggest' (int64) dtype
|
||||
expected = expected.astype(s.dtype)
|
||||
pass
|
||||
if (op_name == '__rpow__') and isinstance(other, pd.Series):
|
||||
# TODO pow on Int arrays gives different result with NA
|
||||
# see https://github.com/pandas-dev/pandas/issues/22022
|
||||
result = result.fillna(1)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
op(s, other)
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=None):
|
||||
super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None)
|
||||
|
||||
@pytest.mark.skip(reason="intNA does not error on ops")
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
# other specific errors tested in the integer array specific tests
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(base.BaseComparisonOpsTests):
|
||||
|
||||
def check_opname(self, s, op_name, other, exc=None):
|
||||
super(TestComparisonOps, self).check_opname(s, op_name,
|
||||
other, exc=None)
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
self.check_opname(s, op_name, other)
|
||||
|
||||
|
||||
class TestInterface(base.BaseInterfaceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
# for test_concat_mixed_dtypes test
|
||||
# concat of an Integer and Int coerces to object dtype
|
||||
# TODO(jreback) once integrated this would
|
||||
|
||||
|
||||
class TestGetitem(base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(base.BaseMethodsTests):
|
||||
|
||||
@pytest.mark.parametrize('dropna', [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(
|
||||
dropna=dropna).sort_index()
|
||||
expected.index = expected.index.astype(all_data.dtype)
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestCasting(base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestNumericReduce(base.BaseNumericReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestBooleanReduce(base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(base.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestParsing(base.BaseParsingTests):
|
||||
pass
|
||||
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import IntervalDtype
|
||||
|
||||
from pandas import Interval
|
||||
from pandas.core.arrays import IntervalArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def make_data():
|
||||
N = 100
|
||||
left = np.random.uniform(size=N).cumsum()
|
||||
right = left + np.random.uniform(size=N)
|
||||
return [Interval(l, r) for l, r in zip(left, right)]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return IntervalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
return IntervalArray(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return IntervalArray.from_tuples([None, (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return IntervalArray.from_tuples([(1, 2), None, (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
a = (0, 1)
|
||||
b = (1, 2)
|
||||
c = (2, 3)
|
||||
return IntervalArray.from_tuples([b, b, None, None, a, a, b, c])
|
||||
|
||||
|
||||
class BaseInterval(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype(BaseInterval, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestCasting(BaseInterval, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BaseInterval, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BaseInterval, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGrouping(BaseInterval, base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseInterval, base.BaseInterfaceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReduce(base.BaseNoReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseInterval, base.BaseMethodsTests):
|
||||
|
||||
@pytest.mark.skip(reason='addition is not defined for intervals')
|
||||
def test_combine_add(self, data_repeated):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="Not Applicable")
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(BaseInterval, base.BaseMissingTests):
|
||||
# Index.fillna only accepts scalar `value`, so we have to skip all
|
||||
# non-scalar fill tests.
|
||||
unsupported_fill = pytest.mark.skip("Unsupported fillna option.")
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_limit_pad(self):
|
||||
pass
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_series_method(self):
|
||||
pass
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_limit_backfill(self):
|
||||
pass
|
||||
|
||||
@unsupported_fill
|
||||
def test_fillna_series(self):
|
||||
pass
|
||||
|
||||
def test_non_scalar_raises(self, data_missing):
|
||||
msg = "Got a 'list' instead."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data_missing.fillna([1, 1])
|
||||
|
||||
|
||||
class TestReshaping(BaseInterval, base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(BaseInterval, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BaseInterval, base.BasePrintingTests):
|
||||
@pytest.mark.skip(reason="custom repr")
|
||||
def test_array_repr(self, data, size):
|
||||
pass
|
||||
|
||||
|
||||
class TestParsing(BaseInterval, base.BaseParsingTests):
|
||||
@pytest.mark.parametrize('engine', ['c', 'python'])
|
||||
def test_EA_types(self, engine, data):
|
||||
expected_msg = r'.*must implement _from_sequence_of_strings.*'
|
||||
with pytest.raises(NotImplementedError, match=expected_msg):
|
||||
super(TestParsing, self).test_EA_types(engine, data)
|
||||
@@ -0,0 +1,216 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import compat
|
||||
from pandas.core.arrays.numpy_ import PandasArray, PandasDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from . import base
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return PandasDtype(np.dtype('float'))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def allow_in_pandas(monkeypatch):
|
||||
"""
|
||||
A monkeypatch to tells pandas to let us in.
|
||||
|
||||
By default, passing a PandasArray to an index / series / frame
|
||||
constructor will unbox that PandasArray to an ndarray, and treat
|
||||
it as a non-EA column. We don't want people using EAs without
|
||||
reason.
|
||||
|
||||
The mechanism for this is a check against ABCPandasArray
|
||||
in each constructor.
|
||||
|
||||
But, for testing, we need to allow them in pandas. So we patch
|
||||
the _typ of PandasArray, so that we evade the ABCPandasArray
|
||||
check.
|
||||
"""
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(PandasArray, '_typ', 'extension')
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(allow_in_pandas, dtype):
|
||||
return PandasArray(np.arange(1, 101, dtype=dtype._dtype))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(allow_in_pandas):
|
||||
return PandasArray(np.array([np.nan, 1.0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
def cmp(a, b):
|
||||
return np.isnan(a) and np.isnan(b)
|
||||
return cmp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(allow_in_pandas):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
return PandasArray(
|
||||
np.array([1, 2, 0])
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(allow_in_pandas):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
return PandasArray(
|
||||
np.array([1, np.nan, 0])
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(allow_in_pandas):
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
a, b, c = np.arange(3)
|
||||
return PandasArray(np.array(
|
||||
[b, b, np.nan, np.nan, a, a, b, c]
|
||||
))
|
||||
|
||||
|
||||
class BaseNumPyTests(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestCasting(BaseNumPyTests, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests):
|
||||
@pytest.mark.skip(reason="We don't register our dtype")
|
||||
# We don't want to register. This test should probably be split in two.
|
||||
def test_from_dtype(self, data):
|
||||
pass
|
||||
|
||||
|
||||
class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
|
||||
|
||||
@pytest.mark.skip(reason="Incorrect expected.")
|
||||
# we unsurprisingly clash with a NumPy name.
|
||||
def test_check_dtype(self, data):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BaseNumPyTests, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BaseNumPyTests, base.BaseInterfaceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BaseNumPyTests, base.BaseMethodsTests):
|
||||
|
||||
@pytest.mark.skip(reason="TODO: remove?")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="Incorrect expected")
|
||||
# We have a bool dtype, so the result is an ExtensionArray
|
||||
# but expected is not
|
||||
def test_combine_le(self, data_repeated):
|
||||
super(TestMethods, self).test_combine_le(data_repeated)
|
||||
|
||||
|
||||
class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests):
|
||||
divmod_exc = None
|
||||
series_scalar_exc = None
|
||||
frame_scalar_exc = None
|
||||
series_array_exc = None
|
||||
|
||||
def test_divmod_series_array(self, data):
|
||||
s = pd.Series(data)
|
||||
self._check_divmod_op(s, divmod, data, exc=None)
|
||||
|
||||
@pytest.mark.skip("We implement ops")
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
if (compat.PY2 and
|
||||
all_arithmetic_operators in {'__div__', '__rdiv__'}):
|
||||
raise pytest.skip(
|
||||
"Matching NumPy int / int -> float behavior."
|
||||
)
|
||||
super(TestArithmetics, self).test_arith_series_with_scalar(
|
||||
data, all_arithmetic_operators
|
||||
)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
if (compat.PY2 and
|
||||
all_arithmetic_operators in {'__div__', '__rdiv__'}):
|
||||
raise pytest.skip(
|
||||
"Matching NumPy int / int -> float behavior."
|
||||
)
|
||||
super(TestArithmetics, self).test_arith_series_with_array(
|
||||
data, all_arithmetic_operators
|
||||
)
|
||||
|
||||
|
||||
class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests):
|
||||
|
||||
def check_reduce(self, s, op_name, skipna):
|
||||
result = getattr(s, op_name)(skipna=skipna)
|
||||
# avoid coercing int -> float. Just cast to the actual numpy type.
|
||||
expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMising(BaseNumPyTests, base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
|
||||
|
||||
@pytest.mark.skip("Incorrect parent test")
|
||||
# not actually a mixed concat, since we concat int and int.
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
super(TestReshaping, self).test_concat_mixed_dtypes(data)
|
||||
|
||||
|
||||
class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestParsing(BaseNumPyTests, base.BaseParsingTests):
|
||||
pass
|
||||
@@ -0,0 +1,166 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import iNaT
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import PeriodArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return PeriodDtype(freq='D')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return PeriodArray(np.arange(1970, 2070), freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
return PeriodArray([2018, 2019, 2017], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return PeriodArray([iNaT, 2017], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
return PeriodArray([2018, iNaT, 2017], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
B = 2018
|
||||
NA = iNaT
|
||||
A = 2017
|
||||
C = 2019
|
||||
return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return pd.NaT
|
||||
|
||||
|
||||
class BasePeriodTests(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestPeriodDtype(BasePeriodTests, base.BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestConstructors(BasePeriodTests, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGetitem(BasePeriodTests, base.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMethods(BasePeriodTests, base.BaseMethodsTests):
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# Period + Period is not defined.
|
||||
pass
|
||||
|
||||
|
||||
class TestInterface(BasePeriodTests, base.BaseInterfaceTests):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests):
|
||||
implements = {'__sub__', '__rsub__'}
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# we implement substitution...
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0],
|
||||
exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super(TestArithmeticOps, self).test_arith_series_with_scalar(
|
||||
data, all_arithmetic_operators
|
||||
)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
if all_arithmetic_operators in self.implements:
|
||||
s = pd.Series(data)
|
||||
self.check_opname(s, all_arithmetic_operators, s.iloc[0],
|
||||
exc=None)
|
||||
else:
|
||||
# ... but not the rest.
|
||||
super(TestArithmeticOps, self).test_arith_series_with_scalar(
|
||||
data, all_arithmetic_operators
|
||||
)
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
|
||||
super(TestArithmeticOps, self)._check_divmod_op(
|
||||
s, op, other, exc=TypeError
|
||||
)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
# we don't implement + for Period
|
||||
s = pd.Series(data)
|
||||
msg = (r"unsupported operand type\(s\) for \+: "
|
||||
r"\'PeriodArray\' and \'PeriodArray\'")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s + data
|
||||
|
||||
def test_error(self):
|
||||
pass
|
||||
|
||||
def test_direct_arith_with_series_returns_not_implemented(self, data):
|
||||
# Override to use __sub__ instead of __add__
|
||||
other = pd.Series(data)
|
||||
result = data.__sub__(other)
|
||||
assert result is NotImplemented
|
||||
|
||||
|
||||
class TestCasting(BasePeriodTests, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests):
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
# the base test is not appropriate for us. We raise on comparison
|
||||
# with (some) integers, depending on the value.
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(BasePeriodTests, base.BaseMissingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BasePeriodTests, base.BaseReshapingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(BasePeriodTests, base.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(BasePeriodTests, base.BaseGroupbyTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestPrinting(BasePeriodTests, base.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestParsing(BasePeriodTests, base.BaseParsingTests):
|
||||
@pytest.mark.parametrize('engine', ['c', 'python'])
|
||||
def test_EA_types(self, engine, data):
|
||||
expected_msg = r'.*must implement _from_sequence_of_strings.*'
|
||||
with pytest.raises(NotImplementedError, match=expected_msg):
|
||||
super(TestParsing, self).test_EA_types(engine, data)
|
||||
@@ -0,0 +1,370 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseArray, SparseDtype
|
||||
from pandas.tests.extension import base
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def make_data(fill_value):
|
||||
if np.isnan(fill_value):
|
||||
data = np.random.uniform(size=100)
|
||||
else:
|
||||
data = np.random.randint(1, 100, size=100)
|
||||
if data[0] == data[1]:
|
||||
data[0] += 1
|
||||
|
||||
data[2::3] = fill_value
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return SparseDtype()
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data(request):
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
res = SparseArray(make_data(request.param),
|
||||
fill_value=request.param)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_missing(request):
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return SparseArray([np.nan, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_repeated(request):
|
||||
"""Return different versions of data for count times"""
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield SparseArray(make_data(request.param),
|
||||
fill_value=request.param)
|
||||
yield gen
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_for_sorting(request):
|
||||
return SparseArray([2, 3, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_missing_for_sorting(request):
|
||||
return SparseArray([2, np.nan, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
return np.nan
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return lambda left, right: pd.isna(left) and pd.isna(right)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_for_grouping(request):
|
||||
return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3],
|
||||
fill_value=request.param)
|
||||
|
||||
|
||||
class BaseSparseTests(object):
|
||||
|
||||
def _check_unsupported(self, data):
|
||||
if data.dtype == SparseDtype(int, 0):
|
||||
pytest.skip("Can't store nan in int array.")
|
||||
|
||||
|
||||
class TestDtype(BaseSparseTests, base.BaseDtypeTests):
|
||||
|
||||
def test_array_type_with_arg(self, data, dtype):
|
||||
assert dtype.construct_array_type() is SparseArray
|
||||
|
||||
|
||||
class TestInterface(BaseSparseTests, base.BaseInterfaceTests):
|
||||
def test_no_values_attribute(self, data):
|
||||
pytest.skip("We have values")
|
||||
|
||||
|
||||
class TestConstructors(BaseSparseTests, base.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(BaseSparseTests, base.BaseReshapingTests):
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
# This should be the same, aside from concat([sparse, float])
|
||||
df1 = pd.DataFrame({'A': data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat([x.apply(lambda s: np.asarray(s).astype(object))
|
||||
for x in dfs])
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super(TestReshaping, self).test_concat_columns(data, na_value)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super(TestReshaping, self).test_align(data, na_value)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super(TestReshaping, self).test_align_frame(data, na_value)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super(TestReshaping, self).test_align_series_frame(data, na_value)
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super(TestReshaping, self).test_merge(data, na_value)
|
||||
|
||||
|
||||
class TestGetitem(BaseSparseTests, base.BaseGetitemTests):
|
||||
|
||||
def test_get(self, data):
|
||||
s = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
if np.isnan(s.values.fill_value):
|
||||
assert np.isnan(s.get(4)) and np.isnan(s.iloc[2])
|
||||
else:
|
||||
assert s.get(4) == s.iloc[2]
|
||||
assert s.get(2) == s.iloc[1]
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super(TestGetitem, self).test_reindex(data, na_value)
|
||||
|
||||
|
||||
# Skipping TestSetitem, since we don't implement it.
|
||||
|
||||
class TestMissing(BaseSparseTests, base.BaseMissingTests):
|
||||
|
||||
def test_isna(self, data_missing):
|
||||
expected_dtype = SparseDtype(bool,
|
||||
pd.isna(data_missing.dtype.fill_value))
|
||||
expected = SparseArray([True, False], dtype=expected_dtype)
|
||||
|
||||
result = pd.isna(data_missing)
|
||||
self.assert_equal(result, expected)
|
||||
|
||||
result = pd.Series(data_missing).isna()
|
||||
expected = pd.Series(expected)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
# GH 21189
|
||||
result = pd.Series(data_missing).drop([0, 1]).isna()
|
||||
expected = pd.Series([], dtype=expected_dtype)
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super(TestMissing, self).test_fillna_limit_pad(data_missing)
|
||||
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super(TestMissing, self).test_fillna_limit_backfill(data_missing)
|
||||
|
||||
def test_fillna_series_method(self, data_missing):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super(TestMissing, self).test_fillna_limit_backfill(data_missing)
|
||||
|
||||
@pytest.mark.skip(reason="Unsupported")
|
||||
def test_fillna_series(self):
|
||||
# this one looks doable.
|
||||
pass
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
# Have to override to specify that fill_value will change.
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({
|
||||
"A": data_missing,
|
||||
"B": [1, 2]
|
||||
}).fillna(fill_value)
|
||||
|
||||
if pd.isna(data_missing.fill_value):
|
||||
dtype = SparseDtype(data_missing.dtype, fill_value)
|
||||
else:
|
||||
dtype = data_missing.dtype
|
||||
|
||||
expected = pd.DataFrame({
|
||||
"A": data_missing._from_sequence([fill_value, fill_value],
|
||||
dtype=dtype),
|
||||
"B": [1, 2],
|
||||
})
|
||||
|
||||
self.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestMethods(BaseSparseTests, base.BaseMethodsTests):
|
||||
|
||||
def test_combine_le(self, data_repeated):
|
||||
# We return a Series[SparseArray].__le__ returns a
|
||||
# Series[Sparse[bool]]
|
||||
# rather than Series[bool]
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(pd.SparseArray([
|
||||
a <= b for (a, b) in
|
||||
zip(list(orig_data1), list(orig_data2))
|
||||
], fill_value=False))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(pd.SparseArray([
|
||||
a <= val for a in list(orig_data1)
|
||||
], fill_value=False))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr})
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
assert df.values.base is not result.values.base
|
||||
assert df.A._values.to_dense() is arr.to_dense()
|
||||
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr)
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
|
||||
assert ser._values is not result._values
|
||||
assert ser._values.to_dense() is arr.to_dense()
|
||||
|
||||
@pytest.mark.skip(reason="Not Applicable")
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
pass
|
||||
|
||||
def test_where_series(self, data, na_value):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
|
||||
cond = np.array([True, True, False, False])
|
||||
result = ser.where(cond)
|
||||
|
||||
new_dtype = SparseDtype('float', 0.0)
|
||||
expected = pd.Series(cls._from_sequence([a, a, na_value, na_value],
|
||||
dtype=new_dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
cond = np.array([True, False, True, True])
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b],
|
||||
dtype=data.dtype))
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self, data):
|
||||
if data.dtype.subtype == 'int':
|
||||
# Right now this is upcasted to float, just like combine_first
|
||||
# for Series[int]
|
||||
pytest.skip("TODO(SparseArray.__setitem__ will preserve dtype.")
|
||||
super(TestMethods, self).test_combine_first(data)
|
||||
|
||||
@pytest.mark.parametrize("as_series", [True, False])
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
super(TestMethods, self).test_searchsorted(data_for_sorting,
|
||||
as_series=as_series)
|
||||
|
||||
|
||||
class TestCasting(BaseSparseTests, base.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests):
|
||||
series_scalar_exc = None
|
||||
frame_scalar_exc = None
|
||||
divmod_exc = None
|
||||
series_array_exc = None
|
||||
|
||||
def _skip_if_different_combine(self, data):
|
||||
if data.fill_value == 0:
|
||||
# arith ops call on dtype.fill_value so that the sparsity
|
||||
# is maintained. Combine can't be called on a dtype in
|
||||
# general, so we can't make the expected. This is tested elsewhere
|
||||
raise pytest.skip("Incorrected expected from Series.combine")
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
pass
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
self._skip_if_different_combine(data)
|
||||
super(TestArithmeticOps, self).test_arith_series_with_scalar(
|
||||
data,
|
||||
all_arithmetic_operators
|
||||
)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
self._skip_if_different_combine(data)
|
||||
super(TestArithmeticOps, self).test_arith_series_with_array(
|
||||
data,
|
||||
all_arithmetic_operators
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(BaseSparseTests, base.BaseComparisonOpsTests):
|
||||
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
# array
|
||||
result = pd.Series(op(data, other))
|
||||
# hard to test the fill value, since we don't know what expected
|
||||
# is in general.
|
||||
# Rely on tests in `tests/sparse` to validate that.
|
||||
assert isinstance(result.dtype, SparseDtype)
|
||||
assert result.dtype.subtype == np.dtype('bool')
|
||||
|
||||
with np.errstate(all='ignore'):
|
||||
expected = pd.Series(
|
||||
pd.SparseArray(op(np.asarray(data), np.asarray(other)),
|
||||
fill_value=result.values.fill_value)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# series
|
||||
s = pd.Series(data)
|
||||
result = op(s, other)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestPrinting(BaseSparseTests, base.BasePrintingTests):
|
||||
@pytest.mark.xfail(reason='Different repr', strict=True)
|
||||
def test_array_repr(self, data, size):
|
||||
super(TestPrinting, self).test_array_repr(data, size)
|
||||
|
||||
|
||||
class TestParsing(BaseSparseTests, base.BaseParsingTests):
|
||||
@pytest.mark.parametrize('engine', ['c', 'python'])
|
||||
def test_EA_types(self, engine, data):
|
||||
expected_msg = r'.*must implement _from_sequence_of_strings.*'
|
||||
with pytest.raises(NotImplementedError, match=expected_msg):
|
||||
super(TestParsing, self).test_EA_types(engine, data)
|
||||
Reference in New Issue
Block a user