Static code analysis and corrections

2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,4 @@
+from .array import DecimalArray, DecimalDtype, to_decimal, make_data
+
+
+__all__ = ['DecimalArray', 'DecimalDtype', 'to_decimal', 'make_data']
@@ -0,0 +1,166 @@
+import decimal
+import numbers
+import random
+import sys
+
+import numpy as np
+
+from pandas.core.dtypes.base import ExtensionDtype
+
+import pandas as pd
+from pandas.api.extensions import register_extension_dtype
+from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin
+
+
+@register_extension_dtype
+class DecimalDtype(ExtensionDtype):
+    type = decimal.Decimal
+    name = 'decimal'
+    na_value = decimal.Decimal('NaN')
+    _metadata = ('context',)
+
+    def __init__(self, context=None):
+        self.context = context or decimal.getcontext()
+
+    def __repr__(self):
+        return 'DecimalDtype(context={})'.format(self.context)
+
+    @classmethod
+    def construct_array_type(cls):
+        """Return the array type associated with this dtype
+
+        Returns
+        -------
+        type
+        """
+        return DecimalArray
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from "
+                            "'{}'".format(cls, string))
+
+    @property
+    def _is_numeric(self):
+        return True
+
+
+class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
+    __array_priority__ = 1000
+
+    def __init__(self, values, dtype=None, copy=False, context=None):
+        for val in values:
+            if not isinstance(val, decimal.Decimal):
+                raise TypeError("All values must be of type " +
+                                str(decimal.Decimal))
+        values = np.asarray(values, dtype=object)
+
+        self._data = values
+        # Some aliases for common attribute names to ensure pandas supports
+        # these
+        self._items = self.data = self._data
+        # those aliases are currently not working due to assumptions
+        # in internal code (GH-20735)
+        # self._values = self.values = self.data
+        self._dtype = DecimalDtype(context)
+
+    @property
+    def dtype(self):
+        return self._dtype
+
+    @classmethod
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return cls(scalars)
+
+    @classmethod
+    def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
+        return cls._from_sequence([decimal.Decimal(x) for x in strings],
+                                  dtype, copy)
+
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls(values)
+
+    def __getitem__(self, item):
+        if isinstance(item, numbers.Integral):
+            return self._data[item]
+        else:
+            return type(self)(self._data[item])
+
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        from pandas.api.extensions import take
+
+        data = self._data
+        if allow_fill and fill_value is None:
+            fill_value = self.dtype.na_value
+
+        result = take(data, indexer, fill_value=fill_value,
+                      allow_fill=allow_fill)
+        return self._from_sequence(result)
+
+    def copy(self, deep=False):
+        if deep:
+            return type(self)(self._data.copy())
+        return type(self)(self)
+
+    def astype(self, dtype, copy=True):
+        if isinstance(dtype, type(self.dtype)):
+            return type(self)(self._data, context=dtype.context)
+        return np.asarray(self, dtype=dtype)
+
+    def __setitem__(self, key, value):
+        if pd.api.types.is_list_like(value):
+            if pd.api.types.is_scalar(key):
+                raise ValueError("setting an array element with a sequence.")
+            value = [decimal.Decimal(v) for v in value]
+        else:
+            value = decimal.Decimal(value)
+        self._data[key] = value
+
+    def __len__(self):
+        return len(self._data)
+
+    @property
+    def nbytes(self):
+        n = len(self)
+        if n:
+            return n * sys.getsizeof(self[0])
+        return 0
+
+    def isna(self):
+        return np.array([x.is_nan() for x in self._data], dtype=bool)
+
+    @property
+    def _na_value(self):
+        return decimal.Decimal('NaN')
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        return cls(np.concatenate([x._data for x in to_concat]))
+
+    def _reduce(self, name, skipna=True, **kwargs):
+
+        if skipna:
+            raise NotImplementedError("decimal does not support skipna=True")
+
+        try:
+            op = getattr(self.data, name)
+        except AttributeError:
+            raise NotImplementedError("decimal does not support "
+                                      "the {} operation".format(name))
+        return op(axis=0)
+
+
+def to_decimal(values, context=None):
+    return DecimalArray([decimal.Decimal(x) for x in values], context=context)
+
+
+def make_data():
+    return [decimal.Decimal(random.random()) for _ in range(100)]
+
+
+DecimalArray._add_arithmetic_ops()
+DecimalArray._add_comparison_ops()
@@ -0,0 +1,401 @@
+import decimal
+import math
+import operator
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import compat
+from pandas.tests.extension import base
+import pandas.util.testing as tm
+
+from .array import DecimalArray, DecimalDtype, make_data, to_decimal
+
+
+@pytest.fixture
+def dtype():
+    return DecimalDtype()
+
+
+@pytest.fixture
+def data():
+    return DecimalArray(make_data())
+
+
+@pytest.fixture
+def data_missing():
+    return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
+
+
+@pytest.fixture
+def data_for_sorting():
+    return DecimalArray([decimal.Decimal('1'),
+                         decimal.Decimal('2'),
+                         decimal.Decimal('0')])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return DecimalArray([decimal.Decimal('1'),
+                         decimal.Decimal('NaN'),
+                         decimal.Decimal('0')])
+
+
+@pytest.fixture
+def na_cmp():
+    return lambda x, y: x.is_nan() and y.is_nan()
+
+
+@pytest.fixture
+def na_value():
+    return decimal.Decimal("NaN")
+
+
+@pytest.fixture
+def data_for_grouping():
+    b = decimal.Decimal('1.0')
+    a = decimal.Decimal('0.0')
+    c = decimal.Decimal('2.0')
+    na = decimal.Decimal('NaN')
+    return DecimalArray([b, b, na, na, a, a, b, c])
+
+
+class BaseDecimal(object):
+
+    def assert_series_equal(self, left, right, *args, **kwargs):
+        def convert(x):
+            # need to convert array([Decimal(NaN)], dtype='object') to np.NaN
+            # because Series[object].isnan doesn't recognize decimal(NaN) as
+            # NA.
+            try:
+                return math.isnan(x)
+            except TypeError:
+                return False
+
+        if left.dtype == 'object':
+            left_na = left.apply(convert)
+        else:
+            left_na = left.isna()
+        if right.dtype == 'object':
+            right_na = right.apply(convert)
+        else:
+            right_na = right.isna()
+
+        tm.assert_series_equal(left_na, right_na)
+        return tm.assert_series_equal(left[~left_na],
+                                      right[~right_na],
+                                      *args, **kwargs)
+
+    def assert_frame_equal(self, left, right, *args, **kwargs):
+        # TODO(EA): select_dtypes
+        tm.assert_index_equal(
+            left.columns, right.columns,
+            exact=kwargs.get('check_column_type', 'equiv'),
+            check_names=kwargs.get('check_names', True),
+            check_exact=kwargs.get('check_exact', False),
+            check_categorical=kwargs.get('check_categorical', True),
+            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
+
+        decimals = (left.dtypes == 'decimal').index
+
+        for col in decimals:
+            self.assert_series_equal(left[col], right[col],
+                                     *args, **kwargs)
+
+        left = left.drop(columns=decimals)
+        right = right.drop(columns=decimals)
+        tm.assert_frame_equal(left, right, *args, **kwargs)
+
+
+class TestDtype(BaseDecimal, base.BaseDtypeTests):
+    @pytest.mark.skipif(compat.PY2, reason="Context not hashable.")
+    def test_hashable(self, dtype):
+        pass
+
+
+class TestInterface(BaseDecimal, base.BaseInterfaceTests):
+
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
+
+
+class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
+
+    @pytest.mark.skip(reason="not implemented constructor from dtype")
+    def test_from_dtype(self, data):
+        # construct from our dtype & string dtype
+        pass
+
+
+class TestReshaping(BaseDecimal, base.BaseReshapingTests):
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
+
+
+class TestGetitem(BaseDecimal, base.BaseGetitemTests):
+
+    def test_take_na_value_other_decimal(self):
+        arr = DecimalArray([decimal.Decimal('1.0'),
+                            decimal.Decimal('2.0')])
+        result = arr.take([0, -1], allow_fill=True,
+                          fill_value=decimal.Decimal('-1.0'))
+        expected = DecimalArray([decimal.Decimal('1.0'),
+                                 decimal.Decimal('-1.0')])
+        self.assert_extension_array_equal(result, expected)
+
+
+class TestMissing(BaseDecimal, base.BaseMissingTests):
+    pass
+
+
+class Reduce(object):
+
+    def check_reduce(self, s, op_name, skipna):
+
+        if skipna or op_name in ['median', 'skew', 'kurt']:
+            with pytest.raises(NotImplementedError):
+                getattr(s, op_name)(skipna=skipna)
+
+        else:
+            result = getattr(s, op_name)(skipna=skipna)
+            expected = getattr(np.asarray(s), op_name)()
+            tm.assert_almost_equal(result, expected)
+
+
+class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
+    pass
+
+
+class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
+    pass
+
+
+class TestMethods(BaseDecimal, base.BaseMethodsTests):
+    @pytest.mark.parametrize('dropna', [True, False])
+    @pytest.mark.xfail(reason="value_counts not implemented yet.")
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
+
+        tm.assert_series_equal(result, expected)
+
+
+class TestCasting(BaseDecimal, base.BaseCastingTests):
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
+
+
+class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
+
+
+class TestSetitem(BaseDecimal, base.BaseSetitemTests):
+    pass
+
+
+class TestPrinting(BaseDecimal, base.BasePrintingTests):
+    pytestmark = pytest.mark.skipif(compat.PY2,
+                                    reason="Unhashble dtype in Py2.")
+
+
+# TODO(extension)
+@pytest.mark.xfail(reason=(
+    "raising AssertionError as this is not implemented, "
+    "though easy enough to do"))
+def test_series_constructor_coerce_data_to_extension_dtype_raises():
+    xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
+           "extension array directly.")
+    with pytest.raises(ValueError, match=xpr):
+        pd.Series([0, 1, 2], dtype=DecimalDtype())
+
+
+def test_series_constructor_with_dtype():
+    arr = DecimalArray([decimal.Decimal('10.0')])
+    result = pd.Series(arr, dtype=DecimalDtype())
+    expected = pd.Series(arr)
+    tm.assert_series_equal(result, expected)
+
+    result = pd.Series(arr, dtype='int64')
+    expected = pd.Series([10])
+    tm.assert_series_equal(result, expected)
+
+
+def test_dataframe_constructor_with_dtype():
+    arr = DecimalArray([decimal.Decimal('10.0')])
+
+    result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
+    expected = pd.DataFrame({"A": arr})
+    tm.assert_frame_equal(result, expected)
+
+    arr = DecimalArray([decimal.Decimal('10.0')])
+    result = pd.DataFrame({"A": arr}, dtype='int64')
+    expected = pd.DataFrame({"A": [10]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("frame", [True, False])
+def test_astype_dispatches(frame):
+    # This is a dtype-specific test that ensures Series[decimal].astype
+    # gets all the way through to ExtensionArray.astype
+    # Designing a reliable smoke test that works for arbitrary data types
+    # is difficult.
+    data = pd.Series(DecimalArray([decimal.Decimal(2)]), name='a')
+    ctx = decimal.Context()
+    ctx.prec = 5
+
+    if frame:
+        data = data.to_frame()
+
+    result = data.astype(DecimalDtype(ctx))
+
+    if frame:
+        result = result['a']
+
+    assert result.dtype.context.prec == ctx.prec
+
+
+class TestArithmeticOps(BaseDecimal, base.BaseArithmeticOpsTests):
+
+    def check_opname(self, s, op_name, other, exc=None):
+        super(TestArithmeticOps, self).check_opname(s, op_name,
+                                                    other, exc=None)
+
+    def test_arith_series_with_array(self, data, all_arithmetic_operators):
+        op_name = all_arithmetic_operators
+        s = pd.Series(data)
+
+        context = decimal.getcontext()
+        divbyzerotrap = context.traps[decimal.DivisionByZero]
+        invalidoptrap = context.traps[decimal.InvalidOperation]
+        context.traps[decimal.DivisionByZero] = 0
+        context.traps[decimal.InvalidOperation] = 0
+
+        # Decimal supports ops with int, but not float
+        other = pd.Series([int(d * 100) for d in data])
+        self.check_opname(s, op_name, other)
+
+        if "mod" not in op_name:
+            self.check_opname(s, op_name, s * 2)
+
+        self.check_opname(s, op_name, 0)
+        self.check_opname(s, op_name, 5)
+        context.traps[decimal.DivisionByZero] = divbyzerotrap
+        context.traps[decimal.InvalidOperation] = invalidoptrap
+
+    def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
+        # We implement divmod
+        super(TestArithmeticOps, self)._check_divmod_op(
+            s, op, other, exc=None
+        )
+
+    def test_error(self):
+        pass
+
+
+class TestComparisonOps(BaseDecimal, base.BaseComparisonOpsTests):
+
+    def check_opname(self, s, op_name, other, exc=None):
+        super(TestComparisonOps, self).check_opname(s, op_name,
+                                                    other, exc=None)
+
+    def _compare_other(self, s, data, op_name, other):
+        self.check_opname(s, op_name, other)
+
+    def test_compare_scalar(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        self._compare_other(s, data, op_name, 0.5)
+
+    def test_compare_array(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+
+        alter = np.random.choice([-1, 0, 1], len(data))
+        # Randomly double, halve or keep same value
+        other = pd.Series(data) * [decimal.Decimal(pow(2.0, i))
+                                   for i in alter]
+        self._compare_other(s, data, op_name, other)
+
+
+class DecimalArrayWithoutFromSequence(DecimalArray):
+    """Helper class for testing error handling in _from_sequence."""
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        raise KeyError("For the test")
+
+
+class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
+    @classmethod
+    def _create_arithmetic_method(cls, op):
+        return cls._create_method(op, coerce_to_dtype=False)
+
+
+DecimalArrayWithoutCoercion._add_arithmetic_ops()
+
+
+def test_combine_from_sequence_raises():
+    # https://github.com/pandas-dev/pandas/issues/22850
+    ser = pd.Series(DecimalArrayWithoutFromSequence([
+        decimal.Decimal("1.0"),
+        decimal.Decimal("2.0")
+    ]))
+    result = ser.combine(ser, operator.add)
+
+    # note: object dtype
+    expected = pd.Series([decimal.Decimal("2.0"),
+                          decimal.Decimal("4.0")], dtype="object")
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("class_", [DecimalArrayWithoutFromSequence,
+                                    DecimalArrayWithoutCoercion])
+def test_scalar_ops_from_sequence_raises(class_):
+    # op(EA, EA) should return an EA, or an ndarray if it's not possible
+    # to return an EA with the return values.
+    arr = class_([
+        decimal.Decimal("1.0"),
+        decimal.Decimal("2.0")
+    ])
+    result = arr + arr
+    expected = np.array([decimal.Decimal("2.0"), decimal.Decimal("4.0")],
+                        dtype="object")
+    tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("reverse, expected_div, expected_mod", [
+    (False, [0, 1, 1, 2], [1, 0, 1, 0]),
+    (True, [2, 1, 0, 0], [0, 0, 2, 2]),
+])
+def test_divmod_array(reverse, expected_div, expected_mod):
+    # https://github.com/pandas-dev/pandas/issues/22930
+    arr = to_decimal([1, 2, 3, 4])
+    if reverse:
+        div, mod = divmod(2, arr)
+    else:
+        div, mod = divmod(arr, 2)
+    expected_div = to_decimal(expected_div)
+    expected_mod = to_decimal(expected_mod)
+
+    tm.assert_extension_array_equal(div, expected_div)
+    tm.assert_extension_array_equal(mod, expected_mod)
+
+
+def test_formatting_values_deprecated():
+    class DecimalArray2(DecimalArray):
+        def _formatting_values(self):
+            return np.array(self)
+
+    ser = pd.Series(DecimalArray2([decimal.Decimal('1.0')]))
+    # different levels for 2 vs. 3
+    check_stacklevel = compat.PY3
+
+    with tm.assert_produces_warning(DeprecationWarning,
+                                    check_stacklevel=check_stacklevel):
+        repr(ser)