Static code analysis and corrections

This commit is contained in:
Kristjan Komlosi
2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,4 @@
from .array import DecimalArray, DecimalDtype, to_decimal, make_data
__all__ = ['DecimalArray', 'DecimalDtype', 'to_decimal', 'make_data']
@@ -0,0 +1,166 @@
import decimal
import numbers
import random
import sys
import numpy as np
from pandas.core.dtypes.base import ExtensionDtype
import pandas as pd
from pandas.api.extensions import register_extension_dtype
from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin
@register_extension_dtype
class DecimalDtype(ExtensionDtype):
type = decimal.Decimal
name = 'decimal'
na_value = decimal.Decimal('NaN')
_metadata = ('context',)
def __init__(self, context=None):
self.context = context or decimal.getcontext()
def __repr__(self):
return 'DecimalDtype(context={})'.format(self.context)
@classmethod
def construct_array_type(cls):
"""Return the array type associated with this dtype
Returns
-------
type
"""
return DecimalArray
@classmethod
def construct_from_string(cls, string):
if string == cls.name:
return cls()
else:
raise TypeError("Cannot construct a '{}' from "
"'{}'".format(cls, string))
@property
def _is_numeric(self):
return True
class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
__array_priority__ = 1000
def __init__(self, values, dtype=None, copy=False, context=None):
for val in values:
if not isinstance(val, decimal.Decimal):
raise TypeError("All values must be of type " +
str(decimal.Decimal))
values = np.asarray(values, dtype=object)
self._data = values
# Some aliases for common attribute names to ensure pandas supports
# these
self._items = self.data = self._data
# those aliases are currently not working due to assumptions
# in internal code (GH-20735)
# self._values = self.values = self.data
self._dtype = DecimalDtype(context)
@property
def dtype(self):
return self._dtype
@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
return cls(scalars)
@classmethod
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
return cls._from_sequence([decimal.Decimal(x) for x in strings],
dtype, copy)
@classmethod
def _from_factorized(cls, values, original):
return cls(values)
def __getitem__(self, item):
if isinstance(item, numbers.Integral):
return self._data[item]
else:
return type(self)(self._data[item])
def take(self, indexer, allow_fill=False, fill_value=None):
from pandas.api.extensions import take
data = self._data
if allow_fill and fill_value is None:
fill_value = self.dtype.na_value
result = take(data, indexer, fill_value=fill_value,
allow_fill=allow_fill)
return self._from_sequence(result)
def copy(self, deep=False):
if deep:
return type(self)(self._data.copy())
return type(self)(self)
def astype(self, dtype, copy=True):
if isinstance(dtype, type(self.dtype)):
return type(self)(self._data, context=dtype.context)
return np.asarray(self, dtype=dtype)
def __setitem__(self, key, value):
if pd.api.types.is_list_like(value):
if pd.api.types.is_scalar(key):
raise ValueError("setting an array element with a sequence.")
value = [decimal.Decimal(v) for v in value]
else:
value = decimal.Decimal(value)
self._data[key] = value
def __len__(self):
return len(self._data)
@property
def nbytes(self):
n = len(self)
if n:
return n * sys.getsizeof(self[0])
return 0
def isna(self):
return np.array([x.is_nan() for x in self._data], dtype=bool)
@property
def _na_value(self):
return decimal.Decimal('NaN')
@classmethod
def _concat_same_type(cls, to_concat):
return cls(np.concatenate([x._data for x in to_concat]))
def _reduce(self, name, skipna=True, **kwargs):
if skipna:
raise NotImplementedError("decimal does not support skipna=True")
try:
op = getattr(self.data, name)
except AttributeError:
raise NotImplementedError("decimal does not support "
"the {} operation".format(name))
return op(axis=0)
def to_decimal(values, context=None):
return DecimalArray([decimal.Decimal(x) for x in values], context=context)
def make_data():
return [decimal.Decimal(random.random()) for _ in range(100)]
DecimalArray._add_arithmetic_ops()
DecimalArray._add_comparison_ops()
@@ -0,0 +1,401 @@
import decimal
import math
import operator
import numpy as np
import pytest
import pandas as pd
from pandas import compat
from pandas.tests.extension import base
import pandas.util.testing as tm
from .array import DecimalArray, DecimalDtype, make_data, to_decimal
@pytest.fixture
def dtype():
return DecimalDtype()
@pytest.fixture
def data():
return DecimalArray(make_data())
@pytest.fixture
def data_missing():
return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
@pytest.fixture
def data_for_sorting():
return DecimalArray([decimal.Decimal('1'),
decimal.Decimal('2'),
decimal.Decimal('0')])
@pytest.fixture
def data_missing_for_sorting():
return DecimalArray([decimal.Decimal('1'),
decimal.Decimal('NaN'),
decimal.Decimal('0')])
@pytest.fixture
def na_cmp():
return lambda x, y: x.is_nan() and y.is_nan()
@pytest.fixture
def na_value():
return decimal.Decimal("NaN")
@pytest.fixture
def data_for_grouping():
b = decimal.Decimal('1.0')
a = decimal.Decimal('0.0')
c = decimal.Decimal('2.0')
na = decimal.Decimal('NaN')
return DecimalArray([b, b, na, na, a, a, b, c])
class BaseDecimal(object):
def assert_series_equal(self, left, right, *args, **kwargs):
def convert(x):
# need to convert array([Decimal(NaN)], dtype='object') to np.NaN
# because Series[object].isnan doesn't recognize decimal(NaN) as
# NA.
try:
return math.isnan(x)
except TypeError:
return False
if left.dtype == 'object':
left_na = left.apply(convert)
else:
left_na = left.isna()
if right.dtype == 'object':
right_na = right.apply(convert)
else:
right_na = right.isna()
tm.assert_series_equal(left_na, right_na)
return tm.assert_series_equal(left[~left_na],
right[~right_na],
*args, **kwargs)
def assert_frame_equal(self, left, right, *args, **kwargs):
# TODO(EA): select_dtypes
tm.assert_index_equal(
left.columns, right.columns,
exact=kwargs.get('check_column_type', 'equiv'),
check_names=kwargs.get('check_names', True),
check_exact=kwargs.get('check_exact', False),
check_categorical=kwargs.get('check_categorical', True),
obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
decimals = (left.dtypes == 'decimal').index
for col in decimals:
self.assert_series_equal(left[col], right[col],
*args, **kwargs)
left = left.drop(columns=decimals)
right = right.drop(columns=decimals)
tm.assert_frame_equal(left, right, *args, **kwargs)
class TestDtype(BaseDecimal, base.BaseDtypeTests):
@pytest.mark.skipif(compat.PY2, reason="Context not hashable.")
def test_hashable(self, dtype):
pass
class TestInterface(BaseDecimal, base.BaseInterfaceTests):
pytestmark = pytest.mark.skipif(compat.PY2,
reason="Unhashble dtype in Py2.")
class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
@pytest.mark.skip(reason="not implemented constructor from dtype")
def test_from_dtype(self, data):
# construct from our dtype & string dtype
pass
class TestReshaping(BaseDecimal, base.BaseReshapingTests):
pytestmark = pytest.mark.skipif(compat.PY2,
reason="Unhashble dtype in Py2.")
class TestGetitem(BaseDecimal, base.BaseGetitemTests):
def test_take_na_value_other_decimal(self):
arr = DecimalArray([decimal.Decimal('1.0'),
decimal.Decimal('2.0')])
result = arr.take([0, -1], allow_fill=True,
fill_value=decimal.Decimal('-1.0'))
expected = DecimalArray([decimal.Decimal('1.0'),
decimal.Decimal('-1.0')])
self.assert_extension_array_equal(result, expected)
class TestMissing(BaseDecimal, base.BaseMissingTests):
pass
class Reduce(object):
def check_reduce(self, s, op_name, skipna):
if skipna or op_name in ['median', 'skew', 'kurt']:
with pytest.raises(NotImplementedError):
getattr(s, op_name)(skipna=skipna)
else:
result = getattr(s, op_name)(skipna=skipna)
expected = getattr(np.asarray(s), op_name)()
tm.assert_almost_equal(result, expected)
class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
pass
class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
pass
class TestMethods(BaseDecimal, base.BaseMethodsTests):
@pytest.mark.parametrize('dropna', [True, False])
@pytest.mark.xfail(reason="value_counts not implemented yet.")
def test_value_counts(self, all_data, dropna):
all_data = all_data[:10]
if dropna:
other = np.array(all_data[~all_data.isna()])
else:
other = all_data
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
tm.assert_series_equal(result, expected)
class TestCasting(BaseDecimal, base.BaseCastingTests):
pytestmark = pytest.mark.skipif(compat.PY2,
reason="Unhashble dtype in Py2.")
class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
pytestmark = pytest.mark.skipif(compat.PY2,
reason="Unhashble dtype in Py2.")
class TestSetitem(BaseDecimal, base.BaseSetitemTests):
pass
class TestPrinting(BaseDecimal, base.BasePrintingTests):
pytestmark = pytest.mark.skipif(compat.PY2,
reason="Unhashble dtype in Py2.")
# TODO(extension)
@pytest.mark.xfail(reason=(
"raising AssertionError as this is not implemented, "
"though easy enough to do"))
def test_series_constructor_coerce_data_to_extension_dtype_raises():
xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
"extension array directly.")
with pytest.raises(ValueError, match=xpr):
pd.Series([0, 1, 2], dtype=DecimalDtype())
def test_series_constructor_with_dtype():
arr = DecimalArray([decimal.Decimal('10.0')])
result = pd.Series(arr, dtype=DecimalDtype())
expected = pd.Series(arr)
tm.assert_series_equal(result, expected)
result = pd.Series(arr, dtype='int64')
expected = pd.Series([10])
tm.assert_series_equal(result, expected)
def test_dataframe_constructor_with_dtype():
arr = DecimalArray([decimal.Decimal('10.0')])
result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
expected = pd.DataFrame({"A": arr})
tm.assert_frame_equal(result, expected)
arr = DecimalArray([decimal.Decimal('10.0')])
result = pd.DataFrame({"A": arr}, dtype='int64')
expected = pd.DataFrame({"A": [10]})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("frame", [True, False])
def test_astype_dispatches(frame):
# This is a dtype-specific test that ensures Series[decimal].astype
# gets all the way through to ExtensionArray.astype
# Designing a reliable smoke test that works for arbitrary data types
# is difficult.
data = pd.Series(DecimalArray([decimal.Decimal(2)]), name='a')
ctx = decimal.Context()
ctx.prec = 5
if frame:
data = data.to_frame()
result = data.astype(DecimalDtype(ctx))
if frame:
result = result['a']
assert result.dtype.context.prec == ctx.prec
class TestArithmeticOps(BaseDecimal, base.BaseArithmeticOpsTests):
def check_opname(self, s, op_name, other, exc=None):
super(TestArithmeticOps, self).check_opname(s, op_name,
other, exc=None)
def test_arith_series_with_array(self, data, all_arithmetic_operators):
op_name = all_arithmetic_operators
s = pd.Series(data)
context = decimal.getcontext()
divbyzerotrap = context.traps[decimal.DivisionByZero]
invalidoptrap = context.traps[decimal.InvalidOperation]
context.traps[decimal.DivisionByZero] = 0
context.traps[decimal.InvalidOperation] = 0
# Decimal supports ops with int, but not float
other = pd.Series([int(d * 100) for d in data])
self.check_opname(s, op_name, other)
if "mod" not in op_name:
self.check_opname(s, op_name, s * 2)
self.check_opname(s, op_name, 0)
self.check_opname(s, op_name, 5)
context.traps[decimal.DivisionByZero] = divbyzerotrap
context.traps[decimal.InvalidOperation] = invalidoptrap
def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
# We implement divmod
super(TestArithmeticOps, self)._check_divmod_op(
s, op, other, exc=None
)
def test_error(self):
pass
class TestComparisonOps(BaseDecimal, base.BaseComparisonOpsTests):
def check_opname(self, s, op_name, other, exc=None):
super(TestComparisonOps, self).check_opname(s, op_name,
other, exc=None)
def _compare_other(self, s, data, op_name, other):
self.check_opname(s, op_name, other)
def test_compare_scalar(self, data, all_compare_operators):
op_name = all_compare_operators
s = pd.Series(data)
self._compare_other(s, data, op_name, 0.5)
def test_compare_array(self, data, all_compare_operators):
op_name = all_compare_operators
s = pd.Series(data)
alter = np.random.choice([-1, 0, 1], len(data))
# Randomly double, halve or keep same value
other = pd.Series(data) * [decimal.Decimal(pow(2.0, i))
for i in alter]
self._compare_other(s, data, op_name, other)
class DecimalArrayWithoutFromSequence(DecimalArray):
"""Helper class for testing error handling in _from_sequence."""
def _from_sequence(cls, scalars, dtype=None, copy=False):
raise KeyError("For the test")
class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
@classmethod
def _create_arithmetic_method(cls, op):
return cls._create_method(op, coerce_to_dtype=False)
DecimalArrayWithoutCoercion._add_arithmetic_ops()
def test_combine_from_sequence_raises():
# https://github.com/pandas-dev/pandas/issues/22850
ser = pd.Series(DecimalArrayWithoutFromSequence([
decimal.Decimal("1.0"),
decimal.Decimal("2.0")
]))
result = ser.combine(ser, operator.add)
# note: object dtype
expected = pd.Series([decimal.Decimal("2.0"),
decimal.Decimal("4.0")], dtype="object")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("class_", [DecimalArrayWithoutFromSequence,
DecimalArrayWithoutCoercion])
def test_scalar_ops_from_sequence_raises(class_):
# op(EA, EA) should return an EA, or an ndarray if it's not possible
# to return an EA with the return values.
arr = class_([
decimal.Decimal("1.0"),
decimal.Decimal("2.0")
])
result = arr + arr
expected = np.array([decimal.Decimal("2.0"), decimal.Decimal("4.0")],
dtype="object")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("reverse, expected_div, expected_mod", [
(False, [0, 1, 1, 2], [1, 0, 1, 0]),
(True, [2, 1, 0, 0], [0, 0, 2, 2]),
])
def test_divmod_array(reverse, expected_div, expected_mod):
# https://github.com/pandas-dev/pandas/issues/22930
arr = to_decimal([1, 2, 3, 4])
if reverse:
div, mod = divmod(2, arr)
else:
div, mod = divmod(arr, 2)
expected_div = to_decimal(expected_div)
expected_mod = to_decimal(expected_mod)
tm.assert_extension_array_equal(div, expected_div)
tm.assert_extension_array_equal(mod, expected_mod)
def test_formatting_values_deprecated():
class DecimalArray2(DecimalArray):
def _formatting_values(self):
return np.array(self)
ser = pd.Series(DecimalArray2([decimal.Decimal('1.0')]))
# different levels for 2 vs. 3
check_stacklevel = compat.PY3
with tm.assert_produces_warning(DeprecationWarning,
check_stacklevel=check_stacklevel):
repr(ser)