Static code analysis and corrections

This commit is contained in:
Kristjan Komlosi
2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,105 @@
import decimal
import numbers
import random
import sys
import numpy as np
import pandas as pd
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.base import ExtensionDtype
class DecimalDtype(ExtensionDtype):
type = decimal.Decimal
name = 'decimal'
na_value = decimal.Decimal('NaN')
@classmethod
def construct_from_string(cls, string):
if string == cls.name:
return cls()
else:
raise TypeError("Cannot construct a '{}' from "
"'{}'".format(cls, string))
class DecimalArray(ExtensionArray):
dtype = DecimalDtype()
def __init__(self, values):
assert all(isinstance(v, decimal.Decimal) for v in values)
values = np.asarray(values, dtype=object)
self._data = values
# Some aliases for common attribute names to ensure pandas supports
# these
self._items = self.data = self._data
# those aliases are currently not working due to assumptions
# in internal code (GH-20735)
# self._values = self.values = self.data
@classmethod
def _from_sequence(cls, scalars):
return cls(scalars)
@classmethod
def _from_factorized(cls, values, original):
return cls(values)
def __getitem__(self, item):
if isinstance(item, numbers.Integral):
return self._data[item]
else:
return type(self)(self._data[item])
def take(self, indexer, allow_fill=False, fill_value=None):
from pandas.api.extensions import take
data = self._data
if allow_fill and fill_value is None:
fill_value = self.dtype.na_value
result = take(data, indexer, fill_value=fill_value,
allow_fill=allow_fill)
return self._from_sequence(result)
def copy(self, deep=False):
if deep:
return type(self)(self._data.copy())
return type(self)(self)
def __setitem__(self, key, value):
if pd.api.types.is_list_like(value):
value = [decimal.Decimal(v) for v in value]
else:
value = decimal.Decimal(value)
self._data[key] = value
def __len__(self):
return len(self._data)
def __repr__(self):
return 'DecimalArray({!r})'.format(self._data)
@property
def nbytes(self):
n = len(self)
if n:
return n * sys.getsizeof(self[0])
return 0
def isna(self):
return np.array([x.is_nan() for x in self._data], dtype=bool)
@property
def _na_value(self):
return decimal.Decimal('NaN')
@classmethod
def _concat_same_type(cls, to_concat):
return cls(np.concatenate([x._data for x in to_concat]))
def make_data():
return [decimal.Decimal(random.random()) for _ in range(100)]
@@ -0,0 +1,185 @@
import decimal
import numpy as np
import pandas as pd
import pandas.util.testing as tm
import pytest
from pandas.tests.extension import base
from .array import DecimalDtype, DecimalArray, make_data
@pytest.fixture
def dtype():
return DecimalDtype()
@pytest.fixture
def data():
return DecimalArray(make_data())
@pytest.fixture
def data_missing():
return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
@pytest.fixture
def data_for_sorting():
return DecimalArray([decimal.Decimal('1'),
decimal.Decimal('2'),
decimal.Decimal('0')])
@pytest.fixture
def data_missing_for_sorting():
return DecimalArray([decimal.Decimal('1'),
decimal.Decimal('NaN'),
decimal.Decimal('0')])
@pytest.fixture
def na_cmp():
return lambda x, y: x.is_nan() and y.is_nan()
@pytest.fixture
def na_value():
return decimal.Decimal("NaN")
@pytest.fixture
def data_for_grouping():
b = decimal.Decimal('1.0')
a = decimal.Decimal('0.0')
c = decimal.Decimal('2.0')
na = decimal.Decimal('NaN')
return DecimalArray([b, b, na, na, a, a, b, c])
class BaseDecimal(object):
def assert_series_equal(self, left, right, *args, **kwargs):
left_na = left.isna()
right_na = right.isna()
tm.assert_series_equal(left_na, right_na)
return tm.assert_series_equal(left[~left_na],
right[~right_na],
*args, **kwargs)
def assert_frame_equal(self, left, right, *args, **kwargs):
# TODO(EA): select_dtypes
tm.assert_index_equal(
left.columns, right.columns,
exact=kwargs.get('check_column_type', 'equiv'),
check_names=kwargs.get('check_names', True),
check_exact=kwargs.get('check_exact', False),
check_categorical=kwargs.get('check_categorical', True),
obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
decimals = (left.dtypes == 'decimal').index
for col in decimals:
self.assert_series_equal(left[col], right[col],
*args, **kwargs)
left = left.drop(columns=decimals)
right = right.drop(columns=decimals)
tm.assert_frame_equal(left, right, *args, **kwargs)
class TestDtype(BaseDecimal, base.BaseDtypeTests):
pass
class TestInterface(BaseDecimal, base.BaseInterfaceTests):
pass
class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
pass
class TestReshaping(BaseDecimal, base.BaseReshapingTests):
pass
class TestGetitem(BaseDecimal, base.BaseGetitemTests):
def test_take_na_value_other_decimal(self):
arr = DecimalArray([decimal.Decimal('1.0'),
decimal.Decimal('2.0')])
result = arr.take([0, -1], allow_fill=True,
fill_value=decimal.Decimal('-1.0'))
expected = DecimalArray([decimal.Decimal('1.0'),
decimal.Decimal('-1.0')])
self.assert_extension_array_equal(result, expected)
class TestMissing(BaseDecimal, base.BaseMissingTests):
pass
class TestMethods(BaseDecimal, base.BaseMethodsTests):
@pytest.mark.parametrize('dropna', [True, False])
@pytest.mark.xfail(reason="value_counts not implemented yet.")
def test_value_counts(self, all_data, dropna):
all_data = all_data[:10]
if dropna:
other = np.array(all_data[~all_data.isna()])
else:
other = all_data
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
tm.assert_series_equal(result, expected)
class TestCasting(BaseDecimal, base.BaseCastingTests):
pass
class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
pass
def test_series_constructor_coerce_data_to_extension_dtype_raises():
xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the "
"extension array directly.")
with tm.assert_raises_regex(ValueError, xpr):
pd.Series([0, 1, 2], dtype=DecimalDtype())
def test_series_constructor_with_same_dtype_ok():
arr = DecimalArray([decimal.Decimal('10.0')])
result = pd.Series(arr, dtype=DecimalDtype())
expected = pd.Series(arr)
tm.assert_series_equal(result, expected)
def test_series_constructor_coerce_extension_array_to_dtype_raises():
arr = DecimalArray([decimal.Decimal('10.0')])
xpr = r"Cannot specify a dtype 'int64' .* \('decimal'\)."
with tm.assert_raises_regex(ValueError, xpr):
pd.Series(arr, dtype='int64')
def test_dataframe_constructor_with_same_dtype_ok():
arr = DecimalArray([decimal.Decimal('10.0')])
result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
expected = pd.DataFrame({"A": arr})
tm.assert_frame_equal(result, expected)
def test_dataframe_constructor_with_different_dtype_raises():
arr = DecimalArray([decimal.Decimal('10.0')])
xpr = "Cannot coerce extension array to dtype 'int64'. "
with tm.assert_raises_regex(ValueError, xpr):
pd.DataFrame({"A": arr}, dtype='int64')