pruned venvs
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
-538
@@ -1,538 +0,0 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSparseArrayArithmetics(object):
|
||||
|
||||
_base = np.array
|
||||
_klass = pd.SparseArray
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
def _check_numeric_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid='ignore', divide='ignore'):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
|
||||
# sparse & sparse
|
||||
self._assert((a + b).to_dense(), a_dense + b_dense)
|
||||
self._assert((b + a).to_dense(), b_dense + a_dense)
|
||||
|
||||
self._assert((a - b).to_dense(), a_dense - b_dense)
|
||||
self._assert((b - a).to_dense(), b_dense - a_dense)
|
||||
|
||||
self._assert((a * b).to_dense(), a_dense * b_dense)
|
||||
self._assert((b * a).to_dense(), b_dense * a_dense)
|
||||
|
||||
# pandas uses future division
|
||||
self._assert((a / b).to_dense(), a_dense * 1.0 / b_dense)
|
||||
self._assert((b / a).to_dense(), b_dense * 1.0 / a_dense)
|
||||
|
||||
# ToDo: FIXME in GH 13843
|
||||
if not (self._base == pd.Series and
|
||||
a.dtype.subtype == np.dtype('int64')):
|
||||
self._assert((a // b).to_dense(), a_dense // b_dense)
|
||||
self._assert((b // a).to_dense(), b_dense // a_dense)
|
||||
|
||||
self._assert((a % b).to_dense(), a_dense % b_dense)
|
||||
self._assert((b % a).to_dense(), b_dense % a_dense)
|
||||
|
||||
self._assert((a ** b).to_dense(), a_dense ** b_dense)
|
||||
self._assert((b ** a).to_dense(), b_dense ** a_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._assert((a + b_dense).to_dense(), a_dense + b_dense)
|
||||
self._assert((b_dense + a).to_dense(), b_dense + a_dense)
|
||||
|
||||
self._assert((a - b_dense).to_dense(), a_dense - b_dense)
|
||||
self._assert((b_dense - a).to_dense(), b_dense - a_dense)
|
||||
|
||||
self._assert((a * b_dense).to_dense(), a_dense * b_dense)
|
||||
self._assert((b_dense * a).to_dense(), b_dense * a_dense)
|
||||
|
||||
# pandas uses future division
|
||||
self._assert((a / b_dense).to_dense(), a_dense * 1.0 / b_dense)
|
||||
self._assert((b_dense / a).to_dense(), b_dense * 1.0 / a_dense)
|
||||
|
||||
# ToDo: FIXME in GH 13843
|
||||
if not (self._base == pd.Series and
|
||||
a.dtype.subtype == np.dtype('int64')):
|
||||
self._assert((a // b_dense).to_dense(), a_dense // b_dense)
|
||||
self._assert((b_dense // a).to_dense(), b_dense // a_dense)
|
||||
|
||||
self._assert((a % b_dense).to_dense(), a_dense % b_dense)
|
||||
self._assert((b_dense % a).to_dense(), b_dense % a_dense)
|
||||
|
||||
self._assert((a ** b_dense).to_dense(), a_dense ** b_dense)
|
||||
self._assert((b_dense ** a).to_dense(), b_dense ** a_dense)
|
||||
|
||||
def _check_bool_result(self, res):
|
||||
assert isinstance(res, self._klass)
|
||||
assert isinstance(res.dtype, SparseDtype)
|
||||
assert res.dtype.subtype == np.bool
|
||||
assert isinstance(res.fill_value, bool)
|
||||
|
||||
def _check_comparison_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid='ignore'):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
#
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a == b)
|
||||
self._assert((a == b).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b)
|
||||
self._assert((a != b).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b)
|
||||
self._assert((a >= b).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b)
|
||||
self._assert((a <= b).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b)
|
||||
self._assert((a > b).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b)
|
||||
self._assert((a < b).to_dense(), a_dense < b_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._check_bool_result(a == b_dense)
|
||||
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b_dense)
|
||||
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b_dense)
|
||||
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b_dense)
|
||||
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b_dense)
|
||||
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b_dense)
|
||||
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
|
||||
|
||||
def _check_logical_ops(self, a, b, a_dense, b_dense):
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a & b)
|
||||
self._assert((a & b).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b)
|
||||
self._assert((a | b).to_dense(), a_dense | b_dense)
|
||||
# sparse & dense
|
||||
self._check_bool_result(a & b_dense)
|
||||
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b_dense)
|
||||
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
|
||||
|
||||
def test_float_scalar(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_scalar_comparison(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_same_index(self):
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
|
||||
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_same_index_comparison(self):
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
|
||||
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array_different_kind(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind='integer')
|
||||
b = self._klass(rvalues, kind='block')
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=0)
|
||||
b = self._klass(rvalues, kind='block')
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=0)
|
||||
b = self._klass(rvalues, kind='block', fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=1)
|
||||
b = self._klass(rvalues, kind='block', fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array_comparison(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array(self):
|
||||
# have to specify dtype explicitly until fixing GH 667
|
||||
dtype = np.int64
|
||||
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype, fill_value=1)
|
||||
b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array_comparison(self):
|
||||
|
||||
# int32 NI ATM
|
||||
for dtype in ['int64']:
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_bool_same_index(self):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([True, False, True, True], dtype=np.bool)
|
||||
rvalues = self._base([True, False, True, True], dtype=np.bool)
|
||||
|
||||
for fill_value in [True, False, np.nan]:
|
||||
a = self._klass(values, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_bool_array_logical(self):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([True, False, True, False, True, True],
|
||||
dtype=np.bool)
|
||||
rvalues = self._base([True, False, False, True, False, True],
|
||||
dtype=np.bool)
|
||||
|
||||
for fill_value in [True, False, np.nan]:
|
||||
a = self._klass(values, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_float_int(self):
|
||||
|
||||
for rdtype in ['int64']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_comparison(self):
|
||||
|
||||
# int32 NI ATM
|
||||
for rdtype in ['int64']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
|
||||
class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
|
||||
|
||||
_base = pd.Series
|
||||
_klass = pd.SparseSeries
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
def test_alignment(self):
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
|
||||
dtype=np.int64, fill_value=0)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
|
||||
dtype=np.int64, fill_value=np.nan)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
|
||||
dtype=np.int64, fill_value=0)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
|
||||
dtype=np.int64, fill_value=np.nan)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [
|
||||
operator.eq,
|
||||
operator.add,
|
||||
])
|
||||
def test_with_list(op):
|
||||
arr = pd.SparseArray([0, 1], fill_value=0)
|
||||
result = op(arr, [0, 1])
|
||||
expected = op(arr, pd.SparseArray([0, 1]))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ufunc', [
|
||||
np.abs, np.exp,
|
||||
])
|
||||
@pytest.mark.parametrize('arr', [
|
||||
pd.SparseArray([0, 0, -1, 1]),
|
||||
pd.SparseArray([None, None, -1, 1]),
|
||||
])
|
||||
def test_ufuncs(ufunc, arr):
|
||||
result = ufunc(arr)
|
||||
fill_value = ufunc(arr.fill_value)
|
||||
expected = pd.SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("a, b", [
|
||||
(pd.SparseArray([0, 0, 0]), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
])
|
||||
@pytest.mark.parametrize("ufunc", [
|
||||
np.add,
|
||||
np.greater,
|
||||
])
|
||||
def test_binary_ufuncs(ufunc, a, b):
|
||||
# can't say anything about fill value here.
|
||||
result = ufunc(a, b)
|
||||
expected = ufunc(np.asarray(a), np.asarray(b))
|
||||
assert isinstance(result, pd.SparseArray)
|
||||
tm.assert_numpy_array_equal(np.asarray(result), expected)
|
||||
|
||||
|
||||
def test_ndarray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
ndarray += sparray
|
||||
expected = np.array([0, 3, 2, 3])
|
||||
tm.assert_numpy_array_equal(ndarray, expected)
|
||||
|
||||
|
||||
def test_sparray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
sparray += ndarray
|
||||
expected = pd.SparseArray([0, 3, 2, 3], fill_value=0)
|
||||
tm.assert_sp_array_equal(sparray, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False])
|
||||
def test_invert(fill_value):
|
||||
arr = np.array([True, False, False, True])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = ~sparray
|
||||
expected = pd.SparseArray(~arr, fill_value=not fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [0, np.nan])
|
||||
@pytest.mark.parametrize("op", [operator.pos, operator.neg])
|
||||
def test_unary_op(op, fill_value):
|
||||
arr = np.array([0, 1, np.nan, 2])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = op(sparray)
|
||||
expected = pd.SparseArray(op(arr), fill_value=op(fill_value))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,161 +0,0 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype, fill_value", [
|
||||
('int', 0),
|
||||
('float', np.nan),
|
||||
('bool', False),
|
||||
('object', np.nan),
|
||||
('datetime64[ns]', pd.NaT),
|
||||
('timedelta64[ns]', pd.NaT),
|
||||
])
|
||||
def test_inferred_dtype(dtype, fill_value):
|
||||
sparse_dtype = SparseDtype(dtype)
|
||||
result = sparse_dtype.fill_value
|
||||
if pd.isna(fill_value):
|
||||
assert pd.isna(result) and type(result) == type(fill_value)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
|
||||
def test_from_sparse_dtype():
|
||||
dtype = SparseDtype('float', 0)
|
||||
result = SparseDtype(dtype)
|
||||
assert result.fill_value == 0
|
||||
|
||||
|
||||
def test_from_sparse_dtype_fill_value():
|
||||
dtype = SparseDtype('int', 1)
|
||||
result = SparseDtype(dtype, fill_value=2)
|
||||
expected = SparseDtype('int', 2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype, fill_value', [
|
||||
('int', None),
|
||||
('float', None),
|
||||
('bool', None),
|
||||
('object', None),
|
||||
('datetime64[ns]', None),
|
||||
('timedelta64[ns]', None),
|
||||
('int', np.nan),
|
||||
('float', 0),
|
||||
])
|
||||
def test_equal(dtype, fill_value):
|
||||
a = SparseDtype(dtype, fill_value)
|
||||
b = SparseDtype(dtype, fill_value)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
def test_nans_equal():
|
||||
a = SparseDtype(float, float('nan'))
|
||||
b = SparseDtype(float, np.nan)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
@pytest.mark.parametrize('a, b', [
|
||||
(SparseDtype('float64'), SparseDtype('float32')),
|
||||
(SparseDtype('float64'), SparseDtype('float64', 0)),
|
||||
(SparseDtype('float64'), SparseDtype('datetime64[ns]', np.nan)),
|
||||
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
|
||||
(SparseDtype('float64'), np.dtype('float64')),
|
||||
])
|
||||
def test_not_equal(a, b):
|
||||
assert a != b
|
||||
|
||||
|
||||
def test_construct_from_string_raises():
|
||||
with pytest.raises(TypeError):
|
||||
SparseDtype.construct_from_string('not a dtype')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype, expected", [
|
||||
(SparseDtype(int), True),
|
||||
(SparseDtype(float), True),
|
||||
(SparseDtype(bool), True),
|
||||
(SparseDtype(object), False),
|
||||
(SparseDtype(str), False),
|
||||
])
|
||||
def test_is_numeric(dtype, expected):
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
def test_str_uses_object():
|
||||
result = SparseDtype(str).subtype
|
||||
assert result == np.dtype('object')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("string, expected", [
|
||||
('Sparse[float64]', SparseDtype(np.dtype('float64'))),
|
||||
('Sparse[float32]', SparseDtype(np.dtype('float32'))),
|
||||
('Sparse[int]', SparseDtype(np.dtype('int'))),
|
||||
('Sparse[str]', SparseDtype(np.dtype('str'))),
|
||||
('Sparse[datetime64[ns]]', SparseDtype(np.dtype('datetime64[ns]'))),
|
||||
("Sparse", SparseDtype(np.dtype("float"), np.nan))
|
||||
])
|
||||
def test_construct_from_string(string, expected):
|
||||
result = SparseDtype.construct_from_string(string)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("a, b, expected", [
|
||||
(SparseDtype(float, 0.0), SparseDtype(np.dtype('float'), 0.0), True),
|
||||
(SparseDtype(int, 0), SparseDtype(int, 0), True),
|
||||
(SparseDtype(float, float('nan')), SparseDtype(float, np.nan), True),
|
||||
(SparseDtype(float, 0), SparseDtype(float, np.nan), False),
|
||||
(SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
|
||||
])
|
||||
def test_hash_equal(a, b, expected):
|
||||
result = a == b
|
||||
assert result is expected
|
||||
|
||||
result = hash(a) == hash(b)
|
||||
assert result is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('string, expected', [
|
||||
('Sparse[int]', 'int'),
|
||||
('Sparse[int, 0]', 'int'),
|
||||
('Sparse[int64]', 'int64'),
|
||||
('Sparse[int64, 0]', 'int64'),
|
||||
('Sparse[datetime64[ns], 0]', 'datetime64[ns]'),
|
||||
])
|
||||
def test_parse_subtype(string, expected):
|
||||
subtype, _ = SparseDtype._parse_subtype(string)
|
||||
assert subtype == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("string", [
|
||||
"Sparse[int, 1]",
|
||||
"Sparse[float, 0.0]",
|
||||
"Sparse[bool, True]",
|
||||
])
|
||||
def test_construct_from_string_fill_value_raises(string):
|
||||
with pytest.raises(TypeError, match='fill_value in the string is not'):
|
||||
SparseDtype.construct_from_string(string)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('original, dtype, expected', [
|
||||
(SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
|
||||
(SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
|
||||
(SparseDtype(int, 1), str, SparseDtype(object, '1')),
|
||||
(SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
|
||||
])
|
||||
def test_update_dtype(original, dtype, expected):
|
||||
result = original.update_dtype(dtype)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("original, dtype", [
|
||||
(SparseDtype(float, np.nan), int),
|
||||
(SparseDtype(str, 'abc'), int),
|
||||
])
|
||||
def test_update_dtype_raises(original, dtype):
|
||||
with pytest.raises(ValueError):
|
||||
original.update_dtype(dtype)
|
||||
@@ -1,605 +0,0 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.sparse as splib
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import Series
|
||||
from pandas.core.arrays.sparse import BlockIndex, IntIndex, _make_index
|
||||
import pandas.util.testing as tm
|
||||
|
||||
TEST_LENGTH = 20
|
||||
|
||||
plain_case = dict(xloc=[0, 7, 15], xlen=[3, 5, 5], yloc=[2, 9, 14],
|
||||
ylen=[2, 3, 5], intersect_loc=[2, 9, 15],
|
||||
intersect_len=[1, 3, 4])
|
||||
delete_blocks = dict(xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4],
|
||||
intersect_loc=[1], intersect_len=[3])
|
||||
split_blocks = dict(xloc=[0], xlen=[10], yloc=[0, 5], ylen=[3, 7],
|
||||
intersect_loc=[0, 5], intersect_len=[3, 5])
|
||||
skip_block = dict(xloc=[10], xlen=[5], yloc=[0, 12], ylen=[5, 3],
|
||||
intersect_loc=[12], intersect_len=[3])
|
||||
|
||||
no_intersect = dict(xloc=[0, 10], xlen=[4, 6], yloc=[5, 17], ylen=[4, 2],
|
||||
intersect_loc=[], intersect_len=[])
|
||||
|
||||
|
||||
def check_cases(_check_case):
|
||||
def _check_case_dict(case):
|
||||
_check_case(case['xloc'], case['xlen'], case['yloc'], case['ylen'],
|
||||
case['intersect_loc'], case['intersect_len'])
|
||||
|
||||
_check_case_dict(plain_case)
|
||||
_check_case_dict(delete_blocks)
|
||||
_check_case_dict(split_blocks)
|
||||
_check_case_dict(skip_block)
|
||||
_check_case_dict(no_intersect)
|
||||
|
||||
# one or both is empty
|
||||
_check_case([0], [5], [], [], [], [])
|
||||
_check_case([], [], [], [], [], [])
|
||||
|
||||
|
||||
class TestSparseIndexUnion(object):
|
||||
|
||||
def test_index_make_union(self):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
bresult = xindex.make_union(yindex)
|
||||
assert (isinstance(bresult, BlockIndex))
|
||||
tm.assert_numpy_array_equal(bresult.blocs,
|
||||
np.array(eloc, dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(bresult.blengths,
|
||||
np.array(elen, dtype=np.int32))
|
||||
|
||||
ixindex = xindex.to_int_index()
|
||||
iyindex = yindex.to_int_index()
|
||||
iresult = ixindex.make_union(iyindex)
|
||||
assert (isinstance(iresult, IntIndex))
|
||||
tm.assert_numpy_array_equal(iresult.indices,
|
||||
bresult.to_int_index().indices)
|
||||
|
||||
"""
|
||||
x: ----
|
||||
y: ----
|
||||
r: --------
|
||||
"""
|
||||
xloc = [0]
|
||||
xlen = [5]
|
||||
yloc = [5]
|
||||
ylen = [4]
|
||||
eloc = [0]
|
||||
elen = [9]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----- -----
|
||||
y: ----- --
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [5, 5]
|
||||
yloc = [2, 17]
|
||||
ylen = [5, 2]
|
||||
eloc = [0, 10, 17]
|
||||
elen = [7, 5, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------
|
||||
y: -------
|
||||
r: ----------
|
||||
"""
|
||||
xloc = [1]
|
||||
xlen = [5]
|
||||
yloc = [3]
|
||||
ylen = [5]
|
||||
eloc = [1]
|
||||
elen = [7]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4]
|
||||
ylen = [8]
|
||||
eloc = [2]
|
||||
elen = [12]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: --- -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [0, 5]
|
||||
xlen = [3, 5]
|
||||
yloc = [0]
|
||||
ylen = [7]
|
||||
eloc = [0]
|
||||
elen = [10]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: ------- ---
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4, 13]
|
||||
ylen = [8, 4]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----------------------
|
||||
y: ---- ---- ---
|
||||
r: ----------------------
|
||||
"""
|
||||
xloc = [2]
|
||||
xlen = [15]
|
||||
yloc = [4, 9, 14]
|
||||
ylen = [3, 2, 2]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ---- ---
|
||||
y: --- ---
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [3, 3]
|
||||
yloc = [5, 15]
|
||||
ylen = [2, 2]
|
||||
eloc = [0, 5, 10, 15]
|
||||
elen = [3, 2, 3, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
|
||||
def test_intindex_make_union(self):
|
||||
a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1], dtype=np.int32))
|
||||
b = IntIndex(4, np.array([0, 1], dtype=np.int32))
|
||||
with pytest.raises(ValueError):
|
||||
a.make_union(b)
|
||||
|
||||
|
||||
class TestSparseIndexIntersect(object):
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_intersect(self):
|
||||
def _check_correct(a, b, expected):
|
||||
result = a.intersect(b)
|
||||
assert (result.equals(expected))
|
||||
|
||||
def _check_length_exc(a, longer):
|
||||
pytest.raises(Exception, a.intersect, longer)
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
expected = BlockIndex(TEST_LENGTH, eloc, elen)
|
||||
longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
|
||||
|
||||
_check_correct(xindex, yindex, expected)
|
||||
_check_correct(xindex.to_int_index(), yindex.to_int_index(),
|
||||
expected.to_int_index())
|
||||
|
||||
_check_length_exc(xindex, longer_index)
|
||||
_check_length_exc(xindex.to_int_index(),
|
||||
longer_index.to_int_index())
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_intersect_empty(self):
|
||||
xindex = IntIndex(4, np.array([], dtype=np.int32))
|
||||
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
xindex = xindex.to_block_index()
|
||||
yindex = yindex.to_block_index()
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
def test_intersect_identical(self):
|
||||
cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)),
|
||||
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),
|
||||
IntIndex(0, np.array([], dtype=np.int32)),
|
||||
IntIndex(5, np.array([], dtype=np.int32))]
|
||||
|
||||
for case in cases:
|
||||
assert case.intersect(case).equals(case)
|
||||
case = case.to_block_index()
|
||||
assert case.intersect(case).equals(case)
|
||||
|
||||
|
||||
class TestSparseIndexCommon(object):
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_lookup(self):
|
||||
for kind in ['integer', 'block']:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == -1
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 0
|
||||
assert idx.lookup(3) == 1
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
|
||||
for i in range(-1, 5):
|
||||
assert idx.lookup(i) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == 1
|
||||
assert idx.lookup(2) == 2
|
||||
assert idx.lookup(3) == 3
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 1
|
||||
assert idx.lookup(3) == 2
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
def test_lookup_array(self):
|
||||
for kind in ['integer', 'block']:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
|
||||
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, 0, 2], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
|
||||
exp = np.array([1, -1, 2, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_lookup_basics(self):
|
||||
def _check(index):
|
||||
assert (index.lookup(0) == -1)
|
||||
assert (index.lookup(5) == 0)
|
||||
assert (index.lookup(7) == 2)
|
||||
assert (index.lookup(8) == -1)
|
||||
assert (index.lookup(9) == -1)
|
||||
assert (index.lookup(10) == -1)
|
||||
assert (index.lookup(11) == -1)
|
||||
assert (index.lookup(12) == 3)
|
||||
assert (index.lookup(17) == 8)
|
||||
assert (index.lookup(18) == -1)
|
||||
|
||||
bindex = BlockIndex(20, [5, 12], [3, 6])
|
||||
iindex = bindex.to_int_index()
|
||||
|
||||
_check(bindex)
|
||||
_check(iindex)
|
||||
|
||||
# corner cases
|
||||
|
||||
|
||||
class TestBlockIndex(object):
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_make_block_boundary(self):
|
||||
for i in [5, 10, 100, 101]:
|
||||
idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32),
|
||||
kind='block')
|
||||
|
||||
exp = np.arange(0, i, 2, dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(idx.blocs, exp)
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.ones(len(exp), dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = BlockIndex(10, [0, 4], [2, 5])
|
||||
|
||||
assert index.equals(index)
|
||||
assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
|
||||
|
||||
def test_check_integrity(self):
|
||||
locs = []
|
||||
lengths = []
|
||||
|
||||
# 0-length OK
|
||||
# TODO: index variables are not used...is that right?
|
||||
index = BlockIndex(0, locs, lengths) # noqa
|
||||
|
||||
# also OK even though empty
|
||||
index = BlockIndex(1, locs, lengths) # noqa
|
||||
|
||||
# block extend beyond end
|
||||
pytest.raises(Exception, BlockIndex, 10, [5], [10])
|
||||
|
||||
# block overlap
|
||||
pytest.raises(Exception, BlockIndex, 10, [2, 5], [5, 3])
|
||||
|
||||
def test_to_int_index(self):
|
||||
locs = [0, 10]
|
||||
lengths = [4, 6]
|
||||
exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
|
||||
|
||||
block = BlockIndex(20, locs, lengths)
|
||||
dense = block.to_int_index()
|
||||
|
||||
tm.assert_numpy_array_equal(dense.indices,
|
||||
np.array(exp_inds, dtype=np.int32))
|
||||
|
||||
def test_to_block_index(self):
|
||||
index = BlockIndex(10, [0, 5], [4, 5])
|
||||
assert index.to_block_index() is index
|
||||
|
||||
|
||||
class TestIntIndex(object):
|
||||
|
||||
def test_check_integrity(self):
|
||||
|
||||
# Too many indices than specified in self.length
|
||||
msg = "Too many indices"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=1, indices=[1, 2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# All indices must be less than the length.
|
||||
msg = "All indices must be less than the length"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 5])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 6])
|
||||
|
||||
# Indices must be strictly ascending.
|
||||
msg = "Indices must be strictly increasing"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 3])
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = IntIndex(10, [0, 1, 2, 3, 4])
|
||||
assert index.equals(index)
|
||||
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
|
||||
|
||||
def test_to_block_index(self):
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
# see if survive the round trip
|
||||
xbindex = xindex.to_int_index().to_block_index()
|
||||
ybindex = yindex.to_int_index().to_block_index()
|
||||
assert isinstance(xbindex, BlockIndex)
|
||||
assert xbindex.equals(xindex)
|
||||
assert ybindex.equals(yindex)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_to_int_index(self):
|
||||
index = IntIndex(10, [2, 3, 4, 5, 6])
|
||||
assert index.to_int_index() is index
|
||||
|
||||
|
||||
class TestSparseOperators(object):
|
||||
|
||||
def _op_tests(self, sparse_op, python_op):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
xdindex = xindex.to_int_index()
|
||||
ydindex = yindex.to_int_index()
|
||||
|
||||
x = np.arange(xindex.npoints) * 10. + 1
|
||||
y = np.arange(yindex.npoints) * 100. + 1
|
||||
|
||||
xfill = 0
|
||||
yfill = 2
|
||||
|
||||
result_block_vals, rb_index, bfill = sparse_op(x, xindex, xfill, y,
|
||||
yindex, yfill)
|
||||
result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y,
|
||||
ydindex, yfill)
|
||||
|
||||
assert rb_index.to_int_index().equals(ri_index)
|
||||
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
|
||||
assert bfill == ifill
|
||||
|
||||
# check versus Series...
|
||||
xseries = Series(x, xdindex.indices)
|
||||
xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)
|
||||
|
||||
yseries = Series(y, ydindex.indices)
|
||||
yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)
|
||||
|
||||
series_result = python_op(xseries, yseries)
|
||||
series_result = series_result.reindex(ri_index.indices)
|
||||
|
||||
tm.assert_numpy_array_equal(result_block_vals,
|
||||
series_result.values)
|
||||
tm.assert_numpy_array_equal(result_int_vals, series_result.values)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
@pytest.mark.parametrize('opname',
|
||||
['add', 'sub', 'mul', 'truediv', 'floordiv'])
|
||||
def test_op(self, opname):
|
||||
sparse_op = getattr(splib, 'sparse_%s_float64' % opname)
|
||||
python_op = getattr(operator, opname)
|
||||
self._op_tests(sparse_op, python_op)
|
||||
Reference in New Issue
Block a user