Static code analysis and corrections
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,40 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from pandas import SparseDataFrame, DataFrame, SparseSeries
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_quantile():
|
||||
# GH 17386
|
||||
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
|
||||
q = 0.1
|
||||
|
||||
sparse_df = SparseDataFrame(data)
|
||||
result = sparse_df.quantile(q)
|
||||
|
||||
dense_df = DataFrame(data)
|
||||
dense_expected = dense_df.quantile(q)
|
||||
sparse_expected = SparseSeries(dense_expected)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_quantile_multi():
|
||||
# GH 17386
|
||||
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
|
||||
q = [0.1, 0.5]
|
||||
|
||||
sparse_df = SparseDataFrame(data)
|
||||
result = sparse_df.quantile(q)
|
||||
|
||||
dense_df = DataFrame(data)
|
||||
dense_expected = dense_df.quantile(q)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
@@ -0,0 +1,92 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from pandas import SparseDataFrame, DataFrame, Series, bdate_range
|
||||
from pandas.core import nanops
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dates():
|
||||
return bdate_range('1/1/2011', periods=10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty():
|
||||
return SparseDataFrame()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(dates):
|
||||
data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
|
||||
'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
|
||||
'C': np.arange(10, dtype=np.float64),
|
||||
'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}
|
||||
|
||||
return SparseDataFrame(data, index=dates)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fill_frame(frame):
|
||||
values = frame.values.copy()
|
||||
values[np.isnan(values)] = 2
|
||||
|
||||
return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
|
||||
default_fill_value=2,
|
||||
index=frame.index)
|
||||
|
||||
|
||||
def test_apply(frame):
|
||||
applied = frame.apply(np.sqrt)
|
||||
assert isinstance(applied, SparseDataFrame)
|
||||
tm.assert_almost_equal(applied.values, np.sqrt(frame.values))
|
||||
|
||||
# agg / broadcast
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
broadcasted = frame.apply(np.sum, broadcast=True)
|
||||
assert isinstance(broadcasted, SparseDataFrame)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
exp = frame.to_dense().apply(np.sum, broadcast=True)
|
||||
tm.assert_frame_equal(broadcasted.to_dense(), exp)
|
||||
|
||||
applied = frame.apply(np.sum)
|
||||
tm.assert_series_equal(applied,
|
||||
frame.to_dense().apply(nanops.nansum))
|
||||
|
||||
|
||||
def test_apply_fill(fill_frame):
|
||||
applied = fill_frame.apply(np.sqrt)
|
||||
assert applied['A'].fill_value == np.sqrt(2)
|
||||
|
||||
|
||||
def test_apply_empty(empty):
|
||||
assert empty.apply(np.sqrt) is empty
|
||||
|
||||
|
||||
def test_apply_nonuq():
|
||||
orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
|
||||
index=['a', 'a', 'c'])
|
||||
sparse = orig.to_sparse()
|
||||
res = sparse.apply(lambda s: s[0], axis=1)
|
||||
exp = orig.apply(lambda s: s[0], axis=1)
|
||||
|
||||
# dtype must be kept
|
||||
assert res.dtype == np.int64
|
||||
|
||||
# ToDo: apply must return subclassed dtype
|
||||
assert isinstance(res, Series)
|
||||
tm.assert_series_equal(res.to_dense(), exp)
|
||||
|
||||
# df.T breaks
|
||||
sparse = orig.T.to_sparse()
|
||||
res = sparse.apply(lambda s: s[0], axis=0) # noqa
|
||||
exp = orig.T.apply(lambda s: s[0], axis=0)
|
||||
|
||||
# TODO: no non-unique columns supported in sparse yet
|
||||
# tm.assert_series_equal(res.to_dense(), exp)
|
||||
|
||||
|
||||
def test_applymap(frame):
|
||||
# just test that it works
|
||||
result = frame.applymap(lambda x: x * 2)
|
||||
assert isinstance(result, SparseDataFrame)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,113 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from pandas import SparseDataFrame, DataFrame
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data', [
|
||||
[[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
|
||||
[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
|
||||
[
|
||||
[1.0, 1.0 + 1.0j],
|
||||
[2.0 + 2.0j, 2.0],
|
||||
[3.0, 3.0 + 3.0j],
|
||||
[4.0 + 4.0j, 4.0],
|
||||
[np.nan, np.nan]
|
||||
]
|
||||
])
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_numeric_data(data):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse > lower_bound)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense > lower_bound)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data', [
|
||||
[[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
|
||||
[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
|
||||
[
|
||||
[1.0, 1.0 + 1.0j],
|
||||
[2.0 + 2.0j, 2.0],
|
||||
[3.0, 3.0 + 3.0j],
|
||||
[4.0 + 4.0j, 4.0],
|
||||
[np.nan, np.nan]
|
||||
]
|
||||
])
|
||||
@pytest.mark.parametrize('other', [
|
||||
True,
|
||||
-100,
|
||||
0.1,
|
||||
100.0 + 100.0j
|
||||
])
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_numeric_data_and_other(data, other):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse > lower_bound, other)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense > lower_bound, other)
|
||||
sparse_expected = SparseDataFrame(dense_expected,
|
||||
default_fill_value=other)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_bool_data():
|
||||
# GH 17386
|
||||
data = [[False, False], [True, True], [False, False]]
|
||||
cond = True
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse == cond)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense == cond)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
True,
|
||||
0,
|
||||
0.1,
|
||||
100.0 + 100.0j
|
||||
])
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_bool_data_and_other(other):
|
||||
# GH 17386
|
||||
data = [[False, False], [True, True], [False, False]]
|
||||
cond = True
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse == cond, other)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense == cond, other)
|
||||
sparse_expected = SparseDataFrame(dense_expected,
|
||||
default_fill_value=other)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
@@ -0,0 +1,20 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from pandas import SparseDataFrame, read_csv
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestSparseDataFrameToCsv(object):
|
||||
fill_values = [np.nan, 0, None, 1]
|
||||
|
||||
@pytest.mark.parametrize('fill_value', fill_values)
|
||||
def test_to_csv_sparse_dataframe(self, fill_value):
|
||||
# GH19384
|
||||
sdf = SparseDataFrame({'a': type(self).fill_values},
|
||||
default_fill_value=fill_value)
|
||||
|
||||
with tm.ensure_clean('sparse_df.csv') as path:
|
||||
sdf.to_csv(path, index=False)
|
||||
df = read_csv(path, skip_blank_lines=False)
|
||||
|
||||
tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)
|
||||
+168
@@ -0,0 +1,168 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from warnings import catch_warnings
|
||||
from pandas.util import testing as tm
|
||||
from pandas import SparseDataFrame, SparseSeries
|
||||
from distutils.version import LooseVersion
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_float_dtype,
|
||||
is_object_dtype,
|
||||
is_float)
|
||||
|
||||
|
||||
scipy = pytest.importorskip('scipy')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
|
||||
@pytest.mark.parametrize('columns', [None, list('def')])
|
||||
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
|
||||
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
|
||||
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
|
||||
# GH 4343
|
||||
# Make one ndarray and from it one sparse matrix, both to be used for
|
||||
# constructing frames and comparing results
|
||||
arr = np.eye(3, dtype=dtype)
|
||||
# GH 16179
|
||||
arr[0, 1] = dtype(2)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm, index=index, columns=columns,
|
||||
default_fill_value=fill_value)
|
||||
|
||||
# Expected result construction is kind of tricky for all
|
||||
# dtype-fill_value combinations; easiest to cast to something generic
|
||||
# and except later on
|
||||
rarr = arr.astype(object)
|
||||
rarr[arr == 0] = np.nan
|
||||
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
|
||||
fill_value if fill_value is not None else np.nan)
|
||||
|
||||
# Assert frame is as expected
|
||||
sdf_obj = sdf.astype(object)
|
||||
tm.assert_sp_frame_equal(sdf_obj, expected)
|
||||
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
|
||||
|
||||
# Assert spmatrices equal
|
||||
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
|
||||
|
||||
# Ensure dtype is preserved if possible
|
||||
was_upcast = ((fill_value is None or is_float(fill_value)) and
|
||||
not is_object_dtype(dtype) and
|
||||
not is_float_dtype(dtype))
|
||||
res_dtype = (bool if is_bool_dtype(dtype) else
|
||||
float if was_upcast else
|
||||
dtype)
|
||||
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
|
||||
assert sdf.to_coo().dtype == res_dtype
|
||||
|
||||
# However, adding a str column results in an upcast to object
|
||||
sdf['strings'] = np.arange(len(sdf)).astype(str)
|
||||
assert sdf.to_coo().dtype == np.object_
|
||||
|
||||
|
||||
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
|
||||
def test_from_to_scipy_object(spmatrix, fill_value):
|
||||
# GH 4343
|
||||
dtype = object
|
||||
columns = list('cd')
|
||||
index = list('ab')
|
||||
|
||||
if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(
|
||||
scipy.__version__) >= LooseVersion('0.19.0')):
|
||||
pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")
|
||||
|
||||
# Make one ndarray and from it one sparse matrix, both to be used for
|
||||
# constructing frames and comparing results
|
||||
arr = np.eye(2, dtype=dtype)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm, index=index, columns=columns,
|
||||
default_fill_value=fill_value)
|
||||
|
||||
# Expected result construction is kind of tricky for all
|
||||
# dtype-fill_value combinations; easiest to cast to something generic
|
||||
# and except later on
|
||||
rarr = arr.astype(object)
|
||||
rarr[arr == 0] = np.nan
|
||||
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
|
||||
fill_value if fill_value is not None else np.nan)
|
||||
|
||||
# Assert frame is as expected
|
||||
sdf_obj = sdf.astype(object)
|
||||
tm.assert_sp_frame_equal(sdf_obj, expected)
|
||||
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
|
||||
|
||||
# Assert spmatrices equal
|
||||
with catch_warnings(record=True):
|
||||
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
|
||||
|
||||
# Ensure dtype is preserved if possible
|
||||
res_dtype = object
|
||||
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
|
||||
assert sdf.to_coo().dtype == res_dtype
|
||||
|
||||
|
||||
def test_from_scipy_correct_ordering(spmatrix):
|
||||
# GH 16179
|
||||
arr = np.arange(1, 5).reshape(2, 2)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm)
|
||||
expected = SparseDataFrame(arr)
|
||||
tm.assert_sp_frame_equal(sdf, expected)
|
||||
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
|
||||
|
||||
|
||||
def test_from_scipy_fillna(spmatrix):
|
||||
# GH 16112
|
||||
arr = np.eye(3)
|
||||
arr[1:, 0] = np.nan
|
||||
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm).fillna(-1.0)
|
||||
|
||||
# Returning frame should fill all nan values with -1.0
|
||||
expected = SparseDataFrame({
|
||||
0: SparseSeries([1., -1, -1]),
|
||||
1: SparseSeries([np.nan, 1, np.nan]),
|
||||
2: SparseSeries([np.nan, np.nan, 1]),
|
||||
}, default_fill_value=-1)
|
||||
|
||||
# fill_value is expected to be what .fillna() above was called with
|
||||
# We don't use -1 as initial fill_value in expected SparseSeries
|
||||
# construction because this way we obtain "compressed" SparseArrays,
|
||||
# avoiding having to construct them ourselves
|
||||
for col in expected:
|
||||
expected[col].fill_value = -1
|
||||
|
||||
tm.assert_sp_frame_equal(sdf, expected)
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,113 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from pandas import SparseSeries, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data', [
|
||||
[1, 1, 2, 2, 3, 3, 4, 4, 0, 0],
|
||||
[1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan],
|
||||
[
|
||||
1.0, 1.0 + 1.0j,
|
||||
2.0 + 2.0j, 2.0,
|
||||
3.0, 3.0 + 3.0j,
|
||||
4.0 + 4.0j, 4.0,
|
||||
np.nan, np.nan
|
||||
]
|
||||
])
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_numeric_data(data):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse > lower_bound)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense > lower_bound)
|
||||
sparse_expected = SparseSeries(dense_expected)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data', [
|
||||
[1, 1, 2, 2, 3, 3, 4, 4, 0, 0],
|
||||
[1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan],
|
||||
[
|
||||
1.0, 1.0 + 1.0j,
|
||||
2.0 + 2.0j, 2.0,
|
||||
3.0, 3.0 + 3.0j,
|
||||
4.0 + 4.0j, 4.0,
|
||||
np.nan, np.nan
|
||||
]
|
||||
])
|
||||
@pytest.mark.parametrize('other', [
|
||||
True,
|
||||
-100,
|
||||
0.1,
|
||||
100.0 + 100.0j
|
||||
])
|
||||
@pytest.mark.skip(reason='Wrong SparseBlock initialization '
|
||||
'(Segfault) '
|
||||
'(GH 17386)')
|
||||
def test_where_with_numeric_data_and_other(data, other):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse > lower_bound, other)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense > lower_bound, other)
|
||||
sparse_expected = SparseSeries(dense_expected, fill_value=other)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_bool_data():
|
||||
# GH 17386
|
||||
data = [False, False, True, True, False, False]
|
||||
cond = True
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse == cond)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense == cond)
|
||||
sparse_expected = SparseSeries(dense_expected)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
True,
|
||||
0,
|
||||
0.1,
|
||||
100.0 + 100.0j
|
||||
])
|
||||
@pytest.mark.skip(reason='Wrong SparseBlock initialization '
|
||||
'(Segfault) '
|
||||
'(GH 17386)')
|
||||
def test_where_with_bool_data_and_other(other):
|
||||
# GH 17386
|
||||
data = [False, False, True, True, False, False]
|
||||
cond = True
|
||||
|
||||
sparse = SparseSeries(data)
|
||||
result = sparse.where(sparse == cond, other)
|
||||
|
||||
dense = Series(data)
|
||||
dense_expected = dense.where(dense == cond, other)
|
||||
sparse_expected = SparseSeries(dense_expected, fill_value=other)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,451 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSparseArrayArithmetics(object):
|
||||
|
||||
_base = np.array
|
||||
_klass = pd.SparseArray
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
def _check_numeric_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid='ignore', divide='ignore'):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
|
||||
# sparse & sparse
|
||||
self._assert((a + b).to_dense(), a_dense + b_dense)
|
||||
self._assert((b + a).to_dense(), b_dense + a_dense)
|
||||
|
||||
self._assert((a - b).to_dense(), a_dense - b_dense)
|
||||
self._assert((b - a).to_dense(), b_dense - a_dense)
|
||||
|
||||
self._assert((a * b).to_dense(), a_dense * b_dense)
|
||||
self._assert((b * a).to_dense(), b_dense * a_dense)
|
||||
|
||||
# pandas uses future division
|
||||
self._assert((a / b).to_dense(), a_dense * 1.0 / b_dense)
|
||||
self._assert((b / a).to_dense(), b_dense * 1.0 / a_dense)
|
||||
|
||||
# ToDo: FIXME in GH 13843
|
||||
if not (self._base == pd.Series and a.dtype == 'int64'):
|
||||
self._assert((a // b).to_dense(), a_dense // b_dense)
|
||||
self._assert((b // a).to_dense(), b_dense // a_dense)
|
||||
|
||||
self._assert((a % b).to_dense(), a_dense % b_dense)
|
||||
self._assert((b % a).to_dense(), b_dense % a_dense)
|
||||
|
||||
self._assert((a ** b).to_dense(), a_dense ** b_dense)
|
||||
self._assert((b ** a).to_dense(), b_dense ** a_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._assert((a + b_dense).to_dense(), a_dense + b_dense)
|
||||
self._assert((b_dense + a).to_dense(), b_dense + a_dense)
|
||||
|
||||
self._assert((a - b_dense).to_dense(), a_dense - b_dense)
|
||||
self._assert((b_dense - a).to_dense(), b_dense - a_dense)
|
||||
|
||||
self._assert((a * b_dense).to_dense(), a_dense * b_dense)
|
||||
self._assert((b_dense * a).to_dense(), b_dense * a_dense)
|
||||
|
||||
# pandas uses future division
|
||||
self._assert((a / b_dense).to_dense(), a_dense * 1.0 / b_dense)
|
||||
self._assert((b_dense / a).to_dense(), b_dense * 1.0 / a_dense)
|
||||
|
||||
# ToDo: FIXME in GH 13843
|
||||
if not (self._base == pd.Series and a.dtype == 'int64'):
|
||||
self._assert((a // b_dense).to_dense(), a_dense // b_dense)
|
||||
self._assert((b_dense // a).to_dense(), b_dense // a_dense)
|
||||
|
||||
self._assert((a % b_dense).to_dense(), a_dense % b_dense)
|
||||
self._assert((b_dense % a).to_dense(), b_dense % a_dense)
|
||||
|
||||
self._assert((a ** b_dense).to_dense(), a_dense ** b_dense)
|
||||
self._assert((b_dense ** a).to_dense(), b_dense ** a_dense)
|
||||
|
||||
def _check_bool_result(self, res):
|
||||
assert isinstance(res, self._klass)
|
||||
assert res.dtype == np.bool
|
||||
assert isinstance(res.fill_value, bool)
|
||||
|
||||
def _check_comparison_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid='ignore'):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
#
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a == b)
|
||||
self._assert((a == b).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b)
|
||||
self._assert((a != b).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b)
|
||||
self._assert((a >= b).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b)
|
||||
self._assert((a <= b).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b)
|
||||
self._assert((a > b).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b)
|
||||
self._assert((a < b).to_dense(), a_dense < b_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._check_bool_result(a == b_dense)
|
||||
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b_dense)
|
||||
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b_dense)
|
||||
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b_dense)
|
||||
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b_dense)
|
||||
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b_dense)
|
||||
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
|
||||
|
||||
def _check_logical_ops(self, a, b, a_dense, b_dense):
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a & b)
|
||||
self._assert((a & b).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b)
|
||||
self._assert((a | b).to_dense(), a_dense | b_dense)
|
||||
# sparse & dense
|
||||
self._check_bool_result(a & b_dense)
|
||||
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b_dense)
|
||||
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
|
||||
|
||||
def test_float_scalar(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_scalar_comparison(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_same_index(self):
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
|
||||
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_same_index_comparison(self):
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
|
||||
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array_different_kind(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind='integer')
|
||||
b = self._klass(rvalues, kind='block')
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=0)
|
||||
b = self._klass(rvalues, kind='block')
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=0)
|
||||
b = self._klass(rvalues, kind='block', fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=1)
|
||||
b = self._klass(rvalues, kind='block', fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array_comparison(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array(self):
|
||||
# have to specify dtype explicitly until fixing GH 667
|
||||
dtype = np.int64
|
||||
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
assert a.dtype == dtype
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == dtype
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == dtype
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == dtype
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == dtype
|
||||
b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert b.dtype == dtype
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
|
||||
assert a.dtype == dtype
|
||||
b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
|
||||
assert b.dtype == dtype
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array_comparison(self):
|
||||
|
||||
# int32 NI ATM
|
||||
for dtype in ['int64']:
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_bool_same_index(self):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([True, False, True, True], dtype=np.bool)
|
||||
rvalues = self._base([True, False, True, True], dtype=np.bool)
|
||||
|
||||
for fill_value in [True, False, np.nan]:
|
||||
a = self._klass(values, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_bool_array_logical(self):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([True, False, True, False, True, True],
|
||||
dtype=np.bool)
|
||||
rvalues = self._base([True, False, False, True, False, True],
|
||||
dtype=np.bool)
|
||||
|
||||
for fill_value in [True, False, np.nan]:
|
||||
a = self._klass(values, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_float_int(self):
|
||||
|
||||
for rdtype in ['int64']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == rdtype
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == rdtype
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == rdtype
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == rdtype
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_comparison(self):
|
||||
|
||||
# int32 NI ATM
|
||||
for rdtype in ['int64']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == rdtype
|
||||
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == rdtype
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == rdtype
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == rdtype
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
|
||||
class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
|
||||
|
||||
_base = pd.Series
|
||||
_klass = pd.SparseSeries
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
def test_alignment(self):
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
|
||||
dtype=np.int64, fill_value=0)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
|
||||
dtype=np.int64, fill_value=np.nan)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
|
||||
dtype=np.int64, fill_value=0)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
|
||||
dtype=np.int64, fill_value=np.nan)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
@@ -0,0 +1,935 @@
|
||||
from pandas.compat import range
|
||||
|
||||
import re
|
||||
import operator
|
||||
import pytest
|
||||
import warnings
|
||||
|
||||
from numpy import nan
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.sparse.api import SparseArray, SparseSeries
|
||||
from pandas._libs.sparse import IntIndex
|
||||
from pandas.util.testing import assert_almost_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSparseArray(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6])
|
||||
self.arr = SparseArray(self.arr_data)
|
||||
self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
||||
|
||||
def test_constructor_dtype(self):
|
||||
arr = SparseArray([np.nan, 1, 2, np.nan])
|
||||
assert arr.dtype == np.float64
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
|
||||
assert arr.dtype == np.float64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
|
||||
assert arr.dtype == np.float64
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=None)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
def test_constructor_object_dtype(self):
|
||||
# GH 11856
|
||||
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object)
|
||||
assert arr.dtype == np.object
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object,
|
||||
fill_value='A')
|
||||
assert arr.dtype == np.object
|
||||
assert arr.fill_value == 'A'
|
||||
|
||||
# GH 17574
|
||||
data = [False, 0, 100.0, 0.0]
|
||||
arr = SparseArray(data, dtype=np.object, fill_value=False)
|
||||
assert arr.dtype == np.object
|
||||
assert arr.fill_value is False
|
||||
arr_expected = np.array(data, dtype=np.object)
|
||||
it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
|
||||
assert np.fromiter(it, dtype=np.bool).all()
|
||||
|
||||
def test_constructor_spindex_dtype(self):
|
||||
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
|
||||
tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan]))
|
||||
assert arr.dtype == np.float64
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray(data=[1, 2, 3],
|
||||
sparse_index=IntIndex(4, [1, 2, 3]),
|
||||
dtype=np.int64, fill_value=0)
|
||||
exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
|
||||
fill_value=0, dtype=np.int64)
|
||||
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(data=[1, 2, 3],
|
||||
sparse_index=IntIndex(4, [1, 2, 3]),
|
||||
dtype=None, fill_value=0)
|
||||
exp = SparseArray([0, 1, 2, 3], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
# scalar input
|
||||
arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
|
||||
exp = SparseArray([1], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
|
||||
fill_value=0, dtype=None)
|
||||
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == np.int64
|
||||
assert arr.fill_value == 0
|
||||
|
||||
@pytest.mark.parametrize('scalar,dtype', [
|
||||
(False, bool),
|
||||
(0.0, 'float64'),
|
||||
(1, 'int64'),
|
||||
('z', 'object')])
|
||||
def test_scalar_with_index_infer_dtype(self, scalar, dtype):
|
||||
# GH 19163
|
||||
arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
|
||||
exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
|
||||
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
|
||||
assert arr.dtype == dtype
|
||||
assert exp.dtype == dtype
|
||||
|
||||
def test_sparseseries_roundtrip(self):
|
||||
# GH 13999
|
||||
for kind in ['integer', 'block']:
|
||||
for fill in [1, np.nan, 0]:
|
||||
arr = SparseArray([np.nan, 1, np.nan, 2, 3], kind=kind,
|
||||
fill_value=fill)
|
||||
res = SparseArray(SparseSeries(arr))
|
||||
tm.assert_sp_array_equal(arr, res)
|
||||
|
||||
arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64,
|
||||
kind=kind, fill_value=fill)
|
||||
res = SparseArray(SparseSeries(arr), dtype=np.int64)
|
||||
tm.assert_sp_array_equal(arr, res)
|
||||
|
||||
res = SparseArray(SparseSeries(arr))
|
||||
tm.assert_sp_array_equal(arr, res)
|
||||
|
||||
for fill in [True, False, np.nan]:
|
||||
arr = SparseArray([True, False, True, True], dtype=np.bool,
|
||||
kind=kind, fill_value=fill)
|
||||
res = SparseArray(SparseSeries(arr))
|
||||
tm.assert_sp_array_equal(arr, res)
|
||||
|
||||
res = SparseArray(SparseSeries(arr))
|
||||
tm.assert_sp_array_equal(arr, res)
|
||||
|
||||
def test_get_item(self):
|
||||
|
||||
assert np.isnan(self.arr[1])
|
||||
assert self.arr[2] == 1
|
||||
assert self.arr[7] == 5
|
||||
|
||||
assert self.zarr[0] == 0
|
||||
assert self.zarr[2] == 1
|
||||
assert self.zarr[7] == 5
|
||||
|
||||
errmsg = re.compile("bounds")
|
||||
tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[11])
|
||||
tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[-11])
|
||||
assert self.arr[-1] == self.arr[len(self.arr) - 1]
|
||||
|
||||
def test_take(self):
|
||||
assert np.isnan(self.arr.take(0))
|
||||
assert np.isscalar(self.arr.take(2))
|
||||
|
||||
assert self.arr.take(2) == np.take(self.arr_data, 2)
|
||||
assert self.arr.take(6) == np.take(self.arr_data, 6)
|
||||
|
||||
exp = SparseArray(np.take(self.arr_data, [2, 3]))
|
||||
tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)
|
||||
|
||||
exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
|
||||
tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)
|
||||
|
||||
def test_take_fill_value(self):
|
||||
data = np.array([1, np.nan, 0, 3, 0])
|
||||
sparse = SparseArray(data, fill_value=0)
|
||||
|
||||
exp = SparseArray(np.take(data, [0]), fill_value=0)
|
||||
tm.assert_sp_array_equal(sparse.take([0]), exp)
|
||||
|
||||
exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
|
||||
tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
|
||||
|
||||
def test_take_negative(self):
|
||||
exp = SparseArray(np.take(self.arr_data, [-1]))
|
||||
tm.assert_sp_array_equal(self.arr.take([-1]), exp)
|
||||
|
||||
exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
|
||||
tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)
|
||||
|
||||
def test_bad_take(self):
|
||||
tm.assert_raises_regex(
|
||||
IndexError, "bounds", lambda: self.arr.take(11))
|
||||
pytest.raises(IndexError, lambda: self.arr.take(-11))
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
tm.assert_raises_regex(TypeError, msg, self.arr.take,
|
||||
[2, 3], foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, self.arr.take,
|
||||
[2, 3], out=self.arr)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, self.arr.take,
|
||||
[2, 3], mode='clip')
|
||||
|
||||
def test_take_filling(self):
|
||||
# similar tests as GH 12631
|
||||
sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([np.nan, np.nan, 4])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = SparseArray([np.nan, np.nan, np.nan])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = sparse.take(np.array([1, 0, -1]),
|
||||
allow_fill=False, fill_value=True)
|
||||
expected = SparseArray([np.nan, np.nan, 4])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = ('When allow_fill=True and fill_value is not None, '
|
||||
'all indices must be >= -1')
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
sparse.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
sparse.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, 5]), fill_value=True)
|
||||
|
||||
def test_take_filling_fill_value(self):
|
||||
# same tests as GH 12631
|
||||
sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([0, np.nan, 4], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = SparseArray([0, np.nan, 0], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = sparse.take(np.array([1, 0, -1]),
|
||||
allow_fill=False, fill_value=True)
|
||||
expected = SparseArray([0, np.nan, 4], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = ('When allow_fill=True and fill_value is not None, '
|
||||
'all indices must be >= -1')
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
sparse.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
sparse.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, 5]), fill_value=True)
|
||||
|
||||
def test_take_filling_all_nan(self):
|
||||
sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([np.nan, np.nan, np.nan])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = SparseArray([np.nan, np.nan, np.nan])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError):
|
||||
sparse.take(np.array([1, 5]), fill_value=True)
|
||||
|
||||
def test_set_item(self):
|
||||
def setitem():
|
||||
self.arr[5] = 3
|
||||
|
||||
def setslice():
|
||||
self.arr[1:5] = 2
|
||||
|
||||
tm.assert_raises_regex(TypeError, "item assignment", setitem)
|
||||
tm.assert_raises_regex(TypeError, "item assignment", setslice)
|
||||
|
||||
def test_constructor_from_too_large_array(self):
|
||||
tm.assert_raises_regex(TypeError, "expected dimension <= 1 data",
|
||||
SparseArray, np.arange(10).reshape((2, 5)))
|
||||
|
||||
def test_constructor_from_sparse(self):
|
||||
res = SparseArray(self.zarr)
|
||||
assert res.fill_value == 0
|
||||
assert_almost_equal(res.sp_values, self.zarr.sp_values)
|
||||
|
||||
def test_constructor_copy(self):
|
||||
cp = SparseArray(self.arr, copy=True)
|
||||
cp.sp_values[:3] = 0
|
||||
assert not (self.arr.sp_values[:3] == 0).any()
|
||||
|
||||
not_copy = SparseArray(self.arr)
|
||||
not_copy.sp_values[:3] = 0
|
||||
assert (self.arr.sp_values[:3] == 0).all()
|
||||
|
||||
def test_constructor_bool(self):
|
||||
# GH 10648
|
||||
data = np.array([False, False, True, True, False, False])
|
||||
arr = SparseArray(data, fill_value=False, dtype=bool)
|
||||
|
||||
assert arr.dtype == bool
|
||||
tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
|
||||
tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
||||
tm.assert_numpy_array_equal(arr.sp_index.indices,
|
||||
np.array([2, 3], np.int32))
|
||||
|
||||
for dense in [arr.to_dense(), arr.values]:
|
||||
assert dense.dtype == bool
|
||||
tm.assert_numpy_array_equal(dense, data)
|
||||
|
||||
def test_constructor_bool_fill_value(self):
|
||||
arr = SparseArray([True, False, True], dtype=None)
|
||||
assert arr.dtype == np.bool
|
||||
assert not arr.fill_value
|
||||
|
||||
arr = SparseArray([True, False, True], dtype=np.bool)
|
||||
assert arr.dtype == np.bool
|
||||
assert not arr.fill_value
|
||||
|
||||
arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
|
||||
assert arr.dtype == np.bool
|
||||
assert arr.fill_value
|
||||
|
||||
def test_constructor_float32(self):
|
||||
# GH 10648
|
||||
data = np.array([1., np.nan, 3], dtype=np.float32)
|
||||
arr = SparseArray(data, dtype=np.float32)
|
||||
|
||||
assert arr.dtype == np.float32
|
||||
tm.assert_numpy_array_equal(arr.sp_values,
|
||||
np.array([1, 3], dtype=np.float32))
|
||||
tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
||||
tm.assert_numpy_array_equal(arr.sp_index.indices,
|
||||
np.array([0, 2], dtype=np.int32))
|
||||
|
||||
for dense in [arr.to_dense(), arr.values]:
|
||||
assert dense.dtype == np.float32
|
||||
tm.assert_numpy_array_equal(dense, data)
|
||||
|
||||
def test_astype(self):
|
||||
res = self.arr.astype('f8')
|
||||
res.sp_values[:3] = 27
|
||||
assert not (self.arr.sp_values[:3] == 27).any()
|
||||
|
||||
msg = "unable to coerce current fill_value nan to int64 dtype"
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
self.arr.astype('i8')
|
||||
|
||||
arr = SparseArray([0, np.nan, 0, 1])
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
arr.astype('i8')
|
||||
|
||||
arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
|
||||
msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
arr.astype('i8')
|
||||
|
||||
def test_astype_all(self):
|
||||
vals = np.array([1, 2, 3])
|
||||
arr = SparseArray(vals, fill_value=1)
|
||||
|
||||
types = [np.float64, np.float32, np.int64,
|
||||
np.int32, np.int16, np.int8]
|
||||
for typ in types:
|
||||
res = arr.astype(typ)
|
||||
assert res.dtype == typ
|
||||
assert res.sp_values.dtype == typ
|
||||
|
||||
tm.assert_numpy_array_equal(res.values, vals.astype(typ))
|
||||
|
||||
def test_set_fill_value(self):
|
||||
arr = SparseArray([1., np.nan, 2.], fill_value=np.nan)
|
||||
arr.fill_value = 2
|
||||
assert arr.fill_value == 2
|
||||
|
||||
arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
|
||||
arr.fill_value = 2
|
||||
assert arr.fill_value == 2
|
||||
|
||||
# coerces to int
|
||||
msg = "unable to set fill_value 3\\.1 to int64 dtype"
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
arr.fill_value = 3.1
|
||||
|
||||
msg = "unable to set fill_value nan to int64 dtype"
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
arr.fill_value = np.nan
|
||||
|
||||
arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
|
||||
arr.fill_value = True
|
||||
assert arr.fill_value
|
||||
|
||||
# coerces to bool
|
||||
msg = "unable to set fill_value 0 to bool dtype"
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
arr.fill_value = 0
|
||||
|
||||
msg = "unable to set fill_value nan to bool dtype"
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
arr.fill_value = np.nan
|
||||
|
||||
# invalid
|
||||
msg = "fill_value must be a scalar"
|
||||
for val in [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]:
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
arr.fill_value = val
|
||||
|
||||
def test_copy_shallow(self):
|
||||
arr2 = self.arr.copy(deep=False)
|
||||
|
||||
def _get_base(values):
|
||||
base = values.base
|
||||
while base.base is not None:
|
||||
base = base.base
|
||||
return base
|
||||
|
||||
assert (_get_base(arr2) is _get_base(self.arr))
|
||||
|
||||
def test_values_asarray(self):
|
||||
assert_almost_equal(self.arr.values, self.arr_data)
|
||||
assert_almost_equal(self.arr.to_dense(), self.arr_data)
|
||||
assert_almost_equal(self.arr.sp_values, np.asarray(self.arr))
|
||||
|
||||
@pytest.mark.parametrize('data,shape,dtype', [
|
||||
([0, 0, 0, 0, 0], (5,), None),
|
||||
([], (0,), None),
|
||||
([0], (1,), None),
|
||||
(['A', 'A', np.nan, 'B'], (4,), np.object)
|
||||
])
|
||||
def test_shape(self, data, shape, dtype):
|
||||
# GH 21126
|
||||
out = SparseArray(data, dtype=dtype)
|
||||
assert out.shape == shape
|
||||
|
||||
def test_to_dense(self):
|
||||
vals = np.array([1, np.nan, np.nan, 3, np.nan])
|
||||
res = SparseArray(vals).to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
res = SparseArray(vals, fill_value=0).to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
vals = np.array([1, np.nan, 0, 3, 0])
|
||||
res = SparseArray(vals).to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
res = SparseArray(vals, fill_value=0).to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
vals = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
res = SparseArray(vals).to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
res = SparseArray(vals, fill_value=0).to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
# see gh-14647
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
SparseArray(vals).to_dense(fill=2)
|
||||
|
||||
def test_getitem(self):
|
||||
def _checkit(i):
|
||||
assert_almost_equal(self.arr[i], self.arr.values[i])
|
||||
|
||||
for i in range(len(self.arr)):
|
||||
_checkit(i)
|
||||
_checkit(-i)
|
||||
|
||||
def test_getslice(self):
|
||||
result = self.arr[:-3]
|
||||
exp = SparseArray(self.arr.values[:-3])
|
||||
tm.assert_sp_array_equal(result, exp)
|
||||
|
||||
result = self.arr[-4:]
|
||||
exp = SparseArray(self.arr.values[-4:])
|
||||
tm.assert_sp_array_equal(result, exp)
|
||||
|
||||
# two corner cases from Series
|
||||
result = self.arr[-12:]
|
||||
exp = SparseArray(self.arr)
|
||||
tm.assert_sp_array_equal(result, exp)
|
||||
|
||||
result = self.arr[:-12]
|
||||
exp = SparseArray(self.arr.values[:0])
|
||||
tm.assert_sp_array_equal(result, exp)
|
||||
|
||||
def test_getslice_tuple(self):
|
||||
dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
|
||||
|
||||
sparse = SparseArray(dense)
|
||||
res = sparse[4:, ]
|
||||
exp = SparseArray(dense[4:, ])
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
sparse = SparseArray(dense, fill_value=0)
|
||||
res = sparse[4:, ]
|
||||
exp = SparseArray(dense[4:, ], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
sparse[4:, :]
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
# check numpy compat
|
||||
dense[4:, :]
|
||||
|
||||
def test_binary_operators(self):
|
||||
data1 = np.random.randn(20)
|
||||
data2 = np.random.randn(20)
|
||||
data1[::2] = np.nan
|
||||
data2[::3] = np.nan
|
||||
|
||||
arr1 = SparseArray(data1)
|
||||
arr2 = SparseArray(data2)
|
||||
|
||||
data1[::2] = 3
|
||||
data2[::3] = 3
|
||||
farr1 = SparseArray(data1, fill_value=3)
|
||||
farr2 = SparseArray(data2, fill_value=3)
|
||||
|
||||
def _check_op(op, first, second):
|
||||
res = op(first, second)
|
||||
exp = SparseArray(op(first.values, second.values),
|
||||
fill_value=first.fill_value)
|
||||
assert isinstance(res, SparseArray)
|
||||
assert_almost_equal(res.values, exp.values)
|
||||
|
||||
res2 = op(first, second.values)
|
||||
assert isinstance(res2, SparseArray)
|
||||
tm.assert_sp_array_equal(res, res2)
|
||||
|
||||
res3 = op(first.values, second)
|
||||
assert isinstance(res3, SparseArray)
|
||||
tm.assert_sp_array_equal(res, res3)
|
||||
|
||||
res4 = op(first, 4)
|
||||
assert isinstance(res4, SparseArray)
|
||||
|
||||
# ignore this if the actual op raises (e.g. pow)
|
||||
try:
|
||||
exp = op(first.values, 4)
|
||||
exp_fv = op(first.fill_value, 4)
|
||||
assert_almost_equal(res4.fill_value, exp_fv)
|
||||
assert_almost_equal(res4.values, exp)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def _check_inplace_op(op):
|
||||
tmp = arr1.copy()
|
||||
pytest.raises(NotImplementedError, op, tmp, arr2)
|
||||
|
||||
with np.errstate(all='ignore'):
|
||||
bin_ops = [operator.add, operator.sub, operator.mul,
|
||||
operator.truediv, operator.floordiv, operator.pow]
|
||||
for op in bin_ops:
|
||||
_check_op(op, arr1, arr2)
|
||||
_check_op(op, farr1, farr2)
|
||||
|
||||
inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv',
|
||||
'ipow']
|
||||
for op in inplace_ops:
|
||||
_check_inplace_op(getattr(operator, op))
|
||||
|
||||
def test_pickle(self):
|
||||
def _check_roundtrip(obj):
|
||||
unpickled = tm.round_trip_pickle(obj)
|
||||
tm.assert_sp_array_equal(unpickled, obj)
|
||||
|
||||
_check_roundtrip(self.arr)
|
||||
_check_roundtrip(self.zarr)
|
||||
|
||||
def test_generator_warnings(self):
|
||||
sp_arr = SparseArray([1, 2, 3])
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.filterwarnings(action='always',
|
||||
category=DeprecationWarning)
|
||||
warnings.filterwarnings(action='always',
|
||||
category=PendingDeprecationWarning)
|
||||
for _ in sp_arr:
|
||||
pass
|
||||
assert len(w) == 0
|
||||
|
||||
def test_fillna(self):
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, 0, 3, 0])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([np.nan, np.nan, np.nan, np.nan])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# float dtype's fill_value is np.nan, replaced by -1
|
||||
s = SparseArray([0., 0., 0., 0.])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0., 0., 0., 0.], fill_value=-1)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# int dtype shouldn't have missing. No changes.
|
||||
s = SparseArray([0, 0, 0, 0])
|
||||
assert s.dtype == np.int64
|
||||
assert s.fill_value == 0
|
||||
res = s.fillna(-1)
|
||||
tm.assert_sp_array_equal(res, s)
|
||||
|
||||
s = SparseArray([0, 0, 0, 0], fill_value=0)
|
||||
assert s.dtype == np.int64
|
||||
assert s.fill_value == 0
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0, 0, 0, 0], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# fill_value can be nan if there is no missing hole.
|
||||
# only fill_value will be changed
|
||||
s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
|
||||
assert s.dtype == np.int64
|
||||
assert np.isnan(s.fill_value)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0, 0, 0, 0], fill_value=-1)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_fillna_overlap(self):
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
|
||||
# filling with existing value doesn't replace existing value with
|
||||
# fill_value, i.e. existing 3 remains in sp_values
|
||||
res = s.fillna(3)
|
||||
exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
|
||||
tm.assert_numpy_array_equal(res.to_dense(), exp)
|
||||
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
|
||||
res = s.fillna(3)
|
||||
exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
|
||||
class TestSparseArrayAnalytics(object):
|
||||
|
||||
@pytest.mark.parametrize('data,pos,neg', [
|
||||
([True, True, True], True, False),
|
||||
([1, 2, 1], 1, 0),
|
||||
([1.0, 2.0, 1.0], 1.0, 0.0)
|
||||
])
|
||||
def test_all(self, data, pos, neg):
|
||||
# GH 17570
|
||||
out = SparseArray(data).all()
|
||||
assert out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).all()
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = SparseArray(data).all()
|
||||
assert not out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).all()
|
||||
assert not out
|
||||
|
||||
@pytest.mark.parametrize('data,pos,neg', [
|
||||
([True, True, True], True, False),
|
||||
([1, 2, 1], 1, 0),
|
||||
([1.0, 2.0, 1.0], 1.0, 0.0)
|
||||
])
|
||||
def test_numpy_all(self, data, pos, neg):
|
||||
# GH 17570
|
||||
out = np.all(SparseArray(data))
|
||||
assert out
|
||||
|
||||
out = np.all(SparseArray(data, fill_value=pos))
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = np.all(SparseArray(data))
|
||||
assert not out
|
||||
|
||||
out = np.all(SparseArray(data, fill_value=pos))
|
||||
assert not out
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.all,
|
||||
SparseArray(data), out=out)
|
||||
|
||||
@pytest.mark.parametrize('data,pos,neg', [
|
||||
([False, True, False], True, False),
|
||||
([0, 2, 0], 2, 0),
|
||||
([0.0, 2.0, 0.0], 2.0, 0.0)
|
||||
])
|
||||
def test_any(self, data, pos, neg):
|
||||
# GH 17570
|
||||
out = SparseArray(data).any()
|
||||
assert out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).any()
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = SparseArray(data).any()
|
||||
assert not out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).any()
|
||||
assert not out
|
||||
|
||||
@pytest.mark.parametrize('data,pos,neg', [
|
||||
([False, True, False], True, False),
|
||||
([0, 2, 0], 2, 0),
|
||||
([0.0, 2.0, 0.0], 2.0, 0.0)
|
||||
])
|
||||
def test_numpy_any(self, data, pos, neg):
|
||||
# GH 17570
|
||||
out = np.any(SparseArray(data))
|
||||
assert out
|
||||
|
||||
out = np.any(SparseArray(data, fill_value=pos))
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = np.any(SparseArray(data))
|
||||
assert not out
|
||||
|
||||
out = np.any(SparseArray(data, fill_value=pos))
|
||||
assert not out
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.any,
|
||||
SparseArray(data), out=out)
|
||||
|
||||
def test_sum(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = SparseArray(data).sum()
|
||||
assert out == 45.0
|
||||
|
||||
data[5] = np.nan
|
||||
out = SparseArray(data, fill_value=2).sum()
|
||||
assert out == 40.0
|
||||
|
||||
out = SparseArray(data, fill_value=np.nan).sum()
|
||||
assert out == 40.0
|
||||
|
||||
def test_numpy_sum(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = np.sum(SparseArray(data))
|
||||
assert out == 45.0
|
||||
|
||||
data[5] = np.nan
|
||||
out = np.sum(SparseArray(data, fill_value=2))
|
||||
assert out == 40.0
|
||||
|
||||
out = np.sum(SparseArray(data, fill_value=np.nan))
|
||||
assert out == 40.0
|
||||
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.sum,
|
||||
SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.sum,
|
||||
SparseArray(data), out=out)
|
||||
|
||||
def test_cumsum(self):
|
||||
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
|
||||
non_null_expected = SparseArray(non_null_data.cumsum())
|
||||
|
||||
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
|
||||
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
|
||||
|
||||
for data, expected in [
|
||||
(null_data, null_expected),
|
||||
(non_null_data, non_null_expected)
|
||||
]:
|
||||
out = SparseArray(data).cumsum()
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = SparseArray(data, fill_value=np.nan).cumsum()
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = SparseArray(data, fill_value=2).cumsum()
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid.
|
||||
msg = "axis\\(={axis}\\) out of bounds".format(axis=axis)
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
SparseArray(data).cumsum(axis=axis)
|
||||
|
||||
def test_numpy_cumsum(self):
|
||||
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
|
||||
non_null_expected = SparseArray(non_null_data.cumsum())
|
||||
|
||||
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
|
||||
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
|
||||
|
||||
for data, expected in [
|
||||
(null_data, null_expected),
|
||||
(non_null_data, non_null_expected)
|
||||
]:
|
||||
out = np.cumsum(SparseArray(data))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = np.cumsum(SparseArray(data, fill_value=np.nan))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = np.cumsum(SparseArray(data, fill_value=2))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.cumsum,
|
||||
SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.cumsum,
|
||||
SparseArray(data), out=out)
|
||||
|
||||
def test_mean(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = SparseArray(data).mean()
|
||||
assert out == 4.5
|
||||
|
||||
data[5] = np.nan
|
||||
out = SparseArray(data).mean()
|
||||
assert out == 40.0 / 9
|
||||
|
||||
def test_numpy_mean(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = np.mean(SparseArray(data))
|
||||
assert out == 4.5
|
||||
|
||||
data[5] = np.nan
|
||||
out = np.mean(SparseArray(data))
|
||||
assert out == 40.0 / 9
|
||||
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.mean,
|
||||
SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.mean,
|
||||
SparseArray(data), out=out)
|
||||
|
||||
def test_ufunc(self):
|
||||
# GH 13853 make sure ufunc is applied to fill_value
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray([1, np.nan, 2, np.nan, 2])
|
||||
tm.assert_sp_array_equal(abs(sparse), result)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
|
||||
fill_value=1)
|
||||
tm.assert_sp_array_equal(abs(sparse), result)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=-1)
|
||||
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
|
||||
fill_value=1)
|
||||
tm.assert_sp_array_equal(abs(sparse), result)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2]))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
|
||||
result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
def test_ufunc_args(self):
|
||||
# GH 13853 make sure ufunc is applied to fill_value, including its arg
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray([2, np.nan, 3, np.nan, -1])
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray([2, 0, 3, -1], fill_value=2)
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
|
||||
result = SparseArray([2, 0, 1, -1], fill_value=1)
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
@@ -0,0 +1,384 @@
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
import itertools
|
||||
|
||||
|
||||
class TestSparseSeriesConcat(object):
|
||||
|
||||
def test_concat(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
|
||||
sparse2 = pd.SparseSeries(val2, name='y', kind=kind)
|
||||
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, fill_value=0, name='x', kind=kind)
|
||||
sparse2 = pd.SparseSeries(val2, fill_value=0, name='y', kind=kind)
|
||||
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, fill_value=0, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
def test_concat_axis1(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, name='x')
|
||||
sparse2 = pd.SparseSeries(val2, name='y')
|
||||
|
||||
res = pd.concat([sparse1, sparse2], axis=1)
|
||||
exp = pd.concat([pd.Series(val1, name='x'),
|
||||
pd.Series(val2, name='y')], axis=1)
|
||||
exp = pd.SparseDataFrame(exp)
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
def test_concat_different_fill(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
|
||||
sparse2 = pd.SparseSeries(val2, name='y', kind=kind, fill_value=0)
|
||||
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse1])
|
||||
exp = pd.concat([pd.Series(val2), pd.Series(val1)])
|
||||
exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
def test_concat_axis1_different_fill(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, name='x')
|
||||
sparse2 = pd.SparseSeries(val2, name='y', fill_value=0)
|
||||
|
||||
res = pd.concat([sparse1, sparse2], axis=1)
|
||||
exp = pd.concat([pd.Series(val1, name='x'),
|
||||
pd.Series(val2, name='y')], axis=1)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
def test_concat_different_kind(self):
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
sparse1 = pd.SparseSeries(val1, name='x', kind='integer')
|
||||
sparse2 = pd.SparseSeries(val2, name='y', kind='block', fill_value=0)
|
||||
|
||||
res = pd.concat([sparse1, sparse2])
|
||||
exp = pd.concat([pd.Series(val1), pd.Series(val2)])
|
||||
exp = pd.SparseSeries(exp, kind='integer')
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse1])
|
||||
exp = pd.concat([pd.Series(val2), pd.Series(val1)])
|
||||
exp = pd.SparseSeries(exp, kind='block', fill_value=0)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
def test_concat_sparse_dense(self):
|
||||
# use first input's fill_value
|
||||
val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
|
||||
val2 = np.array([3, np.nan, 4, 0, 0])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
sparse = pd.SparseSeries(val1, name='x', kind=kind)
|
||||
dense = pd.Series(val2, name='y')
|
||||
|
||||
res = pd.concat([sparse, dense])
|
||||
exp = pd.concat([pd.Series(val1), dense])
|
||||
exp = pd.SparseSeries(exp, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
res = pd.concat([dense, sparse, dense])
|
||||
exp = pd.concat([dense, pd.Series(val1), dense])
|
||||
exp = pd.SparseSeries(exp, kind=kind)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
sparse = pd.SparseSeries(val1, name='x', kind=kind, fill_value=0)
|
||||
dense = pd.Series(val2, name='y')
|
||||
|
||||
res = pd.concat([sparse, dense])
|
||||
exp = pd.concat([pd.Series(val1), dense])
|
||||
exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
res = pd.concat([dense, sparse, dense])
|
||||
exp = pd.concat([dense, pd.Series(val1), dense])
|
||||
exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
|
||||
tm.assert_sp_series_equal(res, exp)
|
||||
|
||||
|
||||
class TestSparseDataFrameConcat(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.dense1 = pd.DataFrame({'A': [0., 1., 2., np.nan],
|
||||
'B': [0., 0., 0., 0.],
|
||||
'C': [np.nan, np.nan, np.nan, np.nan],
|
||||
'D': [1., 2., 3., 4.]})
|
||||
|
||||
self.dense2 = pd.DataFrame({'A': [5., 6., 7., 8.],
|
||||
'B': [np.nan, 0., 7., 8.],
|
||||
'C': [5., 6., np.nan, np.nan],
|
||||
'D': [np.nan, np.nan, np.nan, np.nan]})
|
||||
|
||||
self.dense3 = pd.DataFrame({'E': [5., 6., 7., 8.],
|
||||
'F': [np.nan, 0., 7., 8.],
|
||||
'G': [5., 6., np.nan, np.nan],
|
||||
'H': [np.nan, np.nan, np.nan, np.nan]})
|
||||
|
||||
def test_concat(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse2 = self.dense2.to_sparse()
|
||||
|
||||
res = pd.concat([sparse, sparse])
|
||||
exp = pd.concat([self.dense1, self.dense1]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse2])
|
||||
exp = pd.concat([self.dense2, self.dense2]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse, sparse2])
|
||||
exp = pd.concat([self.dense1, self.dense2]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse])
|
||||
exp = pd.concat([self.dense2, self.dense1]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# fill_value = 0
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse2 = self.dense2.to_sparse(fill_value=0)
|
||||
|
||||
res = pd.concat([sparse, sparse])
|
||||
exp = pd.concat([self.dense1, self.dense1]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse2])
|
||||
exp = pd.concat([self.dense2, self.dense2]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse, sparse2])
|
||||
exp = pd.concat([self.dense1, self.dense2]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse])
|
||||
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
def test_concat_different_fill_value(self):
|
||||
# 1st fill_value will be used
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse2 = self.dense2.to_sparse(fill_value=0)
|
||||
|
||||
res = pd.concat([sparse, sparse2])
|
||||
exp = pd.concat([self.dense1, self.dense2]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2, sparse])
|
||||
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
def test_concat_different_columns_sort_warns(self):
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
res = pd.concat([sparse, sparse3])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
exp = pd.concat([self.dense1, self.dense3])
|
||||
|
||||
exp = exp.to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
def test_concat_different_columns(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse()
|
||||
|
||||
res = pd.concat([sparse, sparse3], sort=True)
|
||||
exp = pd.concat([self.dense1, self.dense3], sort=True).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], sort=True)
|
||||
exp = pd.concat([self.dense3, self.dense1], sort=True).to_sparse()
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# fill_value = 0
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
|
||||
res = pd.concat([sparse, sparse3], sort=True)
|
||||
exp = (pd.concat([self.dense1, self.dense3], sort=True)
|
||||
.to_sparse(fill_value=0))
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], sort=True)
|
||||
exp = (pd.concat([self.dense3, self.dense1], sort=True)
|
||||
.to_sparse(fill_value=0))
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# different fill values
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
# each columns keeps its fill_value, thus compare in dense
|
||||
res = pd.concat([sparse, sparse3], sort=True)
|
||||
exp = pd.concat([self.dense1, self.dense3], sort=True)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], sort=True)
|
||||
exp = pd.concat([self.dense3, self.dense1], sort=True)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
def test_concat_series(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse2 = self.dense2.to_sparse()
|
||||
|
||||
for col in ['A', 'D']:
|
||||
res = pd.concat([sparse, sparse2[col]])
|
||||
exp = pd.concat([self.dense1, self.dense2[col]]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2[col], sparse])
|
||||
exp = pd.concat([self.dense2[col], self.dense1]).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# fill_value = 0
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse2 = self.dense2.to_sparse(fill_value=0)
|
||||
|
||||
for col in ['C', 'D']:
|
||||
res = pd.concat([sparse, sparse2[col]])
|
||||
exp = pd.concat([self.dense1,
|
||||
self.dense2[col]]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse2[col], sparse])
|
||||
exp = pd.concat([self.dense2[col],
|
||||
self.dense1]).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
def test_concat_axis1(self):
|
||||
# fill_value = np.nan
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse()
|
||||
|
||||
res = pd.concat([sparse, sparse3], axis=1)
|
||||
exp = pd.concat([self.dense1, self.dense3], axis=1).to_sparse()
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], axis=1)
|
||||
exp = pd.concat([self.dense3, self.dense1], axis=1).to_sparse()
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# fill_value = 0
|
||||
sparse = self.dense1.to_sparse(fill_value=0)
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
|
||||
res = pd.concat([sparse, sparse3], axis=1)
|
||||
exp = pd.concat([self.dense1, self.dense3],
|
||||
axis=1).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], axis=1)
|
||||
exp = pd.concat([self.dense3, self.dense1],
|
||||
axis=1).to_sparse(fill_value=0)
|
||||
exp._default_fill_value = np.nan
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# different fill values
|
||||
sparse = self.dense1.to_sparse()
|
||||
sparse3 = self.dense3.to_sparse(fill_value=0)
|
||||
# each columns keeps its fill_value, thus compare in dense
|
||||
res = pd.concat([sparse, sparse3], axis=1)
|
||||
exp = pd.concat([self.dense1, self.dense3], axis=1)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
res = pd.concat([sparse3, sparse], axis=1)
|
||||
exp = pd.concat([self.dense3, self.dense1], axis=1)
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
@pytest.mark.parametrize('fill_value,sparse_idx,dense_idx',
|
||||
itertools.product([None, 0, 1, np.nan],
|
||||
[0, 1],
|
||||
[1, 0]))
|
||||
def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
|
||||
frames = [self.dense1, self.dense2]
|
||||
sparse_frame = [frames[dense_idx],
|
||||
frames[sparse_idx].to_sparse(fill_value=fill_value)]
|
||||
dense_frame = [frames[dense_idx], frames[sparse_idx]]
|
||||
|
||||
# This will try both directions sparse + dense and dense + sparse
|
||||
for _ in range(2):
|
||||
res = pd.concat(sparse_frame)
|
||||
exp = pd.concat(dense_frame)
|
||||
|
||||
assert isinstance(res, pd.SparseDataFrame)
|
||||
tm.assert_frame_equal(res.to_dense(), exp)
|
||||
|
||||
sparse_frame = sparse_frame[::-1]
|
||||
dense_frame = dense_frame[::-1]
|
||||
|
||||
@pytest.mark.parametrize('fill_value,sparse_idx,dense_idx',
|
||||
itertools.product([None, 0, 1, np.nan],
|
||||
[0, 1],
|
||||
[1, 0]))
|
||||
def test_concat_sparse_dense_cols(self, fill_value, sparse_idx, dense_idx):
|
||||
# See GH16874, GH18914 and #18686 for why this should be a DataFrame
|
||||
|
||||
frames = [self.dense1, self.dense3]
|
||||
|
||||
sparse_frame = [frames[dense_idx],
|
||||
frames[sparse_idx].to_sparse(fill_value=fill_value)]
|
||||
dense_frame = [frames[dense_idx], frames[sparse_idx]]
|
||||
|
||||
# This will try both directions sparse + dense and dense + sparse
|
||||
for _ in range(2):
|
||||
res = pd.concat(sparse_frame, axis=1)
|
||||
exp = pd.concat(dense_frame, axis=1)
|
||||
|
||||
for column in frames[dense_idx].columns:
|
||||
if dense_idx == sparse_idx:
|
||||
tm.assert_frame_equal(res[column], exp[column])
|
||||
else:
|
||||
tm.assert_series_equal(res[column], exp[column])
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
sparse_frame = sparse_frame[::-1]
|
||||
dense_frame = dense_frame[::-1]
|
||||
@@ -0,0 +1,132 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas.compat import (is_platform_windows,
|
||||
is_platform_32bit)
|
||||
from pandas.core.config import option_context
|
||||
|
||||
|
||||
use_32bit_repr = is_platform_windows() or is_platform_32bit()
|
||||
|
||||
|
||||
class TestSparseSeriesFormatting(object):
|
||||
|
||||
@property
|
||||
def dtype_format_for_platform(self):
|
||||
return '' if use_32bit_repr else ', dtype=int32'
|
||||
|
||||
def test_sparse_max_row(self):
|
||||
s = pd.Series([1, np.nan, np.nan, 3, np.nan]).to_sparse()
|
||||
result = repr(s)
|
||||
dfm = self.dtype_format_for_platform
|
||||
exp = ("0 1.0\n1 NaN\n2 NaN\n3 3.0\n"
|
||||
"4 NaN\ndtype: float64\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm))
|
||||
assert result == exp
|
||||
|
||||
with option_context("display.max_rows", 3):
|
||||
# GH 10560
|
||||
result = repr(s)
|
||||
exp = ("0 1.0\n ... \n4 NaN\n"
|
||||
"Length: 5, dtype: float64\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm))
|
||||
assert result == exp
|
||||
|
||||
def test_sparse_mi_max_row(self):
|
||||
idx = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 0),
|
||||
('C', 0), ('C', 1), ('C', 2)])
|
||||
s = pd.Series([1, np.nan, np.nan, 3, np.nan, np.nan],
|
||||
index=idx).to_sparse()
|
||||
result = repr(s)
|
||||
dfm = self.dtype_format_for_platform
|
||||
exp = ("A 0 1.0\n 1 NaN\nB 0 NaN\n"
|
||||
"C 0 3.0\n 1 NaN\n 2 NaN\n"
|
||||
"dtype: float64\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm))
|
||||
assert result == exp
|
||||
|
||||
with option_context("display.max_rows", 3,
|
||||
"display.show_dimensions", False):
|
||||
# GH 13144
|
||||
result = repr(s)
|
||||
exp = ("A 0 1.0\n ... \nC 2 NaN\n"
|
||||
"dtype: float64\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dfm))
|
||||
assert result == exp
|
||||
|
||||
def test_sparse_bool(self):
|
||||
# GH 13110
|
||||
s = pd.SparseSeries([True, False, False, True, False, False],
|
||||
fill_value=False)
|
||||
result = repr(s)
|
||||
dtype = '' if use_32bit_repr else ', dtype=int32'
|
||||
exp = ("0 True\n1 False\n2 False\n"
|
||||
"3 True\n4 False\n5 False\n"
|
||||
"dtype: bool\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype))
|
||||
assert result == exp
|
||||
|
||||
with option_context("display.max_rows", 3):
|
||||
result = repr(s)
|
||||
exp = ("0 True\n ... \n5 False\n"
|
||||
"Length: 6, dtype: bool\nBlockIndex\n"
|
||||
"Block locations: array([0, 3]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype))
|
||||
assert result == exp
|
||||
|
||||
def test_sparse_int(self):
|
||||
# GH 13110
|
||||
s = pd.SparseSeries([0, 1, 0, 0, 1, 0], fill_value=False)
|
||||
|
||||
result = repr(s)
|
||||
dtype = '' if use_32bit_repr else ', dtype=int32'
|
||||
exp = ("0 0\n1 1\n2 0\n3 0\n4 1\n"
|
||||
"5 0\ndtype: int64\nBlockIndex\n"
|
||||
"Block locations: array([1, 4]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype))
|
||||
assert result == exp
|
||||
|
||||
with option_context("display.max_rows", 3,
|
||||
"display.show_dimensions", False):
|
||||
result = repr(s)
|
||||
exp = ("0 0\n ..\n5 0\n"
|
||||
"dtype: int64\nBlockIndex\n"
|
||||
"Block locations: array([1, 4]{0})\n"
|
||||
"Block lengths: array([1, 1]{0})".format(dtype))
|
||||
assert result == exp
|
||||
|
||||
|
||||
class TestSparseDataFrameFormatting(object):
|
||||
|
||||
def test_sparse_frame(self):
|
||||
# GH 13110
|
||||
df = pd.DataFrame({'A': [True, False, True, False, True],
|
||||
'B': [True, False, True, False, True],
|
||||
'C': [0, 0, 3, 0, 5],
|
||||
'D': [np.nan, np.nan, np.nan, 1, 2]})
|
||||
sparse = df.to_sparse()
|
||||
assert repr(sparse) == repr(df)
|
||||
|
||||
with option_context("display.max_rows", 3):
|
||||
assert repr(sparse) == repr(df)
|
||||
|
||||
def test_sparse_repr_after_set(self):
|
||||
# GH 15488
|
||||
sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
|
||||
res = sdf.copy()
|
||||
|
||||
# Ignore the warning
|
||||
with pd.option_context('mode.chained_assignment', None):
|
||||
sdf[0][1] = 2 # This line triggers the bug
|
||||
|
||||
repr(sdf)
|
||||
tm.assert_sp_frame_equal(sdf, res)
|
||||
@@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSparseGroupBy(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
|
||||
'foo', 'bar', 'foo', 'foo'],
|
||||
'B': ['one', 'one', 'two', 'three',
|
||||
'two', 'two', 'one', 'three'],
|
||||
'C': np.random.randn(8),
|
||||
'D': np.random.randn(8),
|
||||
'E': [np.nan, np.nan, 1, 2,
|
||||
np.nan, 1, np.nan, np.nan]})
|
||||
self.sparse = self.dense.to_sparse()
|
||||
|
||||
def test_first_last_nth(self):
|
||||
# tests for first / last / nth
|
||||
sparse_grouped = self.sparse.groupby('A')
|
||||
dense_grouped = self.dense.groupby('A')
|
||||
|
||||
tm.assert_frame_equal(sparse_grouped.first(),
|
||||
dense_grouped.first())
|
||||
tm.assert_frame_equal(sparse_grouped.last(),
|
||||
dense_grouped.last())
|
||||
tm.assert_frame_equal(sparse_grouped.nth(1),
|
||||
dense_grouped.nth(1))
|
||||
|
||||
def test_aggfuncs(self):
|
||||
sparse_grouped = self.sparse.groupby('A')
|
||||
dense_grouped = self.dense.groupby('A')
|
||||
|
||||
tm.assert_frame_equal(sparse_grouped.mean(),
|
||||
dense_grouped.mean())
|
||||
|
||||
# ToDo: sparse sum includes str column
|
||||
# tm.assert_frame_equal(sparse_grouped.sum(),
|
||||
# dense_grouped.sum())
|
||||
|
||||
tm.assert_frame_equal(sparse_grouped.count(),
|
||||
dense_grouped.count())
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,617 @@
|
||||
from pandas import Series
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
import operator
|
||||
import pandas.util.testing as tm
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index
|
||||
import pandas._libs.sparse as splib
|
||||
|
||||
TEST_LENGTH = 20
|
||||
|
||||
plain_case = dict(xloc=[0, 7, 15], xlen=[3, 5, 5], yloc=[2, 9, 14],
|
||||
ylen=[2, 3, 5], intersect_loc=[2, 9, 15],
|
||||
intersect_len=[1, 3, 4])
|
||||
delete_blocks = dict(xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4],
|
||||
intersect_loc=[1], intersect_len=[3])
|
||||
split_blocks = dict(xloc=[0], xlen=[10], yloc=[0, 5], ylen=[3, 7],
|
||||
intersect_loc=[0, 5], intersect_len=[3, 5])
|
||||
skip_block = dict(xloc=[10], xlen=[5], yloc=[0, 12], ylen=[5, 3],
|
||||
intersect_loc=[12], intersect_len=[3])
|
||||
|
||||
no_intersect = dict(xloc=[0, 10], xlen=[4, 6], yloc=[5, 17], ylen=[4, 2],
|
||||
intersect_loc=[], intersect_len=[])
|
||||
|
||||
|
||||
def check_cases(_check_case):
|
||||
def _check_case_dict(case):
|
||||
_check_case(case['xloc'], case['xlen'], case['yloc'], case['ylen'],
|
||||
case['intersect_loc'], case['intersect_len'])
|
||||
|
||||
_check_case_dict(plain_case)
|
||||
_check_case_dict(delete_blocks)
|
||||
_check_case_dict(split_blocks)
|
||||
_check_case_dict(skip_block)
|
||||
_check_case_dict(no_intersect)
|
||||
|
||||
# one or both is empty
|
||||
_check_case([0], [5], [], [], [], [])
|
||||
_check_case([], [], [], [], [], [])
|
||||
|
||||
|
||||
class TestSparseIndexUnion(object):
|
||||
|
||||
def test_index_make_union(self):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
bresult = xindex.make_union(yindex)
|
||||
assert (isinstance(bresult, BlockIndex))
|
||||
tm.assert_numpy_array_equal(bresult.blocs,
|
||||
np.array(eloc, dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(bresult.blengths,
|
||||
np.array(elen, dtype=np.int32))
|
||||
|
||||
ixindex = xindex.to_int_index()
|
||||
iyindex = yindex.to_int_index()
|
||||
iresult = ixindex.make_union(iyindex)
|
||||
assert (isinstance(iresult, IntIndex))
|
||||
tm.assert_numpy_array_equal(iresult.indices,
|
||||
bresult.to_int_index().indices)
|
||||
|
||||
"""
|
||||
x: ----
|
||||
y: ----
|
||||
r: --------
|
||||
"""
|
||||
xloc = [0]
|
||||
xlen = [5]
|
||||
yloc = [5]
|
||||
ylen = [4]
|
||||
eloc = [0]
|
||||
elen = [9]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----- -----
|
||||
y: ----- --
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [5, 5]
|
||||
yloc = [2, 17]
|
||||
ylen = [5, 2]
|
||||
eloc = [0, 10, 17]
|
||||
elen = [7, 5, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------
|
||||
y: -------
|
||||
r: ----------
|
||||
"""
|
||||
xloc = [1]
|
||||
xlen = [5]
|
||||
yloc = [3]
|
||||
ylen = [5]
|
||||
eloc = [1]
|
||||
elen = [7]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4]
|
||||
ylen = [8]
|
||||
eloc = [2]
|
||||
elen = [12]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: --- -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [0, 5]
|
||||
xlen = [3, 5]
|
||||
yloc = [0]
|
||||
ylen = [7]
|
||||
eloc = [0]
|
||||
elen = [10]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: ------- ---
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4, 13]
|
||||
ylen = [8, 4]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----------------------
|
||||
y: ---- ---- ---
|
||||
r: ----------------------
|
||||
"""
|
||||
xloc = [2]
|
||||
xlen = [15]
|
||||
yloc = [4, 9, 14]
|
||||
ylen = [3, 2, 2]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ---- ---
|
||||
y: --- ---
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [3, 3]
|
||||
yloc = [5, 15]
|
||||
ylen = [2, 2]
|
||||
eloc = [0, 5, 10, 15]
|
||||
elen = [3, 2, 3, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
|
||||
def test_intindex_make_union(self):
|
||||
a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1], dtype=np.int32))
|
||||
b = IntIndex(4, np.array([0, 1], dtype=np.int32))
|
||||
with pytest.raises(ValueError):
|
||||
a.make_union(b)
|
||||
|
||||
|
||||
class TestSparseIndexIntersect(object):
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_intersect(self):
|
||||
def _check_correct(a, b, expected):
|
||||
result = a.intersect(b)
|
||||
assert (result.equals(expected))
|
||||
|
||||
def _check_length_exc(a, longer):
|
||||
pytest.raises(Exception, a.intersect, longer)
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
expected = BlockIndex(TEST_LENGTH, eloc, elen)
|
||||
longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
|
||||
|
||||
_check_correct(xindex, yindex, expected)
|
||||
_check_correct(xindex.to_int_index(), yindex.to_int_index(),
|
||||
expected.to_int_index())
|
||||
|
||||
_check_length_exc(xindex, longer_index)
|
||||
_check_length_exc(xindex.to_int_index(),
|
||||
longer_index.to_int_index())
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_intersect_empty(self):
|
||||
xindex = IntIndex(4, np.array([], dtype=np.int32))
|
||||
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
xindex = xindex.to_block_index()
|
||||
yindex = yindex.to_block_index()
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
def test_intersect_identical(self):
|
||||
cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)),
|
||||
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),
|
||||
IntIndex(0, np.array([], dtype=np.int32)),
|
||||
IntIndex(5, np.array([], dtype=np.int32))]
|
||||
|
||||
for case in cases:
|
||||
assert case.intersect(case).equals(case)
|
||||
case = case.to_block_index()
|
||||
assert case.intersect(case).equals(case)
|
||||
|
||||
|
||||
class TestSparseIndexCommon(object):
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_lookup(self):
|
||||
for kind in ['integer', 'block']:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == -1
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 0
|
||||
assert idx.lookup(3) == 1
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
|
||||
for i in range(-1, 5):
|
||||
assert idx.lookup(i) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == 1
|
||||
assert idx.lookup(2) == 2
|
||||
assert idx.lookup(3) == 3
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 1
|
||||
assert idx.lookup(3) == 2
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
def test_lookup_array(self):
|
||||
for kind in ['integer', 'block']:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
|
||||
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, 0, 2], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
|
||||
exp = np.array([1, -1, 2, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_lookup_basics(self):
|
||||
def _check(index):
|
||||
assert (index.lookup(0) == -1)
|
||||
assert (index.lookup(5) == 0)
|
||||
assert (index.lookup(7) == 2)
|
||||
assert (index.lookup(8) == -1)
|
||||
assert (index.lookup(9) == -1)
|
||||
assert (index.lookup(10) == -1)
|
||||
assert (index.lookup(11) == -1)
|
||||
assert (index.lookup(12) == 3)
|
||||
assert (index.lookup(17) == 8)
|
||||
assert (index.lookup(18) == -1)
|
||||
|
||||
bindex = BlockIndex(20, [5, 12], [3, 6])
|
||||
iindex = bindex.to_int_index()
|
||||
|
||||
_check(bindex)
|
||||
_check(iindex)
|
||||
|
||||
# corner cases
|
||||
|
||||
|
||||
class TestBlockIndex(object):
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_make_block_boundary(self):
|
||||
for i in [5, 10, 100, 101]:
|
||||
idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32),
|
||||
kind='block')
|
||||
|
||||
exp = np.arange(0, i, 2, dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(idx.blocs, exp)
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.ones(len(exp), dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = BlockIndex(10, [0, 4], [2, 5])
|
||||
|
||||
assert index.equals(index)
|
||||
assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
|
||||
|
||||
def test_check_integrity(self):
|
||||
locs = []
|
||||
lengths = []
|
||||
|
||||
# 0-length OK
|
||||
# TODO: index variables are not used...is that right?
|
||||
index = BlockIndex(0, locs, lengths) # noqa
|
||||
|
||||
# also OK even though empty
|
||||
index = BlockIndex(1, locs, lengths) # noqa
|
||||
|
||||
# block extend beyond end
|
||||
pytest.raises(Exception, BlockIndex, 10, [5], [10])
|
||||
|
||||
# block overlap
|
||||
pytest.raises(Exception, BlockIndex, 10, [2, 5], [5, 3])
|
||||
|
||||
def test_to_int_index(self):
|
||||
locs = [0, 10]
|
||||
lengths = [4, 6]
|
||||
exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
|
||||
|
||||
block = BlockIndex(20, locs, lengths)
|
||||
dense = block.to_int_index()
|
||||
|
||||
tm.assert_numpy_array_equal(dense.indices,
|
||||
np.array(exp_inds, dtype=np.int32))
|
||||
|
||||
def test_to_block_index(self):
|
||||
index = BlockIndex(10, [0, 5], [4, 5])
|
||||
assert index.to_block_index() is index
|
||||
|
||||
|
||||
class TestIntIndex(object):
|
||||
|
||||
def test_check_integrity(self):
|
||||
|
||||
# Too many indices than specified in self.length
|
||||
msg = "Too many indices"
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
IntIndex(length=1, indices=[1, 2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# All indices must be less than the length.
|
||||
msg = "All indices must be less than the length"
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
IntIndex(length=5, indices=[1, 2, 5])
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
IntIndex(length=5, indices=[1, 2, 6])
|
||||
|
||||
# Indices must be strictly ascending.
|
||||
msg = "Indices must be strictly increasing"
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
IntIndex(length=5, indices=[1, 3, 2])
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
IntIndex(length=5, indices=[1, 3, 3])
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = IntIndex(10, [0, 1, 2, 3, 4])
|
||||
assert index.equals(index)
|
||||
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
|
||||
|
||||
def test_to_block_index(self):
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
# see if survive the round trip
|
||||
xbindex = xindex.to_int_index().to_block_index()
|
||||
ybindex = yindex.to_int_index().to_block_index()
|
||||
assert isinstance(xbindex, BlockIndex)
|
||||
assert xbindex.equals(xindex)
|
||||
assert ybindex.equals(yindex)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_to_int_index(self):
|
||||
index = IntIndex(10, [2, 3, 4, 5, 6])
|
||||
assert index.to_int_index() is index
|
||||
|
||||
|
||||
class TestSparseOperators(object):
|
||||
|
||||
def _op_tests(self, sparse_op, python_op):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
xdindex = xindex.to_int_index()
|
||||
ydindex = yindex.to_int_index()
|
||||
|
||||
x = np.arange(xindex.npoints) * 10. + 1
|
||||
y = np.arange(yindex.npoints) * 100. + 1
|
||||
|
||||
xfill = 0
|
||||
yfill = 2
|
||||
|
||||
result_block_vals, rb_index, bfill = sparse_op(x, xindex, xfill, y,
|
||||
yindex, yfill)
|
||||
result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y,
|
||||
ydindex, yfill)
|
||||
|
||||
assert rb_index.to_int_index().equals(ri_index)
|
||||
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
|
||||
assert bfill == ifill
|
||||
|
||||
# check versus Series...
|
||||
xseries = Series(x, xdindex.indices)
|
||||
xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)
|
||||
|
||||
yseries = Series(y, ydindex.indices)
|
||||
yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)
|
||||
|
||||
series_result = python_op(xseries, yseries)
|
||||
series_result = series_result.reindex(ri_index.indices)
|
||||
|
||||
tm.assert_numpy_array_equal(result_block_vals,
|
||||
series_result.values)
|
||||
tm.assert_numpy_array_equal(result_int_vals, series_result.values)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
|
||||
# too cute? oh but how I abhor code duplication
|
||||
check_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv']
|
||||
|
||||
|
||||
def make_optestf(op):
|
||||
def f(self):
|
||||
sparse_op = getattr(splib, 'sparse_%s_float64' % op)
|
||||
python_op = getattr(operator, op)
|
||||
self._op_tests(sparse_op, python_op)
|
||||
|
||||
f.__name__ = 'test_%s' % op
|
||||
return f
|
||||
|
||||
|
||||
for op in check_ops:
|
||||
g = make_optestf(op)
|
||||
setattr(TestSparseOperators, g.__name__, g)
|
||||
del g
|
||||
@@ -0,0 +1,50 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestPivotTable(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
|
||||
'foo', 'bar', 'foo', 'foo'],
|
||||
'B': ['one', 'one', 'two', 'three',
|
||||
'two', 'two', 'one', 'three'],
|
||||
'C': np.random.randn(8),
|
||||
'D': np.random.randn(8),
|
||||
'E': [np.nan, np.nan, 1, 2,
|
||||
np.nan, 1, np.nan, np.nan]})
|
||||
self.sparse = self.dense.to_sparse()
|
||||
|
||||
def test_pivot_table(self):
|
||||
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
|
||||
values='C')
|
||||
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
|
||||
values='C')
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
|
||||
values='E')
|
||||
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
|
||||
values='E')
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
|
||||
values='E', aggfunc='mean')
|
||||
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
|
||||
values='E', aggfunc='mean')
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
# ToDo: sum doesn't handle nan properly
|
||||
# res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
|
||||
# values='E', aggfunc='sum')
|
||||
# res_dense = pd.pivot_table(self.dense, index='A', columns='B',
|
||||
# values='E', aggfunc='sum')
|
||||
# tm.assert_frame_equal(res_sparse, res_dense)
|
||||
|
||||
def test_pivot_table_multi(self):
|
||||
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
|
||||
values=['D', 'E'])
|
||||
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
|
||||
values=['D', 'E'])
|
||||
tm.assert_frame_equal(res_sparse, res_dense)
|
||||
@@ -0,0 +1,38 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sparse_df():
|
||||
return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_index3():
|
||||
return pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
|
||||
|
||||
def test_sparse_frame_stack(sparse_df, multi_index3):
|
||||
ss = sparse_df.stack()
|
||||
expected = pd.SparseSeries(np.ones(3), index=multi_index3)
|
||||
tm.assert_sp_series_equal(ss, expected)
|
||||
|
||||
|
||||
def test_sparse_frame_unstack(sparse_df):
|
||||
mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)])
|
||||
sparse_df.index = mi
|
||||
arr = np.array([[1, np.nan, np.nan],
|
||||
[np.nan, 1, np.nan],
|
||||
[np.nan, np.nan, 1]])
|
||||
unstacked_df = pd.DataFrame(arr, index=mi).unstack()
|
||||
unstacked_sdf = sparse_df.unstack()
|
||||
|
||||
tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values)
|
||||
|
||||
|
||||
def test_sparse_series_unstack(sparse_df, multi_index3):
|
||||
frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack()
|
||||
tm.assert_sp_frame_equal(frame, sparse_df)
|
||||
Reference in New Issue
Block a user