pruned venvs
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,40 +0,0 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from pandas import SparseDataFrame, DataFrame, SparseSeries
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_quantile():
|
||||
# GH 17386
|
||||
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
|
||||
q = 0.1
|
||||
|
||||
sparse_df = SparseDataFrame(data)
|
||||
result = sparse_df.quantile(q)
|
||||
|
||||
dense_df = DataFrame(data)
|
||||
dense_expected = dense_df.quantile(q)
|
||||
sparse_expected = SparseSeries(dense_expected)
|
||||
|
||||
tm.assert_series_equal(result, dense_expected)
|
||||
tm.assert_sp_series_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_quantile_multi():
|
||||
# GH 17386
|
||||
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
|
||||
q = [0.1, 0.5]
|
||||
|
||||
sparse_df = SparseDataFrame(data)
|
||||
result = sparse_df.quantile(q)
|
||||
|
||||
dense_df = DataFrame(data)
|
||||
dense_expected = dense_df.quantile(q)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
@@ -1,92 +0,0 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from pandas import SparseDataFrame, DataFrame, Series, bdate_range
|
||||
from pandas.core import nanops
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dates():
|
||||
return bdate_range('1/1/2011', periods=10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty():
|
||||
return SparseDataFrame()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(dates):
|
||||
data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
|
||||
'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
|
||||
'C': np.arange(10, dtype=np.float64),
|
||||
'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}
|
||||
|
||||
return SparseDataFrame(data, index=dates)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fill_frame(frame):
|
||||
values = frame.values.copy()
|
||||
values[np.isnan(values)] = 2
|
||||
|
||||
return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
|
||||
default_fill_value=2,
|
||||
index=frame.index)
|
||||
|
||||
|
||||
def test_apply(frame):
|
||||
applied = frame.apply(np.sqrt)
|
||||
assert isinstance(applied, SparseDataFrame)
|
||||
tm.assert_almost_equal(applied.values, np.sqrt(frame.values))
|
||||
|
||||
# agg / broadcast
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
broadcasted = frame.apply(np.sum, broadcast=True)
|
||||
assert isinstance(broadcasted, SparseDataFrame)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
exp = frame.to_dense().apply(np.sum, broadcast=True)
|
||||
tm.assert_frame_equal(broadcasted.to_dense(), exp)
|
||||
|
||||
applied = frame.apply(np.sum)
|
||||
tm.assert_series_equal(applied,
|
||||
frame.to_dense().apply(nanops.nansum))
|
||||
|
||||
|
||||
def test_apply_fill(fill_frame):
|
||||
applied = fill_frame.apply(np.sqrt)
|
||||
assert applied['A'].fill_value == np.sqrt(2)
|
||||
|
||||
|
||||
def test_apply_empty(empty):
|
||||
assert empty.apply(np.sqrt) is empty
|
||||
|
||||
|
||||
def test_apply_nonuq():
|
||||
orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
|
||||
index=['a', 'a', 'c'])
|
||||
sparse = orig.to_sparse()
|
||||
res = sparse.apply(lambda s: s[0], axis=1)
|
||||
exp = orig.apply(lambda s: s[0], axis=1)
|
||||
|
||||
# dtype must be kept
|
||||
assert res.dtype == np.int64
|
||||
|
||||
# ToDo: apply must return subclassed dtype
|
||||
assert isinstance(res, Series)
|
||||
tm.assert_series_equal(res.to_dense(), exp)
|
||||
|
||||
# df.T breaks
|
||||
sparse = orig.T.to_sparse()
|
||||
res = sparse.apply(lambda s: s[0], axis=0) # noqa
|
||||
exp = orig.T.apply(lambda s: s[0], axis=0)
|
||||
|
||||
# TODO: no non-unique columns supported in sparse yet
|
||||
# tm.assert_series_equal(res.to_dense(), exp)
|
||||
|
||||
|
||||
def test_applymap(frame):
|
||||
# just test that it works
|
||||
result = frame.applymap(lambda x: x * 2)
|
||||
assert isinstance(result, SparseDataFrame)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,113 +0,0 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from pandas import SparseDataFrame, DataFrame
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data', [
|
||||
[[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
|
||||
[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
|
||||
[
|
||||
[1.0, 1.0 + 1.0j],
|
||||
[2.0 + 2.0j, 2.0],
|
||||
[3.0, 3.0 + 3.0j],
|
||||
[4.0 + 4.0j, 4.0],
|
||||
[np.nan, np.nan]
|
||||
]
|
||||
])
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_numeric_data(data):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse > lower_bound)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense > lower_bound)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data', [
|
||||
[[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]],
|
||||
[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]],
|
||||
[
|
||||
[1.0, 1.0 + 1.0j],
|
||||
[2.0 + 2.0j, 2.0],
|
||||
[3.0, 3.0 + 3.0j],
|
||||
[4.0 + 4.0j, 4.0],
|
||||
[np.nan, np.nan]
|
||||
]
|
||||
])
|
||||
@pytest.mark.parametrize('other', [
|
||||
True,
|
||||
-100,
|
||||
0.1,
|
||||
100.0 + 100.0j
|
||||
])
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_numeric_data_and_other(data, other):
|
||||
# GH 17386
|
||||
lower_bound = 1.5
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse > lower_bound, other)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense > lower_bound, other)
|
||||
sparse_expected = SparseDataFrame(dense_expected,
|
||||
default_fill_value=other)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_bool_data():
|
||||
# GH 17386
|
||||
data = [[False, False], [True, True], [False, False]]
|
||||
cond = True
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse == cond)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense == cond)
|
||||
sparse_expected = SparseDataFrame(dense_expected)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
True,
|
||||
0,
|
||||
0.1,
|
||||
100.0 + 100.0j
|
||||
])
|
||||
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
|
||||
'(GH 17386)')
|
||||
def test_where_with_bool_data_and_other(other):
|
||||
# GH 17386
|
||||
data = [[False, False], [True, True], [False, False]]
|
||||
cond = True
|
||||
|
||||
sparse = SparseDataFrame(data)
|
||||
result = sparse.where(sparse == cond, other)
|
||||
|
||||
dense = DataFrame(data)
|
||||
dense_expected = dense.where(dense == cond, other)
|
||||
sparse_expected = SparseDataFrame(dense_expected,
|
||||
default_fill_value=other)
|
||||
|
||||
tm.assert_frame_equal(result, dense_expected)
|
||||
tm.assert_sp_frame_equal(result, sparse_expected)
|
||||
@@ -1,20 +0,0 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from pandas import SparseDataFrame, read_csv
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestSparseDataFrameToCsv(object):
|
||||
fill_values = [np.nan, 0, None, 1]
|
||||
|
||||
@pytest.mark.parametrize('fill_value', fill_values)
|
||||
def test_to_csv_sparse_dataframe(self, fill_value):
|
||||
# GH19384
|
||||
sdf = SparseDataFrame({'a': type(self).fill_values},
|
||||
default_fill_value=fill_value)
|
||||
|
||||
with tm.ensure_clean('sparse_df.csv') as path:
|
||||
sdf.to_csv(path, index=False)
|
||||
df = read_csv(path, skip_blank_lines=False)
|
||||
|
||||
tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf)
|
||||
-168
@@ -1,168 +0,0 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from warnings import catch_warnings
|
||||
from pandas.util import testing as tm
|
||||
from pandas import SparseDataFrame, SparseSeries
|
||||
from distutils.version import LooseVersion
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_float_dtype,
|
||||
is_object_dtype,
|
||||
is_float)
|
||||
|
||||
|
||||
scipy = pytest.importorskip('scipy')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
|
||||
@pytest.mark.parametrize('columns', [None, list('def')])
|
||||
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
|
||||
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
|
||||
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
|
||||
# GH 4343
|
||||
# Make one ndarray and from it one sparse matrix, both to be used for
|
||||
# constructing frames and comparing results
|
||||
arr = np.eye(3, dtype=dtype)
|
||||
# GH 16179
|
||||
arr[0, 1] = dtype(2)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm, index=index, columns=columns,
|
||||
default_fill_value=fill_value)
|
||||
|
||||
# Expected result construction is kind of tricky for all
|
||||
# dtype-fill_value combinations; easiest to cast to something generic
|
||||
# and except later on
|
||||
rarr = arr.astype(object)
|
||||
rarr[arr == 0] = np.nan
|
||||
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
|
||||
fill_value if fill_value is not None else np.nan)
|
||||
|
||||
# Assert frame is as expected
|
||||
sdf_obj = sdf.astype(object)
|
||||
tm.assert_sp_frame_equal(sdf_obj, expected)
|
||||
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
|
||||
|
||||
# Assert spmatrices equal
|
||||
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
|
||||
|
||||
# Ensure dtype is preserved if possible
|
||||
was_upcast = ((fill_value is None or is_float(fill_value)) and
|
||||
not is_object_dtype(dtype) and
|
||||
not is_float_dtype(dtype))
|
||||
res_dtype = (bool if is_bool_dtype(dtype) else
|
||||
float if was_upcast else
|
||||
dtype)
|
||||
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
|
||||
assert sdf.to_coo().dtype == res_dtype
|
||||
|
||||
# However, adding a str column results in an upcast to object
|
||||
sdf['strings'] = np.arange(len(sdf)).astype(str)
|
||||
assert sdf.to_coo().dtype == np.object_
|
||||
|
||||
|
||||
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
|
||||
def test_from_to_scipy_object(spmatrix, fill_value):
|
||||
# GH 4343
|
||||
dtype = object
|
||||
columns = list('cd')
|
||||
index = list('ab')
|
||||
|
||||
if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(
|
||||
scipy.__version__) >= LooseVersion('0.19.0')):
|
||||
pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")
|
||||
|
||||
# Make one ndarray and from it one sparse matrix, both to be used for
|
||||
# constructing frames and comparing results
|
||||
arr = np.eye(2, dtype=dtype)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm, index=index, columns=columns,
|
||||
default_fill_value=fill_value)
|
||||
|
||||
# Expected result construction is kind of tricky for all
|
||||
# dtype-fill_value combinations; easiest to cast to something generic
|
||||
# and except later on
|
||||
rarr = arr.astype(object)
|
||||
rarr[arr == 0] = np.nan
|
||||
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
|
||||
fill_value if fill_value is not None else np.nan)
|
||||
|
||||
# Assert frame is as expected
|
||||
sdf_obj = sdf.astype(object)
|
||||
tm.assert_sp_frame_equal(sdf_obj, expected)
|
||||
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
|
||||
|
||||
# Assert spmatrices equal
|
||||
with catch_warnings(record=True):
|
||||
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
|
||||
|
||||
# Ensure dtype is preserved if possible
|
||||
res_dtype = object
|
||||
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
|
||||
assert sdf.to_coo().dtype == res_dtype
|
||||
|
||||
|
||||
def test_from_scipy_correct_ordering(spmatrix):
|
||||
# GH 16179
|
||||
arr = np.arange(1, 5).reshape(2, 2)
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm)
|
||||
expected = SparseDataFrame(arr)
|
||||
tm.assert_sp_frame_equal(sdf, expected)
|
||||
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
|
||||
|
||||
|
||||
def test_from_scipy_fillna(spmatrix):
|
||||
# GH 16112
|
||||
arr = np.eye(3)
|
||||
arr[1:, 0] = np.nan
|
||||
|
||||
try:
|
||||
spm = spmatrix(arr)
|
||||
assert spm.dtype == arr.dtype
|
||||
except (TypeError, AssertionError):
|
||||
# If conversion to sparse fails for this spmatrix type and arr.dtype,
|
||||
# then the combination is not currently supported in NumPy, so we
|
||||
# can just skip testing it thoroughly
|
||||
return
|
||||
|
||||
sdf = SparseDataFrame(spm).fillna(-1.0)
|
||||
|
||||
# Returning frame should fill all nan values with -1.0
|
||||
expected = SparseDataFrame({
|
||||
0: SparseSeries([1., -1, -1]),
|
||||
1: SparseSeries([np.nan, 1, np.nan]),
|
||||
2: SparseSeries([np.nan, np.nan, 1]),
|
||||
}, default_fill_value=-1)
|
||||
|
||||
# fill_value is expected to be what .fillna() above was called with
|
||||
# We don't use -1 as initial fill_value in expected SparseSeries
|
||||
# construction because this way we obtain "compressed" SparseArrays,
|
||||
# avoiding having to construct them ourselves
|
||||
for col in expected:
|
||||
expected[col].fill_value = -1
|
||||
|
||||
tm.assert_sp_frame_equal(sdf, expected)
|
||||
Reference in New Issue
Block a user