demo + utils venv
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pandas import Categorical
|
||||
|
||||
|
||||
class TestCategorical(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
|
||||
ordered=True)
|
||||
@@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def allow_fill(request):
|
||||
"""Boolean 'allow_fill' parameter for Categorical.take"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def ordered(request):
|
||||
"""Boolean 'ordered' parameter for Categorical."""
|
||||
return request.param
|
||||
@@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
@pytest.mark.parametrize('categories', [
|
||||
['b', 'a', 'c'],
|
||||
['a', 'b', 'c', 'd'],
|
||||
])
|
||||
def test_factorize(categories, ordered):
|
||||
cat = pd.Categorical(['b', 'b', 'a', 'c', None],
|
||||
categories=categories,
|
||||
ordered=ordered)
|
||||
labels, uniques = pd.factorize(cat)
|
||||
expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(['b', 'a', 'c'],
|
||||
categories=categories,
|
||||
ordered=ordered)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort():
|
||||
cat = pd.Categorical(['b', 'b', None, 'a'])
|
||||
labels, uniques = pd.factorize(cat, sort=True)
|
||||
expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(['a', 'b'])
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort_ordered():
|
||||
cat = pd.Categorical(['b', 'b', None, 'a'],
|
||||
categories=['c', 'b', 'a'],
|
||||
ordered=True)
|
||||
|
||||
labels, uniques = pd.factorize(cat, sort=True)
|
||||
expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(['b', 'a'],
|
||||
categories=['c', 'b', 'a'],
|
||||
ordered=True)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_isin_cats():
|
||||
# GH2003
|
||||
cat = pd.Categorical(["a", "b", np.nan])
|
||||
|
||||
result = cat.isin(["a", np.nan])
|
||||
expected = np.array([True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
result = cat.isin(["a", "c"])
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
|
||||
def test_isin_empty(empty):
|
||||
s = pd.Categorical(["a", "b"])
|
||||
expected = np.array([False, False], dtype=bool)
|
||||
|
||||
result = s.isin(empty)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
class TestTake(object):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
|
||||
def test_take_warns(self):
|
||||
cat = pd.Categorical(['a', 'b'])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat.take([0, -1])
|
||||
|
||||
def test_take_positive_no_warning(self):
|
||||
cat = pd.Categorical(['a', 'b'])
|
||||
with tm.assert_produces_warning(None):
|
||||
cat.take([0, 0])
|
||||
|
||||
def test_take_bounds(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = pd.Categorical(['a', 'b', 'a'])
|
||||
with pytest.raises(IndexError):
|
||||
cat.take([4, 5], allow_fill=allow_fill)
|
||||
|
||||
def test_take_empty(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = pd.Categorical([], categories=['a', 'b'])
|
||||
with pytest.raises(IndexError):
|
||||
cat.take([0], allow_fill=allow_fill)
|
||||
|
||||
def test_positional_take(self, ordered):
|
||||
cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
|
||||
ordered=ordered)
|
||||
result = cat.take([0, 1, 2], allow_fill=False)
|
||||
expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_positional_take_unobserved(self, ordered):
|
||||
cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
|
||||
ordered=ordered)
|
||||
result = cat.take([1, 0], allow_fill=False)
|
||||
expected = pd.Categorical(['b', 'a'], categories=cat.categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_allow_fill(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = pd.Categorical(['a', 'a', 'b'])
|
||||
result = cat.take([0, -1, -1], allow_fill=True)
|
||||
expected = pd.Categorical(['a', np.nan, np.nan],
|
||||
categories=['a', 'b'])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_with_negative_one(self):
|
||||
# -1 was a category
|
||||
cat = pd.Categorical([-1, 0, 1])
|
||||
result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
|
||||
expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = pd.Categorical(['a', 'b', 'c'])
|
||||
result = cat.take([0, 1, -1], fill_value='a', allow_fill=True)
|
||||
expected = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c'])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value_new_raises(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = pd.Categorical(['a', 'b', 'c'])
|
||||
xpr = r"'fill_value' \('d'\) is not in this Categorical's categories."
|
||||
with pytest.raises(TypeError, match=xpr):
|
||||
cat.take([0, 1, -1], fill_value='d', allow_fill=True)
|
||||
+303
@@ -0,0 +1,303 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PYPY
|
||||
|
||||
from pandas import Categorical, Index, Series
|
||||
from pandas.api.types import is_scalar
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalAnalytics(object):
|
||||
|
||||
def test_min_max(self):
|
||||
|
||||
# unordered cats have no min/max
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=False)
|
||||
pytest.raises(TypeError, lambda: cat.min())
|
||||
pytest.raises(TypeError, lambda: cat.max())
|
||||
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert _min == "a"
|
||||
assert _max == "d"
|
||||
|
||||
cat = Categorical(["a", "b", "c", "d"],
|
||||
categories=['d', 'c', 'b', 'a'], ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert _min == "d"
|
||||
assert _max == "a"
|
||||
|
||||
cat = Categorical([np.nan, "b", "c", np.nan],
|
||||
categories=['d', 'c', 'b', 'a'], ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert np.isnan(_min)
|
||||
assert _max == "b"
|
||||
|
||||
_min = cat.min(numeric_only=True)
|
||||
assert _min == "c"
|
||||
_max = cat.max(numeric_only=True)
|
||||
assert _max == "b"
|
||||
|
||||
cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1],
|
||||
ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert np.isnan(_min)
|
||||
assert _max == 1
|
||||
|
||||
_min = cat.min(numeric_only=True)
|
||||
assert _min == 2
|
||||
_max = cat.max(numeric_only=True)
|
||||
assert _max == 1
|
||||
|
||||
@pytest.mark.parametrize("values,categories,exp_mode", [
|
||||
([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]),
|
||||
([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]),
|
||||
([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]),
|
||||
([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]),
|
||||
([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4])])
|
||||
def test_mode(self, values, categories, exp_mode):
|
||||
s = Categorical(values, categories=categories, ordered=True)
|
||||
res = s.mode()
|
||||
exp = Categorical(exp_mode, categories=categories, ordered=True)
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
|
||||
def test_searchsorted(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/8420
|
||||
# https://github.com/pandas-dev/pandas/issues/14522
|
||||
|
||||
c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
|
||||
categories=['cheese', 'milk', 'apple', 'bread'],
|
||||
ordered=True)
|
||||
s1 = Series(c1)
|
||||
c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
|
||||
categories=['cheese', 'milk', 'apple', 'bread'],
|
||||
ordered=False)
|
||||
s2 = Series(c2)
|
||||
|
||||
# Searching for single item argument, side='left' (default)
|
||||
res_cat = c1.searchsorted('apple')
|
||||
assert res_cat == 2
|
||||
assert is_scalar(res_cat)
|
||||
|
||||
res_ser = s1.searchsorted('apple')
|
||||
assert res_ser == 2
|
||||
assert is_scalar(res_ser)
|
||||
|
||||
# Searching for single item array, side='left' (default)
|
||||
res_cat = c1.searchsorted(['bread'])
|
||||
res_ser = s1.searchsorted(['bread'])
|
||||
exp = np.array([3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for several items array, side='right'
|
||||
res_cat = c1.searchsorted(['apple', 'bread'], side='right')
|
||||
res_ser = s1.searchsorted(['apple', 'bread'], side='right')
|
||||
exp = np.array([3, 5], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for a single value that is not from the Categorical
|
||||
pytest.raises(KeyError, lambda: c1.searchsorted('cucumber'))
|
||||
pytest.raises(KeyError, lambda: s1.searchsorted('cucumber'))
|
||||
|
||||
# Searching for multiple values one of each is not from the Categorical
|
||||
pytest.raises(KeyError,
|
||||
lambda: c1.searchsorted(['bread', 'cucumber']))
|
||||
pytest.raises(KeyError,
|
||||
lambda: s1.searchsorted(['bread', 'cucumber']))
|
||||
|
||||
# searchsorted call for unordered Categorical
|
||||
pytest.raises(ValueError, lambda: c2.searchsorted('apple'))
|
||||
pytest.raises(ValueError, lambda: s2.searchsorted('apple'))
|
||||
|
||||
def test_unique(self):
|
||||
# categories are reordered based on value when ordered=False
|
||||
cat = Categorical(["a", "b"])
|
||||
exp = Index(["a", "b"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
tm.assert_categorical_equal(res, cat)
|
||||
|
||||
cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
tm.assert_categorical_equal(res, Categorical(exp))
|
||||
|
||||
cat = Categorical(["c", "a", "b", "a", "a"],
|
||||
categories=["a", "b", "c"])
|
||||
exp = Index(["c", "a", "b"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
exp_cat = Categorical(exp, categories=['c', 'a', 'b'])
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
# nan must be removed
|
||||
cat = Categorical(["b", np.nan, "b", np.nan, "a"],
|
||||
categories=["a", "b", "c"])
|
||||
res = cat.unique()
|
||||
exp = Index(["b", "a"])
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_ordered(self):
|
||||
# keep categories order when ordered=True
|
||||
cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_index_series(self):
|
||||
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
|
||||
# Categorical.unique sorts categories by appearance order
|
||||
# if ordered=False
|
||||
exp = Categorical([3, 1, 2], categories=[3, 1, 2])
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
|
||||
exp = Categorical([1, 2], categories=[1, 2])
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
|
||||
# Categorical.unique keeps categories order if ordered=True
|
||||
exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
def test_shift(self):
|
||||
# GH 9416
|
||||
cat = Categorical(['a', 'b', 'c', 'd', 'a'])
|
||||
|
||||
# shift forward
|
||||
sp1 = cat.shift(1)
|
||||
xp1 = Categorical([np.nan, 'a', 'b', 'c', 'd'])
|
||||
tm.assert_categorical_equal(sp1, xp1)
|
||||
tm.assert_categorical_equal(cat[:-1], sp1[1:])
|
||||
|
||||
# shift back
|
||||
sn2 = cat.shift(-2)
|
||||
xp2 = Categorical(['c', 'd', 'a', np.nan, np.nan],
|
||||
categories=['a', 'b', 'c', 'd'])
|
||||
tm.assert_categorical_equal(sn2, xp2)
|
||||
tm.assert_categorical_equal(cat[2:], sn2[:-2])
|
||||
|
||||
# shift by zero
|
||||
tm.assert_categorical_equal(cat, cat.shift(0))
|
||||
|
||||
def test_nbytes(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
|
||||
assert cat.nbytes == exp
|
||||
|
||||
def test_memory_usage(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
# .categories is an index, so we include the hashtable
|
||||
assert 0 < cat.nbytes <= cat.memory_usage()
|
||||
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
|
||||
|
||||
cat = Categorical(['foo', 'foo', 'bar'])
|
||||
assert cat.memory_usage(deep=True) > cat.nbytes
|
||||
|
||||
if not PYPY:
|
||||
# sys.getsizeof will call the .memory_usage with
|
||||
# deep=True, and add on some GC overhead
|
||||
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
|
||||
assert abs(diff) < 100
|
||||
|
||||
def test_map(self):
|
||||
c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True)
|
||||
result = c.map(lambda x: x.lower())
|
||||
exp = Categorical(list('ababc'), categories=list('cba'), ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False)
|
||||
result = c.map(lambda x: x.lower())
|
||||
exp = Categorical(list('ababc'), categories=list('abc'), ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
result = c.map(lambda x: 1)
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
|
||||
|
||||
def test_validate_inplace(self):
|
||||
cat = Categorical(['A', 'B', 'B', 'C', 'A'])
|
||||
invalid_values = [1, "True", [1, 2, 3], 5.0]
|
||||
|
||||
for value in invalid_values:
|
||||
with pytest.raises(ValueError):
|
||||
cat.set_ordered(value=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.as_ordered(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.as_unordered(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories(['X', 'Y', 'Z'], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(
|
||||
['X', 'Y', 'Z'], ordered=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.add_categories(
|
||||
new_categories=['D', 'E', 'F'], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_categories(removals=['D', 'E', 'F'], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_unused_categories(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.sort_values(inplace=value)
|
||||
|
||||
def test_isna(self):
|
||||
exp = np.array([False, False, True])
|
||||
c = Categorical(["a", "b", np.nan])
|
||||
res = c.isna()
|
||||
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
@@ -0,0 +1,508 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series
|
||||
from pandas.core.arrays.categorical import _recode_for_categories
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalAPI(object):
|
||||
|
||||
def test_ordered_api(self):
|
||||
# GH 9347
|
||||
cat1 = Categorical(list('acb'), ordered=False)
|
||||
tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c']))
|
||||
assert not cat1.ordered
|
||||
|
||||
cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False)
|
||||
tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a']))
|
||||
assert not cat2.ordered
|
||||
|
||||
cat3 = Categorical(list('acb'), ordered=True)
|
||||
tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c']))
|
||||
assert cat3.ordered
|
||||
|
||||
cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True)
|
||||
tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a']))
|
||||
assert cat4.ordered
|
||||
|
||||
def test_set_ordered(self):
|
||||
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
cat2 = cat.as_unordered()
|
||||
assert not cat2.ordered
|
||||
cat2 = cat.as_ordered()
|
||||
assert cat2.ordered
|
||||
cat2.as_unordered(inplace=True)
|
||||
assert not cat2.ordered
|
||||
cat2.as_ordered(inplace=True)
|
||||
assert cat2.ordered
|
||||
|
||||
assert cat2.set_ordered(True).ordered
|
||||
assert not cat2.set_ordered(False).ordered
|
||||
cat2.set_ordered(True, inplace=True)
|
||||
assert cat2.ordered
|
||||
cat2.set_ordered(False, inplace=True)
|
||||
assert not cat2.ordered
|
||||
|
||||
# removed in 0.19.0
|
||||
msg = "can\'t set attribute"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = True
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = False
|
||||
|
||||
def test_rename_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
|
||||
# inplace=False: the old one must not be changed
|
||||
res = cat.rename_categories([1, 2, 3])
|
||||
tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1],
|
||||
dtype=np.int64))
|
||||
tm.assert_index_equal(res.categories, Index([1, 2, 3]))
|
||||
|
||||
exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
|
||||
|
||||
exp_cat = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(cat.categories, exp_cat)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = cat.rename_categories(lambda x: x.upper())
|
||||
expected = Categorical(["A", "B", "C", "A"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# and now inplace
|
||||
res = cat.rename_categories([1, 2, 3], inplace=True)
|
||||
assert res is None
|
||||
tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1],
|
||||
dtype=np.int64))
|
||||
tm.assert_index_equal(cat.categories, Index([1, 2, 3]))
|
||||
|
||||
# Lengthen
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories([1, 2, 3, 4])
|
||||
|
||||
# Shorten
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories([1, 2])
|
||||
|
||||
def test_rename_categories_series(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/17981
|
||||
c = Categorical(['a', 'b'])
|
||||
xpr = "Treating Series 'new_categories' as a list-like "
|
||||
with tm.assert_produces_warning(FutureWarning) as rec:
|
||||
result = c.rename_categories(Series([0, 1]))
|
||||
|
||||
assert len(rec) == 1
|
||||
assert xpr in str(rec[0].message)
|
||||
expected = Categorical([0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_rename_categories_dict(self):
|
||||
# GH 17336
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1})
|
||||
expected = Index([4, 3, 2, 1])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for inplace
|
||||
res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1},
|
||||
inplace=True)
|
||||
assert res is None
|
||||
tm.assert_index_equal(cat.categories, expected)
|
||||
|
||||
# Test for dicts of smaller length
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'a': 1, 'c': 3})
|
||||
|
||||
expected = Index([1, 'b', 3, 'd'])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with bigger length
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3,
|
||||
'd': 4, 'e': 5, 'f': 6})
|
||||
expected = Index([1, 2, 3, 4])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with no items from old categories
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'f': 1, 'g': 3})
|
||||
|
||||
expected = Index(['a', 'b', 'c', 'd'])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
def test_reorder_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", "c", "a"], categories=["c", "b", "a"],
|
||||
ordered=True)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.reorder_categories(["c", "b", "a"])
|
||||
# cat must be the same as before
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
# only res is changed
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.reorder_categories(["c", "b", "a"], inplace=True)
|
||||
assert res is None
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
|
||||
# not all "old" included in "new"
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(["a"])
|
||||
|
||||
# still not all "old" in "new"
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(["a", "b", "d"])
|
||||
|
||||
# all "old" included in "new", but too long
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(["a", "b", "c", "d"])
|
||||
|
||||
def test_add_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", "c", "a"],
|
||||
categories=["a", "b", "c", "d"], ordered=True)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.add_categories("d")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.add_categories(["d"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.add_categories("d", inplace=True)
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
assert res is None
|
||||
|
||||
# new is in old categories
|
||||
with pytest.raises(ValueError):
|
||||
cat.add_categories(["d"])
|
||||
|
||||
# GH 9927
|
||||
cat = Categorical(list("abc"), ordered=True)
|
||||
expected = Categorical(
|
||||
list("abc"), categories=list("abcde"), ordered=True)
|
||||
# test with Series, np.array, index, list
|
||||
res = cat.add_categories(Series(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(np.array(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(Index(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(["d", "e"])
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
|
||||
def test_set_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
|
||||
res = cat.set_categories(["c", "b", "a"], inplace=True)
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
assert res is None
|
||||
|
||||
res = cat.set_categories(["a", "b", "c"])
|
||||
# cat must be the same as before
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
# only res is changed
|
||||
exp_categories_back = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(res.categories, exp_categories_back)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_values)
|
||||
|
||||
# not all "old" included in "new" -> all not included ones are now
|
||||
# np.nan
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
res = cat.set_categories(["a"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0],
|
||||
dtype=np.int8))
|
||||
|
||||
# still not all "old" in "new"
|
||||
res = cat.set_categories(["a", "b", "d"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0],
|
||||
dtype=np.int8))
|
||||
tm.assert_index_equal(res.categories, Index(["a", "b", "d"]))
|
||||
|
||||
# all "old" included in "new"
|
||||
cat = cat.set_categories(["a", "b", "c", "d"])
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
|
||||
# internals...
|
||||
c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True)
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0],
|
||||
dtype=np.int8))
|
||||
tm.assert_index_equal(c.categories, Index([1, 2, 3, 4]))
|
||||
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(c.get_values(), exp)
|
||||
|
||||
# all "pointers" to '4' must be changed from 3 to 0,...
|
||||
c = c.set_categories([4, 3, 2, 1])
|
||||
|
||||
# positions are changed
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3],
|
||||
dtype=np.int8))
|
||||
|
||||
# categories are now in new order
|
||||
tm.assert_index_equal(c.categories, Index([4, 3, 2, 1]))
|
||||
|
||||
# output is the same
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(c.get_values(), exp)
|
||||
assert c.min() == 4
|
||||
assert c.max() == 1
|
||||
|
||||
# set_categories should set the ordering if specified
|
||||
c2 = c.set_categories([4, 3, 2, 1], ordered=False)
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
|
||||
|
||||
# set_categories should pass thru the ordering
|
||||
c2 = c.set_ordered(False).set_categories([4, 3, 2, 1])
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
|
||||
|
||||
@pytest.mark.parametrize('values, categories, new_categories', [
|
||||
# No NaNs, same cats, same order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# Same, unsorted
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
# Introduce NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
# No overlap
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
|
||||
])
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_set_categories_many(self, values, categories, new_categories,
|
||||
ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c.set_categories(new_categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_rename_less(self):
|
||||
# GH 24675
|
||||
cat = Categorical(['A', 'B'])
|
||||
result = cat.set_categories(['A'], rename=True)
|
||||
expected = Categorical(['A', np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_private(self):
|
||||
cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
|
||||
cat._set_categories(['a', 'c', 'd', 'e'])
|
||||
expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
# fastpath
|
||||
cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
|
||||
cat._set_categories(['a', 'c', 'd', 'e'], fastpath=True)
|
||||
expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
def test_remove_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
|
||||
ordered=True)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.remove_categories("c")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.remove_categories(["c"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.remove_categories("c", inplace=True)
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
assert res is None
|
||||
|
||||
# removal is not in categories
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_categories(["c"])
|
||||
|
||||
def test_remove_unused_categories(self):
|
||||
c = Categorical(["a", "b", "c", "d", "a"],
|
||||
categories=["a", "b", "c", "d", "e"])
|
||||
exp_categories_all = Index(["a", "b", "c", "d", "e"])
|
||||
exp_categories_dropped = Index(["a", "b", "c", "d"])
|
||||
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, exp_categories_dropped)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories(inplace=True)
|
||||
tm.assert_index_equal(c.categories, exp_categories_dropped)
|
||||
assert res is None
|
||||
|
||||
# with NaN values (GH11599)
|
||||
c = Categorical(["a", "b", "c", np.nan],
|
||||
categories=["a", "b", "c", "d", "e"])
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories,
|
||||
Index(np.array(["a", "b", "c"])))
|
||||
exp_codes = np.array([0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(res.codes, exp_codes)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan]
|
||||
cat = Categorical(values=val, categories=list('ABCDEFG'))
|
||||
out = cat.remove_unused_categories()
|
||||
tm.assert_index_equal(out.categories, Index(['B', 'D', 'F']))
|
||||
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(out.codes, exp_codes)
|
||||
assert out.get_values().tolist() == val
|
||||
|
||||
alpha = list('abcdefghijklmnopqrstuvwxyz')
|
||||
val = np.random.choice(alpha[::2], 10000).astype('object')
|
||||
val[np.random.choice(len(val), 100)] = np.nan
|
||||
|
||||
cat = Categorical(values=val, categories=alpha)
|
||||
out = cat.remove_unused_categories()
|
||||
assert out.get_values().tolist() == val.tolist()
|
||||
|
||||
|
||||
class TestCategoricalAPIWithFactor(TestCategorical):
|
||||
|
||||
def test_describe(self):
|
||||
# string type
|
||||
desc = self.factor.describe()
|
||||
assert self.factor.ordered
|
||||
exp_index = CategoricalIndex(['a', 'b', 'c'], name='categories',
|
||||
ordered=self.factor.ordered)
|
||||
expected = DataFrame({'counts': [3, 2, 3],
|
||||
'freqs': [3 / 8., 2 / 8., 3 / 8.]},
|
||||
index=exp_index)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check unused categories
|
||||
cat = self.factor.copy()
|
||||
cat.set_categories(["a", "b", "c", "d"], inplace=True)
|
||||
desc = cat.describe()
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list('abcd'), ordered=self.factor.ordered, name='categories')
|
||||
expected = DataFrame({'counts': [3, 2, 3, 0],
|
||||
'freqs': [3 / 8., 2 / 8., 3 / 8., 0]},
|
||||
index=exp_index)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check an integer one
|
||||
cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
|
||||
desc = cat.describe()
|
||||
exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered,
|
||||
name='categories')
|
||||
expected = DataFrame({'counts': [5, 3, 3],
|
||||
'freqs': [5 / 11., 3 / 11., 3 / 11.]},
|
||||
index=exp_index)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
# describe should work with NaN
|
||||
cat = Categorical([np.nan, 1, 2, 2])
|
||||
desc = cat.describe()
|
||||
expected = DataFrame({'counts': [1, 2, 1],
|
||||
'freqs': [1 / 4., 2 / 4., 1 / 4.]},
|
||||
index=CategoricalIndex([1, 2, np.nan],
|
||||
categories=[1, 2],
|
||||
name='categories'))
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
def test_set_categories_inplace(self):
|
||||
cat = self.factor.copy()
|
||||
cat.set_categories(['a', 'b', 'c', 'd'], inplace=True)
|
||||
tm.assert_index_equal(cat.categories, Index(['a', 'b', 'c', 'd']))
|
||||
|
||||
|
||||
class TestPrivateCategoricalAPI(object):
|
||||
|
||||
def test_codes_immutable(self):
|
||||
|
||||
# Codes should be read only
|
||||
c = Categorical(["a", "b", "c", "a", np.nan])
|
||||
exp = np.array([0, 1, 2, 0, -1], dtype='int8')
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
# Assignments to codes should raise
|
||||
with pytest.raises(ValueError):
|
||||
c.codes = np.array([0, 1, 2, 0, 1], dtype='int8')
|
||||
|
||||
# changes in the codes array should raise
|
||||
codes = c.codes
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
codes[4] = 1
|
||||
|
||||
# But even after getting the codes, the original array should still be
|
||||
# writeable!
|
||||
c[4] = "a"
|
||||
exp = np.array([0, 1, 2, 0, 0], dtype='int8')
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
c._codes[4] = 2
|
||||
exp = np.array([0, 1, 2, 0, 2], dtype='int8')
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
@pytest.mark.parametrize('codes, old, new, expected', [
|
||||
([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]),
|
||||
([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]),
|
||||
([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]),
|
||||
([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]),
|
||||
([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]),
|
||||
([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]),
|
||||
([-1, -1], [], ['a', 'b'], [-1, -1]),
|
||||
([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]),
|
||||
])
|
||||
def test_recode_to_categories(self, codes, old, new, expected):
|
||||
codes = np.asanyarray(codes, dtype=np.int8)
|
||||
expected = np.asanyarray(expected, dtype=np.int8)
|
||||
old = Index(old)
|
||||
new = Index(new)
|
||||
result = _recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_recode_to_categories_large(self):
|
||||
N = 1000
|
||||
codes = np.arange(N)
|
||||
old = Index(codes)
|
||||
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
|
||||
new = Index(expected)
|
||||
result = _recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
+574
@@ -0,0 +1,574 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, CategoricalIndex, DatetimeIndex, Index, Interval,
|
||||
IntervalIndex, NaT, Series, Timestamp, date_range, period_range,
|
||||
timedelta_range)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalConstructors(object):
|
||||
|
||||
def test_validate_ordered(self):
|
||||
# see gh-14058
|
||||
exp_msg = "'ordered' must either be 'True' or 'False'"
|
||||
exp_err = TypeError
|
||||
|
||||
# This should be a boolean.
|
||||
ordered = np.array([0, 1, 2])
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical([1, 2, 3], ordered=ordered)
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
|
||||
ordered=ordered)
|
||||
|
||||
def test_constructor_empty(self):
|
||||
# GH 17248
|
||||
c = Categorical([])
|
||||
expected = Index([])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
c = Categorical([], categories=[1, 2, 3])
|
||||
expected = pd.Int64Index([1, 2, 3])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
def test_constructor_empty_boolean(self):
|
||||
# see gh-22702
|
||||
cat = pd.Categorical([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_constructor_tuples(self):
|
||||
values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
|
||||
result = Categorical(values)
|
||||
expected = Index([(1,), (1, 2)], tupleize_cols=False)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
assert result.ordered is False
|
||||
|
||||
def test_constructor_tuples_datetimes(self):
|
||||
# numpy will auto reshape when all of the tuples are the
|
||||
# same len, so add an extra one with 2 items and slice it off
|
||||
values = np.array([(Timestamp('2010-01-01'),),
|
||||
(Timestamp('2010-01-02'),),
|
||||
(Timestamp('2010-01-01'),),
|
||||
(Timestamp('2010-01-02'),),
|
||||
('a', 'b')], dtype=object)[:-1]
|
||||
result = Categorical(values)
|
||||
expected = Index([(Timestamp('2010-01-01'),),
|
||||
(Timestamp('2010-01-02'),)], tupleize_cols=False)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
|
||||
def test_constructor_unsortable(self):
|
||||
|
||||
# it works!
|
||||
arr = np.array([1, 2, 3, datetime.now()], dtype='O')
|
||||
factor = Categorical(arr, ordered=False)
|
||||
assert not factor.ordered
|
||||
|
||||
# this however will raise as cannot be sorted
|
||||
msg = ("'values' is not ordered, please explicitly specify the "
|
||||
"categories order by passing in a categories argument.")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(arr, ordered=True)
|
||||
|
||||
def test_constructor_interval(self):
|
||||
result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)],
|
||||
ordered=True)
|
||||
ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
|
||||
exp = Categorical(ii, ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
tm.assert_index_equal(result.categories, ii)
|
||||
|
||||
def test_constructor(self):
|
||||
|
||||
exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
|
||||
c1 = Categorical(exp_arr)
|
||||
tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["c", "b", "a"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
|
||||
# categories must be unique
|
||||
msg = "Categorical categories must be unique"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([1, 2], [1, 2, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ["a", "b", "b"])
|
||||
|
||||
# The default should be unordered
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
assert not c1.ordered
|
||||
|
||||
# Categorical as input
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
|
||||
tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))
|
||||
|
||||
# Series of dtype category
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# Series
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]),
|
||||
categories=["a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# This should result in integer categories, not float!
|
||||
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
cat = Categorical([np.nan, 1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# this should result in floats
|
||||
cat = Categorical([np.nan, 1, 2., 3])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
cat = Categorical([np.nan, 1., 2., 3.])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
# This doesn't work -> this would probably need some kind of "remember
|
||||
# the original type" feature to try to cast the array interface result
|
||||
# to...
|
||||
|
||||
# vals = np.asarray(cat[cat.notna()])
|
||||
# assert is_integer_dtype(vals)
|
||||
|
||||
# corner cases
|
||||
cat = Categorical([1])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
cat = Categorical(["a"])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == "a"
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# Scalars should be converted to lists
|
||||
cat = Categorical(1)
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# two arrays
|
||||
# - when the first is an integer dtype and the second is not
|
||||
# - when the resulting codes are all -1/NaN
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old = Categorical([0, 1, 2, 0, 1, 2],
|
||||
categories=["a", "b", "c"]) # noqa
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa
|
||||
categories=[3, 4, 5])
|
||||
|
||||
# the next one are from the old docs
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa
|
||||
cat = Categorical([1, 2], categories=[1, 2, 3])
|
||||
|
||||
# this is a legitimate constructor
|
||||
with tm.assert_produces_warning(None):
|
||||
c = Categorical(np.array([], dtype='int64'), # noqa
|
||||
categories=[3, 2, 1], ordered=True)
|
||||
|
||||
def test_constructor_not_sequence(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16022
|
||||
msg = r"^Parameter 'categories' must be list-like, was"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(['a', 'b'], categories='a')
|
||||
|
||||
def test_constructor_with_null(self):
|
||||
|
||||
# Cannot have NaN in categories
|
||||
msg = "Categorial categories cannot be null"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([np.nan, "a", "b", "c"],
|
||||
categories=[np.nan, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([None, "a", "b", "c"],
|
||||
categories=[None, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(DatetimeIndex(['nat', '20160101']),
|
||||
categories=[NaT, Timestamp('20160101')])
|
||||
|
||||
def test_constructor_with_index(self):
|
||||
ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
|
||||
tm.assert_categorical_equal(ci.values, Categorical(ci))
|
||||
|
||||
ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
|
||||
tm.assert_categorical_equal(ci.values,
|
||||
Categorical(ci.astype(object),
|
||||
categories=ci.categories))
|
||||
|
||||
def test_constructor_with_generator(self):
|
||||
# This was raising an Error in isna(single_val).any() because isna
|
||||
# returned a scalar for a generator
|
||||
xrange = range
|
||||
|
||||
exp = Categorical([0, 1, 2])
|
||||
cat = Categorical((x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical(xrange(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
# This uses xrange internally
|
||||
from pandas.core.index import MultiIndex
|
||||
MultiIndex.from_product([range(5), ['a', 'b', 'c']])
|
||||
|
||||
# check that categories accept generators and sequences
|
||||
cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical([0, 1, 2], categories=xrange(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
@pytest.mark.parametrize("dtl", [
|
||||
date_range("1995-01-01 00:00:00", periods=5, freq="s"),
|
||||
date_range("1995-01-01 00:00:00", periods=5,
|
||||
freq="s", tz="US/Eastern"),
|
||||
timedelta_range("1 day", periods=5, freq="s")
|
||||
])
|
||||
def test_constructor_with_datetimelike(self, dtl):
|
||||
# see gh-12077
|
||||
# constructor with a datetimelike and NaT
|
||||
|
||||
s = Series(dtl)
|
||||
c = Categorical(s)
|
||||
|
||||
expected = type(dtl)(s)
|
||||
expected.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8"))
|
||||
|
||||
# with NaT
|
||||
s2 = s.copy()
|
||||
s2.iloc[-1] = NaT
|
||||
c = Categorical(s2)
|
||||
|
||||
expected = type(dtl)(s2.dropna())
|
||||
expected.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
result = repr(c)
|
||||
assert "NaT" in result
|
||||
|
||||
def test_constructor_from_index_series_datetimetz(self):
|
||||
idx = date_range('2015-01-01 10:00', freq='D', periods=3,
|
||||
tz='US/Eastern')
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_timedelta(self):
|
||||
idx = timedelta_range('1 days', freq='D', periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_period(self):
|
||||
idx = period_range('2015-01-01', freq='D', periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_invariant(self):
|
||||
# GH 14190
|
||||
vals = [
|
||||
np.array([1., 1.2, 1.8, np.nan]),
|
||||
np.array([1, 2, 3], dtype='int64'),
|
||||
['a', 'b', 'c', np.nan],
|
||||
[pd.Period('2014-01'), pd.Period('2014-02'), NaT],
|
||||
[Timestamp('2014-01-01'), Timestamp('2014-01-02'), NaT],
|
||||
[Timestamp('2014-01-01', tz='US/Eastern'),
|
||||
Timestamp('2014-01-02', tz='US/Eastern'), NaT],
|
||||
]
|
||||
for val in vals:
|
||||
c = Categorical(val)
|
||||
c2 = Categorical(c)
|
||||
tm.assert_categorical_equal(c, c2)
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_constructor_with_dtype(self, ordered):
|
||||
categories = ['b', 'a', 'c']
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype)
|
||||
expected = Categorical(['a', 'b', 'a', 'c'], categories=categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
assert result.ordered is ordered
|
||||
|
||||
def test_constructor_dtype_and_others_raises(self):
|
||||
dtype = CategoricalDtype(['a', 'b'], ordered=True)
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(['a', 'b'], ordered=True, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(['a', 'b'], ordered=False, dtype=dtype)
|
||||
|
||||
@pytest.mark.parametrize('categories', [
|
||||
None, ['a', 'b'], ['a', 'c'],
|
||||
])
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_constructor_str_category(self, categories, ordered):
|
||||
result = Categorical(['a', 'b'], categories=categories,
|
||||
ordered=ordered, dtype='category')
|
||||
expected = Categorical(['a', 'b'], categories=categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_str_unknown(self):
|
||||
with pytest.raises(ValueError, match="Unknown dtype"):
|
||||
Categorical([1, 2], dtype="foo")
|
||||
|
||||
def test_constructor_from_categorical_with_dtype(self):
|
||||
dtype = CategoricalDtype(['a', 'b', 'c'], ordered=True)
|
||||
values = Categorical(['a', 'b', 'd'])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use dtype.categories, not values.categories
|
||||
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_with_unknown_dtype(self):
|
||||
dtype = CategoricalDtype(None, ordered=True)
|
||||
values = Categorical(['a', 'b', 'd'])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use values.categories, not dtype.categories
|
||||
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_string(self):
|
||||
values = Categorical(['a', 'b', 'd'])
|
||||
# use categories, ordered
|
||||
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True,
|
||||
dtype='category')
|
||||
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# No string
|
||||
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
|
||||
|
||||
result = Categorical(
|
||||
['a', 'b'], categories=Categorical(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical(
|
||||
['a', 'b'], categories=CategoricalIndex(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_codes(self):
|
||||
|
||||
# too few categories
|
||||
dtype = CategoricalDtype(categories=[1, 2])
|
||||
msg = "codes need to be between "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], dtype=dtype)
|
||||
|
||||
# no int codes
|
||||
msg = "codes need to be array-like integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], dtype=dtype)
|
||||
|
||||
# no unique categories
|
||||
with pytest.raises(ValueError,
|
||||
match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
|
||||
|
||||
# NaN categories included
|
||||
with pytest.raises(ValueError,
|
||||
match="Categorial categories cannot be null"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
|
||||
|
||||
# too negative
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
msg = r"codes need to be between -1 and len\(categories\)-1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], dtype=dtype)
|
||||
|
||||
exp = Categorical(["a", "b", "c"], ordered=False)
|
||||
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
def test_from_codes_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
|
||||
|
||||
result = Categorical.from_codes(
|
||||
[0, 1], categories=Categorical(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical.from_codes(
|
||||
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-unique Categorical still raises
|
||||
with pytest.raises(ValueError,
|
||||
match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
|
||||
|
||||
def test_from_codes_with_nan_code(self):
|
||||
# GH21767
|
||||
codes = [1, 2, np.nan]
|
||||
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
|
||||
with pytest.raises(ValueError,
|
||||
match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, categories=dtype.categories)
|
||||
with pytest.raises(ValueError,
|
||||
match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
def test_from_codes_with_float(self):
|
||||
# GH21767
|
||||
codes = [1.0, 2.0, 0] # integer, but in float dtype
|
||||
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat = Categorical.from_codes(codes, dtype.categories)
|
||||
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat = Categorical.from_codes(codes, dtype=dtype)
|
||||
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
|
||||
|
||||
codes = [1.1, 2.0, 0] # non-integer
|
||||
with pytest.raises(ValueError,
|
||||
match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype.categories)
|
||||
with pytest.raises(ValueError,
|
||||
match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
def test_from_codes_with_dtype_raises(self):
|
||||
msg = 'Cannot specify'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([0, 1], categories=['a', 'b'],
|
||||
dtype=CategoricalDtype(['a', 'b']))
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([0, 1], ordered=True,
|
||||
dtype=CategoricalDtype(['a', 'b']))
|
||||
|
||||
def test_from_codes_neither(self):
|
||||
msg = "Both were None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([0, 1])
|
||||
|
||||
@pytest.mark.parametrize('dtype', [None, 'category'])
|
||||
def test_from_inferred_categories(self, dtype):
|
||||
cats = ['a', 'b']
|
||||
codes = np.array([0, 0, 1, 1], dtype='i8')
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes(codes, cats)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype', [None, 'category'])
|
||||
def test_from_inferred_categories_sorts(self, dtype):
|
||||
cats = ['b', 'a']
|
||||
codes = np.array([0, 1, 1, 1], dtype='i8')
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_dtype(self):
|
||||
cats = ['a', 'b', 'd']
|
||||
codes = np.array([0, 1, 0, 2], dtype='i8')
|
||||
dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True)
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical(['a', 'b', 'a', 'd'],
|
||||
categories=['c', 'b', 'a'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_coerces(self):
|
||||
cats = ['1', '2', 'bad']
|
||||
codes = np.array([0, 0, 1, 2], dtype='i8')
|
||||
dtype = CategoricalDtype([1, 2])
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical([1, 1, 2, np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('ordered', [None, True, False])
|
||||
def test_construction_with_ordered(self, ordered):
|
||||
# GH 9347, 9190
|
||||
cat = Categorical([0, 1, 2], ordered=ordered)
|
||||
assert cat.ordered == bool(ordered)
|
||||
|
||||
@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
|
||||
def test_constructor_imaginary(self):
|
||||
values = [1, 2, 3 + 1j]
|
||||
c1 = Categorical(values)
|
||||
tm.assert_index_equal(c1.categories, Index(values))
|
||||
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
|
||||
+177
@@ -0,0 +1,177 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import long
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas import Categorical, CategoricalIndex, Index, Series, Timestamp
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalDtypes(object):
|
||||
|
||||
def test_is_equal_dtype(self):
|
||||
|
||||
# test dtype comparisons between cats
|
||||
|
||||
c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False)
|
||||
c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False)
|
||||
c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True)
|
||||
assert c1.is_dtype_equal(c1)
|
||||
assert c2.is_dtype_equal(c2)
|
||||
assert c3.is_dtype_equal(c3)
|
||||
assert c1.is_dtype_equal(c2)
|
||||
assert not c1.is_dtype_equal(c3)
|
||||
assert not c1.is_dtype_equal(Index(list('aabca')))
|
||||
assert not c1.is_dtype_equal(c1.astype(object))
|
||||
assert c1.is_dtype_equal(CategoricalIndex(c1))
|
||||
assert (c1.is_dtype_equal(
|
||||
CategoricalIndex(c1, categories=list('cab'))))
|
||||
assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
|
||||
|
||||
# GH 16659
|
||||
s1 = Series(c1)
|
||||
s2 = Series(c2)
|
||||
s3 = Series(c3)
|
||||
assert c1.is_dtype_equal(s1)
|
||||
assert c2.is_dtype_equal(s2)
|
||||
assert c3.is_dtype_equal(s3)
|
||||
assert c1.is_dtype_equal(s2)
|
||||
assert not c1.is_dtype_equal(s3)
|
||||
assert not c1.is_dtype_equal(s1.astype(object))
|
||||
|
||||
def test_set_dtype_same(self):
|
||||
c = Categorical(['a', 'b', 'c'])
|
||||
result = c._set_dtype(CategoricalDtype(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, c)
|
||||
|
||||
def test_set_dtype_new_categories(self):
|
||||
c = Categorical(['a', 'b', 'c'])
|
||||
result = c._set_dtype(CategoricalDtype(list('abcd')))
|
||||
tm.assert_numpy_array_equal(result.codes, c.codes)
|
||||
tm.assert_index_equal(result.dtype.categories, Index(list('abcd')))
|
||||
|
||||
@pytest.mark.parametrize('values, categories, new_categories', [
|
||||
# No NaNs, same cats, same order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# Same, unsorted
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
# Introduce NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
# No overlap
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
|
||||
])
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_set_dtype_many(self, values, categories, new_categories,
|
||||
ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c._set_dtype(expected.dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_dtype_no_overlap(self):
|
||||
c = Categorical(['a', 'b', 'c'], ['d', 'e'])
|
||||
result = c._set_dtype(CategoricalDtype(['a', 'b']))
|
||||
expected = Categorical([None, None, None], categories=['a', 'b'])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_codes_dtypes(self):
|
||||
|
||||
# GH 8453
|
||||
result = Categorical(['foo', 'bar', 'baz'])
|
||||
assert result.codes.dtype == 'int8'
|
||||
|
||||
result = Categorical(['foo%05d' % i for i in range(400)])
|
||||
assert result.codes.dtype == 'int16'
|
||||
|
||||
result = Categorical(['foo%05d' % i for i in range(40000)])
|
||||
assert result.codes.dtype == 'int32'
|
||||
|
||||
# adding cats
|
||||
result = Categorical(['foo', 'bar', 'baz'])
|
||||
assert result.codes.dtype == 'int8'
|
||||
result = result.add_categories(['foo%05d' % i for i in range(400)])
|
||||
assert result.codes.dtype == 'int16'
|
||||
|
||||
# removing cats
|
||||
result = result.remove_categories(['foo%05d' % i for i in range(300)])
|
||||
assert result.codes.dtype == 'int8'
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_astype(self, ordered):
|
||||
# string
|
||||
cat = Categorical(list('abbaaccc'), ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
msg = 'could not convert string to float'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.astype(float)
|
||||
|
||||
# numeric
|
||||
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(int)
|
||||
expected = np.array(cat, dtype=np.int)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(float)
|
||||
expected = np.array(cat, dtype=np.float)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype_ordered', [True, False])
|
||||
@pytest.mark.parametrize('cat_ordered', [True, False])
|
||||
def test_astype_category(self, dtype_ordered, cat_ordered):
|
||||
# GH 10696/18593
|
||||
data = list('abcaacbab')
|
||||
cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(
|
||||
data, categories=cat.categories, ordered=dtype_ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(list('adc'), dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, dtype=dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = cat.astype('category')
|
||||
expected = cat
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_iter_python_types(self):
|
||||
# GH-19909
|
||||
# TODO(Py2): Remove long
|
||||
cat = Categorical([1, 2])
|
||||
assert isinstance(list(cat)[0], (int, long))
|
||||
assert isinstance(cat.tolist()[0], (int, long))
|
||||
|
||||
def test_iter_python_types_datetime(self):
|
||||
cat = Categorical([Timestamp('2017-01-01'),
|
||||
Timestamp('2017-01-02')])
|
||||
assert isinstance(list(cat)[0], Timestamp)
|
||||
assert isinstance(cat.tolist()[0], Timestamp)
|
||||
+264
@@ -0,0 +1,264 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
|
||||
import pandas.core.common as com
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexingWithFactor(TestCategorical):
|
||||
|
||||
def test_getitem(self):
|
||||
assert self.factor[0] == 'a'
|
||||
assert self.factor[-1] == 'c'
|
||||
|
||||
subf = self.factor[[0, 1, 2]]
|
||||
tm.assert_numpy_array_equal(subf._codes,
|
||||
np.array([0, 1, 1], dtype=np.int8))
|
||||
|
||||
subf = self.factor[np.asarray(self.factor) == 'c']
|
||||
tm.assert_numpy_array_equal(subf._codes,
|
||||
np.array([2, 2, 2], dtype=np.int8))
|
||||
|
||||
def test_setitem(self):
|
||||
|
||||
# int/positional
|
||||
c = self.factor.copy()
|
||||
c[0] = 'b'
|
||||
assert c[0] == 'b'
|
||||
c[-1] = 'a'
|
||||
assert c[-1] == 'a'
|
||||
|
||||
# boolean
|
||||
c = self.factor.copy()
|
||||
indexer = np.zeros(len(c), dtype='bool')
|
||||
indexer[0] = True
|
||||
indexer[-1] = True
|
||||
c[indexer] = 'c'
|
||||
expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
|
||||
ordered=True)
|
||||
|
||||
tm.assert_categorical_equal(c, expected)
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
pd.Categorical(['b', 'a']),
|
||||
pd.Categorical(['b', 'a'], categories=['b', 'a']),
|
||||
])
|
||||
def test_setitem_same_but_unordered(self, other):
|
||||
# GH-24142
|
||||
target = pd.Categorical(['a', 'b'], categories=['a', 'b'])
|
||||
mask = np.array([True, False])
|
||||
target[mask] = other[mask]
|
||||
expected = pd.Categorical(['b', 'b'], categories=['a', 'b'])
|
||||
tm.assert_categorical_equal(target, expected)
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
pd.Categorical(['b', 'a'], categories=['b', 'a', 'c']),
|
||||
pd.Categorical(['b', 'a'], categories=['a', 'b', 'c']),
|
||||
pd.Categorical(['a', 'a'], categories=['a']),
|
||||
pd.Categorical(['b', 'b'], categories=['b']),
|
||||
])
|
||||
def test_setitem_different_unordered_raises(self, other):
|
||||
# GH-24142
|
||||
target = pd.Categorical(['a', 'b'], categories=['a', 'b'])
|
||||
mask = np.array([True, False])
|
||||
with pytest.raises(ValueError):
|
||||
target[mask] = other[mask]
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
pd.Categorical(['b', 'a']),
|
||||
pd.Categorical(['b', 'a'], categories=['b', 'a'], ordered=True),
|
||||
pd.Categorical(['b', 'a'], categories=['a', 'b', 'c'], ordered=True),
|
||||
])
|
||||
def test_setitem_same_ordered_rasies(self, other):
|
||||
# Gh-24142
|
||||
target = pd.Categorical(['a', 'b'], categories=['a', 'b'],
|
||||
ordered=True)
|
||||
mask = np.array([True, False])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
target[mask] = other[mask]
|
||||
|
||||
|
||||
class TestCategoricalIndexing(object):
|
||||
|
||||
def test_getitem_listlike(self):
|
||||
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
np.random.seed(1)
|
||||
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
expected = c[np.array([100000]).astype(np.int64)].codes
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
|
||||
'2014-03', '2014-03'], freq='M')
|
||||
|
||||
cat1 = Categorical(idx1)
|
||||
str(cat1)
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
|
||||
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
|
||||
tm.assert_index_equal(cat1.categories, exp_idx)
|
||||
|
||||
idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
|
||||
'2014-03', '2014-01'], freq='M')
|
||||
cat2 = Categorical(idx2, ordered=True)
|
||||
str(cat2)
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
|
||||
exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
|
||||
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
|
||||
tm.assert_index_equal(cat2.categories, exp_idx2)
|
||||
|
||||
idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09',
|
||||
'2013-08', '2013-07', '2013-05'], freq='M')
|
||||
cat3 = Categorical(idx3, ordered=True)
|
||||
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09',
|
||||
'2013-10', '2013-11', '2013-12'], freq='M')
|
||||
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
|
||||
tm.assert_index_equal(cat3.categories, exp_idx)
|
||||
|
||||
def test_categories_assigments(self):
|
||||
s = Categorical(["a", "b", "c", "a"])
|
||||
exp = np.array([1, 2, 3, 1], dtype=np.int64)
|
||||
s.categories = [1, 2, 3]
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp)
|
||||
tm.assert_index_equal(s.categories, Index([1, 2, 3]))
|
||||
|
||||
# lengthen
|
||||
with pytest.raises(ValueError):
|
||||
s.categories = [1, 2, 3, 4]
|
||||
|
||||
# shorten
|
||||
with pytest.raises(ValueError):
|
||||
s.categories = [1, 2]
|
||||
|
||||
# Combinations of sorted/unique:
|
||||
@pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
|
||||
[1, 3, 3, 4], [1, 2, 2, 4]])
|
||||
# Combinations of missing/unique
|
||||
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
|
||||
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
|
||||
def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
|
||||
# GH 21448
|
||||
key = key_class(key_values, categories=range(1, 5))
|
||||
# Test for flat index and CategoricalIndex with same/different cats:
|
||||
for dtype in None, 'category', key.dtype:
|
||||
idx = Index(idx_values, dtype=dtype)
|
||||
expected, exp_miss = idx.get_indexer_non_unique(key_values)
|
||||
result, res_miss = idx.get_indexer_non_unique(key)
|
||||
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
tm.assert_numpy_array_equal(exp_miss, res_miss)
|
||||
|
||||
def test_where_unobserved_nan(self):
|
||||
ser = pd.Series(pd.Categorical(['a', 'b']))
|
||||
result = ser.where([True, False])
|
||||
expected = pd.Series(pd.Categorical(['a', None],
|
||||
categories=['a', 'b']))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# all NA
|
||||
ser = pd.Series(pd.Categorical(['a', 'b']))
|
||||
result = ser.where([False, False])
|
||||
expected = pd.Series(pd.Categorical([None, None],
|
||||
categories=['a', 'b']))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_unobserved_categories(self):
|
||||
ser = pd.Series(
|
||||
Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'])
|
||||
)
|
||||
result = ser.where([True, True, False], other='b')
|
||||
expected = pd.Series(
|
||||
Categorical(['a', 'b', 'b'], categories=ser.cat.categories)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_other_categorical(self):
|
||||
ser = pd.Series(
|
||||
Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'])
|
||||
)
|
||||
other = Categorical(['b', 'c', 'a'], categories=['a', 'c', 'b', 'd'])
|
||||
result = ser.where([True, False, True], other)
|
||||
expected = pd.Series(Categorical(['a', 'c', 'c'], dtype=ser.dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_warns(self):
|
||||
ser = pd.Series(Categorical(['a', 'b', 'c']))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ser.where([True, False, True], 'd')
|
||||
|
||||
expected = pd.Series(np.array(['a', 'd', 'c'], dtype='object'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_ordered_differs_rasies(self):
|
||||
ser = pd.Series(
|
||||
Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'],
|
||||
ordered=True)
|
||||
)
|
||||
other = Categorical(['b', 'c', 'a'], categories=['a', 'c', 'b', 'd'],
|
||||
ordered=True)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ser.where([True, False, True], other)
|
||||
|
||||
expected = pd.Series(np.array(['a', 'c', 'c'], dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean(index):
|
||||
s = Series(range(3))
|
||||
idx = Categorical([True, False, True])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
assert com.is_bool_indexer(idx)
|
||||
result = s[idx]
|
||||
expected = s[idx.astype('object')]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean_raises(index):
|
||||
s = Series(range(3))
|
||||
idx = Categorical([True, False, None])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
with pytest.raises(ValueError, match='NA / NaN'):
|
||||
s[idx]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def non_coercible_categorical(monkeypatch):
|
||||
"""
|
||||
Monkeypatch Categorical.__array__ to ensure no implicit conversion.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When Categorical.__array__ is called.
|
||||
"""
|
||||
# TODO(Categorical): identify other places where this may be
|
||||
# useful and move to a conftest.py
|
||||
def array(self, dtype=None):
|
||||
raise ValueError("I cannot be converted.")
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(Categorical, "__array__", array)
|
||||
yield
|
||||
|
||||
|
||||
def test_series_at(non_coercible_categorical):
|
||||
arr = Categorical(['a', 'b', 'c'])
|
||||
ser = Series(arr)
|
||||
result = ser.at[0]
|
||||
assert result == 'a'
|
||||
+87
@@ -0,0 +1,87 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas import Categorical, Index, isna
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalMissing(object):
|
||||
|
||||
def test_na_flags_int_categories(self):
|
||||
# #1457
|
||||
|
||||
categories = lrange(10)
|
||||
labels = np.random.randint(0, 10, 20)
|
||||
labels[::5] = -1
|
||||
|
||||
cat = Categorical(labels, categories, fastpath=True)
|
||||
repr(cat)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(cat), labels == -1)
|
||||
|
||||
def test_nan_handling(self):
|
||||
|
||||
# Nans are represented as -1 in codes
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
|
||||
dtype=np.int8))
|
||||
c[1] = np.nan
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0],
|
||||
dtype=np.int8))
|
||||
|
||||
# Adding nan to categories should make assigned nan point to the
|
||||
# category!
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
|
||||
dtype=np.int8))
|
||||
|
||||
def test_set_dtype_nans(self):
|
||||
c = Categorical(['a', 'b', np.nan])
|
||||
result = c._set_dtype(CategoricalDtype(['a', 'c']))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1],
|
||||
dtype='int8'))
|
||||
|
||||
def test_set_item_nan(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
cat[1] = np.nan
|
||||
|
||||
exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
@pytest.mark.parametrize('fillna_kwargs, msg', [
|
||||
(dict(value=1, method='ffill'),
|
||||
"Cannot specify both 'value' and 'method'."),
|
||||
(dict(),
|
||||
"Must specify a fill 'value' or 'method'."),
|
||||
(dict(method='bad'),
|
||||
"Invalid fill method. Expecting .* bad"),
|
||||
])
|
||||
def test_fillna_raises(self, fillna_kwargs, msg):
|
||||
# https://github.com/pandas-dev/pandas/issues/19682
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.fillna(**fillna_kwargs)
|
||||
|
||||
@pytest.mark.parametrize("named", [True, False])
|
||||
def test_fillna_iterable_category(self, named):
|
||||
# https://github.com/pandas-dev/pandas/issues/21097
|
||||
if named:
|
||||
Point = collections.namedtuple("Point", "x y")
|
||||
else:
|
||||
Point = lambda *args: args # tuple
|
||||
cat = Categorical([Point(0, 0), Point(0, 1), None])
|
||||
result = cat.fillna(Point(0, 0))
|
||||
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
+331
@@ -0,0 +1,331 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, DataFrame, Series, date_range
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalOpsWithFactor(TestCategorical):
|
||||
|
||||
def test_categories_none_comparisons(self):
|
||||
factor = Categorical(['a', 'b', 'b', 'a',
|
||||
'a', 'c', 'c', 'c'], ordered=True)
|
||||
tm.assert_categorical_equal(factor, self.factor)
|
||||
|
||||
def test_comparisons(self):
|
||||
|
||||
result = self.factor[self.factor == 'a']
|
||||
expected = self.factor[np.asarray(self.factor) == 'a']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor != 'a']
|
||||
expected = self.factor[np.asarray(self.factor) != 'a']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor < 'c']
|
||||
expected = self.factor[np.asarray(self.factor) < 'c']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor > 'a']
|
||||
expected = self.factor[np.asarray(self.factor) > 'a']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor >= 'b']
|
||||
expected = self.factor[np.asarray(self.factor) >= 'b']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor <= 'b']
|
||||
expected = self.factor[np.asarray(self.factor) <= 'b']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
n = len(self.factor)
|
||||
|
||||
other = self.factor[np.random.permutation(n)]
|
||||
result = self.factor == other
|
||||
expected = np.asarray(self.factor) == np.asarray(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = self.factor == 'd'
|
||||
expected = np.repeat(False, len(self.factor))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# comparisons with categoricals
|
||||
cat_rev = Categorical(
|
||||
["a", "b", "c"], categories=["c", "b", "a"], ordered=True)
|
||||
cat_rev_base = Categorical(
|
||||
["b", "b", "b"], categories=["c", "b", "a"], ordered=True)
|
||||
cat = Categorical(["a", "b", "c"], ordered=True)
|
||||
cat_base = Categorical(
|
||||
["b", "b", "b"], categories=cat.categories, ordered=True)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = np.array([True, False, False])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
with pytest.raises(TypeError):
|
||||
cat > cat_rev
|
||||
|
||||
cat_rev_base2 = Categorical(
|
||||
["b", "b", "b"], categories=["c", "b", "a", "d"])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
cat_rev > cat_rev_base2
|
||||
|
||||
# Only categories with same ordering information can be compared
|
||||
cat_unorderd = cat.set_ordered(False)
|
||||
assert not (cat > cat).any()
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
cat > cat_unorderd
|
||||
|
||||
# comparison (in both directions) with Series will raise
|
||||
s = Series(["b", "b", "b"])
|
||||
pytest.raises(TypeError, lambda: cat > s)
|
||||
pytest.raises(TypeError, lambda: cat_rev > s)
|
||||
pytest.raises(TypeError, lambda: s < cat)
|
||||
pytest.raises(TypeError, lambda: s < cat_rev)
|
||||
|
||||
# comparison with numpy.array will raise in both direction, but only on
|
||||
# newer numpy versions
|
||||
a = np.array(["b", "b", "b"])
|
||||
pytest.raises(TypeError, lambda: cat > a)
|
||||
pytest.raises(TypeError, lambda: cat_rev > a)
|
||||
|
||||
# Make sure that unequal comparison take the categories order in
|
||||
# account
|
||||
cat_rev = Categorical(
|
||||
list("abc"), categories=list("cba"), ordered=True)
|
||||
exp = np.array([True, False, False])
|
||||
res = cat_rev > "b"
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# check that zero-dim array gets unboxed
|
||||
res = cat_rev > np.array("b")
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
|
||||
class TestCategoricalOps(object):
|
||||
|
||||
def test_compare_frame(self):
|
||||
# GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame
|
||||
data = ["a", "b", 2, "a"]
|
||||
cat = Categorical(data)
|
||||
|
||||
df = DataFrame(cat)
|
||||
|
||||
for op in [operator.eq, operator.ne, operator.ge,
|
||||
operator.gt, operator.le, operator.lt]:
|
||||
with pytest.raises(ValueError):
|
||||
# alignment raises unless we transpose
|
||||
op(cat, df)
|
||||
|
||||
result = cat == df.T
|
||||
expected = DataFrame([[True, True, True, True]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = cat[::-1] != df.T
|
||||
expected = DataFrame([[False, True, True, False]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_datetime_categorical_comparison(self):
|
||||
dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True)
|
||||
tm.assert_numpy_array_equal(dt_cat > dt_cat[0],
|
||||
np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(dt_cat[0] < dt_cat,
|
||||
np.array([False, True, True]))
|
||||
|
||||
def test_reflected_comparison_with_scalars(self):
|
||||
# GH8658
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
tm.assert_numpy_array_equal(cat > cat[0],
|
||||
np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(cat[0] < cat,
|
||||
np.array([False, True, True]))
|
||||
|
||||
def test_comparison_with_unknown_scalars(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||||
# and following comparisons with scalars not in categories should raise
|
||||
# for unequal comps, but not for equal/not equal
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
|
||||
pytest.raises(TypeError, lambda: cat < 4)
|
||||
pytest.raises(TypeError, lambda: cat > 4)
|
||||
pytest.raises(TypeError, lambda: 4 < cat)
|
||||
pytest.raises(TypeError, lambda: 4 > cat)
|
||||
|
||||
tm.assert_numpy_array_equal(cat == 4,
|
||||
np.array([False, False, False]))
|
||||
tm.assert_numpy_array_equal(cat != 4,
|
||||
np.array([True, True, True]))
|
||||
|
||||
@pytest.mark.parametrize('data,reverse,base', [
|
||||
(list("abc"), list("cba"), list("bbb")),
|
||||
([1, 2, 3], [3, 2, 1], [2, 2, 2])]
|
||||
)
|
||||
def test_comparisons(self, data, reverse, base):
|
||||
cat_rev = Series(
|
||||
Categorical(data, categories=reverse, ordered=True))
|
||||
cat_rev_base = Series(
|
||||
Categorical(base, categories=reverse, ordered=True))
|
||||
cat = Series(Categorical(data, ordered=True))
|
||||
cat_base = Series(
|
||||
Categorical(base, categories=cat.cat.categories, ordered=True))
|
||||
s = Series(base)
|
||||
a = np.array(base)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = Series([True, False, False])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = Series([False, False, True])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = Series([False, False, True])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
scalar = base[1]
|
||||
res = cat > scalar
|
||||
exp = Series([False, False, True])
|
||||
exp2 = cat.values > scalar
|
||||
tm.assert_series_equal(res, exp)
|
||||
tm.assert_numpy_array_equal(res.values, exp2)
|
||||
res_rev = cat_rev > scalar
|
||||
exp_rev = Series([True, False, False])
|
||||
exp_rev2 = cat_rev.values > scalar
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
with pytest.raises(TypeError):
|
||||
cat > cat_rev
|
||||
|
||||
# categorical cannot be compared to Series or numpy array, and also
|
||||
# not the other way around
|
||||
pytest.raises(TypeError, lambda: cat > s)
|
||||
pytest.raises(TypeError, lambda: cat_rev > s)
|
||||
pytest.raises(TypeError, lambda: cat > a)
|
||||
pytest.raises(TypeError, lambda: cat_rev > a)
|
||||
|
||||
pytest.raises(TypeError, lambda: s < cat)
|
||||
pytest.raises(TypeError, lambda: s < cat_rev)
|
||||
|
||||
pytest.raises(TypeError, lambda: a < cat)
|
||||
pytest.raises(TypeError, lambda: a < cat_rev)
|
||||
|
||||
@pytest.mark.parametrize('ctor', [
|
||||
lambda *args, **kwargs: Categorical(*args, **kwargs),
|
||||
lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
|
||||
])
|
||||
def test_unordered_different_order_equal(self, ctor):
|
||||
# https://github.com/pandas-dev/pandas/issues/16014
|
||||
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
|
||||
assert (c1 == c2).all()
|
||||
|
||||
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['b', 'a'], categories=['b', 'a'], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['b', 'b'], categories=['b', 'a'], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
|
||||
result = c1 == c2
|
||||
tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
|
||||
|
||||
def test_unordered_different_categories_raises(self):
|
||||
c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
|
||||
c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False)
|
||||
|
||||
with pytest.raises(TypeError, match=("Categoricals can "
|
||||
"only be compared")):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_different_lengths(self):
|
||||
c1 = Categorical([], categories=['a', 'b'])
|
||||
c2 = Categorical([], categories=['a'])
|
||||
|
||||
msg = "Categories are different lengths"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_unordered_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
|
||||
# 349290078
|
||||
a = pd.Categorical(['a'], categories=['a', 'b'])
|
||||
b = pd.Categorical(['b'], categories=['b', 'a'])
|
||||
assert not a.equals(b)
|
||||
|
||||
def test_numeric_like_ops(self):
|
||||
|
||||
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
|
||||
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=['value'], ascending=True)
|
||||
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
|
||||
right=False, labels=cat_labels)
|
||||
|
||||
# numeric ops should not succeed
|
||||
for op in ['__add__', '__sub__', '__mul__', '__truediv__']:
|
||||
pytest.raises(TypeError,
|
||||
lambda: getattr(df, op)(df))
|
||||
|
||||
# reduction ops should not succeed (unless specifically defined, e.g.
|
||||
# min/max)
|
||||
s = df['value_group']
|
||||
for op in ['kurt', 'skew', 'var', 'std', 'mean', 'sum', 'median']:
|
||||
pytest.raises(TypeError,
|
||||
lambda: getattr(s, op)(numeric_only=False))
|
||||
|
||||
# mad technically works because it takes always the numeric data
|
||||
|
||||
# numpy ops
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
with pytest.raises(TypeError):
|
||||
np.sum(s)
|
||||
|
||||
# numeric ops on a Series
|
||||
for op in ['__add__', '__sub__', '__mul__', '__truediv__']:
|
||||
pytest.raises(TypeError, lambda: getattr(s, op)(2))
|
||||
|
||||
# invalid ufunc
|
||||
with pytest.raises(TypeError):
|
||||
np.log(s)
|
||||
|
||||
def test_contains(self):
|
||||
# GH21508
|
||||
c = pd.Categorical(list('aabbca'), categories=list('cab'))
|
||||
|
||||
assert 'b' in c
|
||||
assert 'z' not in c
|
||||
assert np.nan not in c
|
||||
with pytest.raises(TypeError):
|
||||
assert [1] in c
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in c
|
||||
assert 1 not in c
|
||||
|
||||
c = pd.Categorical(list('aabbca') + [np.nan], categories=list('cab'))
|
||||
assert np.nan in c
|
||||
@@ -0,0 +1,529 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat import PY3, u
|
||||
|
||||
from pandas import (
|
||||
Categorical, CategoricalIndex, Series, date_range, period_range,
|
||||
timedelta_range)
|
||||
from pandas.core.config import option_context
|
||||
from pandas.tests.arrays.categorical.common import TestCategorical
|
||||
|
||||
|
||||
class TestCategoricalReprWithFactor(TestCategorical):
|
||||
|
||||
def test_print(self):
|
||||
expected = ["[a, b, b, a, a, c, c, c]",
|
||||
"Categories (3, object): [a < b < c]"]
|
||||
expected = "\n".join(expected)
|
||||
actual = repr(self.factor)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
class TestCategoricalRepr(object):
|
||||
|
||||
def test_big_print(self):
|
||||
factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'],
|
||||
fastpath=True)
|
||||
expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600",
|
||||
"Categories (3, object): [a, b, c]"]
|
||||
expected = "\n".join(expected)
|
||||
|
||||
actual = repr(factor)
|
||||
|
||||
assert actual == expected
|
||||
|
||||
def test_empty_print(self):
|
||||
factor = Categorical([], ["a", "b", "c"])
|
||||
expected = ("[], Categories (3, object): [a, b, c]")
|
||||
actual = repr(factor)
|
||||
assert actual == expected
|
||||
|
||||
assert expected == actual
|
||||
factor = Categorical([], ["a", "b", "c"], ordered=True)
|
||||
expected = ("[], Categories (3, object): [a < b < c]")
|
||||
actual = repr(factor)
|
||||
assert expected == actual
|
||||
|
||||
factor = Categorical([], [])
|
||||
expected = ("[], Categories (0, object): []")
|
||||
assert expected == repr(factor)
|
||||
|
||||
def test_print_none_width(self):
|
||||
# GH10087
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = u("0 1\n1 2\n2 3\n3 4\n" +
|
||||
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]")
|
||||
|
||||
with option_context("display.width", None):
|
||||
assert exp == repr(a)
|
||||
|
||||
def test_unicode_print(self):
|
||||
if PY3:
|
||||
_rep = repr
|
||||
else:
|
||||
_rep = unicode # noqa
|
||||
|
||||
c = Categorical(['aaaaa', 'bb', 'cccc'] * 20)
|
||||
expected = u"""\
|
||||
[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc]
|
||||
Length: 60
|
||||
Categories (3, object): [aaaaa, bb, cccc]"""
|
||||
|
||||
assert _rep(c) == expected
|
||||
|
||||
c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
|
||||
expected = u"""\
|
||||
[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
|
||||
Length: 60
|
||||
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
|
||||
|
||||
assert _rep(c) == expected
|
||||
|
||||
# unicode option should not affect to Categorical, as it doesn't care
|
||||
# the repr width
|
||||
with option_context('display.unicode.east_asian_width', True):
|
||||
|
||||
c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
|
||||
expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
|
||||
Length: 60
|
||||
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
|
||||
|
||||
assert _rep(c) == expected
|
||||
|
||||
def test_categorical_repr(self):
|
||||
c = Categorical([1, 2, 3])
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1, 2, 3, 4, 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20))
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_ordered(self):
|
||||
c = Categorical([1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1 < 2 < 3 < 4 < 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20), ordered=True)
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx)
|
||||
|
||||
# TODO(wesm): exceeding 80 characters in the console is not good
|
||||
# behavior
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]""")
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]")
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
c = Categorical(idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]")
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, "
|
||||
"2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, "
|
||||
"2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]")
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime_ordered(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_int_with_nan(self):
|
||||
c = Categorical([1, 2, np.nan])
|
||||
c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]"""
|
||||
assert repr(c) == c_exp
|
||||
|
||||
s = Series([1, 2, np.nan], dtype="object").astype("category")
|
||||
s_exp = """0 1\n1 2\n2 NaN
|
||||
dtype: category
|
||||
Categories (2, int64): [1, 2]"""
|
||||
assert repr(s) == s_exp
|
||||
|
||||
def test_categorical_repr_period(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_period_ordered(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=20)
|
||||
c = Categorical(idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=20)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_index_repr(self):
|
||||
idx = CategoricalIndex(Categorical([1, 2, 3]))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(idx) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10)))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_ordered(self):
|
||||
i = CategoricalIndex(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10), ordered=True))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime_ordered(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx), ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
|
||||
'2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
|
||||
'2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period(self):
|
||||
# test all length
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=1)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=2)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=3)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx)))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00',
|
||||
'2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00',
|
||||
'2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period_ordered(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
+124
@@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, Index
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalSort(object):
|
||||
|
||||
def test_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(c.argsort(ascending=True), expected,
|
||||
check_dtype=False)
|
||||
|
||||
expected = expected[::-1]
|
||||
tm.assert_numpy_array_equal(c.argsort(ascending=False), expected,
|
||||
check_dtype=False)
|
||||
|
||||
def test_numpy_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(np.argsort(c), expected,
|
||||
check_dtype=False)
|
||||
|
||||
tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected,
|
||||
check_dtype=False)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, axis=0)
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, order='C')
|
||||
|
||||
def test_sort_values(self):
|
||||
|
||||
# unordered cats are sortable
|
||||
cat = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat.sort_values()
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d"], ordered=True)
|
||||
|
||||
# sort_values
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d"],
|
||||
categories=["a", "b", "c", "d"], ordered=True)
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# sort (inplace order)
|
||||
cat1 = cat.copy()
|
||||
cat1.sort_values(inplace=True)
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(cat1.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# reverse
|
||||
cat = Categorical(["a", "c", "c", "b", "d"], ordered=True)
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
def test_sort_values_na_position(self):
|
||||
# see gh-12882
|
||||
cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True)
|
||||
exp_categories = Index([2, 5])
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values() # default arguments
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0])
|
||||
res = cat.sort_values(ascending=True, na_position='first')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0])
|
||||
res = cat.sort_values(ascending=False, na_position='first')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=True, na_position='last')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=False, na_position='last')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position='last')
|
||||
exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position='first')
|
||||
exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pandas import Categorical
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalSubclassing(object):
|
||||
|
||||
def test_constructor(self):
|
||||
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
|
||||
assert isinstance(sc, tm.SubclassedCategorical)
|
||||
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
|
||||
|
||||
def test_from_codes(self):
|
||||
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
|
||||
assert isinstance(sc, tm.SubclassedCategorical)
|
||||
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
|
||||
tm.assert_categorical_equal(sc, exp)
|
||||
|
||||
def test_map(self):
|
||||
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
|
||||
res = sc.map(lambda x: x.upper())
|
||||
assert isinstance(res, tm.SubclassedCategorical)
|
||||
exp = Categorical(['A', 'B', 'C'])
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalWarnings(object):
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip('IPython', minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; c = Categorical([])"
|
||||
ip.run_code(code)
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter('ignore'):
|
||||
list(ip.Completer.completions('c.', 1))
|
||||
|
||||
def test_CategoricalAccessor_categorical_deprecation(object):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Series(['a', 'b'], dtype='category').cat.categorical
|
||||
|
||||
def test_CategoricalAccessor_name_deprecation(object):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Series(['a', 'b'], dtype='category').cat.name
|
||||
|
||||
def test_CategoricalAccessor_index_deprecation(object):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Series(['a', 'b'], dtype='category').cat.index
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Interval, IntervalIndex, date_range, timedelta_range
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[
|
||||
(Index([0, 2, 4]), Index([1, 3, 5])),
|
||||
(Index([0., 1., 2.]), Index([1., 2., 3.])),
|
||||
(timedelta_range('0 days', periods=3),
|
||||
timedelta_range('1 day', periods=3)),
|
||||
(date_range('20170101', periods=3), date_range('20170102', periods=3)),
|
||||
(date_range('20170101', periods=3, tz='US/Eastern'),
|
||||
date_range('20170102', periods=3, tz='US/Eastern'))],
|
||||
ids=lambda x: str(x[0].dtype))
|
||||
def left_right_dtypes(request):
|
||||
"""
|
||||
Fixture for building an IntervalArray from various dtypes
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestMethods(object):
|
||||
|
||||
@pytest.mark.parametrize('new_closed', [
|
||||
'left', 'right', 'both', 'neither'])
|
||||
def test_set_closed(self, closed, new_closed):
|
||||
# GH 21670
|
||||
array = IntervalArray.from_breaks(range(10), closed=closed)
|
||||
result = array.set_closed(new_closed)
|
||||
expected = IntervalArray.from_breaks(range(10), closed=new_closed)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
Interval(0, 1, closed='right'),
|
||||
IntervalArray.from_breaks([1, 2, 3, 4], closed='right'),
|
||||
])
|
||||
def test_where_raises(self, other):
|
||||
ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4],
|
||||
closed='left'))
|
||||
match = "'value.closed' is 'right', expected 'left'."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
ser.where([True, False, True], other=other)
|
||||
|
||||
|
||||
class TestSetitem(object):
|
||||
|
||||
def test_set_na(self, left_right_dtypes):
|
||||
left, right = left_right_dtypes
|
||||
result = IntervalArray.from_arrays(left, right)
|
||||
result[0] = np.nan
|
||||
|
||||
expected_left = Index([left._na_value] + list(left[1:]))
|
||||
expected_right = Index([right._na_value] + list(right[1:]))
|
||||
expected = IntervalArray.from_arrays(expected_left, expected_right)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_repr_matches():
|
||||
idx = IntervalIndex.from_breaks([1, 2, 3])
|
||||
a = repr(idx)
|
||||
b = repr(idx.values)
|
||||
assert a.replace("Index", "Array") == b
|
||||
@@ -0,0 +1,82 @@
|
||||
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Interval, IntervalIndex, Timedelta, Timestamp
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[IntervalArray, IntervalIndex])
|
||||
def constructor(request):
|
||||
"""
|
||||
Fixture for testing both interval container classes.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[
|
||||
(Timedelta('0 days'), Timedelta('1 day')),
|
||||
(Timestamp('2018-01-01'), Timedelta('1 day')),
|
||||
(0, 1)], ids=lambda x: type(x[0]).__name__)
|
||||
def start_shift(request):
|
||||
"""
|
||||
Fixture for generating intervals of different types from a start value
|
||||
and a shift value that can be added to start to generate an endpoint.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestOverlaps(object):
|
||||
|
||||
def test_overlaps_interval(
|
||||
self, constructor, start_shift, closed, other_closed):
|
||||
start, shift = start_shift
|
||||
interval = Interval(start, start + 3 * shift, other_closed)
|
||||
|
||||
# intervals: identical, nested, spanning, partial, adjacent, disjoint
|
||||
tuples = [(start, start + 3 * shift),
|
||||
(start + shift, start + 2 * shift),
|
||||
(start - shift, start + 4 * shift),
|
||||
(start + 2 * shift, start + 4 * shift),
|
||||
(start + 3 * shift, start + 4 * shift),
|
||||
(start + 4 * shift, start + 5 * shift)]
|
||||
interval_container = constructor.from_tuples(tuples, closed)
|
||||
|
||||
adjacent = (interval.closed_right and interval_container.closed_left)
|
||||
expected = np.array([True, True, True, True, adjacent, False])
|
||||
result = interval_container.overlaps(interval)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('other_constructor', [
|
||||
IntervalArray, IntervalIndex])
|
||||
def test_overlaps_interval_container(self, constructor, other_constructor):
|
||||
# TODO: modify this test when implemented
|
||||
interval_container = constructor.from_breaks(range(5))
|
||||
other_container = other_constructor.from_breaks(range(5))
|
||||
with pytest.raises(NotImplementedError):
|
||||
interval_container.overlaps(other_container)
|
||||
|
||||
def test_overlaps_na(self, constructor, start_shift):
|
||||
"""NA values are marked as False"""
|
||||
start, shift = start_shift
|
||||
interval = Interval(start, start + shift)
|
||||
|
||||
tuples = [(start, start + shift),
|
||||
np.nan,
|
||||
(start + 2 * shift, start + 3 * shift)]
|
||||
interval_container = constructor.from_tuples(tuples)
|
||||
|
||||
expected = np.array([True, False, False])
|
||||
result = interval_container.overlaps(interval)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
10, True, 'foo', Timedelta('1 day'), Timestamp('2018-01-01')],
|
||||
ids=lambda x: type(x).__name__)
|
||||
def test_overlaps_invalid_type(self, constructor, other):
|
||||
interval_container = constructor.from_breaks(range(5))
|
||||
msg = '`other` must be Interval-like, got {other}'.format(
|
||||
other=type(other).__name__)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_container.overlaps(other)
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
+538
@@ -0,0 +1,538 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSparseArrayArithmetics(object):
|
||||
|
||||
_base = np.array
|
||||
_klass = pd.SparseArray
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
def _check_numeric_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid='ignore', divide='ignore'):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
|
||||
# sparse & sparse
|
||||
self._assert((a + b).to_dense(), a_dense + b_dense)
|
||||
self._assert((b + a).to_dense(), b_dense + a_dense)
|
||||
|
||||
self._assert((a - b).to_dense(), a_dense - b_dense)
|
||||
self._assert((b - a).to_dense(), b_dense - a_dense)
|
||||
|
||||
self._assert((a * b).to_dense(), a_dense * b_dense)
|
||||
self._assert((b * a).to_dense(), b_dense * a_dense)
|
||||
|
||||
# pandas uses future division
|
||||
self._assert((a / b).to_dense(), a_dense * 1.0 / b_dense)
|
||||
self._assert((b / a).to_dense(), b_dense * 1.0 / a_dense)
|
||||
|
||||
# ToDo: FIXME in GH 13843
|
||||
if not (self._base == pd.Series and
|
||||
a.dtype.subtype == np.dtype('int64')):
|
||||
self._assert((a // b).to_dense(), a_dense // b_dense)
|
||||
self._assert((b // a).to_dense(), b_dense // a_dense)
|
||||
|
||||
self._assert((a % b).to_dense(), a_dense % b_dense)
|
||||
self._assert((b % a).to_dense(), b_dense % a_dense)
|
||||
|
||||
self._assert((a ** b).to_dense(), a_dense ** b_dense)
|
||||
self._assert((b ** a).to_dense(), b_dense ** a_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._assert((a + b_dense).to_dense(), a_dense + b_dense)
|
||||
self._assert((b_dense + a).to_dense(), b_dense + a_dense)
|
||||
|
||||
self._assert((a - b_dense).to_dense(), a_dense - b_dense)
|
||||
self._assert((b_dense - a).to_dense(), b_dense - a_dense)
|
||||
|
||||
self._assert((a * b_dense).to_dense(), a_dense * b_dense)
|
||||
self._assert((b_dense * a).to_dense(), b_dense * a_dense)
|
||||
|
||||
# pandas uses future division
|
||||
self._assert((a / b_dense).to_dense(), a_dense * 1.0 / b_dense)
|
||||
self._assert((b_dense / a).to_dense(), b_dense * 1.0 / a_dense)
|
||||
|
||||
# ToDo: FIXME in GH 13843
|
||||
if not (self._base == pd.Series and
|
||||
a.dtype.subtype == np.dtype('int64')):
|
||||
self._assert((a // b_dense).to_dense(), a_dense // b_dense)
|
||||
self._assert((b_dense // a).to_dense(), b_dense // a_dense)
|
||||
|
||||
self._assert((a % b_dense).to_dense(), a_dense % b_dense)
|
||||
self._assert((b_dense % a).to_dense(), b_dense % a_dense)
|
||||
|
||||
self._assert((a ** b_dense).to_dense(), a_dense ** b_dense)
|
||||
self._assert((b_dense ** a).to_dense(), b_dense ** a_dense)
|
||||
|
||||
def _check_bool_result(self, res):
|
||||
assert isinstance(res, self._klass)
|
||||
assert isinstance(res.dtype, SparseDtype)
|
||||
assert res.dtype.subtype == np.bool
|
||||
assert isinstance(res.fill_value, bool)
|
||||
|
||||
def _check_comparison_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid='ignore'):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
#
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a == b)
|
||||
self._assert((a == b).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b)
|
||||
self._assert((a != b).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b)
|
||||
self._assert((a >= b).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b)
|
||||
self._assert((a <= b).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b)
|
||||
self._assert((a > b).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b)
|
||||
self._assert((a < b).to_dense(), a_dense < b_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._check_bool_result(a == b_dense)
|
||||
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b_dense)
|
||||
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b_dense)
|
||||
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b_dense)
|
||||
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b_dense)
|
||||
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b_dense)
|
||||
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
|
||||
|
||||
def _check_logical_ops(self, a, b, a_dense, b_dense):
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a & b)
|
||||
self._assert((a & b).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b)
|
||||
self._assert((a | b).to_dense(), a_dense | b_dense)
|
||||
# sparse & dense
|
||||
self._check_bool_result(a & b_dense)
|
||||
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b_dense)
|
||||
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
|
||||
|
||||
def test_float_scalar(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, 1, values, 1)
|
||||
self._check_numeric_ops(a, 0, values, 0)
|
||||
self._check_numeric_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_scalar_comparison(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_same_index(self):
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
|
||||
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_same_index_comparison(self):
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
|
||||
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array_different_kind(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = self._klass(values, kind='integer')
|
||||
b = self._klass(rvalues, kind='block')
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=0)
|
||||
b = self._klass(rvalues, kind='block')
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=0)
|
||||
b = self._klass(rvalues, kind='block', fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind='integer', fill_value=1)
|
||||
b = self._klass(rvalues, kind='block', fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array_comparison(self):
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array(self):
|
||||
# have to specify dtype explicitly until fixing GH 667
|
||||
dtype = np.int64
|
||||
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype, fill_value=1)
|
||||
b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array_comparison(self):
|
||||
|
||||
# int32 NI ATM
|
||||
for dtype in ['int64']:
|
||||
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, dtype=dtype, kind=kind)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_bool_same_index(self):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([True, False, True, True], dtype=np.bool)
|
||||
rvalues = self._base([True, False, True, True], dtype=np.bool)
|
||||
|
||||
for fill_value in [True, False, np.nan]:
|
||||
a = self._klass(values, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_bool_array_logical(self):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
for kind in ['integer', 'block']:
|
||||
values = self._base([True, False, True, False, True, True],
|
||||
dtype=np.bool)
|
||||
rvalues = self._base([True, False, False, True, False, True],
|
||||
dtype=np.bool)
|
||||
|
||||
for fill_value in [True, False, np.nan]:
|
||||
a = self._klass(values, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
b = self._klass(rvalues, kind=kind, dtype=np.bool,
|
||||
fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_float_int(self):
|
||||
|
||||
for rdtype in ['int64']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_comparison(self):
|
||||
|
||||
# int32 NI ATM
|
||||
for rdtype in ['int64']:
|
||||
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
for kind in ['integer', 'block']:
|
||||
a = self._klass(values, kind=kind)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=0)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = self._klass(values, kind=kind, fill_value=1)
|
||||
b = self._klass(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
|
||||
class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
|
||||
|
||||
_base = pd.Series
|
||||
_klass = pd.SparseSeries
|
||||
|
||||
def _assert(self, a, b):
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
def test_alignment(self):
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
|
||||
dtype=np.int64, fill_value=0)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
|
||||
dtype=np.int64, fill_value=np.nan)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
da = pd.Series(np.arange(4))
|
||||
db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
|
||||
dtype=np.int64, fill_value=0)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
|
||||
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
|
||||
dtype=np.int64, fill_value=np.nan)
|
||||
self._check_numeric_ops(sa, sb, da, db)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [
|
||||
operator.eq,
|
||||
operator.add,
|
||||
])
|
||||
def test_with_list(op):
|
||||
arr = pd.SparseArray([0, 1], fill_value=0)
|
||||
result = op(arr, [0, 1])
|
||||
expected = op(arr, pd.SparseArray([0, 1]))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ufunc', [
|
||||
np.abs, np.exp,
|
||||
])
|
||||
@pytest.mark.parametrize('arr', [
|
||||
pd.SparseArray([0, 0, -1, 1]),
|
||||
pd.SparseArray([None, None, -1, 1]),
|
||||
])
|
||||
def test_ufuncs(ufunc, arr):
|
||||
result = ufunc(arr)
|
||||
fill_value = ufunc(arr.fill_value)
|
||||
expected = pd.SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("a, b", [
|
||||
(pd.SparseArray([0, 0, 0]), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
])
|
||||
@pytest.mark.parametrize("ufunc", [
|
||||
np.add,
|
||||
np.greater,
|
||||
])
|
||||
def test_binary_ufuncs(ufunc, a, b):
|
||||
# can't say anything about fill value here.
|
||||
result = ufunc(a, b)
|
||||
expected = ufunc(np.asarray(a), np.asarray(b))
|
||||
assert isinstance(result, pd.SparseArray)
|
||||
tm.assert_numpy_array_equal(np.asarray(result), expected)
|
||||
|
||||
|
||||
def test_ndarray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
ndarray += sparray
|
||||
expected = np.array([0, 3, 2, 3])
|
||||
tm.assert_numpy_array_equal(ndarray, expected)
|
||||
|
||||
|
||||
def test_sparray_inplace():
|
||||
sparray = pd.SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
sparray += ndarray
|
||||
expected = pd.SparseArray([0, 3, 2, 3], fill_value=0)
|
||||
tm.assert_sp_array_equal(sparray, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False])
|
||||
def test_invert(fill_value):
|
||||
arr = np.array([True, False, False, True])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = ~sparray
|
||||
expected = pd.SparseArray(~arr, fill_value=not fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [0, np.nan])
|
||||
@pytest.mark.parametrize("op", [operator.pos, operator.neg])
|
||||
def test_unary_op(op, fill_value):
|
||||
arr = np.array([0, 1, np.nan, 2])
|
||||
sparray = pd.SparseArray(arr, fill_value=fill_value)
|
||||
result = op(sparray)
|
||||
expected = pd.SparseArray(op(arr), fill_value=op(fill_value))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,161 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.sparse.api import SparseDtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype, fill_value", [
|
||||
('int', 0),
|
||||
('float', np.nan),
|
||||
('bool', False),
|
||||
('object', np.nan),
|
||||
('datetime64[ns]', pd.NaT),
|
||||
('timedelta64[ns]', pd.NaT),
|
||||
])
|
||||
def test_inferred_dtype(dtype, fill_value):
|
||||
sparse_dtype = SparseDtype(dtype)
|
||||
result = sparse_dtype.fill_value
|
||||
if pd.isna(fill_value):
|
||||
assert pd.isna(result) and type(result) == type(fill_value)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
|
||||
def test_from_sparse_dtype():
|
||||
dtype = SparseDtype('float', 0)
|
||||
result = SparseDtype(dtype)
|
||||
assert result.fill_value == 0
|
||||
|
||||
|
||||
def test_from_sparse_dtype_fill_value():
|
||||
dtype = SparseDtype('int', 1)
|
||||
result = SparseDtype(dtype, fill_value=2)
|
||||
expected = SparseDtype('int', 2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype, fill_value', [
|
||||
('int', None),
|
||||
('float', None),
|
||||
('bool', None),
|
||||
('object', None),
|
||||
('datetime64[ns]', None),
|
||||
('timedelta64[ns]', None),
|
||||
('int', np.nan),
|
||||
('float', 0),
|
||||
])
|
||||
def test_equal(dtype, fill_value):
|
||||
a = SparseDtype(dtype, fill_value)
|
||||
b = SparseDtype(dtype, fill_value)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
def test_nans_equal():
|
||||
a = SparseDtype(float, float('nan'))
|
||||
b = SparseDtype(float, np.nan)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
@pytest.mark.parametrize('a, b', [
|
||||
(SparseDtype('float64'), SparseDtype('float32')),
|
||||
(SparseDtype('float64'), SparseDtype('float64', 0)),
|
||||
(SparseDtype('float64'), SparseDtype('datetime64[ns]', np.nan)),
|
||||
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
|
||||
(SparseDtype('float64'), np.dtype('float64')),
|
||||
])
|
||||
def test_not_equal(a, b):
|
||||
assert a != b
|
||||
|
||||
|
||||
def test_construct_from_string_raises():
|
||||
with pytest.raises(TypeError):
|
||||
SparseDtype.construct_from_string('not a dtype')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype, expected", [
|
||||
(SparseDtype(int), True),
|
||||
(SparseDtype(float), True),
|
||||
(SparseDtype(bool), True),
|
||||
(SparseDtype(object), False),
|
||||
(SparseDtype(str), False),
|
||||
])
|
||||
def test_is_numeric(dtype, expected):
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
def test_str_uses_object():
|
||||
result = SparseDtype(str).subtype
|
||||
assert result == np.dtype('object')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("string, expected", [
|
||||
('Sparse[float64]', SparseDtype(np.dtype('float64'))),
|
||||
('Sparse[float32]', SparseDtype(np.dtype('float32'))),
|
||||
('Sparse[int]', SparseDtype(np.dtype('int'))),
|
||||
('Sparse[str]', SparseDtype(np.dtype('str'))),
|
||||
('Sparse[datetime64[ns]]', SparseDtype(np.dtype('datetime64[ns]'))),
|
||||
("Sparse", SparseDtype(np.dtype("float"), np.nan))
|
||||
])
|
||||
def test_construct_from_string(string, expected):
|
||||
result = SparseDtype.construct_from_string(string)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("a, b, expected", [
|
||||
(SparseDtype(float, 0.0), SparseDtype(np.dtype('float'), 0.0), True),
|
||||
(SparseDtype(int, 0), SparseDtype(int, 0), True),
|
||||
(SparseDtype(float, float('nan')), SparseDtype(float, np.nan), True),
|
||||
(SparseDtype(float, 0), SparseDtype(float, np.nan), False),
|
||||
(SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
|
||||
])
|
||||
def test_hash_equal(a, b, expected):
|
||||
result = a == b
|
||||
assert result is expected
|
||||
|
||||
result = hash(a) == hash(b)
|
||||
assert result is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('string, expected', [
|
||||
('Sparse[int]', 'int'),
|
||||
('Sparse[int, 0]', 'int'),
|
||||
('Sparse[int64]', 'int64'),
|
||||
('Sparse[int64, 0]', 'int64'),
|
||||
('Sparse[datetime64[ns], 0]', 'datetime64[ns]'),
|
||||
])
|
||||
def test_parse_subtype(string, expected):
|
||||
subtype, _ = SparseDtype._parse_subtype(string)
|
||||
assert subtype == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("string", [
|
||||
"Sparse[int, 1]",
|
||||
"Sparse[float, 0.0]",
|
||||
"Sparse[bool, True]",
|
||||
])
|
||||
def test_construct_from_string_fill_value_raises(string):
|
||||
with pytest.raises(TypeError, match='fill_value in the string is not'):
|
||||
SparseDtype.construct_from_string(string)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('original, dtype, expected', [
|
||||
(SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
|
||||
(SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
|
||||
(SparseDtype(int, 1), str, SparseDtype(object, '1')),
|
||||
(SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
|
||||
])
|
||||
def test_update_dtype(original, dtype, expected):
|
||||
result = original.update_dtype(dtype)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("original, dtype", [
|
||||
(SparseDtype(float, np.nan), int),
|
||||
(SparseDtype(str, 'abc'), int),
|
||||
])
|
||||
def test_update_dtype_raises(original, dtype):
|
||||
with pytest.raises(ValueError):
|
||||
original.update_dtype(dtype)
|
||||
@@ -0,0 +1,605 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.sparse as splib
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import Series
|
||||
from pandas.core.arrays.sparse import BlockIndex, IntIndex, _make_index
|
||||
import pandas.util.testing as tm
|
||||
|
||||
TEST_LENGTH = 20
|
||||
|
||||
plain_case = dict(xloc=[0, 7, 15], xlen=[3, 5, 5], yloc=[2, 9, 14],
|
||||
ylen=[2, 3, 5], intersect_loc=[2, 9, 15],
|
||||
intersect_len=[1, 3, 4])
|
||||
delete_blocks = dict(xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4],
|
||||
intersect_loc=[1], intersect_len=[3])
|
||||
split_blocks = dict(xloc=[0], xlen=[10], yloc=[0, 5], ylen=[3, 7],
|
||||
intersect_loc=[0, 5], intersect_len=[3, 5])
|
||||
skip_block = dict(xloc=[10], xlen=[5], yloc=[0, 12], ylen=[5, 3],
|
||||
intersect_loc=[12], intersect_len=[3])
|
||||
|
||||
no_intersect = dict(xloc=[0, 10], xlen=[4, 6], yloc=[5, 17], ylen=[4, 2],
|
||||
intersect_loc=[], intersect_len=[])
|
||||
|
||||
|
||||
def check_cases(_check_case):
|
||||
def _check_case_dict(case):
|
||||
_check_case(case['xloc'], case['xlen'], case['yloc'], case['ylen'],
|
||||
case['intersect_loc'], case['intersect_len'])
|
||||
|
||||
_check_case_dict(plain_case)
|
||||
_check_case_dict(delete_blocks)
|
||||
_check_case_dict(split_blocks)
|
||||
_check_case_dict(skip_block)
|
||||
_check_case_dict(no_intersect)
|
||||
|
||||
# one or both is empty
|
||||
_check_case([0], [5], [], [], [], [])
|
||||
_check_case([], [], [], [], [], [])
|
||||
|
||||
|
||||
class TestSparseIndexUnion(object):
|
||||
|
||||
def test_index_make_union(self):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
bresult = xindex.make_union(yindex)
|
||||
assert (isinstance(bresult, BlockIndex))
|
||||
tm.assert_numpy_array_equal(bresult.blocs,
|
||||
np.array(eloc, dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(bresult.blengths,
|
||||
np.array(elen, dtype=np.int32))
|
||||
|
||||
ixindex = xindex.to_int_index()
|
||||
iyindex = yindex.to_int_index()
|
||||
iresult = ixindex.make_union(iyindex)
|
||||
assert (isinstance(iresult, IntIndex))
|
||||
tm.assert_numpy_array_equal(iresult.indices,
|
||||
bresult.to_int_index().indices)
|
||||
|
||||
"""
|
||||
x: ----
|
||||
y: ----
|
||||
r: --------
|
||||
"""
|
||||
xloc = [0]
|
||||
xlen = [5]
|
||||
yloc = [5]
|
||||
ylen = [4]
|
||||
eloc = [0]
|
||||
elen = [9]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----- -----
|
||||
y: ----- --
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [5, 5]
|
||||
yloc = [2, 17]
|
||||
ylen = [5, 2]
|
||||
eloc = [0, 10, 17]
|
||||
elen = [7, 5, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------
|
||||
y: -------
|
||||
r: ----------
|
||||
"""
|
||||
xloc = [1]
|
||||
xlen = [5]
|
||||
yloc = [3]
|
||||
ylen = [5]
|
||||
eloc = [1]
|
||||
elen = [7]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4]
|
||||
ylen = [8]
|
||||
eloc = [2]
|
||||
elen = [12]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: --- -----
|
||||
y: -------
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [0, 5]
|
||||
xlen = [3, 5]
|
||||
yloc = [0]
|
||||
ylen = [7]
|
||||
eloc = [0]
|
||||
elen = [10]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ------ -----
|
||||
y: ------- ---
|
||||
r: -------------
|
||||
"""
|
||||
xloc = [2, 10]
|
||||
xlen = [4, 4]
|
||||
yloc = [4, 13]
|
||||
ylen = [8, 4]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ----------------------
|
||||
y: ---- ---- ---
|
||||
r: ----------------------
|
||||
"""
|
||||
xloc = [2]
|
||||
xlen = [15]
|
||||
yloc = [4, 9, 14]
|
||||
ylen = [3, 2, 2]
|
||||
eloc = [2]
|
||||
elen = [15]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
"""
|
||||
x: ---- ---
|
||||
y: --- ---
|
||||
"""
|
||||
xloc = [0, 10]
|
||||
xlen = [3, 3]
|
||||
yloc = [5, 15]
|
||||
ylen = [2, 2]
|
||||
eloc = [0, 5, 10, 15]
|
||||
elen = [3, 2, 3, 2]
|
||||
_check_case(xloc, xlen, yloc, ylen, eloc, elen)
|
||||
|
||||
def test_intindex_make_union(self):
|
||||
a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1], dtype=np.int32))
|
||||
b = IntIndex(4, np.array([0, 1], dtype=np.int32))
|
||||
with pytest.raises(ValueError):
|
||||
a.make_union(b)
|
||||
|
||||
|
||||
class TestSparseIndexIntersect(object):
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_intersect(self):
|
||||
def _check_correct(a, b, expected):
|
||||
result = a.intersect(b)
|
||||
assert (result.equals(expected))
|
||||
|
||||
def _check_length_exc(a, longer):
|
||||
pytest.raises(Exception, a.intersect, longer)
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
expected = BlockIndex(TEST_LENGTH, eloc, elen)
|
||||
longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
|
||||
|
||||
_check_correct(xindex, yindex, expected)
|
||||
_check_correct(xindex.to_int_index(), yindex.to_int_index(),
|
||||
expected.to_int_index())
|
||||
|
||||
_check_length_exc(xindex, longer_index)
|
||||
_check_length_exc(xindex.to_int_index(),
|
||||
longer_index.to_int_index())
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_intersect_empty(self):
|
||||
xindex = IntIndex(4, np.array([], dtype=np.int32))
|
||||
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
xindex = xindex.to_block_index()
|
||||
yindex = yindex.to_block_index()
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
def test_intersect_identical(self):
|
||||
cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)),
|
||||
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),
|
||||
IntIndex(0, np.array([], dtype=np.int32)),
|
||||
IntIndex(5, np.array([], dtype=np.int32))]
|
||||
|
||||
for case in cases:
|
||||
assert case.intersect(case).equals(case)
|
||||
case = case.to_block_index()
|
||||
assert case.intersect(case).equals(case)
|
||||
|
||||
|
||||
class TestSparseIndexCommon(object):
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_lookup(self):
|
||||
for kind in ['integer', 'block']:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == -1
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 0
|
||||
assert idx.lookup(3) == 1
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
|
||||
for i in range(-1, 5):
|
||||
assert idx.lookup(i) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == 1
|
||||
assert idx.lookup(2) == 2
|
||||
assert idx.lookup(3) == 3
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 1
|
||||
assert idx.lookup(3) == 2
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
def test_lookup_array(self):
|
||||
for kind in ['integer', 'block']:
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
|
||||
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, 0, 2], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32),
|
||||
kind=kind)
|
||||
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
|
||||
exp = np.array([1, -1, 2, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_lookup_basics(self):
|
||||
def _check(index):
|
||||
assert (index.lookup(0) == -1)
|
||||
assert (index.lookup(5) == 0)
|
||||
assert (index.lookup(7) == 2)
|
||||
assert (index.lookup(8) == -1)
|
||||
assert (index.lookup(9) == -1)
|
||||
assert (index.lookup(10) == -1)
|
||||
assert (index.lookup(11) == -1)
|
||||
assert (index.lookup(12) == 3)
|
||||
assert (index.lookup(17) == 8)
|
||||
assert (index.lookup(18) == -1)
|
||||
|
||||
bindex = BlockIndex(20, [5, 12], [3, 6])
|
||||
iindex = bindex.to_int_index()
|
||||
|
||||
_check(bindex)
|
||||
_check(iindex)
|
||||
|
||||
# corner cases
|
||||
|
||||
|
||||
class TestBlockIndex(object):
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([2], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([4], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block')
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs,
|
||||
np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.array([1, 2], dtype=np.int32))
|
||||
|
||||
def test_make_block_boundary(self):
|
||||
for i in [5, 10, 100, 101]:
|
||||
idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32),
|
||||
kind='block')
|
||||
|
||||
exp = np.arange(0, i, 2, dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(idx.blocs, exp)
|
||||
tm.assert_numpy_array_equal(idx.blengths,
|
||||
np.ones(len(exp), dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = BlockIndex(10, [0, 4], [2, 5])
|
||||
|
||||
assert index.equals(index)
|
||||
assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
|
||||
|
||||
def test_check_integrity(self):
|
||||
locs = []
|
||||
lengths = []
|
||||
|
||||
# 0-length OK
|
||||
# TODO: index variables are not used...is that right?
|
||||
index = BlockIndex(0, locs, lengths) # noqa
|
||||
|
||||
# also OK even though empty
|
||||
index = BlockIndex(1, locs, lengths) # noqa
|
||||
|
||||
# block extend beyond end
|
||||
pytest.raises(Exception, BlockIndex, 10, [5], [10])
|
||||
|
||||
# block overlap
|
||||
pytest.raises(Exception, BlockIndex, 10, [2, 5], [5, 3])
|
||||
|
||||
def test_to_int_index(self):
|
||||
locs = [0, 10]
|
||||
lengths = [4, 6]
|
||||
exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
|
||||
|
||||
block = BlockIndex(20, locs, lengths)
|
||||
dense = block.to_int_index()
|
||||
|
||||
tm.assert_numpy_array_equal(dense.indices,
|
||||
np.array(exp_inds, dtype=np.int32))
|
||||
|
||||
def test_to_block_index(self):
|
||||
index = BlockIndex(10, [0, 5], [4, 5])
|
||||
assert index.to_block_index() is index
|
||||
|
||||
|
||||
class TestIntIndex(object):
|
||||
|
||||
def test_check_integrity(self):
|
||||
|
||||
# Too many indices than specified in self.length
|
||||
msg = "Too many indices"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=1, indices=[1, 2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# All indices must be less than the length.
|
||||
msg = "All indices must be less than the length"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 5])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 6])
|
||||
|
||||
# Indices must be strictly ascending.
|
||||
msg = "Indices must be strictly increasing"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 3])
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([], dtype=np.int32), kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([], dtype=np.int32))
|
||||
|
||||
idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
|
||||
kind='integer')
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices,
|
||||
np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = IntIndex(10, [0, 1, 2, 3, 4])
|
||||
assert index.equals(index)
|
||||
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
|
||||
|
||||
def test_to_block_index(self):
|
||||
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
# see if survive the round trip
|
||||
xbindex = xindex.to_int_index().to_block_index()
|
||||
ybindex = yindex.to_int_index().to_block_index()
|
||||
assert isinstance(xbindex, BlockIndex)
|
||||
assert xbindex.equals(xindex)
|
||||
assert ybindex.equals(yindex)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
def test_to_int_index(self):
|
||||
index = IntIndex(10, [2, 3, 4, 5, 6])
|
||||
assert index.to_int_index() is index
|
||||
|
||||
|
||||
class TestSparseOperators(object):
|
||||
|
||||
def _op_tests(self, sparse_op, python_op):
|
||||
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
|
||||
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
|
||||
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
|
||||
|
||||
xdindex = xindex.to_int_index()
|
||||
ydindex = yindex.to_int_index()
|
||||
|
||||
x = np.arange(xindex.npoints) * 10. + 1
|
||||
y = np.arange(yindex.npoints) * 100. + 1
|
||||
|
||||
xfill = 0
|
||||
yfill = 2
|
||||
|
||||
result_block_vals, rb_index, bfill = sparse_op(x, xindex, xfill, y,
|
||||
yindex, yfill)
|
||||
result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y,
|
||||
ydindex, yfill)
|
||||
|
||||
assert rb_index.to_int_index().equals(ri_index)
|
||||
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
|
||||
assert bfill == ifill
|
||||
|
||||
# check versus Series...
|
||||
xseries = Series(x, xdindex.indices)
|
||||
xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)
|
||||
|
||||
yseries = Series(y, ydindex.indices)
|
||||
yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)
|
||||
|
||||
series_result = python_op(xseries, yseries)
|
||||
series_result = series_result.reindex(ri_index.indices)
|
||||
|
||||
tm.assert_numpy_array_equal(result_block_vals,
|
||||
series_result.values)
|
||||
tm.assert_numpy_array_equal(result_int_vals, series_result.values)
|
||||
|
||||
check_cases(_check_case)
|
||||
|
||||
@pytest.mark.parametrize('opname',
|
||||
['add', 'sub', 'mul', 'truediv', 'floordiv'])
|
||||
def test_op(self, opname):
|
||||
sparse_op = getattr(splib, 'sparse_%s_float64' % opname)
|
||||
python_op = getattr(operator, opname)
|
||||
self._op_tests(sparse_op, python_op)
|
||||
@@ -0,0 +1,256 @@
|
||||
import datetime
|
||||
import decimal
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas.core.dtypes.dtypes import registry
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import register_extension_dtype
|
||||
from pandas.core.arrays import PandasArray, integer_array, period_array
|
||||
from pandas.tests.extension.decimal import (
|
||||
DecimalArray, DecimalDtype, to_decimal)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data, dtype, expected", [
|
||||
# Basic NumPy defaults.
|
||||
([1, 2], None, PandasArray(np.array([1, 2]))),
|
||||
([1, 2], object, PandasArray(np.array([1, 2], dtype=object))),
|
||||
([1, 2], np.dtype('float32'),
|
||||
PandasArray(np.array([1., 2.0], dtype=np.dtype('float32')))),
|
||||
(np.array([1, 2]), None, PandasArray(np.array([1, 2]))),
|
||||
|
||||
# String alias passes through to NumPy
|
||||
([1, 2], 'float32', PandasArray(np.array([1, 2], dtype='float32'))),
|
||||
|
||||
# Period alias
|
||||
([pd.Period('2000', 'D'), pd.Period('2001', 'D')], 'Period[D]',
|
||||
period_array(['2000', '2001'], freq='D')),
|
||||
|
||||
# Period dtype
|
||||
([pd.Period('2000', 'D')], pd.PeriodDtype('D'),
|
||||
period_array(['2000'], freq='D')),
|
||||
|
||||
# Datetime (naive)
|
||||
([1, 2], np.dtype('datetime64[ns]'),
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
np.array([1, 2], dtype='datetime64[ns]'))),
|
||||
|
||||
(np.array([1, 2], dtype='datetime64[ns]'), None,
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
np.array([1, 2], dtype='datetime64[ns]'))),
|
||||
|
||||
(pd.DatetimeIndex(['2000', '2001']), np.dtype('datetime64[ns]'),
|
||||
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
|
||||
|
||||
(pd.DatetimeIndex(['2000', '2001']), None,
|
||||
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
|
||||
|
||||
(['2000', '2001'], np.dtype('datetime64[ns]'),
|
||||
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
|
||||
|
||||
# Datetime (tz-aware)
|
||||
(['2000', '2001'], pd.DatetimeTZDtype(tz="CET"),
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
['2000', '2001'], dtype=pd.DatetimeTZDtype(tz="CET"))),
|
||||
|
||||
# Timedelta
|
||||
(['1H', '2H'], np.dtype('timedelta64[ns]'),
|
||||
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
|
||||
|
||||
(pd.TimedeltaIndex(['1H', '2H']), np.dtype('timedelta64[ns]'),
|
||||
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
|
||||
|
||||
(pd.TimedeltaIndex(['1H', '2H']), None,
|
||||
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
|
||||
|
||||
# Category
|
||||
(['a', 'b'], 'category', pd.Categorical(['a', 'b'])),
|
||||
(['a', 'b'], pd.CategoricalDtype(None, ordered=True),
|
||||
pd.Categorical(['a', 'b'], ordered=True)),
|
||||
|
||||
# Interval
|
||||
([pd.Interval(1, 2), pd.Interval(3, 4)], 'interval',
|
||||
pd.arrays.IntervalArray.from_tuples([(1, 2), (3, 4)])),
|
||||
|
||||
# Sparse
|
||||
([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')),
|
||||
|
||||
# IntegerNA
|
||||
([1, None], 'Int16', integer_array([1, None], dtype='Int16')),
|
||||
(pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
|
||||
|
||||
# Index
|
||||
(pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
|
||||
|
||||
# Series[EA] returns the EA
|
||||
(pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),
|
||||
None,
|
||||
pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])),
|
||||
|
||||
# "3rd party" EAs work
|
||||
([decimal.Decimal(0), decimal.Decimal(1)], 'decimal', to_decimal([0, 1])),
|
||||
|
||||
# pass an ExtensionArray, but a different dtype
|
||||
(period_array(['2000', '2001'], freq='D'),
|
||||
'category',
|
||||
pd.Categorical([pd.Period('2000', 'D'), pd.Period('2001', 'D')])),
|
||||
])
|
||||
def test_array(data, dtype, expected):
|
||||
result = pd.array(data, dtype=dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_copy():
|
||||
a = np.array([1, 2])
|
||||
# default is to copy
|
||||
b = pd.array(a)
|
||||
assert np.shares_memory(a, b._ndarray) is False
|
||||
|
||||
# copy=True
|
||||
b = pd.array(a, copy=True)
|
||||
assert np.shares_memory(a, b._ndarray) is False
|
||||
|
||||
# copy=False
|
||||
b = pd.array(a, copy=False)
|
||||
assert np.shares_memory(a, b._ndarray) is True
|
||||
|
||||
|
||||
cet = pytz.timezone("CET")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data, expected', [
|
||||
# period
|
||||
([pd.Period("2000", "D"), pd.Period("2001", "D")],
|
||||
period_array(["2000", "2001"], freq="D")),
|
||||
|
||||
# interval
|
||||
([pd.Interval(0, 1), pd.Interval(1, 2)],
|
||||
pd.arrays.IntervalArray.from_breaks([0, 1, 2])),
|
||||
|
||||
# datetime
|
||||
([pd.Timestamp('2000',), pd.Timestamp('2001')],
|
||||
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
|
||||
|
||||
([datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
|
||||
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])),
|
||||
|
||||
(np.array([1, 2], dtype='M8[ns]'),
|
||||
pd.arrays.DatetimeArray(np.array([1, 2], dtype='M8[ns]'))),
|
||||
|
||||
(np.array([1, 2], dtype='M8[us]'),
|
||||
pd.arrays.DatetimeArray(np.array([1000, 2000], dtype='M8[ns]'))),
|
||||
|
||||
# datetimetz
|
||||
([pd.Timestamp('2000', tz='CET'), pd.Timestamp('2001', tz='CET')],
|
||||
pd.arrays.DatetimeArray._from_sequence(
|
||||
['2000', '2001'], dtype=pd.DatetimeTZDtype(tz='CET'))),
|
||||
|
||||
([datetime.datetime(2000, 1, 1, tzinfo=cet),
|
||||
datetime.datetime(2001, 1, 1, tzinfo=cet)],
|
||||
pd.arrays.DatetimeArray._from_sequence(['2000', '2001'],
|
||||
tz=cet)),
|
||||
|
||||
# timedelta
|
||||
([pd.Timedelta('1H'), pd.Timedelta('2H')],
|
||||
pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])),
|
||||
|
||||
(np.array([1, 2], dtype='m8[ns]'),
|
||||
pd.arrays.TimedeltaArray(np.array([1, 2], dtype='m8[ns]'))),
|
||||
|
||||
(np.array([1, 2], dtype='m8[us]'),
|
||||
pd.arrays.TimedeltaArray(np.array([1000, 2000], dtype='m8[ns]'))),
|
||||
|
||||
])
|
||||
def test_array_inference(data, expected):
|
||||
result = pd.array(data)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data', [
|
||||
# mix of frequencies
|
||||
[pd.Period("2000", "D"), pd.Period("2001", "A")],
|
||||
# mix of closed
|
||||
[pd.Interval(0, 1, closed='left'), pd.Interval(1, 2, closed='right')],
|
||||
# Mix of timezones
|
||||
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")],
|
||||
# Mix of tz-aware and tz-naive
|
||||
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")],
|
||||
np.array([pd.Timestamp('2000'), pd.Timestamp('2000', tz='CET')]),
|
||||
])
|
||||
def test_array_inference_fails(data):
|
||||
result = pd.array(data)
|
||||
expected = PandasArray(np.array(data, dtype=object))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data", [
|
||||
np.array([[1, 2], [3, 4]]),
|
||||
[[1, 2], [3, 4]],
|
||||
])
|
||||
def test_nd_raises(data):
|
||||
with pytest.raises(ValueError, match='PandasArray must be 1-dimensional'):
|
||||
pd.array(data)
|
||||
|
||||
|
||||
def test_scalar_raises():
|
||||
with pytest.raises(ValueError,
|
||||
match="Cannot pass scalar '1'"):
|
||||
pd.array(1)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# A couple dummy classes to ensure that Series and Indexes are unboxed before
|
||||
# getting to the EA classes.
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class DecimalDtype2(DecimalDtype):
|
||||
name = 'decimal2'
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
return DecimalArray2
|
||||
|
||||
|
||||
class DecimalArray2(DecimalArray):
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, dtype=None, copy=False):
|
||||
if isinstance(scalars, (pd.Series, pd.Index)):
|
||||
raise TypeError
|
||||
|
||||
return super(DecimalArray2, cls)._from_sequence(
|
||||
scalars, dtype=dtype, copy=copy
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, pd.Index])
|
||||
def test_array_unboxes(box):
|
||||
data = box([decimal.Decimal('1'), decimal.Decimal('2')])
|
||||
# make sure it works
|
||||
with pytest.raises(TypeError):
|
||||
DecimalArray2._from_sequence(data)
|
||||
|
||||
result = pd.array(data, dtype='decimal2')
|
||||
expected = DecimalArray2._from_sequence(data.values)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def registry_without_decimal():
|
||||
idx = registry.dtypes.index(DecimalDtype)
|
||||
registry.dtypes.pop(idx)
|
||||
yield
|
||||
registry.dtypes.append(DecimalDtype)
|
||||
|
||||
|
||||
def test_array_not_registered(registry_without_decimal):
|
||||
# check we aren't on it
|
||||
assert registry.find('decimal') is None
|
||||
data = [decimal.Decimal('1'), decimal.Decimal('2')]
|
||||
|
||||
result = pd.array(data, dtype=DecimalDtype)
|
||||
expected = DecimalArray._from_sequence(data)
|
||||
tm.assert_equal(result, expected)
|
||||
@@ -0,0 +1,657 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
# TODO: more freq variants
|
||||
@pytest.fixture(params=['D', 'B', 'W', 'M', 'Q', 'Y'])
|
||||
def period_index(request):
|
||||
"""
|
||||
A fixture to provide PeriodIndex objects with different frequencies.
|
||||
|
||||
Most PeriodArray behavior is already tested in PeriodIndex tests,
|
||||
so here we just test that the PeriodArray behavior matches
|
||||
the PeriodIndex behavior.
|
||||
"""
|
||||
freqstr = request.param
|
||||
# TODO: non-monotone indexes; NaTs, different start dates
|
||||
pi = pd.period_range(start=pd.Timestamp('2000-01-01'),
|
||||
periods=100,
|
||||
freq=freqstr)
|
||||
return pi
|
||||
|
||||
|
||||
@pytest.fixture(params=['D', 'B', 'W', 'M', 'Q', 'Y'])
|
||||
def datetime_index(request):
|
||||
"""
|
||||
A fixture to provide DatetimeIndex objects with different frequencies.
|
||||
|
||||
Most DatetimeArray behavior is already tested in DatetimeIndex tests,
|
||||
so here we just test that the DatetimeArray behavior matches
|
||||
the DatetimeIndex behavior.
|
||||
"""
|
||||
freqstr = request.param
|
||||
# TODO: non-monotone indexes; NaTs, different start dates, timezones
|
||||
pi = pd.date_range(start=pd.Timestamp('2000-01-01'),
|
||||
periods=100,
|
||||
freq=freqstr)
|
||||
return pi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def timedelta_index(request):
|
||||
"""
|
||||
A fixture to provide TimedeltaIndex objects with different frequencies.
|
||||
Most TimedeltaArray behavior is already tested in TimedeltaIndex tests,
|
||||
so here we just test that the TimedeltaArray behavior matches
|
||||
the TimedeltaIndex behavior.
|
||||
"""
|
||||
# TODO: flesh this out
|
||||
return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT'])
|
||||
|
||||
|
||||
class SharedTests(object):
|
||||
index_cls = None
|
||||
|
||||
def test_compare_len1_raises(self):
|
||||
# make sure we raise when comparing with different lengths, specific
|
||||
# to the case where one has length-1, which numpy would broadcast
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq='D')
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
arr == arr[:1]
|
||||
|
||||
# test the index classes while we're at it, GH#23078
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
idx <= idx[[0]]
|
||||
|
||||
def test_take(self):
|
||||
data = np.arange(100, dtype='i8') * 24 * 3600 * 10**9
|
||||
np.random.shuffle(data)
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq='D')
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
takers = [1, 4, 94]
|
||||
result = arr.take(takers)
|
||||
expected = idx.take(takers)
|
||||
|
||||
tm.assert_index_equal(self.index_cls(result), expected)
|
||||
|
||||
takers = np.array([1, 4, 94])
|
||||
result = arr.take(takers)
|
||||
expected = idx.take(takers)
|
||||
|
||||
tm.assert_index_equal(self.index_cls(result), expected)
|
||||
|
||||
def test_take_fill(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq='D')
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=None)
|
||||
assert result[0] is pd.NaT
|
||||
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan)
|
||||
assert result[0] is pd.NaT
|
||||
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT)
|
||||
assert result[0] is pd.NaT
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr.take([0, 1], allow_fill=True, fill_value=2)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr.take([0, 1], allow_fill=True, fill_value=2.0)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr.take([0, 1], allow_fill=True,
|
||||
fill_value=pd.Timestamp.now().time)
|
||||
|
||||
def test_concat_same_type(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
|
||||
idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT)
|
||||
arr = self.array_cls(idx)
|
||||
|
||||
result = arr._concat_same_type([arr[:-1], arr[1:], arr])
|
||||
expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None)
|
||||
|
||||
tm.assert_index_equal(self.index_cls(result), expected)
|
||||
|
||||
def test_unbox_scalar(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
result = arr._unbox_scalar(arr[0])
|
||||
assert isinstance(result, (int, compat.long))
|
||||
|
||||
result = arr._unbox_scalar(pd.NaT)
|
||||
assert isinstance(result, (int, compat.long))
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
arr._unbox_scalar('foo')
|
||||
|
||||
def test_check_compatible_with(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
|
||||
arr._check_compatible_with(arr[0])
|
||||
arr._check_compatible_with(arr[:1])
|
||||
arr._check_compatible_with(pd.NaT)
|
||||
|
||||
def test_scalar_from_string(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
result = arr._scalar_from_string(str(arr[0]))
|
||||
assert result == arr[0]
|
||||
|
||||
def test_reduce_invalid(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
|
||||
with pytest.raises(TypeError, match='cannot perform'):
|
||||
arr._reduce("not a method")
|
||||
|
||||
@pytest.mark.parametrize('method', ['pad', 'backfill'])
|
||||
def test_fillna_method_doesnt_change_orig(self, method):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
arr[4] = pd.NaT
|
||||
|
||||
fill_value = arr[3] if method == 'pad' else arr[5]
|
||||
|
||||
result = arr.fillna(method=method)
|
||||
assert result[4] == fill_value
|
||||
|
||||
# check that the original was not changed
|
||||
assert arr[4] is pd.NaT
|
||||
|
||||
def test_searchsorted(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
|
||||
# scalar
|
||||
result = arr.searchsorted(arr[1])
|
||||
assert result == 1
|
||||
|
||||
result = arr.searchsorted(arr[2], side="right")
|
||||
assert result == 3
|
||||
|
||||
# own-type
|
||||
result = arr.searchsorted(arr[1:3])
|
||||
expected = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.searchsorted(arr[1:3], side="right")
|
||||
expected = np.array([2, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Following numpy convention, NaT goes at the beginning
|
||||
# (unlike NaN which goes at the end)
|
||||
result = arr.searchsorted(pd.NaT)
|
||||
assert result == 0
|
||||
|
||||
def test_setitem(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
|
||||
arr[0] = arr[1]
|
||||
expected = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
expected[0] = expected[1]
|
||||
|
||||
tm.assert_numpy_array_equal(arr.asi8, expected)
|
||||
|
||||
arr[:2] = arr[-2:]
|
||||
expected[:2] = expected[-2:]
|
||||
tm.assert_numpy_array_equal(arr.asi8, expected)
|
||||
|
||||
def test_setitem_raises(self):
|
||||
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
|
||||
arr = self.array_cls(data, freq='D')
|
||||
val = arr[0]
|
||||
|
||||
with pytest.raises(IndexError, match="index 12 is out of bounds"):
|
||||
arr[12] = val
|
||||
|
||||
with pytest.raises(TypeError, match="'value' should be a.* 'object'"):
|
||||
arr[0] = object()
|
||||
|
||||
|
||||
class TestDatetimeArray(SharedTests):
|
||||
index_cls = pd.DatetimeIndex
|
||||
array_cls = DatetimeArray
|
||||
|
||||
def test_round(self, tz_naive_fixture):
|
||||
# GH#24064
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range('2016-01-01 01:01:00', periods=3, freq='H', tz=tz)
|
||||
|
||||
result = dti.round(freq='2T')
|
||||
expected = dti - pd.Timedelta(minutes=1)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_array_interface(self, datetime_index):
|
||||
arr = DatetimeArray(datetime_index)
|
||||
|
||||
# default asarray gives the same underlying data (for tz naive)
|
||||
result = np.asarray(arr)
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# specifying M8[ns] gives the same result as default
|
||||
result = np.asarray(arr, dtype='datetime64[ns]')
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype='datetime64[ns]', copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype='datetime64[ns]')
|
||||
assert result is not expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to object dtype
|
||||
result = np.asarray(arr, dtype=object)
|
||||
expected = np.array(list(arr), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to other dtype always copies
|
||||
result = np.asarray(arr, dtype='int64')
|
||||
assert result is not arr.asi8
|
||||
assert not np.may_share_memory(arr, result)
|
||||
expected = arr.asi8.copy()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# other dtypes handled by numpy
|
||||
for dtype in ['float64', str]:
|
||||
result = np.asarray(arr, dtype=dtype)
|
||||
expected = np.asarray(arr).astype(dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_object_dtype(self, tz_naive_fixture):
|
||||
# GH#23524
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = np.array(list(dti))
|
||||
|
||||
result = np.array(arr, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# also test the DatetimeIndex method while we're at it
|
||||
result = np.array(dti, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_tz(self, tz_naive_fixture):
|
||||
# GH#23524
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.asi8.view('M8[ns]')
|
||||
result = np.array(arr, dtype='M8[ns]')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.array(arr, dtype='datetime64[ns]')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# check that we are not making copies when setting copy=False
|
||||
result = np.array(arr, dtype='M8[ns]', copy=False)
|
||||
assert result.base is expected.base
|
||||
assert result.base is not None
|
||||
result = np.array(arr, dtype='datetime64[ns]', copy=False)
|
||||
assert result.base is expected.base
|
||||
assert result.base is not None
|
||||
|
||||
def test_array_i8_dtype(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.asi8
|
||||
result = np.array(arr, dtype='i8')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.array(arr, dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# check that we are still making copies when setting copy=False
|
||||
result = np.array(arr, dtype='i8', copy=False)
|
||||
assert result.base is not expected.base
|
||||
assert result.base is None
|
||||
|
||||
def test_from_array_keeps_base(self):
|
||||
# Ensure that DatetimeArray._data.base isn't lost.
|
||||
arr = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]')
|
||||
dta = DatetimeArray(arr)
|
||||
|
||||
assert dta._data is arr
|
||||
dta = DatetimeArray(arr[:0])
|
||||
assert dta._data.base is arr
|
||||
|
||||
def test_from_dti(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
assert list(dti) == list(arr)
|
||||
|
||||
# Check that Index.__new__ knows what to do with DatetimeArray
|
||||
dti2 = pd.Index(arr)
|
||||
assert isinstance(dti2, pd.DatetimeIndex)
|
||||
assert list(dti2) == list(arr)
|
||||
|
||||
def test_astype_object(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
|
||||
arr = DatetimeArray(dti)
|
||||
asobj = arr.astype('O')
|
||||
assert isinstance(asobj, np.ndarray)
|
||||
assert asobj.dtype == 'O'
|
||||
assert list(asobj) == list(dti)
|
||||
|
||||
@pytest.mark.parametrize('freqstr', ['D', 'B', 'W', 'M', 'Q', 'Y'])
|
||||
def test_to_perioddelta(self, datetime_index, freqstr):
|
||||
# GH#23113
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.to_perioddelta(freq=freqstr)
|
||||
result = arr.to_perioddelta(freq=freqstr)
|
||||
assert isinstance(result, TimedeltaArray)
|
||||
|
||||
# placeholder until these become actual EA subclasses and we can use
|
||||
# an EA-specific tm.assert_ function
|
||||
tm.assert_index_equal(pd.Index(result), pd.Index(expected))
|
||||
|
||||
@pytest.mark.parametrize('freqstr', ['D', 'B', 'W', 'M', 'Q', 'Y'])
|
||||
def test_to_period(self, datetime_index, freqstr):
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
expected = dti.to_period(freq=freqstr)
|
||||
result = arr.to_period(freq=freqstr)
|
||||
assert isinstance(result, PeriodArray)
|
||||
|
||||
# placeholder until these become actual EA subclasses and we can use
|
||||
# an EA-specific tm.assert_ function
|
||||
tm.assert_index_equal(pd.Index(result), pd.Index(expected))
|
||||
|
||||
@pytest.mark.parametrize('propname', pd.DatetimeIndex._bool_ops)
|
||||
def test_bool_properties(self, datetime_index, propname):
|
||||
# in this case _bool_ops is just `is_leap_year`
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
assert dti.freq == arr.freq
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(dti, propname), dtype=result.dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('propname', pd.DatetimeIndex._field_ops)
|
||||
def test_int_properties(self, datetime_index, propname):
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(dti, propname), dtype=result.dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_take_fill_valid(self, datetime_index, tz_naive_fixture):
|
||||
dti = datetime_index.tz_localize(tz_naive_fixture)
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
now = pd.Timestamp.now().tz_localize(dti.tz)
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=now)
|
||||
assert result[0] == now
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Timedelta invalid
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=now - now)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Period invalid
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=pd.Period('2014Q1'))
|
||||
|
||||
tz = None if dti.tz is not None else 'US/Eastern'
|
||||
now = pd.Timestamp.now().tz_localize(tz)
|
||||
with pytest.raises(TypeError):
|
||||
# Timestamp with mismatched tz-awareness
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=now)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# require NaT, not iNaT, as it could be confused with an integer
|
||||
arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT.value)
|
||||
|
||||
def test_concat_same_type_invalid(self, datetime_index):
|
||||
# different timezones
|
||||
dti = datetime_index
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
if arr.tz is None:
|
||||
other = arr.tz_localize('UTC')
|
||||
else:
|
||||
other = arr.tz_localize(None)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
arr._concat_same_type([arr, other])
|
||||
|
||||
def test_concat_same_type_different_freq(self):
|
||||
# we *can* concatentate DTI with different freqs.
|
||||
a = DatetimeArray(pd.date_range('2000', periods=2, freq='D',
|
||||
tz='US/Central'))
|
||||
b = DatetimeArray(pd.date_range('2000', periods=2, freq='H',
|
||||
tz='US/Central'))
|
||||
result = DatetimeArray._concat_same_type([a, b])
|
||||
expected = DatetimeArray(pd.to_datetime([
|
||||
'2000-01-01 00:00:00', '2000-01-02 00:00:00',
|
||||
'2000-01-01 00:00:00', '2000-01-01 01:00:00',
|
||||
]).tz_localize("US/Central"))
|
||||
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestTimedeltaArray(SharedTests):
|
||||
index_cls = pd.TimedeltaIndex
|
||||
array_cls = TimedeltaArray
|
||||
|
||||
def test_from_tdi(self):
|
||||
tdi = pd.TimedeltaIndex(['1 Day', '3 Hours'])
|
||||
arr = TimedeltaArray(tdi)
|
||||
assert list(arr) == list(tdi)
|
||||
|
||||
# Check that Index.__new__ knows what to do with TimedeltaArray
|
||||
tdi2 = pd.Index(arr)
|
||||
assert isinstance(tdi2, pd.TimedeltaIndex)
|
||||
assert list(tdi2) == list(arr)
|
||||
|
||||
def test_astype_object(self):
|
||||
tdi = pd.TimedeltaIndex(['1 Day', '3 Hours'])
|
||||
arr = TimedeltaArray(tdi)
|
||||
asobj = arr.astype('O')
|
||||
assert isinstance(asobj, np.ndarray)
|
||||
assert asobj.dtype == 'O'
|
||||
assert list(asobj) == list(tdi)
|
||||
|
||||
def test_to_pytimedelta(self, timedelta_index):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
expected = tdi.to_pytimedelta()
|
||||
result = arr.to_pytimedelta()
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_total_seconds(self, timedelta_index):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
expected = tdi.total_seconds()
|
||||
result = arr.total_seconds()
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
@pytest.mark.parametrize('propname', pd.TimedeltaIndex._field_ops)
|
||||
def test_int_properties(self, timedelta_index, propname):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(tdi, propname), dtype=result.dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_interface(self, timedelta_index):
|
||||
arr = TimedeltaArray(timedelta_index)
|
||||
|
||||
# default asarray gives the same underlying data
|
||||
result = np.asarray(arr)
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# specifying m8[ns] gives the same result as default
|
||||
result = np.asarray(arr, dtype='timedelta64[ns]')
|
||||
expected = arr._data
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype='timedelta64[ns]', copy=False)
|
||||
assert result is expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = np.array(arr, dtype='timedelta64[ns]')
|
||||
assert result is not expected
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to object dtype
|
||||
result = np.asarray(arr, dtype=object)
|
||||
expected = np.array(list(arr), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to other dtype always copies
|
||||
result = np.asarray(arr, dtype='int64')
|
||||
assert result is not arr.asi8
|
||||
assert not np.may_share_memory(arr, result)
|
||||
expected = arr.asi8.copy()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# other dtypes handled by numpy
|
||||
for dtype in ['float64', str]:
|
||||
result = np.asarray(arr, dtype=dtype)
|
||||
expected = np.asarray(arr).astype(dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_take_fill_valid(self, timedelta_index):
|
||||
tdi = timedelta_index
|
||||
arr = TimedeltaArray(tdi)
|
||||
|
||||
td1 = pd.Timedelta(days=1)
|
||||
result = arr.take([-1, 1], allow_fill=True, fill_value=td1)
|
||||
assert result[0] == td1
|
||||
|
||||
now = pd.Timestamp.now()
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Timestamp invalid
|
||||
arr.take([0, 1], allow_fill=True, fill_value=now)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# fill_value Period invalid
|
||||
arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D'))
|
||||
|
||||
|
||||
class TestPeriodArray(SharedTests):
|
||||
index_cls = pd.PeriodIndex
|
||||
array_cls = PeriodArray
|
||||
|
||||
def test_from_pi(self, period_index):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
assert list(arr) == list(pi)
|
||||
|
||||
# Check that Index.__new__ knows what to do with PeriodArray
|
||||
pi2 = pd.Index(arr)
|
||||
assert isinstance(pi2, pd.PeriodIndex)
|
||||
assert list(pi2) == list(arr)
|
||||
|
||||
def test_astype_object(self, period_index):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
asobj = arr.astype('O')
|
||||
assert isinstance(asobj, np.ndarray)
|
||||
assert asobj.dtype == 'O'
|
||||
assert list(asobj) == list(pi)
|
||||
|
||||
@pytest.mark.parametrize('how', ['S', 'E'])
|
||||
def test_to_timestamp(self, how, period_index):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
|
||||
expected = DatetimeArray(pi.to_timestamp(how=how))
|
||||
result = arr.to_timestamp(how=how)
|
||||
assert isinstance(result, DatetimeArray)
|
||||
|
||||
# placeholder until these become actual EA subclasses and we can use
|
||||
# an EA-specific tm.assert_ function
|
||||
tm.assert_index_equal(pd.Index(result), pd.Index(expected))
|
||||
|
||||
@pytest.mark.parametrize('propname', PeriodArray._bool_ops)
|
||||
def test_bool_properties(self, period_index, propname):
|
||||
# in this case _bool_ops is just `is_leap_year`
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(pi, propname))
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('propname', PeriodArray._field_ops)
|
||||
def test_int_properties(self, period_index, propname):
|
||||
pi = period_index
|
||||
arr = PeriodArray(pi)
|
||||
|
||||
result = getattr(arr, propname)
|
||||
expected = np.array(getattr(pi, propname))
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_interface(self, period_index):
|
||||
arr = PeriodArray(period_index)
|
||||
|
||||
# default asarray gives objects
|
||||
result = np.asarray(arr)
|
||||
expected = np.array(list(arr), dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to object dtype (same as default)
|
||||
result = np.asarray(arr, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# to other dtypes
|
||||
with pytest.raises(TypeError):
|
||||
np.asarray(arr, dtype='int64')
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
np.asarray(arr, dtype='float64')
|
||||
|
||||
result = np.asarray(arr, dtype='S20')
|
||||
expected = np.asarray(arr).astype('S20')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,292 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Tests for DatetimeArray
|
||||
"""
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.core.arrays.datetimes import sequence_to_dt64ns
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestDatetimeArrayConstructor(object):
|
||||
def test_freq_validation(self):
|
||||
# GH#24623 check that invalid instances cannot be created with the
|
||||
# public constructor
|
||||
arr = np.arange(5, dtype=np.int64) * 3600 * 10**9
|
||||
|
||||
msg = ("Inferred frequency H from passed values does not "
|
||||
"conform to passed frequency W-SUN")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, freq="W")
|
||||
|
||||
@pytest.mark.parametrize('meth', [DatetimeArray._from_sequence,
|
||||
sequence_to_dt64ns,
|
||||
pd.to_datetime,
|
||||
pd.DatetimeIndex])
|
||||
def test_mixing_naive_tzaware_raises(self, meth):
|
||||
# GH#24569
|
||||
arr = np.array([pd.Timestamp('2000'), pd.Timestamp('2000', tz='CET')])
|
||||
|
||||
msg = ('Cannot mix tz-aware with tz-naive values|'
|
||||
'Tz-aware datetime.datetime cannot be converted '
|
||||
'to datetime64 unless utc=True')
|
||||
|
||||
for obj in [arr, arr[::-1]]:
|
||||
# check that we raise regardless of whether naive is found
|
||||
# before aware or vice-versa
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
meth(obj)
|
||||
|
||||
def test_from_pandas_array(self):
|
||||
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9
|
||||
|
||||
result = DatetimeArray._from_sequence(arr, freq='infer')
|
||||
|
||||
expected = pd.date_range('1970-01-01', periods=5, freq='H')._data
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
def test_mismatched_timezone_raises(self):
|
||||
arr = DatetimeArray(np.array(['2000-01-01T06:00:00'], dtype='M8[ns]'),
|
||||
dtype=DatetimeTZDtype(tz='US/Central'))
|
||||
dtype = DatetimeTZDtype(tz='US/Eastern')
|
||||
with pytest.raises(TypeError, match='Timezone of the array'):
|
||||
DatetimeArray(arr, dtype=dtype)
|
||||
|
||||
def test_non_array_raises(self):
|
||||
with pytest.raises(ValueError, match='list'):
|
||||
DatetimeArray([1, 2, 3])
|
||||
|
||||
def test_other_type_raises(self):
|
||||
with pytest.raises(ValueError,
|
||||
match="The dtype of 'values' is incorrect.*bool"):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype='bool'))
|
||||
|
||||
def test_incorrect_dtype_raises(self):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype='i8'), dtype='category')
|
||||
|
||||
def test_freq_infer_raises(self):
|
||||
with pytest.raises(ValueError, match='Frequency inference'):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype='i8'), freq="infer")
|
||||
|
||||
def test_copy(self):
|
||||
data = np.array([1, 2, 3], dtype='M8[ns]')
|
||||
arr = DatetimeArray(data, copy=False)
|
||||
assert arr._data is data
|
||||
|
||||
arr = DatetimeArray(data, copy=True)
|
||||
assert arr._data is not data
|
||||
|
||||
|
||||
class TestDatetimeArrayComparisons(object):
|
||||
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
|
||||
# sufficiently robust
|
||||
|
||||
def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators):
|
||||
# arbitrary tz-naive DatetimeIndex
|
||||
opname = all_compare_operators.strip('_')
|
||||
op = getattr(operator, opname)
|
||||
|
||||
dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=None)
|
||||
arr = DatetimeArray(dti)
|
||||
assert arr.freq == dti.freq
|
||||
assert arr.tz == dti.tz
|
||||
|
||||
right = dti
|
||||
|
||||
expected = np.ones(len(arr), dtype=bool)
|
||||
if opname in ['ne', 'gt', 'lt']:
|
||||
# for these the comparisons should be all-False
|
||||
expected = ~expected
|
||||
|
||||
result = op(arr, arr)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
for other in [right, np.array(right)]:
|
||||
# TODO: add list and tuple, and object-dtype once those
|
||||
# are fixed in the constructor
|
||||
result = op(arr, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = op(other, arr)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestDatetimeArray(object):
|
||||
def test_astype_to_same(self):
|
||||
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
|
||||
result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
|
||||
assert result is arr
|
||||
|
||||
@pytest.mark.parametrize("dtype", [
|
||||
int, np.int32, np.int64, 'uint32', 'uint64',
|
||||
])
|
||||
def test_astype_int(self, dtype):
|
||||
arr = DatetimeArray._from_sequence([pd.Timestamp('2000'),
|
||||
pd.Timestamp('2001')])
|
||||
result = arr.astype(dtype)
|
||||
|
||||
if np.dtype(dtype).kind == 'u':
|
||||
expected_dtype = np.dtype('uint64')
|
||||
else:
|
||||
expected_dtype = np.dtype('int64')
|
||||
expected = arr.astype(expected_dtype)
|
||||
|
||||
assert result.dtype == expected_dtype
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_tz_setter_raises(self):
|
||||
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
|
||||
with pytest.raises(AttributeError, match='tz_localize'):
|
||||
arr.tz = 'UTC'
|
||||
|
||||
def test_setitem_different_tz_raises(self):
|
||||
data = np.array([1, 2, 3], dtype='M8[ns]')
|
||||
arr = DatetimeArray(data, copy=False,
|
||||
dtype=DatetimeTZDtype(tz="US/Central"))
|
||||
with pytest.raises(ValueError, match="None"):
|
||||
arr[0] = pd.Timestamp('2000')
|
||||
|
||||
with pytest.raises(ValueError, match="US/Central"):
|
||||
arr[0] = pd.Timestamp('2000', tz="US/Eastern")
|
||||
|
||||
def test_setitem_clears_freq(self):
|
||||
a = DatetimeArray(pd.date_range('2000', periods=2, freq='D',
|
||||
tz='US/Central'))
|
||||
a[0] = pd.Timestamp("2000", tz="US/Central")
|
||||
assert a.freq is None
|
||||
|
||||
def test_repeat_preserves_tz(self):
|
||||
dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
|
||||
arr = DatetimeArray(dti)
|
||||
|
||||
repeated = arr.repeat([1, 1])
|
||||
|
||||
# preserves tz and values, but not freq
|
||||
expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype)
|
||||
tm.assert_equal(repeated, expected)
|
||||
|
||||
def test_value_counts_preserves_tz(self):
|
||||
dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
|
||||
arr = DatetimeArray(dti).repeat([4, 3])
|
||||
|
||||
result = arr.value_counts()
|
||||
|
||||
# Note: not tm.assert_index_equal, since `freq`s do not match
|
||||
assert result.index.equals(dti)
|
||||
|
||||
arr[-2] = pd.NaT
|
||||
result = arr.value_counts()
|
||||
expected = pd.Series([1, 4, 2],
|
||||
index=[pd.NaT, dti[0], dti[1]])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('method', ['pad', 'backfill'])
|
||||
def test_fillna_preserves_tz(self, method):
|
||||
dti = pd.date_range('2000-01-01', periods=5, freq='D', tz='US/Central')
|
||||
arr = DatetimeArray(dti, copy=True)
|
||||
arr[2] = pd.NaT
|
||||
|
||||
fill_val = dti[1] if method == 'pad' else dti[3]
|
||||
expected = DatetimeArray._from_sequence(
|
||||
[dti[0], dti[1], fill_val, dti[3], dti[4]],
|
||||
freq=None, tz='US/Central'
|
||||
)
|
||||
|
||||
result = arr.fillna(method=method)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# assert that arr and dti were not modified in-place
|
||||
assert arr[2] is pd.NaT
|
||||
assert dti[2] == pd.Timestamp('2000-01-03', tz='US/Central')
|
||||
|
||||
def test_array_interface_tz(self):
|
||||
tz = "US/Central"
|
||||
data = DatetimeArray(pd.date_range('2017', periods=2, tz=tz))
|
||||
result = np.asarray(data)
|
||||
|
||||
expected = np.array([pd.Timestamp('2017-01-01T00:00:00', tz=tz),
|
||||
pd.Timestamp('2017-01-02T00:00:00', tz=tz)],
|
||||
dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.asarray(data, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.asarray(data, dtype='M8[ns]')
|
||||
|
||||
expected = np.array(['2017-01-01T06:00:00',
|
||||
'2017-01-02T06:00:00'], dtype="M8[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_interface(self):
|
||||
data = DatetimeArray(pd.date_range('2017', periods=2))
|
||||
expected = np.array(['2017-01-01T00:00:00', '2017-01-02T00:00:00'],
|
||||
dtype='datetime64[ns]')
|
||||
|
||||
result = np.asarray(data)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.asarray(data, dtype=object)
|
||||
expected = np.array([pd.Timestamp('2017-01-01T00:00:00'),
|
||||
pd.Timestamp('2017-01-02T00:00:00')],
|
||||
dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestSequenceToDT64NS(object):
|
||||
|
||||
def test_tz_dtype_mismatch_raises(self):
|
||||
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
|
||||
with pytest.raises(TypeError, match='data is already tz-aware'):
|
||||
sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC"))
|
||||
|
||||
def test_tz_dtype_matches(self):
|
||||
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
|
||||
result, _, _ = sequence_to_dt64ns(
|
||||
arr, dtype=DatetimeTZDtype(tz="US/Central"))
|
||||
tm.assert_numpy_array_equal(arr._data, result)
|
||||
|
||||
|
||||
class TestReductions(object):
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
def test_min_max(self, tz):
|
||||
arr = DatetimeArray._from_sequence([
|
||||
'2000-01-03',
|
||||
'2000-01-03',
|
||||
'NaT',
|
||||
'2000-01-02',
|
||||
'2000-01-05',
|
||||
'2000-01-04',
|
||||
], tz=tz)
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Timestamp('2000-01-02', tz=tz)
|
||||
assert result == expected
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Timestamp('2000-01-05', tz=tz)
|
||||
assert result == expected
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_min_max_empty(self, skipna, tz):
|
||||
arr = DatetimeArray._from_sequence([], tz=tz)
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
@@ -0,0 +1,713 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.generic import ABCIndexClass
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_float, is_float_dtype, is_integer, is_scalar
|
||||
from pandas.core.arrays import IntegerArray, integer_array
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype,
|
||||
UInt32Dtype, UInt64Dtype)
|
||||
from pandas.tests.extension.base import BaseOpsUtil
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def make_data():
|
||||
return (list(range(8)) +
|
||||
[np.nan] +
|
||||
list(range(10, 98)) +
|
||||
[np.nan] +
|
||||
[99, 100])
|
||||
|
||||
|
||||
@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
|
||||
UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
|
||||
def dtype(request):
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return integer_array(make_data(), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return integer_array([np.nan, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture(params=['data', 'data_missing'])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == 'data':
|
||||
return data
|
||||
elif request.param == 'data_missing':
|
||||
return data_missing
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == 'i'
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == 'u'
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype, expected', [
|
||||
(Int8Dtype(), 'Int8Dtype()'),
|
||||
(Int16Dtype(), 'Int16Dtype()'),
|
||||
(Int32Dtype(), 'Int32Dtype()'),
|
||||
(Int64Dtype(), 'Int64Dtype()'),
|
||||
(UInt8Dtype(), 'UInt8Dtype()'),
|
||||
(UInt16Dtype(), 'UInt16Dtype()'),
|
||||
(UInt32Dtype(), 'UInt32Dtype()'),
|
||||
(UInt64Dtype(), 'UInt64Dtype()'),
|
||||
])
|
||||
def test_repr_dtype(dtype, expected):
|
||||
assert repr(dtype) == expected
|
||||
|
||||
|
||||
def test_repr_array():
|
||||
result = repr(integer_array([1, None, 3]))
|
||||
expected = (
|
||||
'<IntegerArray>\n'
|
||||
'[1, NaN, 3]\n'
|
||||
'Length: 3, dtype: Int64'
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_array_long():
|
||||
data = integer_array([1, 2, None] * 1000)
|
||||
expected = (
|
||||
"<IntegerArray>\n"
|
||||
"[ 1, 2, NaN, 1, 2, NaN, 1, 2, NaN, 1,\n"
|
||||
" ...\n"
|
||||
" NaN, 1, 2, NaN, 1, 2, NaN, 1, 2, NaN]\n"
|
||||
"Length: 3000, dtype: Int64"
|
||||
)
|
||||
result = repr(data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
class TestConstructors(object):
|
||||
|
||||
def test_from_dtype_from_float(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
# from float
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).astype('float'), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / list
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / array
|
||||
expected = pd.Series(data).dropna().reset_index(drop=True)
|
||||
dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
|
||||
result = pd.Series(dropped, dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestArithmeticOps(BaseOpsUtil):
|
||||
|
||||
def _check_divmod_op(self, s, op, other, exc=None):
|
||||
super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None)
|
||||
|
||||
def _check_op(self, s, op_name, other, exc=None):
|
||||
op = self.get_op_from_name(op_name)
|
||||
result = op(s, other)
|
||||
|
||||
# compute expected
|
||||
mask = s.isna()
|
||||
|
||||
# if s is a DataFrame, squeeze to a Series
|
||||
# for comparison
|
||||
if isinstance(s, pd.DataFrame):
|
||||
result = result.squeeze()
|
||||
s = s.squeeze()
|
||||
mask = mask.squeeze()
|
||||
|
||||
# other array is an Integer
|
||||
if isinstance(other, IntegerArray):
|
||||
omask = getattr(other, 'mask', None)
|
||||
mask = getattr(other, 'data', other)
|
||||
if omask is not None:
|
||||
mask |= omask
|
||||
|
||||
# 1 ** na is na, so need to unmask those
|
||||
if op_name == '__pow__':
|
||||
mask = np.where(s == 1, False, mask)
|
||||
|
||||
elif op_name == '__rpow__':
|
||||
mask = np.where(other == 1, False, mask)
|
||||
|
||||
# float result type or float op
|
||||
if ((is_float_dtype(other) or is_float(other) or
|
||||
op_name in ['__rtruediv__', '__truediv__',
|
||||
'__rdiv__', '__div__'])):
|
||||
rs = s.astype('float')
|
||||
expected = op(rs, other)
|
||||
self._check_op_float(result, expected, mask, s, op_name, other)
|
||||
|
||||
# integer result type
|
||||
else:
|
||||
rs = pd.Series(s.values._data)
|
||||
expected = op(rs, other)
|
||||
self._check_op_integer(result, expected, mask, s, op_name, other)
|
||||
|
||||
def _check_op_float(self, result, expected, mask, s, op_name, other):
|
||||
# check comparisions that are resulting in float dtypes
|
||||
|
||||
expected[mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def _check_op_integer(self, result, expected, mask, s, op_name, other):
|
||||
# check comparisions that are resulting in integer dtypes
|
||||
|
||||
# to compare properly, we convert the expected
|
||||
# to float, mask to nans and convert infs
|
||||
# if we have uints then we process as uints
|
||||
# then conert to float
|
||||
# and we ultimately want to create a IntArray
|
||||
# for comparisons
|
||||
|
||||
fill_value = 0
|
||||
|
||||
# mod/rmod turn floating 0 into NaN while
|
||||
# integer works as expected (no nan)
|
||||
if op_name in ['__mod__', '__rmod__']:
|
||||
if is_scalar(other):
|
||||
if other == 0:
|
||||
expected[s.values == 0] = 0
|
||||
else:
|
||||
expected = expected.fillna(0)
|
||||
else:
|
||||
expected[(s.values == 0) &
|
||||
((expected == 0) | expected.isna())] = 0
|
||||
try:
|
||||
expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
|
||||
original = expected
|
||||
expected = expected.astype(s.dtype)
|
||||
|
||||
except ValueError:
|
||||
|
||||
expected = expected.astype(float)
|
||||
expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
|
||||
original = expected
|
||||
expected = expected.astype(s.dtype)
|
||||
|
||||
expected[mask] = np.nan
|
||||
|
||||
# assert that the expected astype is ok
|
||||
# (skip for unsigned as they have wrap around)
|
||||
if not s.dtype.is_unsigned_integer:
|
||||
original = pd.Series(original)
|
||||
|
||||
# we need to fill with 0's to emulate what an astype('int') does
|
||||
# (truncation) for certain ops
|
||||
if op_name in ['__rtruediv__', '__rdiv__']:
|
||||
mask |= original.isna()
|
||||
original = original.fillna(0).astype('int')
|
||||
|
||||
original = original.astype('float')
|
||||
original[mask] = np.nan
|
||||
tm.assert_series_equal(original, expected.astype('float'))
|
||||
|
||||
# assert our expected result
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_arith_integer_array(self, data, all_arithmetic_operators):
|
||||
# we operate with a rhs of an integer array
|
||||
|
||||
op = all_arithmetic_operators
|
||||
|
||||
s = pd.Series(data)
|
||||
rhs = pd.Series([1] * len(data), dtype=data.dtype)
|
||||
rhs.iloc[-1] = np.nan
|
||||
|
||||
self._check_op(s, op, rhs)
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# scalar
|
||||
op = all_arithmetic_operators
|
||||
|
||||
s = pd.Series(data)
|
||||
self._check_op(s, op, 1, exc=TypeError)
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||||
# frame & scalar
|
||||
op = all_arithmetic_operators
|
||||
|
||||
df = pd.DataFrame({'A': data})
|
||||
self._check_op(df, op, 1, exc=TypeError)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
# ndarray & other series
|
||||
op = all_arithmetic_operators
|
||||
|
||||
s = pd.Series(data)
|
||||
other = np.ones(len(s), dtype=s.dtype.type)
|
||||
self._check_op(s, op, other, exc=TypeError)
|
||||
|
||||
def test_arith_coerce_scalar(self, data, all_arithmetic_operators):
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
|
||||
other = 0.01
|
||||
self._check_op(s, op, other)
|
||||
|
||||
@pytest.mark.parametrize("other", [1., 1.0, np.array(1.), np.array([1.])])
|
||||
def test_arithmetic_conversion(self, all_arithmetic_operators, other):
|
||||
# if we have a float operand we should have a float result
|
||||
# if that is equal to an integer
|
||||
op = self.get_op_from_name(all_arithmetic_operators)
|
||||
|
||||
s = pd.Series([1, 2, 3], dtype='Int64')
|
||||
result = op(s, other)
|
||||
assert result.dtype is np.dtype('float')
|
||||
|
||||
@pytest.mark.parametrize("other", [0, 0.5])
|
||||
def test_arith_zero_dim_ndarray(self, other):
|
||||
arr = integer_array([1, None, 2])
|
||||
result = arr + np.array(other)
|
||||
expected = arr + other
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_error(self, data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
opa = getattr(data, op)
|
||||
|
||||
# invalid scalars
|
||||
with pytest.raises(TypeError):
|
||||
ops('foo')
|
||||
with pytest.raises(TypeError):
|
||||
ops(pd.Timestamp('20180101'))
|
||||
|
||||
# invalid array-likes
|
||||
with pytest.raises(TypeError):
|
||||
ops(pd.Series('foo', index=s.index))
|
||||
|
||||
if op != '__rpow__':
|
||||
# TODO(extension)
|
||||
# rpow with a datetimelike coerces the integer array incorrectly
|
||||
with pytest.raises(TypeError):
|
||||
ops(pd.Series(pd.date_range('20180101', periods=len(s))))
|
||||
|
||||
# 2d
|
||||
with pytest.raises(NotImplementedError):
|
||||
opa(pd.DataFrame({'A': s}))
|
||||
with pytest.raises(NotImplementedError):
|
||||
opa(np.arange(len(s)).reshape(-1, len(s)))
|
||||
|
||||
def test_pow(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
a = integer_array([1, np.nan, np.nan, 1])
|
||||
b = integer_array([1, np.nan, 1, np.nan])
|
||||
result = a ** b
|
||||
expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_rpow_one_to_na(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
arr = integer_array([np.nan, np.nan])
|
||||
result = np.array([1.0, 2.0]) ** arr
|
||||
expected = np.array([1.0, np.nan])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestComparisonOps(BaseOpsUtil):
|
||||
|
||||
def _compare_other(self, data, op_name, other):
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
# array
|
||||
result = pd.Series(op(data, other))
|
||||
expected = pd.Series(op(data._data, other))
|
||||
|
||||
# fill the nan locations
|
||||
expected[data._mask] = True if op_name == '__ne__' else False
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# series
|
||||
s = pd.Series(data)
|
||||
result = op(s, other)
|
||||
|
||||
expected = pd.Series(data._data)
|
||||
expected = op(expected, other)
|
||||
|
||||
# fill the nan locations
|
||||
expected[data._mask] = True if op_name == '__ne__' else False
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
self._compare_other(data, op_name, 0)
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
other = pd.Series([0] * len(data))
|
||||
self._compare_other(data, op_name, other)
|
||||
|
||||
|
||||
class TestCasting(object):
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize('dropna', [True, False])
|
||||
def test_construct_index(self, all_data, dropna):
|
||||
# ensure that we do not coerce to Float64Index, rather
|
||||
# keep as Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Index(integer_array(other, dtype=all_data.dtype))
|
||||
expected = pd.Index(other, dtype=object)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dropna', [True, False])
|
||||
def test_astype_index(self, all_data, dropna):
|
||||
# as an int/uint index to Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = all_data[~all_data.isna()]
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
dtype = all_data.dtype
|
||||
idx = pd.Index(np.array(other))
|
||||
assert isinstance(idx, ABCIndexClass)
|
||||
|
||||
result = idx.astype(dtype)
|
||||
expected = idx.astype(object).astype(dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype(self, all_data):
|
||||
all_data = all_data[:10]
|
||||
|
||||
ints = all_data[~all_data.isna()]
|
||||
mixed = all_data
|
||||
dtype = Int8Dtype()
|
||||
|
||||
# coerce to same type - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(ints)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(ints, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype.numpy_dtype)
|
||||
expected = pd.Series(ints._data.astype(
|
||||
all_data.dtype.numpy_dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same type - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(mixed)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(mixed, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - mixed
|
||||
s = pd.Series(mixed)
|
||||
with pytest.raises(ValueError):
|
||||
s.astype(all_data.dtype.numpy_dtype)
|
||||
|
||||
# coerce to object
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype('object')
|
||||
expected = pd.Series(np.asarray(mixed))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype', [Int8Dtype(), 'Int8',
|
||||
UInt32Dtype(), 'UInt32'])
|
||||
def test_astype_specific_casting(self, dtype):
|
||||
s = pd.Series([1, 2, 3], dtype='Int64')
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = pd.Series([1, 2, 3, None], dtype='Int64')
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3, None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_construct_cast_invalid(self, dtype):
|
||||
|
||||
msg = "cannot safely"
|
||||
arr = [1.2, 2.3, 3.7]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
integer_array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
arr = [1.2, 2.3, 3.7, np.nan]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
integer_array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
|
||||
def test_frame_repr(data_missing):
|
||||
|
||||
df = pd.DataFrame({'A': data_missing})
|
||||
result = repr(df)
|
||||
expected = ' A\n0 NaN\n1 1'
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_conversions(data_missing):
|
||||
|
||||
# astype to object series
|
||||
df = pd.DataFrame({'A': data_missing})
|
||||
result = df['A'].astype('object')
|
||||
expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# convert to object ndarray
|
||||
# we assert that we are exactly equal
|
||||
# including type conversions of scalars
|
||||
result = df['A'].astype('object').values
|
||||
expected = np.array([np.nan, 1], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for r, e in zip(result, expected):
|
||||
if pd.isnull(r):
|
||||
assert pd.isnull(e)
|
||||
elif is_integer(r):
|
||||
# PY2 can be int or long
|
||||
assert r == e
|
||||
assert is_integer(e)
|
||||
else:
|
||||
assert r == e
|
||||
assert type(r) == type(e)
|
||||
|
||||
|
||||
def test_integer_array_constructor():
|
||||
values = np.array([1, 2, 3, 4], dtype='int64')
|
||||
mask = np.array([False, False, False, True], dtype='bool')
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
expected = integer_array([1, 2, 3, np.nan], dtype='int64')
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values.astype(float), mask)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
IntegerArray(values)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('a, b', [
|
||||
([1, None], [1, np.nan]),
|
||||
([None], [np.nan]),
|
||||
([None, np.nan], [np.nan, np.nan]),
|
||||
([np.nan, np.nan], [np.nan, np.nan]),
|
||||
])
|
||||
def test_integer_array_constructor_none_is_nan(a, b):
|
||||
result = integer_array(a)
|
||||
expected = integer_array(b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_integer_array_constructor_copy():
|
||||
values = np.array([1, 2, 3, 4], dtype='int64')
|
||||
mask = np.array([False, False, False, True], dtype='bool')
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = IntegerArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'values',
|
||||
[
|
||||
['foo', 'bar'],
|
||||
['1', '2'],
|
||||
'foo',
|
||||
1,
|
||||
1.0,
|
||||
pd.date_range('20130101', periods=2),
|
||||
np.array(['foo']),
|
||||
[[1, 2], [3, 4]],
|
||||
[np.nan, {'a': 1}]])
|
||||
def test_to_integer_array_error(values):
|
||||
# error in converting existing arrays to IntegerArrays
|
||||
with pytest.raises(TypeError):
|
||||
integer_array(values)
|
||||
|
||||
|
||||
def test_to_integer_array_inferred_dtype():
|
||||
# if values has dtype -> respect it
|
||||
result = integer_array(np.array([1, 2], dtype='int8'))
|
||||
assert result.dtype == Int8Dtype()
|
||||
result = integer_array(np.array([1, 2], dtype='int32'))
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
# if values have no dtype -> always int64
|
||||
result = integer_array([1, 2])
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_dtype_keyword():
|
||||
result = integer_array([1, 2], dtype='int8')
|
||||
assert result.dtype == Int8Dtype()
|
||||
|
||||
# if values has dtype -> override it
|
||||
result = integer_array(np.array([1, 2], dtype='int8'), dtype='int32')
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_float():
|
||||
result = integer_array([1., 2.])
|
||||
expected = integer_array([1, 2])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
|
||||
integer_array([1.5, 2.])
|
||||
|
||||
# for float dtypes, the itemsize is not preserved
|
||||
result = integer_array(np.array([1., 2.], dtype='float32'))
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'values, to_dtype, result_dtype',
|
||||
[
|
||||
(np.array([1], dtype='int64'), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), 'int8', Int8Dtype)])
|
||||
def test_to_integer_array(values, to_dtype, result_dtype):
|
||||
# convert existing arrays to IntegerArrays
|
||||
result = integer_array(values, dtype=to_dtype)
|
||||
assert result.dtype == result_dtype()
|
||||
expected = integer_array(values, dtype=result_dtype())
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_cross_type_arithmetic():
|
||||
|
||||
df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
|
||||
'B': pd.Series([1, np.nan, 3], dtype='UInt8'),
|
||||
'C': [1, 2, 3]})
|
||||
|
||||
result = df.A + df.C
|
||||
expected = pd.Series([2, 4, np.nan], dtype='Int64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = (df.A + df.C) * 3 == 12
|
||||
expected = pd.Series([False, True, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.A + df.B
|
||||
expected = pd.Series([2, np.nan, np.nan], dtype='Int64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('op', ['sum', 'min', 'max', 'prod'])
|
||||
def test_preserve_dtypes(op):
|
||||
# TODO(#22346): preserve Int64 dtype
|
||||
# for ops that enable (mean would actually work here
|
||||
# but generally it is a float return value)
|
||||
df = pd.DataFrame({
|
||||
"A": ['a', 'b', 'b'],
|
||||
"B": [1, None, 3],
|
||||
"C": integer_array([1, None, 3], dtype='Int64'),
|
||||
})
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, int)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame({
|
||||
"B": np.array([1.0, 3.0]),
|
||||
"C": integer_array([1, 3], dtype="Int64")
|
||||
}, index=pd.Index(['a', 'b'], name='A'))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('op', ['mean'])
|
||||
def test_reduce_to_float(op):
|
||||
# some reduce ops always return float, even if the result
|
||||
# is a rounded number
|
||||
df = pd.DataFrame({
|
||||
"A": ['a', 'b', 'b'],
|
||||
"B": [1, None, 3],
|
||||
"C": integer_array([1, None, 3], dtype='Int64'),
|
||||
})
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, float)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame({
|
||||
"B": np.array([1.0, 3.0]),
|
||||
"C": integer_array([1, 3], dtype="Int64")
|
||||
}, index=pd.Index(['a', 'b'], name='A'))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_nansafe():
|
||||
# see gh-22343
|
||||
arr = integer_array([np.nan, 1, 2], dtype="Int8")
|
||||
msg = "cannot convert float NaN to integer"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype('uint32')
|
||||
|
||||
|
||||
# TODO(jreback) - these need testing / are broken
|
||||
|
||||
# shift
|
||||
|
||||
# set_index (destroys type)
|
||||
@@ -0,0 +1,206 @@
|
||||
"""
|
||||
Additional tests for PandasArray that aren't covered by
|
||||
the interface tests.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import compat
|
||||
from pandas.arrays import PandasArray
|
||||
from pandas.core.arrays.numpy_ import PandasDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[
|
||||
np.array(['a', 'b'], dtype=object),
|
||||
np.array([0, 1], dtype=float),
|
||||
np.array([0, 1], dtype=int),
|
||||
np.array([0, 1 + 2j], dtype=complex),
|
||||
np.array([True, False], dtype=bool),
|
||||
np.array([0, 1], dtype='datetime64[ns]'),
|
||||
np.array([0, 1], dtype='timedelta64[ns]'),
|
||||
])
|
||||
def any_numpy_array(request):
|
||||
"""
|
||||
Parametrized fixture for NumPy arrays with different dtypes.
|
||||
|
||||
This excludes string and bytes.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# PandasDtype
|
||||
|
||||
@pytest.mark.parametrize('dtype, expected', [
|
||||
('bool', True),
|
||||
('int', True),
|
||||
('uint', True),
|
||||
('float', True),
|
||||
('complex', True),
|
||||
('str', False),
|
||||
pytest.param('bytes', False,
|
||||
marks=pytest.mark.skipif(compat.PY2, reason="PY2")),
|
||||
('datetime64[ns]', False),
|
||||
('object', False),
|
||||
('void', False),
|
||||
])
|
||||
def test_is_numeric(dtype, expected):
|
||||
dtype = PandasDtype(dtype)
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype, expected', [
|
||||
('bool', True),
|
||||
('int', False),
|
||||
('uint', False),
|
||||
('float', False),
|
||||
('complex', False),
|
||||
('str', False),
|
||||
pytest.param('bytes', False,
|
||||
marks=pytest.mark.skipif(compat.PY2, reason="PY2")),
|
||||
('datetime64[ns]', False),
|
||||
('object', False),
|
||||
('void', False)
|
||||
])
|
||||
def test_is_boolean(dtype, expected):
|
||||
dtype = PandasDtype(dtype)
|
||||
assert dtype._is_boolean is expected
|
||||
|
||||
|
||||
def test_repr():
|
||||
dtype = PandasDtype(np.dtype("int64"))
|
||||
assert repr(dtype) == "PandasDtype('int64')"
|
||||
|
||||
|
||||
def test_constructor_from_string():
|
||||
result = PandasDtype.construct_from_string("int64")
|
||||
expected = PandasDtype(np.dtype("int64"))
|
||||
assert result == expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Construction
|
||||
|
||||
def test_constructor_no_coercion():
|
||||
with pytest.raises(ValueError, match='NumPy array'):
|
||||
PandasArray([1, 2, 3])
|
||||
|
||||
|
||||
def test_series_constructor_with_copy():
|
||||
ndarray = np.array([1, 2, 3])
|
||||
ser = pd.Series(PandasArray(ndarray), copy=True)
|
||||
|
||||
assert ser.values is not ndarray
|
||||
|
||||
|
||||
def test_series_constructor_with_astype():
|
||||
ndarray = np.array([1, 2, 3])
|
||||
result = pd.Series(PandasArray(ndarray), dtype="float64")
|
||||
expected = pd.Series([1.0, 2.0, 3.0], dtype="float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_sequence_dtype():
|
||||
arr = np.array([1, 2, 3], dtype='int64')
|
||||
result = PandasArray._from_sequence(arr, dtype='uint64')
|
||||
expected = PandasArray(np.array([1, 2, 3], dtype='uint64'))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_constructor_copy():
|
||||
arr = np.array([0, 1])
|
||||
result = PandasArray(arr, copy=True)
|
||||
|
||||
assert np.shares_memory(result._ndarray, arr) is False
|
||||
|
||||
|
||||
def test_constructor_with_data(any_numpy_array):
|
||||
nparr = any_numpy_array
|
||||
arr = PandasArray(nparr)
|
||||
assert arr.dtype.numpy_dtype == nparr.dtype
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Conversion
|
||||
|
||||
def test_to_numpy():
|
||||
arr = PandasArray(np.array([1, 2, 3]))
|
||||
result = arr.to_numpy()
|
||||
assert result is arr._ndarray
|
||||
|
||||
result = arr.to_numpy(copy=True)
|
||||
assert result is not arr._ndarray
|
||||
|
||||
result = arr.to_numpy(dtype='f8')
|
||||
expected = np.array([1, 2, 3], dtype='f8')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Setitem
|
||||
|
||||
def test_setitem_series():
|
||||
ser = pd.Series([1, 2, 3])
|
||||
ser.array[0] = 10
|
||||
expected = pd.Series([10, 2, 3])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_setitem(any_numpy_array):
|
||||
nparr = any_numpy_array
|
||||
arr = PandasArray(nparr, copy=True)
|
||||
|
||||
arr[0] = arr[1]
|
||||
nparr[0] = nparr[1]
|
||||
|
||||
tm.assert_numpy_array_equal(arr.to_numpy(), nparr)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Reductions
|
||||
|
||||
def test_bad_reduce_raises():
|
||||
arr = np.array([1, 2, 3], dtype='int64')
|
||||
arr = PandasArray(arr)
|
||||
msg = "cannot perform not_a_method with type int"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr._reduce(msg)
|
||||
|
||||
|
||||
def test_validate_reduction_keyword_args():
|
||||
arr = PandasArray(np.array([1, 2, 3]))
|
||||
msg = "the 'keepdims' parameter is not supported .*all"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.all(keepdims=True)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Ops
|
||||
|
||||
@td.skip_if_no("numpy", min_version="1.13.0")
|
||||
def test_ufunc():
|
||||
arr = PandasArray(np.array([-1.0, 0.0, 1.0]))
|
||||
result = np.abs(arr)
|
||||
expected = PandasArray(np.abs(arr._ndarray))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
r1, r2 = np.divmod(arr, np.add(arr, 2))
|
||||
e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2))
|
||||
e1 = PandasArray(e1)
|
||||
e2 = PandasArray(e2)
|
||||
tm.assert_extension_array_equal(r1, e1)
|
||||
tm.assert_extension_array_equal(r2, e2)
|
||||
|
||||
|
||||
@td.skip_if_no("numpy", min_version="1.13.0")
|
||||
def test_basic_binop():
|
||||
# Just a basic smoke test. The EA interface tests exercise this
|
||||
# more thoroughly.
|
||||
x = PandasArray(np.array([1, 2, 3]))
|
||||
result = x + x
|
||||
expected = PandasArray(np.array([2, 4, 6]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
@@ -0,0 +1,301 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype, registry
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import PeriodArray, period_array
|
||||
import pandas.util.testing as tm
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Dtype
|
||||
|
||||
|
||||
def test_registered():
|
||||
assert PeriodDtype in registry.dtypes
|
||||
result = registry.find("Period[D]")
|
||||
expected = PeriodDtype("D")
|
||||
assert result == expected
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# period_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data, freq, expected", [
|
||||
([pd.Period("2017", "D")], None, [17167]),
|
||||
([pd.Period("2017", "D")], "D", [17167]),
|
||||
([2017], "D", [17167]),
|
||||
(["2017"], "D", [17167]),
|
||||
([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]),
|
||||
([pd.Period("2017", "D"), None], None, [17167, iNaT]),
|
||||
(pd.Series(pd.date_range("2017", periods=3)), None,
|
||||
[17167, 17168, 17169]),
|
||||
(pd.date_range("2017", periods=3), None, [17167, 17168, 17169]),
|
||||
])
|
||||
def test_period_array_ok(data, freq, expected):
|
||||
result = period_array(data, freq=freq).asi8
|
||||
expected = np.asarray(expected, dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_datetime64_freq_changes():
|
||||
# https://github.com/pandas-dev/pandas/issues/23438
|
||||
arr = pd.date_range("2017", periods=3, freq="D")
|
||||
result = PeriodArray._from_datetime64(arr, freq="M")
|
||||
expected = period_array(['2017-01-01', '2017-01-01', '2017-01-01'],
|
||||
freq="M")
|
||||
tm.assert_period_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data, freq, msg", [
|
||||
([pd.Period('2017', 'D'),
|
||||
pd.Period('2017', 'A')],
|
||||
None,
|
||||
"Input has different freq"),
|
||||
([pd.Period('2017', 'D')],
|
||||
"A",
|
||||
"Input has different freq"),
|
||||
])
|
||||
def test_period_array_raises(data, freq, msg):
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
period_array(data, freq)
|
||||
|
||||
|
||||
def test_period_array_non_period_series_raies():
|
||||
ser = pd.Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match='dtype'):
|
||||
PeriodArray(ser, freq='D')
|
||||
|
||||
|
||||
def test_period_array_freq_mismatch():
|
||||
arr = period_array(['2000', '2001'], freq='D')
|
||||
with pytest.raises(IncompatibleFrequency, match='freq'):
|
||||
PeriodArray(arr, freq='M')
|
||||
|
||||
with pytest.raises(IncompatibleFrequency, match='freq'):
|
||||
PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
|
||||
|
||||
|
||||
def test_asi8():
|
||||
result = period_array(['2000', '2001', None], freq='D').asi8
|
||||
expected = np.array([10957, 11323, iNaT])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_take_raises():
|
||||
arr = period_array(['2000', '2001'], freq='D')
|
||||
with pytest.raises(IncompatibleFrequency, match='freq'):
|
||||
arr.take([0, -1], allow_fill=True,
|
||||
fill_value=pd.Period('2000', freq='W'))
|
||||
|
||||
with pytest.raises(ValueError, match='foo'):
|
||||
arr.take([0, -1], allow_fill=True, fill_value='foo')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', [
|
||||
int, np.int32, np.int64, 'uint32', 'uint64',
|
||||
])
|
||||
def test_astype(dtype):
|
||||
# We choose to ignore the sign and size of integers for
|
||||
# Period/Datetime/Timedelta astype
|
||||
arr = period_array(['2000', '2001', None], freq='D')
|
||||
result = arr.astype(dtype)
|
||||
|
||||
if np.dtype(dtype).kind == 'u':
|
||||
expected_dtype = np.dtype('uint64')
|
||||
else:
|
||||
expected_dtype = np.dtype('int64')
|
||||
expected = arr.astype(expected_dtype)
|
||||
|
||||
assert result.dtype == expected_dtype
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_copies():
|
||||
arr = period_array(['2000', '2001', None], freq='D')
|
||||
result = arr.astype(np.int64, copy=False)
|
||||
# Add the `.base`, since we now use `.asi8` which returns a view.
|
||||
# We could maybe override it in PeriodArray to return ._data directly.
|
||||
assert result.base is arr._data
|
||||
|
||||
result = arr.astype(np.int64, copy=True)
|
||||
assert result is not arr._data
|
||||
tm.assert_numpy_array_equal(result, arr._data.view('i8'))
|
||||
|
||||
|
||||
def test_astype_categorical():
|
||||
arr = period_array(['2000', '2001', '2001', None], freq='D')
|
||||
result = arr.astype('category')
|
||||
categories = pd.PeriodIndex(['2000', '2001'], freq='D')
|
||||
expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_period():
|
||||
arr = period_array(['2000', '2001', None], freq='D')
|
||||
result = arr.astype(PeriodDtype("M"))
|
||||
expected = period_array(['2000', '2001', None], freq='M')
|
||||
tm.assert_period_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
'datetime64[ns]', 'timedelta64[ns]',
|
||||
])
|
||||
def test_astype_datetime(other):
|
||||
arr = period_array(['2000', '2001', None], freq='D')
|
||||
# slice off the [ns] so that the regex matches.
|
||||
with pytest.raises(TypeError, match=other[:-4]):
|
||||
arr.astype(other)
|
||||
|
||||
|
||||
def test_fillna_raises():
|
||||
arr = period_array(['2000', '2001', '2002'], freq='D')
|
||||
with pytest.raises(ValueError, match='Length'):
|
||||
arr.fillna(arr[:2])
|
||||
|
||||
|
||||
def test_fillna_copies():
|
||||
arr = period_array(['2000', '2001', '2002'], freq='D')
|
||||
result = arr.fillna(pd.Period("2000", "D"))
|
||||
assert result is not arr
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# setitem
|
||||
|
||||
@pytest.mark.parametrize('key, value, expected', [
|
||||
([0], pd.Period("2000", "D"), [10957, 1, 2]),
|
||||
([0], None, [iNaT, 1, 2]),
|
||||
([0], np.nan, [iNaT, 1, 2]),
|
||||
([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
|
||||
([0, 1, 2], [pd.Period("2000", "D"),
|
||||
pd.Period("2001", "D"),
|
||||
pd.Period("2002", "D")],
|
||||
[10957, 11323, 11688]),
|
||||
])
|
||||
def test_setitem(key, value, expected):
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
expected = PeriodArray(expected, freq="D")
|
||||
arr[key] = value
|
||||
tm.assert_period_array_equal(arr, expected)
|
||||
|
||||
|
||||
def test_setitem_raises_incompatible_freq():
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
arr[0] = pd.Period("2000", freq="A")
|
||||
|
||||
other = period_array(['2000', '2001'], freq='A')
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
arr[[0, 1]] = other
|
||||
|
||||
|
||||
def test_setitem_raises_length():
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
with pytest.raises(ValueError, match="length"):
|
||||
arr[[0, 1]] = [pd.Period("2000", freq="D")]
|
||||
|
||||
|
||||
def test_setitem_raises_type():
|
||||
arr = PeriodArray(np.arange(3), freq="D")
|
||||
with pytest.raises(TypeError, match="int"):
|
||||
arr[0] = 1
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Ops
|
||||
|
||||
def test_sub_period():
|
||||
arr = period_array(['2000', '2001'], freq='D')
|
||||
other = pd.Period("2000", freq="M")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
arr - other
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Methods
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
pd.Period('2000', freq='H'),
|
||||
period_array(['2000', '2001', '2000'], freq='H')
|
||||
])
|
||||
def test_where_different_freq_raises(other):
|
||||
ser = pd.Series(period_array(['2000', '2001', '2002'], freq='D'))
|
||||
cond = np.array([True, False, True])
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
ser.where(cond, other)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Printing
|
||||
|
||||
def test_repr_small():
|
||||
arr = period_array(['2000', '2001'], freq='D')
|
||||
result = str(arr)
|
||||
expected = (
|
||||
"<PeriodArray>\n"
|
||||
"['2000-01-01', '2001-01-01']\n"
|
||||
"Length: 2, dtype: period[D]"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_large():
|
||||
arr = period_array(['2000', '2001'] * 500, freq='D')
|
||||
result = str(arr)
|
||||
expected = (
|
||||
"<PeriodArray>\n"
|
||||
"['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
|
||||
"'2000-01-01',\n"
|
||||
" '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
|
||||
"'2001-01-01',\n"
|
||||
" ...\n"
|
||||
" '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
|
||||
"'2000-01-01',\n"
|
||||
" '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
|
||||
"'2001-01-01']\n"
|
||||
"Length: 1000, dtype: period[D]"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Reductions
|
||||
|
||||
class TestReductions(object):
|
||||
|
||||
def test_min_max(self):
|
||||
arr = period_array([
|
||||
'2000-01-03',
|
||||
'2000-01-03',
|
||||
'NaT',
|
||||
'2000-01-02',
|
||||
'2000-01-05',
|
||||
'2000-01-04',
|
||||
], freq='D')
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Period('2000-01-02', freq='D')
|
||||
assert result == expected
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Period('2000-01-05', freq='D')
|
||||
assert result == expected
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_min_max_empty(self, skipna):
|
||||
arr = period_array([], freq='D')
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
@@ -0,0 +1,139 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import TimedeltaArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestTimedeltaArrayConstructor(object):
|
||||
def test_freq_validation(self):
|
||||
# ensure that the public constructor cannot create an invalid instance
|
||||
arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10**9
|
||||
|
||||
msg = ("Inferred frequency None from passed values does not "
|
||||
"conform to passed frequency D")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
TimedeltaArray(arr.view('timedelta64[ns]'), freq="D")
|
||||
|
||||
def test_non_array_raises(self):
|
||||
with pytest.raises(ValueError, match='list'):
|
||||
TimedeltaArray([1, 2, 3])
|
||||
|
||||
def test_other_type_raises(self):
|
||||
with pytest.raises(ValueError,
|
||||
match="dtype bool cannot be converted"):
|
||||
TimedeltaArray(np.array([1, 2, 3], dtype='bool'))
|
||||
|
||||
def test_incorrect_dtype_raises(self):
|
||||
# TODO: why TypeError for 'category' but ValueError for i8?
|
||||
with pytest.raises(ValueError,
|
||||
match=r'category cannot be converted '
|
||||
r'to timedelta64\[ns\]'):
|
||||
TimedeltaArray(np.array([1, 2, 3], dtype='i8'), dtype='category')
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match=r"dtype int64 cannot be converted "
|
||||
r"to timedelta64\[ns\]"):
|
||||
TimedeltaArray(np.array([1, 2, 3], dtype='i8'),
|
||||
dtype=np.dtype("int64"))
|
||||
|
||||
def test_copy(self):
|
||||
data = np.array([1, 2, 3], dtype='m8[ns]')
|
||||
arr = TimedeltaArray(data, copy=False)
|
||||
assert arr._data is data
|
||||
|
||||
arr = TimedeltaArray(data, copy=True)
|
||||
assert arr._data is not data
|
||||
assert arr._data.base is not data
|
||||
|
||||
|
||||
class TestTimedeltaArray(object):
|
||||
def test_from_sequence_dtype(self):
|
||||
msg = "dtype .*object.* cannot be converted to timedelta64"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
TimedeltaArray._from_sequence([], dtype=object)
|
||||
|
||||
def test_abs(self):
|
||||
vals = np.array([-3600 * 10**9, 'NaT', 7200 * 10**9], dtype='m8[ns]')
|
||||
arr = TimedeltaArray(vals)
|
||||
|
||||
evals = np.array([3600 * 10**9, 'NaT', 7200 * 10**9], dtype='m8[ns]')
|
||||
expected = TimedeltaArray(evals)
|
||||
|
||||
result = abs(arr)
|
||||
tm.assert_timedelta_array_equal(result, expected)
|
||||
|
||||
def test_neg(self):
|
||||
vals = np.array([-3600 * 10**9, 'NaT', 7200 * 10**9], dtype='m8[ns]')
|
||||
arr = TimedeltaArray(vals)
|
||||
|
||||
evals = np.array([3600 * 10**9, 'NaT', -7200 * 10**9], dtype='m8[ns]')
|
||||
expected = TimedeltaArray(evals)
|
||||
|
||||
result = -arr
|
||||
tm.assert_timedelta_array_equal(result, expected)
|
||||
|
||||
def test_neg_freq(self):
|
||||
tdi = pd.timedelta_range('2 Days', periods=4, freq='H')
|
||||
arr = TimedeltaArray(tdi, freq=tdi.freq)
|
||||
|
||||
expected = TimedeltaArray(-tdi._data, freq=-tdi.freq)
|
||||
|
||||
result = -arr
|
||||
tm.assert_timedelta_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [
|
||||
int, np.int32, np.int64, 'uint32', 'uint64',
|
||||
])
|
||||
def test_astype_int(self, dtype):
|
||||
arr = TimedeltaArray._from_sequence([pd.Timedelta('1H'),
|
||||
pd.Timedelta('2H')])
|
||||
result = arr.astype(dtype)
|
||||
|
||||
if np.dtype(dtype).kind == 'u':
|
||||
expected_dtype = np.dtype('uint64')
|
||||
else:
|
||||
expected_dtype = np.dtype('int64')
|
||||
expected = arr.astype(expected_dtype)
|
||||
|
||||
assert result.dtype == expected_dtype
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_setitem_clears_freq(self):
|
||||
a = TimedeltaArray(pd.timedelta_range('1H', periods=2, freq='H'))
|
||||
a[0] = pd.Timedelta("1H")
|
||||
assert a.freq is None
|
||||
|
||||
|
||||
class TestReductions(object):
|
||||
|
||||
def test_min_max(self):
|
||||
arr = TimedeltaArray._from_sequence([
|
||||
'3H', '3H', 'NaT', '2H', '5H', '4H',
|
||||
])
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Timedelta('2H')
|
||||
assert result == expected
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Timedelta('5H')
|
||||
assert result == expected
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is pd.NaT
|
||||
|
||||
@pytest.mark.parametrize('skipna', [True, False])
|
||||
def test_min_max_empty(self, skipna):
|
||||
arr = TimedeltaArray._from_sequence([])
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is pd.NaT
|
||||
Reference in New Issue
Block a user