started work on backend
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pandas import Categorical
|
||||
|
||||
|
||||
class TestCategorical(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
|
||||
ordered=True)
|
||||
@@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def allow_fill(request):
|
||||
"""Boolean 'allow_fill' parameter for Categorical.take"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def ordered(request):
|
||||
"""Boolean 'ordered' parameter for Categorical."""
|
||||
return request.param
|
||||
@@ -0,0 +1,113 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
@pytest.mark.parametrize('categories', [
|
||||
['b', 'a', 'c'],
|
||||
['a', 'b', 'c', 'd'],
|
||||
])
|
||||
def test_factorize(categories, ordered):
|
||||
cat = pd.Categorical(['b', 'b', 'a', 'c', None],
|
||||
categories=categories,
|
||||
ordered=ordered)
|
||||
labels, uniques = pd.factorize(cat)
|
||||
expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(['b', 'a', 'c'],
|
||||
categories=categories,
|
||||
ordered=ordered)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort():
|
||||
cat = pd.Categorical(['b', 'b', None, 'a'])
|
||||
labels, uniques = pd.factorize(cat, sort=True)
|
||||
expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(['a', 'b'])
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort_ordered():
|
||||
cat = pd.Categorical(['b', 'b', None, 'a'],
|
||||
categories=['c', 'b', 'a'],
|
||||
ordered=True)
|
||||
|
||||
labels, uniques = pd.factorize(cat, sort=True)
|
||||
expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(['b', 'a'],
|
||||
categories=['c', 'b', 'a'],
|
||||
ordered=True)
|
||||
|
||||
tm.assert_numpy_array_equal(labels, expected_labels)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_isin_cats():
|
||||
# GH2003
|
||||
cat = pd.Categorical(["a", "b", np.nan])
|
||||
|
||||
result = cat.isin(["a", np.nan])
|
||||
expected = np.array([True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
result = cat.isin(["a", "c"])
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
|
||||
def test_isin_empty(empty):
|
||||
s = pd.Categorical(["a", "b"])
|
||||
expected = np.array([False, False], dtype=bool)
|
||||
|
||||
result = s.isin(empty)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
class TestTake(object):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
|
||||
def test_take_warns(self):
|
||||
cat = pd.Categorical(['a', 'b'])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
cat.take([0, -1])
|
||||
|
||||
def test_take_positive_no_warning(self):
|
||||
cat = pd.Categorical(['a', 'b'])
|
||||
with tm.assert_produces_warning(None):
|
||||
cat.take([0, 0])
|
||||
|
||||
def test_take_bounds(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = pd.Categorical(['a', 'b', 'a'])
|
||||
with pytest.raises(IndexError):
|
||||
cat.take([4, 5], allow_fill=allow_fill)
|
||||
|
||||
def test_take_empty(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = pd.Categorical([], categories=['a', 'b'])
|
||||
with pytest.raises(IndexError):
|
||||
cat.take([0], allow_fill=allow_fill)
|
||||
|
||||
def test_positional_take(self, ordered):
|
||||
cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
|
||||
ordered=ordered)
|
||||
result = cat.take([0, 1, 2], allow_fill=False)
|
||||
expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_positional_take_unobserved(self, ordered):
|
||||
cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
|
||||
ordered=ordered)
|
||||
result = cat.take([1, 0], allow_fill=False)
|
||||
expected = pd.Categorical(['b', 'a'], categories=cat.categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
@@ -0,0 +1,320 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas import Categorical, Index, Series
|
||||
|
||||
from pandas.compat import PYPY
|
||||
|
||||
|
||||
class TestCategoricalAnalytics(object):
|
||||
|
||||
def test_min_max(self):
|
||||
|
||||
# unordered cats have no min/max
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=False)
|
||||
pytest.raises(TypeError, lambda: cat.min())
|
||||
pytest.raises(TypeError, lambda: cat.max())
|
||||
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert _min == "a"
|
||||
assert _max == "d"
|
||||
|
||||
cat = Categorical(["a", "b", "c", "d"],
|
||||
categories=['d', 'c', 'b', 'a'], ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert _min == "d"
|
||||
assert _max == "a"
|
||||
|
||||
cat = Categorical([np.nan, "b", "c", np.nan],
|
||||
categories=['d', 'c', 'b', 'a'], ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert np.isnan(_min)
|
||||
assert _max == "b"
|
||||
|
||||
_min = cat.min(numeric_only=True)
|
||||
assert _min == "c"
|
||||
_max = cat.max(numeric_only=True)
|
||||
assert _max == "b"
|
||||
|
||||
cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1],
|
||||
ordered=True)
|
||||
_min = cat.min()
|
||||
_max = cat.max()
|
||||
assert np.isnan(_min)
|
||||
assert _max == 1
|
||||
|
||||
_min = cat.min(numeric_only=True)
|
||||
assert _min == 2
|
||||
_max = cat.max(numeric_only=True)
|
||||
assert _max == 1
|
||||
|
||||
@pytest.mark.parametrize("values,categories,exp_mode", [
|
||||
([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]),
|
||||
([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]),
|
||||
([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]),
|
||||
([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]),
|
||||
([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4])])
|
||||
def test_mode(self, values, categories, exp_mode):
|
||||
s = Categorical(values, categories=categories, ordered=True)
|
||||
res = s.mode()
|
||||
exp = Categorical(exp_mode, categories=categories, ordered=True)
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
|
||||
def test_searchsorted(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/8420
|
||||
# https://github.com/pandas-dev/pandas/issues/14522
|
||||
|
||||
c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
|
||||
categories=['cheese', 'milk', 'apple', 'bread'],
|
||||
ordered=True)
|
||||
s1 = Series(c1)
|
||||
c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
|
||||
categories=['cheese', 'milk', 'apple', 'bread'],
|
||||
ordered=False)
|
||||
s2 = Series(c2)
|
||||
|
||||
# Searching for single item argument, side='left' (default)
|
||||
res_cat = c1.searchsorted('apple')
|
||||
res_ser = s1.searchsorted('apple')
|
||||
exp = np.array([2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for single item array, side='left' (default)
|
||||
res_cat = c1.searchsorted(['bread'])
|
||||
res_ser = s1.searchsorted(['bread'])
|
||||
exp = np.array([3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for several items array, side='right'
|
||||
res_cat = c1.searchsorted(['apple', 'bread'], side='right')
|
||||
res_ser = s1.searchsorted(['apple', 'bread'], side='right')
|
||||
exp = np.array([3, 5], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for a single value that is not from the Categorical
|
||||
pytest.raises(ValueError, lambda: c1.searchsorted('cucumber'))
|
||||
pytest.raises(ValueError, lambda: s1.searchsorted('cucumber'))
|
||||
|
||||
# Searching for multiple values one of each is not from the Categorical
|
||||
pytest.raises(ValueError,
|
||||
lambda: c1.searchsorted(['bread', 'cucumber']))
|
||||
pytest.raises(ValueError,
|
||||
lambda: s1.searchsorted(['bread', 'cucumber']))
|
||||
|
||||
# searchsorted call for unordered Categorical
|
||||
pytest.raises(ValueError, lambda: c2.searchsorted('apple'))
|
||||
pytest.raises(ValueError, lambda: s2.searchsorted('apple'))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
res = c1.searchsorted(v=['bread'])
|
||||
exp = np.array([3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_unique(self):
|
||||
# categories are reordered based on value when ordered=False
|
||||
cat = Categorical(["a", "b"])
|
||||
exp = Index(["a", "b"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
tm.assert_categorical_equal(res, cat)
|
||||
|
||||
cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
tm.assert_categorical_equal(res, Categorical(exp))
|
||||
|
||||
cat = Categorical(["c", "a", "b", "a", "a"],
|
||||
categories=["a", "b", "c"])
|
||||
exp = Index(["c", "a", "b"])
|
||||
res = cat.unique()
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
exp_cat = Categorical(exp, categories=['c', 'a', 'b'])
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
# nan must be removed
|
||||
cat = Categorical(["b", np.nan, "b", np.nan, "a"],
|
||||
categories=["a", "b", "c"])
|
||||
res = cat.unique()
|
||||
exp = Index(["b", "a"])
|
||||
tm.assert_index_equal(res.categories, exp)
|
||||
exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_ordered(self):
|
||||
# keep categories order when ordered=True
|
||||
cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_index_series(self):
|
||||
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
|
||||
# Categorical.unique sorts categories by appearance order
|
||||
# if ordered=False
|
||||
exp = Categorical([3, 1, 2], categories=[3, 1, 2])
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
|
||||
exp = Categorical([1, 2], categories=[1, 2])
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
|
||||
# Categorical.unique keeps categories order if ordered=True
|
||||
exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
def test_shift(self):
|
||||
# GH 9416
|
||||
cat = Categorical(['a', 'b', 'c', 'd', 'a'])
|
||||
|
||||
# shift forward
|
||||
sp1 = cat.shift(1)
|
||||
xp1 = Categorical([np.nan, 'a', 'b', 'c', 'd'])
|
||||
tm.assert_categorical_equal(sp1, xp1)
|
||||
tm.assert_categorical_equal(cat[:-1], sp1[1:])
|
||||
|
||||
# shift back
|
||||
sn2 = cat.shift(-2)
|
||||
xp2 = Categorical(['c', 'd', 'a', np.nan, np.nan],
|
||||
categories=['a', 'b', 'c', 'd'])
|
||||
tm.assert_categorical_equal(sn2, xp2)
|
||||
tm.assert_categorical_equal(cat[2:], sn2[:-2])
|
||||
|
||||
# shift by zero
|
||||
tm.assert_categorical_equal(cat, cat.shift(0))
|
||||
|
||||
def test_nbytes(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
|
||||
assert cat.nbytes == exp
|
||||
|
||||
def test_memory_usage(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
# .categories is an index, so we include the hashtable
|
||||
assert 0 < cat.nbytes <= cat.memory_usage()
|
||||
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
|
||||
|
||||
cat = Categorical(['foo', 'foo', 'bar'])
|
||||
assert cat.memory_usage(deep=True) > cat.nbytes
|
||||
|
||||
if not PYPY:
|
||||
# sys.getsizeof will call the .memory_usage with
|
||||
# deep=True, and add on some GC overhead
|
||||
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
|
||||
assert abs(diff) < 100
|
||||
|
||||
def test_map(self):
|
||||
c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True)
|
||||
result = c.map(lambda x: x.lower())
|
||||
exp = Categorical(list('ababc'), categories=list('cba'), ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False)
|
||||
result = c.map(lambda x: x.lower())
|
||||
exp = Categorical(list('ababc'), categories=list('abc'), ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
result = c.map(lambda x: 1)
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
|
||||
|
||||
def test_validate_inplace(self):
|
||||
cat = Categorical(['A', 'B', 'B', 'C', 'A'])
|
||||
invalid_values = [1, "True", [1, 2, 3], 5.0]
|
||||
|
||||
for value in invalid_values:
|
||||
with pytest.raises(ValueError):
|
||||
cat.set_ordered(value=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.as_ordered(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.as_unordered(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories(['X', 'Y', 'Z'], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.reorder_categories(
|
||||
['X', 'Y', 'Z'], ordered=True, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.add_categories(
|
||||
new_categories=['D', 'E', 'F'], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_categories(removals=['D', 'E', 'F'], inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.remove_unused_categories(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
cat.sort_values(inplace=value)
|
||||
|
||||
def test_repeat(self):
|
||||
# GH10183
|
||||
cat = Categorical(["a", "b"], categories=["a", "b"])
|
||||
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
|
||||
res = cat.repeat(2)
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
|
||||
def test_numpy_repeat(self):
|
||||
cat = Categorical(["a", "b"], categories=["a", "b"])
|
||||
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(np.repeat(cat, 2), exp)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1)
|
||||
|
||||
def test_isna(self):
|
||||
exp = np.array([False, False, True])
|
||||
c = Categorical(["a", "b", np.nan])
|
||||
res = c.isna()
|
||||
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
@@ -0,0 +1,518 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas import Categorical, CategoricalIndex, Index, Series, DataFrame
|
||||
|
||||
from pandas.core.arrays.categorical import _recode_for_categories
|
||||
from pandas.tests.categorical.common import TestCategorical
|
||||
|
||||
|
||||
class TestCategoricalAPI(object):
|
||||
|
||||
def test_ordered_api(self):
|
||||
# GH 9347
|
||||
cat1 = Categorical(list('acb'), ordered=False)
|
||||
tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c']))
|
||||
assert not cat1.ordered
|
||||
|
||||
cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False)
|
||||
tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a']))
|
||||
assert not cat2.ordered
|
||||
|
||||
cat3 = Categorical(list('acb'), ordered=True)
|
||||
tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c']))
|
||||
assert cat3.ordered
|
||||
|
||||
cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True)
|
||||
tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a']))
|
||||
assert cat4.ordered
|
||||
|
||||
def test_set_ordered(self):
|
||||
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
cat2 = cat.as_unordered()
|
||||
assert not cat2.ordered
|
||||
cat2 = cat.as_ordered()
|
||||
assert cat2.ordered
|
||||
cat2.as_unordered(inplace=True)
|
||||
assert not cat2.ordered
|
||||
cat2.as_ordered(inplace=True)
|
||||
assert cat2.ordered
|
||||
|
||||
assert cat2.set_ordered(True).ordered
|
||||
assert not cat2.set_ordered(False).ordered
|
||||
cat2.set_ordered(True, inplace=True)
|
||||
assert cat2.ordered
|
||||
cat2.set_ordered(False, inplace=True)
|
||||
assert not cat2.ordered
|
||||
|
||||
# removed in 0.19.0
|
||||
msg = "can\'t set attribute"
|
||||
with tm.assert_raises_regex(AttributeError, msg):
|
||||
cat.ordered = True
|
||||
with tm.assert_raises_regex(AttributeError, msg):
|
||||
cat.ordered = False
|
||||
|
||||
def test_rename_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
|
||||
# inplace=False: the old one must not be changed
|
||||
res = cat.rename_categories([1, 2, 3])
|
||||
tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1],
|
||||
dtype=np.int64))
|
||||
tm.assert_index_equal(res.categories, Index([1, 2, 3]))
|
||||
|
||||
exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
|
||||
|
||||
exp_cat = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(cat.categories, exp_cat)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = cat.rename_categories(lambda x: x.upper())
|
||||
expected = Categorical(["A", "B", "C", "A"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# and now inplace
|
||||
res = cat.rename_categories([1, 2, 3], inplace=True)
|
||||
assert res is None
|
||||
tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1],
|
||||
dtype=np.int64))
|
||||
tm.assert_index_equal(cat.categories, Index([1, 2, 3]))
|
||||
|
||||
# Lengthen
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories([1, 2, 3, 4])
|
||||
|
||||
# Shorten
|
||||
with pytest.raises(ValueError):
|
||||
cat.rename_categories([1, 2])
|
||||
|
||||
def test_rename_categories_series(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/17981
|
||||
c = Categorical(['a', 'b'])
|
||||
xpr = "Treating Series 'new_categories' as a list-like "
|
||||
with tm.assert_produces_warning(FutureWarning) as rec:
|
||||
result = c.rename_categories(Series([0, 1]))
|
||||
|
||||
assert len(rec) == 1
|
||||
assert xpr in str(rec[0].message)
|
||||
expected = Categorical([0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_rename_categories_dict(self):
|
||||
# GH 17336
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1})
|
||||
expected = Index([4, 3, 2, 1])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for inplace
|
||||
res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1},
|
||||
inplace=True)
|
||||
assert res is None
|
||||
tm.assert_index_equal(cat.categories, expected)
|
||||
|
||||
# Test for dicts of smaller length
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'a': 1, 'c': 3})
|
||||
|
||||
expected = Index([1, 'b', 3, 'd'])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with bigger length
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3,
|
||||
'd': 4, 'e': 5, 'f': 6})
|
||||
expected = Index([1, 2, 3, 4])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with no items from old categories
|
||||
cat = Categorical(['a', 'b', 'c', 'd'])
|
||||
res = cat.rename_categories({'f': 1, 'g': 3})
|
||||
|
||||
expected = Index(['a', 'b', 'c', 'd'])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
def test_reorder_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", "c", "a"], categories=["c", "b", "a"],
|
||||
ordered=True)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.reorder_categories(["c", "b", "a"])
|
||||
# cat must be the same as before
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
# only res is changed
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.reorder_categories(["c", "b", "a"], inplace=True)
|
||||
assert res is None
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
|
||||
# not all "old" included in "new"
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
|
||||
def f():
|
||||
cat.reorder_categories(["a"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# still not all "old" in "new"
|
||||
def f():
|
||||
cat.reorder_categories(["a", "b", "d"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# all "old" included in "new", but too long
|
||||
def f():
|
||||
cat.reorder_categories(["a", "b", "c", "d"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def test_add_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", "c", "a"],
|
||||
categories=["a", "b", "c", "d"], ordered=True)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.add_categories("d")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.add_categories(["d"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.add_categories("d", inplace=True)
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
assert res is None
|
||||
|
||||
# new is in old categories
|
||||
def f():
|
||||
cat.add_categories(["d"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# GH 9927
|
||||
cat = Categorical(list("abc"), ordered=True)
|
||||
expected = Categorical(
|
||||
list("abc"), categories=list("abcde"), ordered=True)
|
||||
# test with Series, np.array, index, list
|
||||
res = cat.add_categories(Series(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(np.array(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(Index(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(["d", "e"])
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
|
||||
def test_set_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
|
||||
res = cat.set_categories(["c", "b", "a"], inplace=True)
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
assert res is None
|
||||
|
||||
res = cat.set_categories(["a", "b", "c"])
|
||||
# cat must be the same as before
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
# only res is changed
|
||||
exp_categories_back = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(res.categories, exp_categories_back)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_values)
|
||||
|
||||
# not all "old" included in "new" -> all not included ones are now
|
||||
# np.nan
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
res = cat.set_categories(["a"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0],
|
||||
dtype=np.int8))
|
||||
|
||||
# still not all "old" in "new"
|
||||
res = cat.set_categories(["a", "b", "d"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0],
|
||||
dtype=np.int8))
|
||||
tm.assert_index_equal(res.categories, Index(["a", "b", "d"]))
|
||||
|
||||
# all "old" included in "new"
|
||||
cat = cat.set_categories(["a", "b", "c", "d"])
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
|
||||
# internals...
|
||||
c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True)
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0],
|
||||
dtype=np.int8))
|
||||
tm.assert_index_equal(c.categories, Index([1, 2, 3, 4]))
|
||||
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(c.get_values(), exp)
|
||||
|
||||
# all "pointers" to '4' must be changed from 3 to 0,...
|
||||
c = c.set_categories([4, 3, 2, 1])
|
||||
|
||||
# positions are changed
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3],
|
||||
dtype=np.int8))
|
||||
|
||||
# categories are now in new order
|
||||
tm.assert_index_equal(c.categories, Index([4, 3, 2, 1]))
|
||||
|
||||
# output is the same
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(c.get_values(), exp)
|
||||
assert c.min() == 4
|
||||
assert c.max() == 1
|
||||
|
||||
# set_categories should set the ordering if specified
|
||||
c2 = c.set_categories([4, 3, 2, 1], ordered=False)
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
|
||||
|
||||
# set_categories should pass thru the ordering
|
||||
c2 = c.set_ordered(False).set_categories([4, 3, 2, 1])
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
|
||||
|
||||
@pytest.mark.parametrize('values, categories, new_categories', [
|
||||
# No NaNs, same cats, same order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# Same, unsorted
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
# Introduce NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
# No overlap
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
|
||||
])
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_set_categories_many(self, values, categories, new_categories,
|
||||
ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c.set_categories(new_categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_private(self):
|
||||
cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
|
||||
cat._set_categories(['a', 'c', 'd', 'e'])
|
||||
expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
# fastpath
|
||||
cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
|
||||
cat._set_categories(['a', 'c', 'd', 'e'], fastpath=True)
|
||||
expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
def test_remove_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
|
||||
ordered=True)
|
||||
|
||||
# first inplace == False
|
||||
res = cat.remove_categories("c")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.remove_categories(["c"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# inplace == True
|
||||
res = cat.remove_categories("c", inplace=True)
|
||||
tm.assert_categorical_equal(cat, new)
|
||||
assert res is None
|
||||
|
||||
# removal is not in categories
|
||||
def f():
|
||||
cat.remove_categories(["c"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def test_remove_unused_categories(self):
|
||||
c = Categorical(["a", "b", "c", "d", "a"],
|
||||
categories=["a", "b", "c", "d", "e"])
|
||||
exp_categories_all = Index(["a", "b", "c", "d", "e"])
|
||||
exp_categories_dropped = Index(["a", "b", "c", "d"])
|
||||
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, exp_categories_dropped)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories(inplace=True)
|
||||
tm.assert_index_equal(c.categories, exp_categories_dropped)
|
||||
assert res is None
|
||||
|
||||
# with NaN values (GH11599)
|
||||
c = Categorical(["a", "b", "c", np.nan],
|
||||
categories=["a", "b", "c", "d", "e"])
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories,
|
||||
Index(np.array(["a", "b", "c"])))
|
||||
exp_codes = np.array([0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(res.codes, exp_codes)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan]
|
||||
cat = Categorical(values=val, categories=list('ABCDEFG'))
|
||||
out = cat.remove_unused_categories()
|
||||
tm.assert_index_equal(out.categories, Index(['B', 'D', 'F']))
|
||||
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(out.codes, exp_codes)
|
||||
assert out.get_values().tolist() == val
|
||||
|
||||
alpha = list('abcdefghijklmnopqrstuvwxyz')
|
||||
val = np.random.choice(alpha[::2], 10000).astype('object')
|
||||
val[np.random.choice(len(val), 100)] = np.nan
|
||||
|
||||
cat = Categorical(values=val, categories=alpha)
|
||||
out = cat.remove_unused_categories()
|
||||
assert out.get_values().tolist() == val.tolist()
|
||||
|
||||
|
||||
class TestCategoricalAPIWithFactor(TestCategorical):
|
||||
|
||||
def test_describe(self):
|
||||
# string type
|
||||
desc = self.factor.describe()
|
||||
assert self.factor.ordered
|
||||
exp_index = CategoricalIndex(['a', 'b', 'c'], name='categories',
|
||||
ordered=self.factor.ordered)
|
||||
expected = DataFrame({'counts': [3, 2, 3],
|
||||
'freqs': [3 / 8., 2 / 8., 3 / 8.]},
|
||||
index=exp_index)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check unused categories
|
||||
cat = self.factor.copy()
|
||||
cat.set_categories(["a", "b", "c", "d"], inplace=True)
|
||||
desc = cat.describe()
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list('abcd'), ordered=self.factor.ordered, name='categories')
|
||||
expected = DataFrame({'counts': [3, 2, 3, 0],
|
||||
'freqs': [3 / 8., 2 / 8., 3 / 8., 0]},
|
||||
index=exp_index)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check an integer one
|
||||
cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
|
||||
desc = cat.describe()
|
||||
exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered,
|
||||
name='categories')
|
||||
expected = DataFrame({'counts': [5, 3, 3],
|
||||
'freqs': [5 / 11., 3 / 11., 3 / 11.]},
|
||||
index=exp_index)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
# describe should work with NaN
|
||||
cat = Categorical([np.nan, 1, 2, 2])
|
||||
desc = cat.describe()
|
||||
expected = DataFrame({'counts': [1, 2, 1],
|
||||
'freqs': [1 / 4., 2 / 4., 1 / 4.]},
|
||||
index=CategoricalIndex([1, 2, np.nan],
|
||||
categories=[1, 2],
|
||||
name='categories'))
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
def test_set_categories_inplace(self):
|
||||
cat = self.factor.copy()
|
||||
cat.set_categories(['a', 'b', 'c', 'd'], inplace=True)
|
||||
tm.assert_index_equal(cat.categories, Index(['a', 'b', 'c', 'd']))
|
||||
|
||||
|
||||
class TestPrivateCategoricalAPI(object):
|
||||
|
||||
def test_codes_immutable(self):
|
||||
|
||||
# Codes should be read only
|
||||
c = Categorical(["a", "b", "c", "a", np.nan])
|
||||
exp = np.array([0, 1, 2, 0, -1], dtype='int8')
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
# Assignments to codes should raise
|
||||
def f():
|
||||
c.codes = np.array([0, 1, 2, 0, 1], dtype='int8')
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# changes in the codes array should raise
|
||||
# np 1.6.1 raises RuntimeError rather than ValueError
|
||||
codes = c.codes
|
||||
|
||||
def f():
|
||||
codes[4] = 1
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# But even after getting the codes, the original array should still be
|
||||
# writeable!
|
||||
c[4] = "a"
|
||||
exp = np.array([0, 1, 2, 0, 0], dtype='int8')
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
c._codes[4] = 2
|
||||
exp = np.array([0, 1, 2, 0, 2], dtype='int8')
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
@pytest.mark.parametrize('codes, old, new, expected', [
|
||||
([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]),
|
||||
([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]),
|
||||
([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]),
|
||||
([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]),
|
||||
([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]),
|
||||
([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]),
|
||||
([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]),
|
||||
([-1, -1], [], ['a', 'b'], [-1, -1]),
|
||||
([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]),
|
||||
])
|
||||
def test_recode_to_categories(self, codes, old, new, expected):
|
||||
codes = np.asanyarray(codes, dtype=np.int8)
|
||||
expected = np.asanyarray(expected, dtype=np.int8)
|
||||
old = Index(old)
|
||||
new = Index(new)
|
||||
result = _recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_recode_to_categories_large(self):
|
||||
N = 1000
|
||||
codes = np.arange(N)
|
||||
old = Index(codes)
|
||||
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
|
||||
new = Index(expected)
|
||||
result = _recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
+515
@@ -0,0 +1,515 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
from pandas import (Categorical, Index, Series, Timestamp,
|
||||
CategoricalIndex, date_range, DatetimeIndex,
|
||||
period_range, timedelta_range, NaT,
|
||||
Interval, IntervalIndex)
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
|
||||
|
||||
|
||||
class TestCategoricalConstructors(object):
|
||||
|
||||
def test_validate_ordered(self):
|
||||
# see gh-14058
|
||||
exp_msg = "'ordered' must either be 'True' or 'False'"
|
||||
exp_err = TypeError
|
||||
|
||||
# This should be a boolean.
|
||||
ordered = np.array([0, 1, 2])
|
||||
|
||||
with tm.assert_raises_regex(exp_err, exp_msg):
|
||||
Categorical([1, 2, 3], ordered=ordered)
|
||||
|
||||
with tm.assert_raises_regex(exp_err, exp_msg):
|
||||
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
|
||||
ordered=ordered)
|
||||
|
||||
def test_constructor_empty(self):
|
||||
# GH 17248
|
||||
c = Categorical([])
|
||||
expected = Index([])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
c = Categorical([], categories=[1, 2, 3])
|
||||
expected = pd.Int64Index([1, 2, 3])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
def test_constructor_tuples(self):
|
||||
values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
|
||||
result = Categorical(values)
|
||||
expected = Index([(1,), (1, 2)], tupleize_cols=False)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
assert result.ordered is False
|
||||
|
||||
def test_constructor_tuples_datetimes(self):
|
||||
# numpy will auto reshape when all of the tuples are the
|
||||
# same len, so add an extra one with 2 items and slice it off
|
||||
values = np.array([(Timestamp('2010-01-01'),),
|
||||
(Timestamp('2010-01-02'),),
|
||||
(Timestamp('2010-01-01'),),
|
||||
(Timestamp('2010-01-02'),),
|
||||
('a', 'b')], dtype=object)[:-1]
|
||||
result = Categorical(values)
|
||||
expected = Index([(Timestamp('2010-01-01'),),
|
||||
(Timestamp('2010-01-02'),)], tupleize_cols=False)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
|
||||
def test_constructor_unsortable(self):
|
||||
|
||||
# it works!
|
||||
arr = np.array([1, 2, 3, datetime.now()], dtype='O')
|
||||
factor = Categorical(arr, ordered=False)
|
||||
assert not factor.ordered
|
||||
|
||||
# this however will raise as cannot be sorted
|
||||
pytest.raises(
|
||||
TypeError, lambda: Categorical(arr, ordered=True))
|
||||
|
||||
def test_constructor_interval(self):
|
||||
result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)],
|
||||
ordered=True)
|
||||
ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
|
||||
exp = Categorical(ii, ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
tm.assert_index_equal(result.categories, ii)
|
||||
|
||||
def test_constructor(self):
|
||||
|
||||
exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
|
||||
c1 = Categorical(exp_arr)
|
||||
tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["c", "b", "a"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
|
||||
# categories must be unique
|
||||
def f():
|
||||
Categorical([1, 2], [1, 2, 2])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def f():
|
||||
Categorical(["a", "b"], ["a", "b", "b"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# The default should be unordered
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
assert not c1.ordered
|
||||
|
||||
# Categorical as input
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
|
||||
tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))
|
||||
|
||||
# Series of dtype category
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# Series
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]),
|
||||
categories=["a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# This should result in integer categories, not float!
|
||||
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
cat = Categorical([np.nan, 1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# this should result in floats
|
||||
cat = Categorical([np.nan, 1, 2., 3])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
cat = Categorical([np.nan, 1., 2., 3.])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
# This doesn't work -> this would probably need some kind of "remember
|
||||
# the original type" feature to try to cast the array interface result
|
||||
# to...
|
||||
|
||||
# vals = np.asarray(cat[cat.notna()])
|
||||
# assert is_integer_dtype(vals)
|
||||
|
||||
# corner cases
|
||||
cat = Categorical([1])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
cat = Categorical(["a"])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == "a"
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# Scalars should be converted to lists
|
||||
cat = Categorical(1)
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# two arrays
|
||||
# - when the first is an integer dtype and the second is not
|
||||
# - when the resulting codes are all -1/NaN
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old = Categorical([0, 1, 2, 0, 1, 2],
|
||||
categories=["a", "b", "c"]) # noqa
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa
|
||||
categories=[3, 4, 5])
|
||||
|
||||
# the next one are from the old docs
|
||||
with tm.assert_produces_warning(None):
|
||||
c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa
|
||||
cat = Categorical([1, 2], categories=[1, 2, 3])
|
||||
|
||||
# this is a legitimate constructor
|
||||
with tm.assert_produces_warning(None):
|
||||
c = Categorical(np.array([], dtype='int64'), # noqa
|
||||
categories=[3, 2, 1], ordered=True)
|
||||
|
||||
def test_constructor_not_sequence(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16022
|
||||
with pytest.raises(TypeError):
|
||||
Categorical(['a', 'b'], categories='a')
|
||||
|
||||
def test_constructor_with_null(self):
|
||||
|
||||
# Cannot have NaN in categories
|
||||
with pytest.raises(ValueError):
|
||||
Categorical([np.nan, "a", "b", "c"],
|
||||
categories=[np.nan, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Categorical([None, "a", "b", "c"],
|
||||
categories=[None, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Categorical(DatetimeIndex(['nat', '20160101']),
|
||||
categories=[NaT, Timestamp('20160101')])
|
||||
|
||||
def test_constructor_with_index(self):
|
||||
ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
|
||||
tm.assert_categorical_equal(ci.values, Categorical(ci))
|
||||
|
||||
ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
|
||||
tm.assert_categorical_equal(ci.values,
|
||||
Categorical(ci.astype(object),
|
||||
categories=ci.categories))
|
||||
|
||||
def test_constructor_with_generator(self):
|
||||
# This was raising an Error in isna(single_val).any() because isna
|
||||
# returned a scalar for a generator
|
||||
xrange = range
|
||||
|
||||
exp = Categorical([0, 1, 2])
|
||||
cat = Categorical((x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical(xrange(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
# This uses xrange internally
|
||||
from pandas.core.index import MultiIndex
|
||||
MultiIndex.from_product([range(5), ['a', 'b', 'c']])
|
||||
|
||||
# check that categories accept generators and sequences
|
||||
cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical([0, 1, 2], categories=xrange(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
def test_constructor_with_datetimelike(self):
|
||||
|
||||
# 12077
|
||||
# constructor wwth a datetimelike and NaT
|
||||
|
||||
for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'),
|
||||
date_range('1995-01-01 00:00:00', periods=5,
|
||||
freq='s', tz='US/Eastern'),
|
||||
timedelta_range('1 day', periods=5, freq='s')]:
|
||||
|
||||
s = Series(dtl)
|
||||
c = Categorical(s)
|
||||
expected = type(dtl)(s)
|
||||
expected.freq = None
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8'))
|
||||
|
||||
# with NaT
|
||||
s2 = s.copy()
|
||||
s2.iloc[-1] = NaT
|
||||
c = Categorical(s2)
|
||||
expected = type(dtl)(s2.dropna())
|
||||
expected.freq = None
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
result = repr(c)
|
||||
assert 'NaT' in result
|
||||
|
||||
def test_constructor_from_index_series_datetimetz(self):
|
||||
idx = date_range('2015-01-01 10:00', freq='D', periods=3,
|
||||
tz='US/Eastern')
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_timedelta(self):
|
||||
idx = timedelta_range('1 days', freq='D', periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_period(self):
|
||||
idx = period_range('2015-01-01', freq='D', periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_invariant(self):
|
||||
# GH 14190
|
||||
vals = [
|
||||
np.array([1., 1.2, 1.8, np.nan]),
|
||||
np.array([1, 2, 3], dtype='int64'),
|
||||
['a', 'b', 'c', np.nan],
|
||||
[pd.Period('2014-01'), pd.Period('2014-02'), NaT],
|
||||
[Timestamp('2014-01-01'), Timestamp('2014-01-02'), NaT],
|
||||
[Timestamp('2014-01-01', tz='US/Eastern'),
|
||||
Timestamp('2014-01-02', tz='US/Eastern'), NaT],
|
||||
]
|
||||
for val in vals:
|
||||
c = Categorical(val)
|
||||
c2 = Categorical(c)
|
||||
tm.assert_categorical_equal(c, c2)
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_constructor_with_dtype(self, ordered):
|
||||
categories = ['b', 'a', 'c']
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype)
|
||||
expected = Categorical(['a', 'b', 'a', 'c'], categories=categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
assert result.ordered is ordered
|
||||
|
||||
def test_constructor_dtype_and_others_raises(self):
|
||||
dtype = CategoricalDtype(['a', 'b'], ordered=True)
|
||||
with tm.assert_raises_regex(ValueError, "Cannot"):
|
||||
Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype)
|
||||
|
||||
with tm.assert_raises_regex(ValueError, "Cannot"):
|
||||
Categorical(['a', 'b'], ordered=True, dtype=dtype)
|
||||
|
||||
with tm.assert_raises_regex(ValueError, "Cannot"):
|
||||
Categorical(['a', 'b'], ordered=False, dtype=dtype)
|
||||
|
||||
@pytest.mark.parametrize('categories', [
|
||||
None, ['a', 'b'], ['a', 'c'],
|
||||
])
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_constructor_str_category(self, categories, ordered):
|
||||
result = Categorical(['a', 'b'], categories=categories,
|
||||
ordered=ordered, dtype='category')
|
||||
expected = Categorical(['a', 'b'], categories=categories,
|
||||
ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_str_unknown(self):
|
||||
with tm.assert_raises_regex(ValueError, "Unknown `dtype`"):
|
||||
Categorical([1, 2], dtype="foo")
|
||||
|
||||
def test_constructor_from_categorical_with_dtype(self):
|
||||
dtype = CategoricalDtype(['a', 'b', 'c'], ordered=True)
|
||||
values = Categorical(['a', 'b', 'd'])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use dtype.categories, not values.categories
|
||||
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_with_unknown_dtype(self):
|
||||
dtype = CategoricalDtype(None, ordered=True)
|
||||
values = Categorical(['a', 'b', 'd'])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use values.categories, not dtype.categories
|
||||
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_string(self):
|
||||
values = Categorical(['a', 'b', 'd'])
|
||||
# use categories, ordered
|
||||
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True,
|
||||
dtype='category')
|
||||
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# No string
|
||||
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
|
||||
|
||||
result = Categorical(
|
||||
['a', 'b'], categories=Categorical(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical(
|
||||
['a', 'b'], categories=CategoricalIndex(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_codes(self):
|
||||
|
||||
# too few categories
|
||||
def f():
|
||||
Categorical.from_codes([1, 2], [1, 2])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# no int codes
|
||||
def f():
|
||||
Categorical.from_codes(["a"], [1, 2])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# no unique categories
|
||||
def f():
|
||||
Categorical.from_codes([0, 1, 2], ["a", "a", "b"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# NaN categories included
|
||||
def f():
|
||||
Categorical.from_codes([0, 1, 2], ["a", "b", np.nan])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# too negative
|
||||
def f():
|
||||
Categorical.from_codes([-2, 1, 2], ["a", "b", "c"])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
exp = Categorical(["a", "b", "c"], ordered=False)
|
||||
res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"])
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
# Not available in earlier numpy versions
|
||||
if hasattr(np.random, "choice"):
|
||||
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
|
||||
Categorical.from_codes(codes, categories=["train", "test"])
|
||||
|
||||
def test_from_codes_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
|
||||
|
||||
result = Categorical.from_codes(
|
||||
[0, 1], categories=Categorical(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical.from_codes(
|
||||
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-unique Categorical still raises
|
||||
with pytest.raises(ValueError):
|
||||
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
|
||||
|
||||
@pytest.mark.parametrize('dtype', [None, 'category'])
|
||||
def test_from_inferred_categories(self, dtype):
|
||||
cats = ['a', 'b']
|
||||
codes = np.array([0, 0, 1, 1], dtype='i8')
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes(codes, cats)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype', [None, 'category'])
|
||||
def test_from_inferred_categories_sorts(self, dtype):
|
||||
cats = ['b', 'a']
|
||||
codes = np.array([0, 1, 1, 1], dtype='i8')
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_dtype(self):
|
||||
cats = ['a', 'b', 'd']
|
||||
codes = np.array([0, 1, 0, 2], dtype='i8')
|
||||
dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True)
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical(['a', 'b', 'a', 'd'],
|
||||
categories=['c', 'b', 'a'],
|
||||
ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_coerces(self):
|
||||
cats = ['1', '2', 'bad']
|
||||
codes = np.array([0, 0, 1, 2], dtype='i8')
|
||||
dtype = CategoricalDtype([1, 2])
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical([1, 1, 2, np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_construction_with_ordered(self):
|
||||
# GH 9347, 9190
|
||||
cat = Categorical([0, 1, 2])
|
||||
assert not cat.ordered
|
||||
cat = Categorical([0, 1, 2], ordered=False)
|
||||
assert not cat.ordered
|
||||
cat = Categorical([0, 1, 2], ordered=True)
|
||||
assert cat.ordered
|
||||
|
||||
@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
|
||||
def test_constructor_imaginary(self):
|
||||
values = [1, 2, 3 + 1j]
|
||||
c1 = Categorical(values)
|
||||
tm.assert_index_equal(c1.categories, Index(values))
|
||||
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
|
||||
@@ -0,0 +1,176 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
from pandas.compat import long
|
||||
from pandas import Categorical, Index, CategoricalIndex, Series, Timestamp
|
||||
|
||||
|
||||
class TestCategoricalDtypes(object):
|
||||
|
||||
def test_is_equal_dtype(self):
|
||||
|
||||
# test dtype comparisons between cats
|
||||
|
||||
c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False)
|
||||
c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False)
|
||||
c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True)
|
||||
assert c1.is_dtype_equal(c1)
|
||||
assert c2.is_dtype_equal(c2)
|
||||
assert c3.is_dtype_equal(c3)
|
||||
assert c1.is_dtype_equal(c2)
|
||||
assert not c1.is_dtype_equal(c3)
|
||||
assert not c1.is_dtype_equal(Index(list('aabca')))
|
||||
assert not c1.is_dtype_equal(c1.astype(object))
|
||||
assert c1.is_dtype_equal(CategoricalIndex(c1))
|
||||
assert (c1.is_dtype_equal(
|
||||
CategoricalIndex(c1, categories=list('cab'))))
|
||||
assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
|
||||
|
||||
# GH 16659
|
||||
s1 = Series(c1)
|
||||
s2 = Series(c2)
|
||||
s3 = Series(c3)
|
||||
assert c1.is_dtype_equal(s1)
|
||||
assert c2.is_dtype_equal(s2)
|
||||
assert c3.is_dtype_equal(s3)
|
||||
assert c1.is_dtype_equal(s2)
|
||||
assert not c1.is_dtype_equal(s3)
|
||||
assert not c1.is_dtype_equal(s1.astype(object))
|
||||
|
||||
def test_set_dtype_same(self):
|
||||
c = Categorical(['a', 'b', 'c'])
|
||||
result = c._set_dtype(CategoricalDtype(['a', 'b', 'c']))
|
||||
tm.assert_categorical_equal(result, c)
|
||||
|
||||
def test_set_dtype_new_categories(self):
|
||||
c = Categorical(['a', 'b', 'c'])
|
||||
result = c._set_dtype(CategoricalDtype(list('abcd')))
|
||||
tm.assert_numpy_array_equal(result.codes, c.codes)
|
||||
tm.assert_index_equal(result.dtype.categories, Index(list('abcd')))
|
||||
|
||||
@pytest.mark.parametrize('values, categories, new_categories', [
|
||||
# No NaNs, same cats, same order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# Same, unsorted
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
|
||||
# No NaNs, same cats, different order
|
||||
(['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
|
||||
# NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
|
||||
# Introduce NaNs
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['a']),
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['b']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
(['b', 'a', 'c'], ['a', 'b'], ['a']),
|
||||
# No overlap
|
||||
(['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
|
||||
])
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_set_dtype_many(self, values, categories, new_categories,
|
||||
ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c._set_dtype(expected.dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_dtype_no_overlap(self):
|
||||
c = Categorical(['a', 'b', 'c'], ['d', 'e'])
|
||||
result = c._set_dtype(CategoricalDtype(['a', 'b']))
|
||||
expected = Categorical([None, None, None], categories=['a', 'b'])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_codes_dtypes(self):
|
||||
|
||||
# GH 8453
|
||||
result = Categorical(['foo', 'bar', 'baz'])
|
||||
assert result.codes.dtype == 'int8'
|
||||
|
||||
result = Categorical(['foo%05d' % i for i in range(400)])
|
||||
assert result.codes.dtype == 'int16'
|
||||
|
||||
result = Categorical(['foo%05d' % i for i in range(40000)])
|
||||
assert result.codes.dtype == 'int32'
|
||||
|
||||
# adding cats
|
||||
result = Categorical(['foo', 'bar', 'baz'])
|
||||
assert result.codes.dtype == 'int8'
|
||||
result = result.add_categories(['foo%05d' % i for i in range(400)])
|
||||
assert result.codes.dtype == 'int16'
|
||||
|
||||
# removing cats
|
||||
result = result.remove_categories(['foo%05d' % i for i in range(300)])
|
||||
assert result.codes.dtype == 'int8'
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_astype(self, ordered):
|
||||
# string
|
||||
cat = Categorical(list('abbaaccc'), ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
msg = 'could not convert string to float'
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
cat.astype(float)
|
||||
|
||||
# numeric
|
||||
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(int)
|
||||
expected = np.array(cat, dtype=np.int)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(float)
|
||||
expected = np.array(cat, dtype=np.float)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype_ordered', [True, False])
|
||||
@pytest.mark.parametrize('cat_ordered', [True, False])
|
||||
def test_astype_category(self, dtype_ordered, cat_ordered):
|
||||
# GH 10696/18593
|
||||
data = list('abcaacbab')
|
||||
cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(
|
||||
data, categories=cat.categories, ordered=dtype_ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(list('adc'), dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, dtype=dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = cat.astype('category')
|
||||
expected = cat
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_iter_python_types(self):
|
||||
# GH-19909
|
||||
# TODO(Py2): Remove long
|
||||
cat = Categorical([1, 2])
|
||||
assert isinstance(list(cat)[0], (int, long))
|
||||
assert isinstance(cat.tolist()[0], (int, long))
|
||||
|
||||
def test_iter_python_types_datetime(self):
|
||||
cat = Categorical([Timestamp('2017-01-01'),
|
||||
Timestamp('2017-01-02')])
|
||||
assert isinstance(list(cat)[0], Timestamp)
|
||||
assert isinstance(cat.tolist()[0], Timestamp)
|
||||
@@ -0,0 +1,123 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
|
||||
from pandas.tests.categorical.common import TestCategorical
|
||||
|
||||
|
||||
class TestCategoricalIndexingWithFactor(TestCategorical):
|
||||
|
||||
def test_getitem(self):
|
||||
assert self.factor[0] == 'a'
|
||||
assert self.factor[-1] == 'c'
|
||||
|
||||
subf = self.factor[[0, 1, 2]]
|
||||
tm.assert_numpy_array_equal(subf._codes,
|
||||
np.array([0, 1, 1], dtype=np.int8))
|
||||
|
||||
subf = self.factor[np.asarray(self.factor) == 'c']
|
||||
tm.assert_numpy_array_equal(subf._codes,
|
||||
np.array([2, 2, 2], dtype=np.int8))
|
||||
|
||||
def test_setitem(self):
|
||||
|
||||
# int/positional
|
||||
c = self.factor.copy()
|
||||
c[0] = 'b'
|
||||
assert c[0] == 'b'
|
||||
c[-1] = 'a'
|
||||
assert c[-1] == 'a'
|
||||
|
||||
# boolean
|
||||
c = self.factor.copy()
|
||||
indexer = np.zeros(len(c), dtype='bool')
|
||||
indexer[0] = True
|
||||
indexer[-1] = True
|
||||
c[indexer] = 'c'
|
||||
expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
|
||||
ordered=True)
|
||||
|
||||
tm.assert_categorical_equal(c, expected)
|
||||
|
||||
|
||||
class TestCategoricalIndexing(object):
|
||||
|
||||
def test_getitem_listlike(self):
|
||||
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
np.random.seed(1)
|
||||
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
expected = c[np.array([100000]).astype(np.int64)].codes
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
|
||||
'2014-03', '2014-03'], freq='M')
|
||||
|
||||
cat1 = Categorical(idx1)
|
||||
str(cat1)
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
|
||||
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
|
||||
tm.assert_index_equal(cat1.categories, exp_idx)
|
||||
|
||||
idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
|
||||
'2014-03', '2014-01'], freq='M')
|
||||
cat2 = Categorical(idx2, ordered=True)
|
||||
str(cat2)
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
|
||||
exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
|
||||
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
|
||||
tm.assert_index_equal(cat2.categories, exp_idx2)
|
||||
|
||||
idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09',
|
||||
'2013-08', '2013-07', '2013-05'], freq='M')
|
||||
cat3 = Categorical(idx3, ordered=True)
|
||||
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09',
|
||||
'2013-10', '2013-11', '2013-12'], freq='M')
|
||||
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
|
||||
tm.assert_index_equal(cat3.categories, exp_idx)
|
||||
|
||||
def test_categories_assigments(self):
|
||||
s = Categorical(["a", "b", "c", "a"])
|
||||
exp = np.array([1, 2, 3, 1], dtype=np.int64)
|
||||
s.categories = [1, 2, 3]
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp)
|
||||
tm.assert_index_equal(s.categories, Index([1, 2, 3]))
|
||||
|
||||
# lengthen
|
||||
def f():
|
||||
s.categories = [1, 2, 3, 4]
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# shorten
|
||||
def f():
|
||||
s.categories = [1, 2]
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# Combinations of sorted/unique:
|
||||
@pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
|
||||
[1, 3, 3, 4], [1, 2, 2, 4]])
|
||||
# Combinations of missing/unique
|
||||
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
|
||||
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
|
||||
def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
|
||||
# GH 21448
|
||||
key = key_class(key_values, categories=range(1, 5))
|
||||
# Test for flat index and CategoricalIndex with same/different cats:
|
||||
for dtype in None, 'category', key.dtype:
|
||||
idx = Index(idx_values, dtype=dtype)
|
||||
expected, exp_miss = idx.get_indexer_non_unique(key_values)
|
||||
result, res_miss = idx.get_indexer_non_unique(key)
|
||||
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
tm.assert_numpy_array_equal(exp_miss, res_miss)
|
||||
@@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas import Categorical, Index, isna
|
||||
from pandas.compat import lrange
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
|
||||
class TestCategoricalMissing(object):
|
||||
|
||||
def test_na_flags_int_categories(self):
|
||||
# #1457
|
||||
|
||||
categories = lrange(10)
|
||||
labels = np.random.randint(0, 10, 20)
|
||||
labels[::5] = -1
|
||||
|
||||
cat = Categorical(labels, categories, fastpath=True)
|
||||
repr(cat)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(cat), labels == -1)
|
||||
|
||||
def test_nan_handling(self):
|
||||
|
||||
# Nans are represented as -1 in codes
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
|
||||
dtype=np.int8))
|
||||
c[1] = np.nan
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0],
|
||||
dtype=np.int8))
|
||||
|
||||
# Adding nan to categories should make assigned nan point to the
|
||||
# category!
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
|
||||
dtype=np.int8))
|
||||
|
||||
def test_set_dtype_nans(self):
|
||||
c = Categorical(['a', 'b', np.nan])
|
||||
result = c._set_dtype(CategoricalDtype(['a', 'c']))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1],
|
||||
dtype='int8'))
|
||||
|
||||
def test_set_item_nan(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
cat[1] = np.nan
|
||||
|
||||
exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
@pytest.mark.parametrize('fillna_kwargs, msg', [
|
||||
(dict(value=1, method='ffill'),
|
||||
"Cannot specify both 'value' and 'method'."),
|
||||
(dict(),
|
||||
"Must specify a fill 'value' or 'method'."),
|
||||
(dict(method='bad'),
|
||||
"Invalid fill method. Expecting .* bad"),
|
||||
])
|
||||
def test_fillna_raises(self, fillna_kwargs, msg):
|
||||
# https://github.com/pandas-dev/pandas/issues/19682
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
cat.fillna(**fillna_kwargs)
|
||||
|
||||
@pytest.mark.parametrize("named", [True, False])
|
||||
def test_fillna_iterable_category(self, named):
|
||||
# https://github.com/pandas-dev/pandas/issues/21097
|
||||
if named:
|
||||
Point = collections.namedtuple("Point", "x y")
|
||||
else:
|
||||
Point = lambda *args: args # tuple
|
||||
cat = Categorical([Point(0, 0), Point(0, 1), None])
|
||||
result = cat.fillna(Point(0, 0))
|
||||
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
@@ -0,0 +1,293 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas import Categorical, Series, DataFrame, date_range
|
||||
from pandas.tests.categorical.common import TestCategorical
|
||||
|
||||
|
||||
class TestCategoricalOpsWithFactor(TestCategorical):
|
||||
|
||||
def test_categories_none_comparisons(self):
|
||||
factor = Categorical(['a', 'b', 'b', 'a',
|
||||
'a', 'c', 'c', 'c'], ordered=True)
|
||||
tm.assert_categorical_equal(factor, self.factor)
|
||||
|
||||
def test_comparisons(self):
|
||||
|
||||
result = self.factor[self.factor == 'a']
|
||||
expected = self.factor[np.asarray(self.factor) == 'a']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor != 'a']
|
||||
expected = self.factor[np.asarray(self.factor) != 'a']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor < 'c']
|
||||
expected = self.factor[np.asarray(self.factor) < 'c']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor > 'a']
|
||||
expected = self.factor[np.asarray(self.factor) > 'a']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor >= 'b']
|
||||
expected = self.factor[np.asarray(self.factor) >= 'b']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = self.factor[self.factor <= 'b']
|
||||
expected = self.factor[np.asarray(self.factor) <= 'b']
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
n = len(self.factor)
|
||||
|
||||
other = self.factor[np.random.permutation(n)]
|
||||
result = self.factor == other
|
||||
expected = np.asarray(self.factor) == np.asarray(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = self.factor == 'd'
|
||||
expected = np.repeat(False, len(self.factor))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# comparisons with categoricals
|
||||
cat_rev = Categorical(
|
||||
["a", "b", "c"], categories=["c", "b", "a"], ordered=True)
|
||||
cat_rev_base = Categorical(
|
||||
["b", "b", "b"], categories=["c", "b", "a"], ordered=True)
|
||||
cat = Categorical(["a", "b", "c"], ordered=True)
|
||||
cat_base = Categorical(
|
||||
["b", "b", "b"], categories=cat.categories, ordered=True)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = np.array([True, False, False])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
def f():
|
||||
cat > cat_rev
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
cat_rev_base2 = Categorical(
|
||||
["b", "b", "b"], categories=["c", "b", "a", "d"])
|
||||
|
||||
def f():
|
||||
cat_rev > cat_rev_base2
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# Only categories with same ordering information can be compared
|
||||
cat_unorderd = cat.set_ordered(False)
|
||||
assert not (cat > cat).any()
|
||||
|
||||
def f():
|
||||
cat > cat_unorderd
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# comparison (in both directions) with Series will raise
|
||||
s = Series(["b", "b", "b"])
|
||||
pytest.raises(TypeError, lambda: cat > s)
|
||||
pytest.raises(TypeError, lambda: cat_rev > s)
|
||||
pytest.raises(TypeError, lambda: s < cat)
|
||||
pytest.raises(TypeError, lambda: s < cat_rev)
|
||||
|
||||
# comparison with numpy.array will raise in both direction, but only on
|
||||
# newer numpy versions
|
||||
a = np.array(["b", "b", "b"])
|
||||
pytest.raises(TypeError, lambda: cat > a)
|
||||
pytest.raises(TypeError, lambda: cat_rev > a)
|
||||
|
||||
# Make sure that unequal comparison take the categories order in
|
||||
# account
|
||||
cat_rev = Categorical(
|
||||
list("abc"), categories=list("cba"), ordered=True)
|
||||
exp = np.array([True, False, False])
|
||||
res = cat_rev > "b"
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
|
||||
class TestCategoricalOps(object):
|
||||
|
||||
def test_datetime_categorical_comparison(self):
|
||||
dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True)
|
||||
tm.assert_numpy_array_equal(dt_cat > dt_cat[0],
|
||||
np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(dt_cat[0] < dt_cat,
|
||||
np.array([False, True, True]))
|
||||
|
||||
def test_reflected_comparison_with_scalars(self):
|
||||
# GH8658
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
tm.assert_numpy_array_equal(cat > cat[0],
|
||||
np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(cat[0] < cat,
|
||||
np.array([False, True, True]))
|
||||
|
||||
def test_comparison_with_unknown_scalars(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||||
# and following comparisons with scalars not in categories should raise
|
||||
# for unequal comps, but not for equal/not equal
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
|
||||
pytest.raises(TypeError, lambda: cat < 4)
|
||||
pytest.raises(TypeError, lambda: cat > 4)
|
||||
pytest.raises(TypeError, lambda: 4 < cat)
|
||||
pytest.raises(TypeError, lambda: 4 > cat)
|
||||
|
||||
tm.assert_numpy_array_equal(cat == 4,
|
||||
np.array([False, False, False]))
|
||||
tm.assert_numpy_array_equal(cat != 4,
|
||||
np.array([True, True, True]))
|
||||
|
||||
@pytest.mark.parametrize('data,reverse,base', [
|
||||
(list("abc"), list("cba"), list("bbb")),
|
||||
([1, 2, 3], [3, 2, 1], [2, 2, 2])]
|
||||
)
|
||||
def test_comparisons(self, data, reverse, base):
|
||||
cat_rev = Series(
|
||||
Categorical(data, categories=reverse, ordered=True))
|
||||
cat_rev_base = Series(
|
||||
Categorical(base, categories=reverse, ordered=True))
|
||||
cat = Series(Categorical(data, ordered=True))
|
||||
cat_base = Series(
|
||||
Categorical(base, categories=cat.cat.categories, ordered=True))
|
||||
s = Series(base)
|
||||
a = np.array(base)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = Series([True, False, False])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = Series([False, False, True])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = Series([False, False, True])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
scalar = base[1]
|
||||
res = cat > scalar
|
||||
exp = Series([False, False, True])
|
||||
exp2 = cat.values > scalar
|
||||
tm.assert_series_equal(res, exp)
|
||||
tm.assert_numpy_array_equal(res.values, exp2)
|
||||
res_rev = cat_rev > scalar
|
||||
exp_rev = Series([True, False, False])
|
||||
exp_rev2 = cat_rev.values > scalar
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
def f():
|
||||
cat > cat_rev
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# categorical cannot be compared to Series or numpy array, and also
|
||||
# not the other way around
|
||||
pytest.raises(TypeError, lambda: cat > s)
|
||||
pytest.raises(TypeError, lambda: cat_rev > s)
|
||||
pytest.raises(TypeError, lambda: cat > a)
|
||||
pytest.raises(TypeError, lambda: cat_rev > a)
|
||||
|
||||
pytest.raises(TypeError, lambda: s < cat)
|
||||
pytest.raises(TypeError, lambda: s < cat_rev)
|
||||
|
||||
pytest.raises(TypeError, lambda: a < cat)
|
||||
pytest.raises(TypeError, lambda: a < cat_rev)
|
||||
|
||||
@pytest.mark.parametrize('ctor', [
|
||||
lambda *args, **kwargs: Categorical(*args, **kwargs),
|
||||
lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
|
||||
])
|
||||
def test_unordered_different_order_equal(self, ctor):
|
||||
# https://github.com/pandas-dev/pandas/issues/16014
|
||||
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
|
||||
assert (c1 == c2).all()
|
||||
|
||||
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['b', 'a'], categories=['b', 'a'], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['b', 'b'], categories=['b', 'a'], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
|
||||
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
|
||||
result = c1 == c2
|
||||
tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
|
||||
|
||||
def test_unordered_different_categories_raises(self):
|
||||
c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
|
||||
c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False)
|
||||
with tm.assert_raises_regex(TypeError,
|
||||
"Categoricals can only be compared"):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_different_lengths(self):
|
||||
c1 = Categorical([], categories=['a', 'b'])
|
||||
c2 = Categorical([], categories=['a'])
|
||||
msg = "Categories are different lengths"
|
||||
with tm.assert_raises_regex(TypeError, msg):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_unordered_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
|
||||
# 349290078
|
||||
a = pd.Categorical(['a'], categories=['a', 'b'])
|
||||
b = pd.Categorical(['b'], categories=['b', 'a'])
|
||||
assert not a.equals(b)
|
||||
|
||||
def test_numeric_like_ops(self):
|
||||
|
||||
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
|
||||
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=['value'], ascending=True)
|
||||
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
|
||||
right=False, labels=cat_labels)
|
||||
|
||||
# numeric ops should not succeed
|
||||
for op in ['__add__', '__sub__', '__mul__', '__truediv__']:
|
||||
pytest.raises(TypeError,
|
||||
lambda: getattr(df, op)(df))
|
||||
|
||||
# reduction ops should not succeed (unless specifically defined, e.g.
|
||||
# min/max)
|
||||
s = df['value_group']
|
||||
for op in ['kurt', 'skew', 'var', 'std', 'mean', 'sum', 'median']:
|
||||
pytest.raises(TypeError,
|
||||
lambda: getattr(s, op)(numeric_only=False))
|
||||
|
||||
# mad technically works because it takes always the numeric data
|
||||
|
||||
# numpy ops
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
pytest.raises(TypeError, lambda: np.sum(s))
|
||||
|
||||
# numeric ops on a Series
|
||||
for op in ['__add__', '__sub__', '__mul__', '__truediv__']:
|
||||
pytest.raises(TypeError, lambda: getattr(s, op)(2))
|
||||
|
||||
# invalid ufunc
|
||||
pytest.raises(TypeError, lambda: np.log(s))
|
||||
@@ -0,0 +1,517 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (Categorical, Series, CategoricalIndex, date_range,
|
||||
period_range, timedelta_range)
|
||||
from pandas.compat import u, PY3
|
||||
from pandas.core.config import option_context
|
||||
from pandas.tests.categorical.common import TestCategorical
|
||||
|
||||
|
||||
class TestCategoricalReprWithFactor(TestCategorical):
|
||||
|
||||
def test_print(self):
|
||||
expected = ["[a, b, b, a, a, c, c, c]",
|
||||
"Categories (3, object): [a < b < c]"]
|
||||
expected = "\n".join(expected)
|
||||
actual = repr(self.factor)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
class TestCategoricalRepr(object):
|
||||
|
||||
def test_big_print(self):
|
||||
factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'],
|
||||
fastpath=True)
|
||||
expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600",
|
||||
"Categories (3, object): [a, b, c]"]
|
||||
expected = "\n".join(expected)
|
||||
|
||||
actual = repr(factor)
|
||||
|
||||
assert actual == expected
|
||||
|
||||
def test_empty_print(self):
|
||||
factor = Categorical([], ["a", "b", "c"])
|
||||
expected = ("[], Categories (3, object): [a, b, c]")
|
||||
# hack because array_repr changed in numpy > 1.6.x
|
||||
actual = repr(factor)
|
||||
assert actual == expected
|
||||
|
||||
assert expected == actual
|
||||
factor = Categorical([], ["a", "b", "c"], ordered=True)
|
||||
expected = ("[], Categories (3, object): [a < b < c]")
|
||||
actual = repr(factor)
|
||||
assert expected == actual
|
||||
|
||||
factor = Categorical([], [])
|
||||
expected = ("[], Categories (0, object): []")
|
||||
assert expected == repr(factor)
|
||||
|
||||
def test_print_none_width(self):
|
||||
# GH10087
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = u("0 1\n1 2\n2 3\n3 4\n" +
|
||||
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]")
|
||||
|
||||
with option_context("display.width", None):
|
||||
assert exp == repr(a)
|
||||
|
||||
def test_unicode_print(self):
|
||||
if PY3:
|
||||
_rep = repr
|
||||
else:
|
||||
_rep = unicode # noqa
|
||||
|
||||
c = Categorical(['aaaaa', 'bb', 'cccc'] * 20)
|
||||
expected = u"""\
|
||||
[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc]
|
||||
Length: 60
|
||||
Categories (3, object): [aaaaa, bb, cccc]"""
|
||||
|
||||
assert _rep(c) == expected
|
||||
|
||||
c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
|
||||
expected = u"""\
|
||||
[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
|
||||
Length: 60
|
||||
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
|
||||
|
||||
assert _rep(c) == expected
|
||||
|
||||
# unicode option should not affect to Categorical, as it doesn't care
|
||||
# the repr width
|
||||
with option_context('display.unicode.east_asian_width', True):
|
||||
|
||||
c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
|
||||
expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
|
||||
Length: 60
|
||||
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
|
||||
|
||||
assert _rep(c) == expected
|
||||
|
||||
def test_categorical_repr(self):
|
||||
c = Categorical([1, 2, 3])
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1, 2, 3, 4, 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20))
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_ordered(self):
|
||||
c = Categorical([1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1 < 2 < 3 < 4 < 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20), ordered=True)
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx)
|
||||
|
||||
# TODO(wesm): exceeding 80 characters in the console is not good
|
||||
# behavior
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]""")
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]")
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
c = Categorical(idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]")
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, "
|
||||
"2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, "
|
||||
"2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]")
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime_ordered(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_period(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_period_ordered(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=20)
|
||||
c = Categorical(idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=20)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_index_repr(self):
|
||||
idx = CategoricalIndex(Categorical([1, 2, 3]))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(idx) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10)))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_ordered(self):
|
||||
i = CategoricalIndex(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10), ordered=True))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime_ordered(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx), ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
|
||||
'2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
|
||||
'2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period(self):
|
||||
# test all length
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=1)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=2)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=3)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx)))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00',
|
||||
'2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00',
|
||||
'2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period_ordered(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa
|
||||
|
||||
assert repr(i) == exp
|
||||
@@ -0,0 +1,123 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas import Categorical, Index
|
||||
|
||||
|
||||
class TestCategoricalSort(object):
|
||||
|
||||
def test_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(c.argsort(ascending=True), expected,
|
||||
check_dtype=False)
|
||||
|
||||
expected = expected[::-1]
|
||||
tm.assert_numpy_array_equal(c.argsort(ascending=False), expected,
|
||||
check_dtype=False)
|
||||
|
||||
def test_numpy_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(np.argsort(c), expected,
|
||||
check_dtype=False)
|
||||
|
||||
tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected,
|
||||
check_dtype=False)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.argsort,
|
||||
c, axis=0)
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
tm.assert_raises_regex(ValueError, msg, np.argsort,
|
||||
c, order='C')
|
||||
|
||||
def test_sort_values(self):
|
||||
|
||||
# unordered cats are sortable
|
||||
cat = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat.sort_values()
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d"], ordered=True)
|
||||
|
||||
# sort_values
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d"],
|
||||
categories=["a", "b", "c", "d"], ordered=True)
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# sort (inplace order)
|
||||
cat1 = cat.copy()
|
||||
cat1.sort_values(inplace=True)
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(cat1.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# reverse
|
||||
cat = Categorical(["a", "c", "c", "b", "d"], ordered=True)
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
def test_sort_values_na_position(self):
|
||||
# see gh-12882
|
||||
cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True)
|
||||
exp_categories = Index([2, 5])
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values() # default arguments
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0])
|
||||
res = cat.sort_values(ascending=True, na_position='first')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0])
|
||||
res = cat.sort_values(ascending=False, na_position='first')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=True, na_position='last')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=False, na_position='last')
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position='last')
|
||||
exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position='first')
|
||||
exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
@@ -0,0 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pandas import Categorical
|
||||
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalSubclassing(object):
|
||||
|
||||
def test_constructor(self):
|
||||
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
|
||||
assert isinstance(sc, tm.SubclassedCategorical)
|
||||
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
|
||||
|
||||
def test_from_codes(self):
|
||||
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
|
||||
assert isinstance(sc, tm.SubclassedCategorical)
|
||||
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
|
||||
tm.assert_categorical_equal(sc, exp)
|
||||
|
||||
def test_map(self):
|
||||
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
|
||||
res = sc.map(lambda x: x.upper())
|
||||
assert isinstance(res, tm.SubclassedCategorical)
|
||||
exp = Categorical(['A', 'B', 'C'])
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
@@ -0,0 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestCategoricalWarnings(object):
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip('IPython', minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; c = Categorical([])"
|
||||
ip.run_code(code)
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter('ignore'):
|
||||
list(ip.Completer.completions('c.', 1))
|
||||
Reference in New Issue
Block a user