pruned venvs

This commit is contained in:
d3m1g0d
2019-03-12 21:56:25 +01:00
parent 8ee094481c
commit 33f0511081
4095 changed files with 0 additions and 748399 deletions
@@ -1,10 +0,0 @@
# -*- coding: utf-8 -*-
from pandas import Categorical
class TestCategorical(object):
def setup_method(self, method):
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
ordered=True)
@@ -1,13 +0,0 @@
import pytest
@pytest.fixture(params=[True, False])
def allow_fill(request):
"""Boolean 'allow_fill' parameter for Categorical.take"""
return request.param
@pytest.fixture(params=[True, False])
def ordered(request):
"""Boolean 'ordered' parameter for Categorical."""
return request.param
@@ -1,113 +0,0 @@
import pytest
import numpy as np
import pandas as pd
import pandas.util.testing as tm
@pytest.mark.parametrize('ordered', [True, False])
@pytest.mark.parametrize('categories', [
['b', 'a', 'c'],
['a', 'b', 'c', 'd'],
])
def test_factorize(categories, ordered):
cat = pd.Categorical(['b', 'b', 'a', 'c', None],
categories=categories,
ordered=ordered)
labels, uniques = pd.factorize(cat)
expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
expected_uniques = pd.Categorical(['b', 'a', 'c'],
categories=categories,
ordered=ordered)
tm.assert_numpy_array_equal(labels, expected_labels)
tm.assert_categorical_equal(uniques, expected_uniques)
def test_factorized_sort():
cat = pd.Categorical(['b', 'b', None, 'a'])
labels, uniques = pd.factorize(cat, sort=True)
expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
expected_uniques = pd.Categorical(['a', 'b'])
tm.assert_numpy_array_equal(labels, expected_labels)
tm.assert_categorical_equal(uniques, expected_uniques)
def test_factorized_sort_ordered():
cat = pd.Categorical(['b', 'b', None, 'a'],
categories=['c', 'b', 'a'],
ordered=True)
labels, uniques = pd.factorize(cat, sort=True)
expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
expected_uniques = pd.Categorical(['b', 'a'],
categories=['c', 'b', 'a'],
ordered=True)
tm.assert_numpy_array_equal(labels, expected_labels)
tm.assert_categorical_equal(uniques, expected_uniques)
def test_isin_cats():
# GH2003
cat = pd.Categorical(["a", "b", np.nan])
result = cat.isin(["a", np.nan])
expected = np.array([True, False, True], dtype=bool)
tm.assert_numpy_array_equal(expected, result)
result = cat.isin(["a", "c"])
expected = np.array([True, False, False], dtype=bool)
tm.assert_numpy_array_equal(expected, result)
@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
def test_isin_empty(empty):
s = pd.Categorical(["a", "b"])
expected = np.array([False, False], dtype=bool)
result = s.isin(empty)
tm.assert_numpy_array_equal(expected, result)
class TestTake(object):
# https://github.com/pandas-dev/pandas/issues/20664
def test_take_warns(self):
cat = pd.Categorical(['a', 'b'])
with tm.assert_produces_warning(FutureWarning):
cat.take([0, -1])
def test_take_positive_no_warning(self):
cat = pd.Categorical(['a', 'b'])
with tm.assert_produces_warning(None):
cat.take([0, 0])
def test_take_bounds(self, allow_fill):
# https://github.com/pandas-dev/pandas/issues/20664
cat = pd.Categorical(['a', 'b', 'a'])
with pytest.raises(IndexError):
cat.take([4, 5], allow_fill=allow_fill)
def test_take_empty(self, allow_fill):
# https://github.com/pandas-dev/pandas/issues/20664
cat = pd.Categorical([], categories=['a', 'b'])
with pytest.raises(IndexError):
cat.take([0], allow_fill=allow_fill)
def test_positional_take(self, ordered):
cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
ordered=ordered)
result = cat.take([0, 1, 2], allow_fill=False)
expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
ordered=ordered)
tm.assert_categorical_equal(result, expected)
def test_positional_take_unobserved(self, ordered):
cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
ordered=ordered)
result = cat.take([1, 0], allow_fill=False)
expected = pd.Categorical(['b', 'a'], categories=cat.categories,
ordered=ordered)
tm.assert_categorical_equal(result, expected)
@@ -1,320 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import sys
import numpy as np
import pandas.util.testing as tm
from pandas import Categorical, Index, Series
from pandas.compat import PYPY
class TestCategoricalAnalytics(object):
def test_min_max(self):
# unordered cats have no min/max
cat = Categorical(["a", "b", "c", "d"], ordered=False)
pytest.raises(TypeError, lambda: cat.min())
pytest.raises(TypeError, lambda: cat.max())
cat = Categorical(["a", "b", "c", "d"], ordered=True)
_min = cat.min()
_max = cat.max()
assert _min == "a"
assert _max == "d"
cat = Categorical(["a", "b", "c", "d"],
categories=['d', 'c', 'b', 'a'], ordered=True)
_min = cat.min()
_max = cat.max()
assert _min == "d"
assert _max == "a"
cat = Categorical([np.nan, "b", "c", np.nan],
categories=['d', 'c', 'b', 'a'], ordered=True)
_min = cat.min()
_max = cat.max()
assert np.isnan(_min)
assert _max == "b"
_min = cat.min(numeric_only=True)
assert _min == "c"
_max = cat.max(numeric_only=True)
assert _max == "b"
cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1],
ordered=True)
_min = cat.min()
_max = cat.max()
assert np.isnan(_min)
assert _max == 1
_min = cat.min(numeric_only=True)
assert _min == 2
_max = cat.max(numeric_only=True)
assert _max == 1
@pytest.mark.parametrize("values,categories,exp_mode", [
([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]),
([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]),
([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]),
([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]),
([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4])])
def test_mode(self, values, categories, exp_mode):
s = Categorical(values, categories=categories, ordered=True)
res = s.mode()
exp = Categorical(exp_mode, categories=categories, ordered=True)
tm.assert_categorical_equal(res, exp)
def test_searchsorted(self):
# https://github.com/pandas-dev/pandas/issues/8420
# https://github.com/pandas-dev/pandas/issues/14522
c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
categories=['cheese', 'milk', 'apple', 'bread'],
ordered=True)
s1 = Series(c1)
c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
categories=['cheese', 'milk', 'apple', 'bread'],
ordered=False)
s2 = Series(c2)
# Searching for single item argument, side='left' (default)
res_cat = c1.searchsorted('apple')
res_ser = s1.searchsorted('apple')
exp = np.array([2], dtype=np.intp)
tm.assert_numpy_array_equal(res_cat, exp)
tm.assert_numpy_array_equal(res_ser, exp)
# Searching for single item array, side='left' (default)
res_cat = c1.searchsorted(['bread'])
res_ser = s1.searchsorted(['bread'])
exp = np.array([3], dtype=np.intp)
tm.assert_numpy_array_equal(res_cat, exp)
tm.assert_numpy_array_equal(res_ser, exp)
# Searching for several items array, side='right'
res_cat = c1.searchsorted(['apple', 'bread'], side='right')
res_ser = s1.searchsorted(['apple', 'bread'], side='right')
exp = np.array([3, 5], dtype=np.intp)
tm.assert_numpy_array_equal(res_cat, exp)
tm.assert_numpy_array_equal(res_ser, exp)
# Searching for a single value that is not from the Categorical
pytest.raises(ValueError, lambda: c1.searchsorted('cucumber'))
pytest.raises(ValueError, lambda: s1.searchsorted('cucumber'))
# Searching for multiple values one of each is not from the Categorical
pytest.raises(ValueError,
lambda: c1.searchsorted(['bread', 'cucumber']))
pytest.raises(ValueError,
lambda: s1.searchsorted(['bread', 'cucumber']))
# searchsorted call for unordered Categorical
pytest.raises(ValueError, lambda: c2.searchsorted('apple'))
pytest.raises(ValueError, lambda: s2.searchsorted('apple'))
with tm.assert_produces_warning(FutureWarning):
res = c1.searchsorted(v=['bread'])
exp = np.array([3], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_unique(self):
# categories are reordered based on value when ordered=False
cat = Categorical(["a", "b"])
exp = Index(["a", "b"])
res = cat.unique()
tm.assert_index_equal(res.categories, exp)
tm.assert_categorical_equal(res, cat)
cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
res = cat.unique()
tm.assert_index_equal(res.categories, exp)
tm.assert_categorical_equal(res, Categorical(exp))
cat = Categorical(["c", "a", "b", "a", "a"],
categories=["a", "b", "c"])
exp = Index(["c", "a", "b"])
res = cat.unique()
tm.assert_index_equal(res.categories, exp)
exp_cat = Categorical(exp, categories=['c', 'a', 'b'])
tm.assert_categorical_equal(res, exp_cat)
# nan must be removed
cat = Categorical(["b", np.nan, "b", np.nan, "a"],
categories=["a", "b", "c"])
res = cat.unique()
exp = Index(["b", "a"])
tm.assert_index_equal(res.categories, exp)
exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
tm.assert_categorical_equal(res, exp_cat)
def test_unique_ordered(self):
# keep categories order when ordered=True
cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True)
res = cat.unique()
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
tm.assert_categorical_equal(res, exp_cat)
cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'],
ordered=True)
res = cat.unique()
exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'],
ordered=True)
tm.assert_categorical_equal(res, exp_cat)
cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'],
ordered=True)
res = cat.unique()
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
tm.assert_categorical_equal(res, exp_cat)
cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'],
ordered=True)
res = cat.unique()
exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'],
ordered=True)
tm.assert_categorical_equal(res, exp_cat)
def test_unique_index_series(self):
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
# Categorical.unique sorts categories by appearance order
# if ordered=False
exp = Categorical([3, 1, 2], categories=[3, 1, 2])
tm.assert_categorical_equal(c.unique(), exp)
tm.assert_index_equal(Index(c).unique(), Index(exp))
tm.assert_categorical_equal(Series(c).unique(), exp)
c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
exp = Categorical([1, 2], categories=[1, 2])
tm.assert_categorical_equal(c.unique(), exp)
tm.assert_index_equal(Index(c).unique(), Index(exp))
tm.assert_categorical_equal(Series(c).unique(), exp)
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
# Categorical.unique keeps categories order if ordered=True
exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
tm.assert_categorical_equal(c.unique(), exp)
tm.assert_index_equal(Index(c).unique(), Index(exp))
tm.assert_categorical_equal(Series(c).unique(), exp)
def test_shift(self):
# GH 9416
cat = Categorical(['a', 'b', 'c', 'd', 'a'])
# shift forward
sp1 = cat.shift(1)
xp1 = Categorical([np.nan, 'a', 'b', 'c', 'd'])
tm.assert_categorical_equal(sp1, xp1)
tm.assert_categorical_equal(cat[:-1], sp1[1:])
# shift back
sn2 = cat.shift(-2)
xp2 = Categorical(['c', 'd', 'a', np.nan, np.nan],
categories=['a', 'b', 'c', 'd'])
tm.assert_categorical_equal(sn2, xp2)
tm.assert_categorical_equal(cat[2:], sn2[:-2])
# shift by zero
tm.assert_categorical_equal(cat, cat.shift(0))
def test_nbytes(self):
cat = Categorical([1, 2, 3])
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
assert cat.nbytes == exp
def test_memory_usage(self):
cat = Categorical([1, 2, 3])
# .categories is an index, so we include the hashtable
assert 0 < cat.nbytes <= cat.memory_usage()
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
cat = Categorical(['foo', 'foo', 'bar'])
assert cat.memory_usage(deep=True) > cat.nbytes
if not PYPY:
# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
assert abs(diff) < 100
def test_map(self):
c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True)
result = c.map(lambda x: x.lower())
exp = Categorical(list('ababc'), categories=list('cba'), ordered=True)
tm.assert_categorical_equal(result, exp)
c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False)
result = c.map(lambda x: x.lower())
exp = Categorical(list('ababc'), categories=list('abc'), ordered=False)
tm.assert_categorical_equal(result, exp)
result = c.map(lambda x: 1)
# GH 12766: Return an index not an array
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
def test_validate_inplace(self):
cat = Categorical(['A', 'B', 'B', 'C', 'A'])
invalid_values = [1, "True", [1, 2, 3], 5.0]
for value in invalid_values:
with pytest.raises(ValueError):
cat.set_ordered(value=True, inplace=value)
with pytest.raises(ValueError):
cat.as_ordered(inplace=value)
with pytest.raises(ValueError):
cat.as_unordered(inplace=value)
with pytest.raises(ValueError):
cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value)
with pytest.raises(ValueError):
cat.rename_categories(['X', 'Y', 'Z'], inplace=value)
with pytest.raises(ValueError):
cat.reorder_categories(
['X', 'Y', 'Z'], ordered=True, inplace=value)
with pytest.raises(ValueError):
cat.add_categories(
new_categories=['D', 'E', 'F'], inplace=value)
with pytest.raises(ValueError):
cat.remove_categories(removals=['D', 'E', 'F'], inplace=value)
with pytest.raises(ValueError):
cat.remove_unused_categories(inplace=value)
with pytest.raises(ValueError):
cat.sort_values(inplace=value)
def test_repeat(self):
# GH10183
cat = Categorical(["a", "b"], categories=["a", "b"])
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
res = cat.repeat(2)
tm.assert_categorical_equal(res, exp)
def test_numpy_repeat(self):
cat = Categorical(["a", "b"], categories=["a", "b"])
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
tm.assert_categorical_equal(np.repeat(cat, 2), exp)
msg = "the 'axis' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1)
def test_isna(self):
exp = np.array([False, False, True])
c = Categorical(["a", "b", np.nan])
res = c.isna()
tm.assert_numpy_array_equal(res, exp)
@@ -1,518 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import numpy as np
import pandas.util.testing as tm
from pandas import Categorical, CategoricalIndex, Index, Series, DataFrame
from pandas.core.arrays.categorical import _recode_for_categories
from pandas.tests.categorical.common import TestCategorical
class TestCategoricalAPI(object):
def test_ordered_api(self):
# GH 9347
cat1 = Categorical(list('acb'), ordered=False)
tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c']))
assert not cat1.ordered
cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False)
tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a']))
assert not cat2.ordered
cat3 = Categorical(list('acb'), ordered=True)
tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c']))
assert cat3.ordered
cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True)
tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a']))
assert cat4.ordered
def test_set_ordered(self):
cat = Categorical(["a", "b", "c", "a"], ordered=True)
cat2 = cat.as_unordered()
assert not cat2.ordered
cat2 = cat.as_ordered()
assert cat2.ordered
cat2.as_unordered(inplace=True)
assert not cat2.ordered
cat2.as_ordered(inplace=True)
assert cat2.ordered
assert cat2.set_ordered(True).ordered
assert not cat2.set_ordered(False).ordered
cat2.set_ordered(True, inplace=True)
assert cat2.ordered
cat2.set_ordered(False, inplace=True)
assert not cat2.ordered
# removed in 0.19.0
msg = "can\'t set attribute"
with tm.assert_raises_regex(AttributeError, msg):
cat.ordered = True
with tm.assert_raises_regex(AttributeError, msg):
cat.ordered = False
def test_rename_categories(self):
cat = Categorical(["a", "b", "c", "a"])
# inplace=False: the old one must not be changed
res = cat.rename_categories([1, 2, 3])
tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1],
dtype=np.int64))
tm.assert_index_equal(res.categories, Index([1, 2, 3]))
exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
exp_cat = Index(["a", "b", "c"])
tm.assert_index_equal(cat.categories, exp_cat)
# GH18862 (let rename_categories take callables)
result = cat.rename_categories(lambda x: x.upper())
expected = Categorical(["A", "B", "C", "A"])
tm.assert_categorical_equal(result, expected)
# and now inplace
res = cat.rename_categories([1, 2, 3], inplace=True)
assert res is None
tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1],
dtype=np.int64))
tm.assert_index_equal(cat.categories, Index([1, 2, 3]))
# Lengthen
with pytest.raises(ValueError):
cat.rename_categories([1, 2, 3, 4])
# Shorten
with pytest.raises(ValueError):
cat.rename_categories([1, 2])
def test_rename_categories_series(self):
# https://github.com/pandas-dev/pandas/issues/17981
c = Categorical(['a', 'b'])
xpr = "Treating Series 'new_categories' as a list-like "
with tm.assert_produces_warning(FutureWarning) as rec:
result = c.rename_categories(Series([0, 1]))
assert len(rec) == 1
assert xpr in str(rec[0].message)
expected = Categorical([0, 1])
tm.assert_categorical_equal(result, expected)
def test_rename_categories_dict(self):
# GH 17336
cat = Categorical(['a', 'b', 'c', 'd'])
res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1})
expected = Index([4, 3, 2, 1])
tm.assert_index_equal(res.categories, expected)
# Test for inplace
res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1},
inplace=True)
assert res is None
tm.assert_index_equal(cat.categories, expected)
# Test for dicts of smaller length
cat = Categorical(['a', 'b', 'c', 'd'])
res = cat.rename_categories({'a': 1, 'c': 3})
expected = Index([1, 'b', 3, 'd'])
tm.assert_index_equal(res.categories, expected)
# Test for dicts with bigger length
cat = Categorical(['a', 'b', 'c', 'd'])
res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3,
'd': 4, 'e': 5, 'f': 6})
expected = Index([1, 2, 3, 4])
tm.assert_index_equal(res.categories, expected)
# Test for dicts with no items from old categories
cat = Categorical(['a', 'b', 'c', 'd'])
res = cat.rename_categories({'f': 1, 'g': 3})
expected = Index(['a', 'b', 'c', 'd'])
tm.assert_index_equal(res.categories, expected)
def test_reorder_categories(self):
cat = Categorical(["a", "b", "c", "a"], ordered=True)
old = cat.copy()
new = Categorical(["a", "b", "c", "a"], categories=["c", "b", "a"],
ordered=True)
# first inplace == False
res = cat.reorder_categories(["c", "b", "a"])
# cat must be the same as before
tm.assert_categorical_equal(cat, old)
# only res is changed
tm.assert_categorical_equal(res, new)
# inplace == True
res = cat.reorder_categories(["c", "b", "a"], inplace=True)
assert res is None
tm.assert_categorical_equal(cat, new)
# not all "old" included in "new"
cat = Categorical(["a", "b", "c", "a"], ordered=True)
def f():
cat.reorder_categories(["a"])
pytest.raises(ValueError, f)
# still not all "old" in "new"
def f():
cat.reorder_categories(["a", "b", "d"])
pytest.raises(ValueError, f)
# all "old" included in "new", but too long
def f():
cat.reorder_categories(["a", "b", "c", "d"])
pytest.raises(ValueError, f)
def test_add_categories(self):
cat = Categorical(["a", "b", "c", "a"], ordered=True)
old = cat.copy()
new = Categorical(["a", "b", "c", "a"],
categories=["a", "b", "c", "d"], ordered=True)
# first inplace == False
res = cat.add_categories("d")
tm.assert_categorical_equal(cat, old)
tm.assert_categorical_equal(res, new)
res = cat.add_categories(["d"])
tm.assert_categorical_equal(cat, old)
tm.assert_categorical_equal(res, new)
# inplace == True
res = cat.add_categories("d", inplace=True)
tm.assert_categorical_equal(cat, new)
assert res is None
# new is in old categories
def f():
cat.add_categories(["d"])
pytest.raises(ValueError, f)
# GH 9927
cat = Categorical(list("abc"), ordered=True)
expected = Categorical(
list("abc"), categories=list("abcde"), ordered=True)
# test with Series, np.array, index, list
res = cat.add_categories(Series(["d", "e"]))
tm.assert_categorical_equal(res, expected)
res = cat.add_categories(np.array(["d", "e"]))
tm.assert_categorical_equal(res, expected)
res = cat.add_categories(Index(["d", "e"]))
tm.assert_categorical_equal(res, expected)
res = cat.add_categories(["d", "e"])
tm.assert_categorical_equal(res, expected)
def test_set_categories(self):
cat = Categorical(["a", "b", "c", "a"], ordered=True)
exp_categories = Index(["c", "b", "a"])
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
res = cat.set_categories(["c", "b", "a"], inplace=True)
tm.assert_index_equal(cat.categories, exp_categories)
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
assert res is None
res = cat.set_categories(["a", "b", "c"])
# cat must be the same as before
tm.assert_index_equal(cat.categories, exp_categories)
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
# only res is changed
exp_categories_back = Index(["a", "b", "c"])
tm.assert_index_equal(res.categories, exp_categories_back)
tm.assert_numpy_array_equal(res.__array__(), exp_values)
# not all "old" included in "new" -> all not included ones are now
# np.nan
cat = Categorical(["a", "b", "c", "a"], ordered=True)
res = cat.set_categories(["a"])
tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0],
dtype=np.int8))
# still not all "old" in "new"
res = cat.set_categories(["a", "b", "d"])
tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0],
dtype=np.int8))
tm.assert_index_equal(res.categories, Index(["a", "b", "d"]))
# all "old" included in "new"
cat = cat.set_categories(["a", "b", "c", "d"])
exp_categories = Index(["a", "b", "c", "d"])
tm.assert_index_equal(cat.categories, exp_categories)
# internals...
c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True)
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0],
dtype=np.int8))
tm.assert_index_equal(c.categories, Index([1, 2, 3, 4]))
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
tm.assert_numpy_array_equal(c.get_values(), exp)
# all "pointers" to '4' must be changed from 3 to 0,...
c = c.set_categories([4, 3, 2, 1])
# positions are changed
tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3],
dtype=np.int8))
# categories are now in new order
tm.assert_index_equal(c.categories, Index([4, 3, 2, 1]))
# output is the same
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
tm.assert_numpy_array_equal(c.get_values(), exp)
assert c.min() == 4
assert c.max() == 1
# set_categories should set the ordering if specified
c2 = c.set_categories([4, 3, 2, 1], ordered=False)
assert not c2.ordered
tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
# set_categories should pass thru the ordering
c2 = c.set_ordered(False).set_categories([4, 3, 2, 1])
assert not c2.ordered
tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
@pytest.mark.parametrize('values, categories, new_categories', [
# No NaNs, same cats, same order
(['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
# No NaNs, same cats, different order
(['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
# Same, unsorted
(['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
# No NaNs, same cats, different order
(['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
# NaNs
(['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
(['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
# Introduce NaNs
(['a', 'b', 'c'], ['a', 'b'], ['a']),
(['a', 'b', 'c'], ['a', 'b'], ['b']),
(['b', 'a', 'c'], ['a', 'b'], ['a']),
(['b', 'a', 'c'], ['a', 'b'], ['a']),
# No overlap
(['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
])
@pytest.mark.parametrize('ordered', [True, False])
def test_set_categories_many(self, values, categories, new_categories,
ordered):
c = Categorical(values, categories)
expected = Categorical(values, new_categories, ordered)
result = c.set_categories(new_categories, ordered=ordered)
tm.assert_categorical_equal(result, expected)
def test_set_categories_private(self):
cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
cat._set_categories(['a', 'c', 'd', 'e'])
expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
tm.assert_categorical_equal(cat, expected)
# fastpath
cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
cat._set_categories(['a', 'c', 'd', 'e'], fastpath=True)
expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
tm.assert_categorical_equal(cat, expected)
def test_remove_categories(self):
cat = Categorical(["a", "b", "c", "a"], ordered=True)
old = cat.copy()
new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
ordered=True)
# first inplace == False
res = cat.remove_categories("c")
tm.assert_categorical_equal(cat, old)
tm.assert_categorical_equal(res, new)
res = cat.remove_categories(["c"])
tm.assert_categorical_equal(cat, old)
tm.assert_categorical_equal(res, new)
# inplace == True
res = cat.remove_categories("c", inplace=True)
tm.assert_categorical_equal(cat, new)
assert res is None
# removal is not in categories
def f():
cat.remove_categories(["c"])
pytest.raises(ValueError, f)
def test_remove_unused_categories(self):
c = Categorical(["a", "b", "c", "d", "a"],
categories=["a", "b", "c", "d", "e"])
exp_categories_all = Index(["a", "b", "c", "d", "e"])
exp_categories_dropped = Index(["a", "b", "c", "d"])
tm.assert_index_equal(c.categories, exp_categories_all)
res = c.remove_unused_categories()
tm.assert_index_equal(res.categories, exp_categories_dropped)
tm.assert_index_equal(c.categories, exp_categories_all)
res = c.remove_unused_categories(inplace=True)
tm.assert_index_equal(c.categories, exp_categories_dropped)
assert res is None
# with NaN values (GH11599)
c = Categorical(["a", "b", "c", np.nan],
categories=["a", "b", "c", "d", "e"])
res = c.remove_unused_categories()
tm.assert_index_equal(res.categories,
Index(np.array(["a", "b", "c"])))
exp_codes = np.array([0, 1, 2, -1], dtype=np.int8)
tm.assert_numpy_array_equal(res.codes, exp_codes)
tm.assert_index_equal(c.categories, exp_categories_all)
val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan]
cat = Categorical(values=val, categories=list('ABCDEFG'))
out = cat.remove_unused_categories()
tm.assert_index_equal(out.categories, Index(['B', 'D', 'F']))
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
tm.assert_numpy_array_equal(out.codes, exp_codes)
assert out.get_values().tolist() == val
alpha = list('abcdefghijklmnopqrstuvwxyz')
val = np.random.choice(alpha[::2], 10000).astype('object')
val[np.random.choice(len(val), 100)] = np.nan
cat = Categorical(values=val, categories=alpha)
out = cat.remove_unused_categories()
assert out.get_values().tolist() == val.tolist()
class TestCategoricalAPIWithFactor(TestCategorical):
def test_describe(self):
# string type
desc = self.factor.describe()
assert self.factor.ordered
exp_index = CategoricalIndex(['a', 'b', 'c'], name='categories',
ordered=self.factor.ordered)
expected = DataFrame({'counts': [3, 2, 3],
'freqs': [3 / 8., 2 / 8., 3 / 8.]},
index=exp_index)
tm.assert_frame_equal(desc, expected)
# check unused categories
cat = self.factor.copy()
cat.set_categories(["a", "b", "c", "d"], inplace=True)
desc = cat.describe()
exp_index = CategoricalIndex(
list('abcd'), ordered=self.factor.ordered, name='categories')
expected = DataFrame({'counts': [3, 2, 3, 0],
'freqs': [3 / 8., 2 / 8., 3 / 8., 0]},
index=exp_index)
tm.assert_frame_equal(desc, expected)
# check an integer one
cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
desc = cat.describe()
exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered,
name='categories')
expected = DataFrame({'counts': [5, 3, 3],
'freqs': [5 / 11., 3 / 11., 3 / 11.]},
index=exp_index)
tm.assert_frame_equal(desc, expected)
# https://github.com/pandas-dev/pandas/issues/3678
# describe should work with NaN
cat = Categorical([np.nan, 1, 2, 2])
desc = cat.describe()
expected = DataFrame({'counts': [1, 2, 1],
'freqs': [1 / 4., 2 / 4., 1 / 4.]},
index=CategoricalIndex([1, 2, np.nan],
categories=[1, 2],
name='categories'))
tm.assert_frame_equal(desc, expected)
def test_set_categories_inplace(self):
cat = self.factor.copy()
cat.set_categories(['a', 'b', 'c', 'd'], inplace=True)
tm.assert_index_equal(cat.categories, Index(['a', 'b', 'c', 'd']))
class TestPrivateCategoricalAPI(object):
def test_codes_immutable(self):
# Codes should be read only
c = Categorical(["a", "b", "c", "a", np.nan])
exp = np.array([0, 1, 2, 0, -1], dtype='int8')
tm.assert_numpy_array_equal(c.codes, exp)
# Assignments to codes should raise
def f():
c.codes = np.array([0, 1, 2, 0, 1], dtype='int8')
pytest.raises(ValueError, f)
# changes in the codes array should raise
# np 1.6.1 raises RuntimeError rather than ValueError
codes = c.codes
def f():
codes[4] = 1
pytest.raises(ValueError, f)
# But even after getting the codes, the original array should still be
# writeable!
c[4] = "a"
exp = np.array([0, 1, 2, 0, 0], dtype='int8')
tm.assert_numpy_array_equal(c.codes, exp)
c._codes[4] = 2
exp = np.array([0, 1, 2, 0, 2], dtype='int8')
tm.assert_numpy_array_equal(c.codes, exp)
@pytest.mark.parametrize('codes, old, new, expected', [
([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]),
([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]),
([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]),
([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]),
([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]),
([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]),
([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]),
([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]),
([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]),
([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]),
([-1, -1], [], ['a', 'b'], [-1, -1]),
([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]),
])
def test_recode_to_categories(self, codes, old, new, expected):
codes = np.asanyarray(codes, dtype=np.int8)
expected = np.asanyarray(expected, dtype=np.int8)
old = Index(old)
new = Index(new)
result = _recode_for_categories(codes, old, new)
tm.assert_numpy_array_equal(result, expected)
def test_recode_to_categories_large(self):
N = 1000
codes = np.arange(N)
old = Index(codes)
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
new = Index(expected)
result = _recode_for_categories(codes, old, new)
tm.assert_numpy_array_equal(result, expected)
@@ -1,515 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
from datetime import datetime
import numpy as np
import pandas as pd
import pandas.util.testing as tm
from pandas import (Categorical, Index, Series, Timestamp,
CategoricalIndex, date_range, DatetimeIndex,
period_range, timedelta_range, NaT,
Interval, IntervalIndex)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
class TestCategoricalConstructors(object):
def test_validate_ordered(self):
# see gh-14058
exp_msg = "'ordered' must either be 'True' or 'False'"
exp_err = TypeError
# This should be a boolean.
ordered = np.array([0, 1, 2])
with tm.assert_raises_regex(exp_err, exp_msg):
Categorical([1, 2, 3], ordered=ordered)
with tm.assert_raises_regex(exp_err, exp_msg):
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
ordered=ordered)
def test_constructor_empty(self):
# GH 17248
c = Categorical([])
expected = Index([])
tm.assert_index_equal(c.categories, expected)
c = Categorical([], categories=[1, 2, 3])
expected = pd.Int64Index([1, 2, 3])
tm.assert_index_equal(c.categories, expected)
def test_constructor_tuples(self):
values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
result = Categorical(values)
expected = Index([(1,), (1, 2)], tupleize_cols=False)
tm.assert_index_equal(result.categories, expected)
assert result.ordered is False
def test_constructor_tuples_datetimes(self):
# numpy will auto reshape when all of the tuples are the
# same len, so add an extra one with 2 items and slice it off
values = np.array([(Timestamp('2010-01-01'),),
(Timestamp('2010-01-02'),),
(Timestamp('2010-01-01'),),
(Timestamp('2010-01-02'),),
('a', 'b')], dtype=object)[:-1]
result = Categorical(values)
expected = Index([(Timestamp('2010-01-01'),),
(Timestamp('2010-01-02'),)], tupleize_cols=False)
tm.assert_index_equal(result.categories, expected)
def test_constructor_unsortable(self):
# it works!
arr = np.array([1, 2, 3, datetime.now()], dtype='O')
factor = Categorical(arr, ordered=False)
assert not factor.ordered
# this however will raise as cannot be sorted
pytest.raises(
TypeError, lambda: Categorical(arr, ordered=True))
def test_constructor_interval(self):
result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)],
ordered=True)
ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
exp = Categorical(ii, ordered=True)
tm.assert_categorical_equal(result, exp)
tm.assert_index_equal(result.categories, ii)
def test_constructor(self):
exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
c1 = Categorical(exp_arr)
tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
c2 = Categorical(exp_arr, categories=["a", "b", "c"])
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
c2 = Categorical(exp_arr, categories=["c", "b", "a"])
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
# categories must be unique
def f():
Categorical([1, 2], [1, 2, 2])
pytest.raises(ValueError, f)
def f():
Categorical(["a", "b"], ["a", "b", "b"])
pytest.raises(ValueError, f)
# The default should be unordered
c1 = Categorical(["a", "b", "c", "a"])
assert not c1.ordered
# Categorical as input
c1 = Categorical(["a", "b", "c", "a"])
c2 = Categorical(c1)
tm.assert_categorical_equal(c1, c2)
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
c2 = Categorical(c1)
tm.assert_categorical_equal(c1, c2)
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
c2 = Categorical(c1)
tm.assert_categorical_equal(c1, c2)
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
c2 = Categorical(c1, categories=["a", "b", "c"])
tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))
# Series of dtype category
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
c2 = Categorical(Series(c1))
tm.assert_categorical_equal(c1, c2)
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
c2 = Categorical(Series(c1))
tm.assert_categorical_equal(c1, c2)
# Series
c1 = Categorical(["a", "b", "c", "a"])
c2 = Categorical(Series(["a", "b", "c", "a"]))
tm.assert_categorical_equal(c1, c2)
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
c2 = Categorical(Series(["a", "b", "c", "a"]),
categories=["a", "b", "c", "d"])
tm.assert_categorical_equal(c1, c2)
# This should result in integer categories, not float!
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
assert is_integer_dtype(cat.categories)
# https://github.com/pandas-dev/pandas/issues/3678
cat = Categorical([np.nan, 1, 2, 3])
assert is_integer_dtype(cat.categories)
# this should result in floats
cat = Categorical([np.nan, 1, 2., 3])
assert is_float_dtype(cat.categories)
cat = Categorical([np.nan, 1., 2., 3.])
assert is_float_dtype(cat.categories)
# This doesn't work -> this would probably need some kind of "remember
# the original type" feature to try to cast the array interface result
# to...
# vals = np.asarray(cat[cat.notna()])
# assert is_integer_dtype(vals)
# corner cases
cat = Categorical([1])
assert len(cat.categories) == 1
assert cat.categories[0] == 1
assert len(cat.codes) == 1
assert cat.codes[0] == 0
cat = Categorical(["a"])
assert len(cat.categories) == 1
assert cat.categories[0] == "a"
assert len(cat.codes) == 1
assert cat.codes[0] == 0
# Scalars should be converted to lists
cat = Categorical(1)
assert len(cat.categories) == 1
assert cat.categories[0] == 1
assert len(cat.codes) == 1
assert cat.codes[0] == 0
# two arrays
# - when the first is an integer dtype and the second is not
# - when the resulting codes are all -1/NaN
with tm.assert_produces_warning(None):
c_old = Categorical([0, 1, 2, 0, 1, 2],
categories=["a", "b", "c"]) # noqa
with tm.assert_produces_warning(None):
c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa
categories=[3, 4, 5])
# the next one are from the old docs
with tm.assert_produces_warning(None):
c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa
cat = Categorical([1, 2], categories=[1, 2, 3])
# this is a legitimate constructor
with tm.assert_produces_warning(None):
c = Categorical(np.array([], dtype='int64'), # noqa
categories=[3, 2, 1], ordered=True)
def test_constructor_not_sequence(self):
# https://github.com/pandas-dev/pandas/issues/16022
with pytest.raises(TypeError):
Categorical(['a', 'b'], categories='a')
def test_constructor_with_null(self):
# Cannot have NaN in categories
with pytest.raises(ValueError):
Categorical([np.nan, "a", "b", "c"],
categories=[np.nan, "a", "b", "c"])
with pytest.raises(ValueError):
Categorical([None, "a", "b", "c"],
categories=[None, "a", "b", "c"])
with pytest.raises(ValueError):
Categorical(DatetimeIndex(['nat', '20160101']),
categories=[NaT, Timestamp('20160101')])
def test_constructor_with_index(self):
ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
tm.assert_categorical_equal(ci.values, Categorical(ci))
ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
tm.assert_categorical_equal(ci.values,
Categorical(ci.astype(object),
categories=ci.categories))
def test_constructor_with_generator(self):
# This was raising an Error in isna(single_val).any() because isna
# returned a scalar for a generator
xrange = range
exp = Categorical([0, 1, 2])
cat = Categorical((x for x in [0, 1, 2]))
tm.assert_categorical_equal(cat, exp)
cat = Categorical(xrange(3))
tm.assert_categorical_equal(cat, exp)
# This uses xrange internally
from pandas.core.index import MultiIndex
MultiIndex.from_product([range(5), ['a', 'b', 'c']])
# check that categories accept generators and sequences
cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
tm.assert_categorical_equal(cat, exp)
cat = Categorical([0, 1, 2], categories=xrange(3))
tm.assert_categorical_equal(cat, exp)
def test_constructor_with_datetimelike(self):
# 12077
# constructor wwth a datetimelike and NaT
for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'),
date_range('1995-01-01 00:00:00', periods=5,
freq='s', tz='US/Eastern'),
timedelta_range('1 day', periods=5, freq='s')]:
s = Series(dtl)
c = Categorical(s)
expected = type(dtl)(s)
expected.freq = None
tm.assert_index_equal(c.categories, expected)
tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8'))
# with NaT
s2 = s.copy()
s2.iloc[-1] = NaT
c = Categorical(s2)
expected = type(dtl)(s2.dropna())
expected.freq = None
tm.assert_index_equal(c.categories, expected)
exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
tm.assert_numpy_array_equal(c.codes, exp)
result = repr(c)
assert 'NaT' in result
def test_constructor_from_index_series_datetimetz(self):
idx = date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
result = Categorical(idx)
tm.assert_index_equal(result.categories, idx)
result = Categorical(Series(idx))
tm.assert_index_equal(result.categories, idx)
def test_constructor_from_index_series_timedelta(self):
idx = timedelta_range('1 days', freq='D', periods=3)
result = Categorical(idx)
tm.assert_index_equal(result.categories, idx)
result = Categorical(Series(idx))
tm.assert_index_equal(result.categories, idx)
def test_constructor_from_index_series_period(self):
idx = period_range('2015-01-01', freq='D', periods=3)
result = Categorical(idx)
tm.assert_index_equal(result.categories, idx)
result = Categorical(Series(idx))
tm.assert_index_equal(result.categories, idx)
def test_constructor_invariant(self):
# GH 14190
vals = [
np.array([1., 1.2, 1.8, np.nan]),
np.array([1, 2, 3], dtype='int64'),
['a', 'b', 'c', np.nan],
[pd.Period('2014-01'), pd.Period('2014-02'), NaT],
[Timestamp('2014-01-01'), Timestamp('2014-01-02'), NaT],
[Timestamp('2014-01-01', tz='US/Eastern'),
Timestamp('2014-01-02', tz='US/Eastern'), NaT],
]
for val in vals:
c = Categorical(val)
c2 = Categorical(c)
tm.assert_categorical_equal(c, c2)
@pytest.mark.parametrize('ordered', [True, False])
def test_constructor_with_dtype(self, ordered):
categories = ['b', 'a', 'c']
dtype = CategoricalDtype(categories, ordered=ordered)
result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype)
expected = Categorical(['a', 'b', 'a', 'c'], categories=categories,
ordered=ordered)
tm.assert_categorical_equal(result, expected)
assert result.ordered is ordered
def test_constructor_dtype_and_others_raises(self):
dtype = CategoricalDtype(['a', 'b'], ordered=True)
with tm.assert_raises_regex(ValueError, "Cannot"):
Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype)
with tm.assert_raises_regex(ValueError, "Cannot"):
Categorical(['a', 'b'], ordered=True, dtype=dtype)
with tm.assert_raises_regex(ValueError, "Cannot"):
Categorical(['a', 'b'], ordered=False, dtype=dtype)
@pytest.mark.parametrize('categories', [
None, ['a', 'b'], ['a', 'c'],
])
@pytest.mark.parametrize('ordered', [True, False])
def test_constructor_str_category(self, categories, ordered):
result = Categorical(['a', 'b'], categories=categories,
ordered=ordered, dtype='category')
expected = Categorical(['a', 'b'], categories=categories,
ordered=ordered)
tm.assert_categorical_equal(result, expected)
def test_constructor_str_unknown(self):
with tm.assert_raises_regex(ValueError, "Unknown `dtype`"):
Categorical([1, 2], dtype="foo")
def test_constructor_from_categorical_with_dtype(self):
dtype = CategoricalDtype(['a', 'b', 'c'], ordered=True)
values = Categorical(['a', 'b', 'd'])
result = Categorical(values, dtype=dtype)
# We use dtype.categories, not values.categories
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
ordered=True)
tm.assert_categorical_equal(result, expected)
def test_constructor_from_categorical_with_unknown_dtype(self):
dtype = CategoricalDtype(None, ordered=True)
values = Categorical(['a', 'b', 'd'])
result = Categorical(values, dtype=dtype)
# We use values.categories, not dtype.categories
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'],
ordered=True)
tm.assert_categorical_equal(result, expected)
def test_constructor_from_categorical_string(self):
values = Categorical(['a', 'b', 'd'])
# use categories, ordered
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True,
dtype='category')
expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
ordered=True)
tm.assert_categorical_equal(result, expected)
# No string
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True)
tm.assert_categorical_equal(result, expected)
def test_constructor_with_categorical_categories(self):
# GH17884
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
result = Categorical(
['a', 'b'], categories=Categorical(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)
result = Categorical(
['a', 'b'], categories=CategoricalIndex(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)
def test_from_codes(self):
# too few categories
def f():
Categorical.from_codes([1, 2], [1, 2])
pytest.raises(ValueError, f)
# no int codes
def f():
Categorical.from_codes(["a"], [1, 2])
pytest.raises(ValueError, f)
# no unique categories
def f():
Categorical.from_codes([0, 1, 2], ["a", "a", "b"])
pytest.raises(ValueError, f)
# NaN categories included
def f():
Categorical.from_codes([0, 1, 2], ["a", "b", np.nan])
pytest.raises(ValueError, f)
# too negative
def f():
Categorical.from_codes([-2, 1, 2], ["a", "b", "c"])
pytest.raises(ValueError, f)
exp = Categorical(["a", "b", "c"], ordered=False)
res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"])
tm.assert_categorical_equal(exp, res)
# Not available in earlier numpy versions
if hasattr(np.random, "choice"):
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
Categorical.from_codes(codes, categories=["train", "test"])
def test_from_codes_with_categorical_categories(self):
# GH17884
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
result = Categorical.from_codes(
[0, 1], categories=Categorical(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)
result = Categorical.from_codes(
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)
# non-unique Categorical still raises
with pytest.raises(ValueError):
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories(self, dtype):
cats = ['a', 'b']
codes = np.array([0, 0, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes(codes, cats)
tm.assert_categorical_equal(result, expected)
@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories_sorts(self, dtype):
cats = ['b', 'a']
codes = np.array([0, 1, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
tm.assert_categorical_equal(result, expected)
def test_from_inferred_categories_dtype(self):
cats = ['a', 'b', 'd']
codes = np.array([0, 1, 0, 2], dtype='i8')
dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True)
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical(['a', 'b', 'a', 'd'],
categories=['c', 'b', 'a'],
ordered=True)
tm.assert_categorical_equal(result, expected)
def test_from_inferred_categories_coerces(self):
cats = ['1', '2', 'bad']
codes = np.array([0, 0, 1, 2], dtype='i8')
dtype = CategoricalDtype([1, 2])
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical([1, 1, 2, np.nan])
tm.assert_categorical_equal(result, expected)
def test_construction_with_ordered(self):
# GH 9347, 9190
cat = Categorical([0, 1, 2])
assert not cat.ordered
cat = Categorical([0, 1, 2], ordered=False)
assert not cat.ordered
cat = Categorical([0, 1, 2], ordered=True)
assert cat.ordered
@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
def test_constructor_imaginary(self):
values = [1, 2, 3 + 1j]
c1 = Categorical(values)
tm.assert_index_equal(c1.categories, Index(values))
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
@@ -1,176 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import numpy as np
import pandas.util.testing as tm
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.compat import long
from pandas import Categorical, Index, CategoricalIndex, Series, Timestamp
class TestCategoricalDtypes(object):
def test_is_equal_dtype(self):
# test dtype comparisons between cats
c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False)
c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False)
c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True)
assert c1.is_dtype_equal(c1)
assert c2.is_dtype_equal(c2)
assert c3.is_dtype_equal(c3)
assert c1.is_dtype_equal(c2)
assert not c1.is_dtype_equal(c3)
assert not c1.is_dtype_equal(Index(list('aabca')))
assert not c1.is_dtype_equal(c1.astype(object))
assert c1.is_dtype_equal(CategoricalIndex(c1))
assert (c1.is_dtype_equal(
CategoricalIndex(c1, categories=list('cab'))))
assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
# GH 16659
s1 = Series(c1)
s2 = Series(c2)
s3 = Series(c3)
assert c1.is_dtype_equal(s1)
assert c2.is_dtype_equal(s2)
assert c3.is_dtype_equal(s3)
assert c1.is_dtype_equal(s2)
assert not c1.is_dtype_equal(s3)
assert not c1.is_dtype_equal(s1.astype(object))
def test_set_dtype_same(self):
c = Categorical(['a', 'b', 'c'])
result = c._set_dtype(CategoricalDtype(['a', 'b', 'c']))
tm.assert_categorical_equal(result, c)
def test_set_dtype_new_categories(self):
c = Categorical(['a', 'b', 'c'])
result = c._set_dtype(CategoricalDtype(list('abcd')))
tm.assert_numpy_array_equal(result.codes, c.codes)
tm.assert_index_equal(result.dtype.categories, Index(list('abcd')))
@pytest.mark.parametrize('values, categories, new_categories', [
# No NaNs, same cats, same order
(['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
# No NaNs, same cats, different order
(['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
# Same, unsorted
(['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
# No NaNs, same cats, different order
(['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
# NaNs
(['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
(['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
# Introduce NaNs
(['a', 'b', 'c'], ['a', 'b'], ['a']),
(['a', 'b', 'c'], ['a', 'b'], ['b']),
(['b', 'a', 'c'], ['a', 'b'], ['a']),
(['b', 'a', 'c'], ['a', 'b'], ['a']),
# No overlap
(['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
])
@pytest.mark.parametrize('ordered', [True, False])
def test_set_dtype_many(self, values, categories, new_categories,
ordered):
c = Categorical(values, categories)
expected = Categorical(values, new_categories, ordered)
result = c._set_dtype(expected.dtype)
tm.assert_categorical_equal(result, expected)
def test_set_dtype_no_overlap(self):
c = Categorical(['a', 'b', 'c'], ['d', 'e'])
result = c._set_dtype(CategoricalDtype(['a', 'b']))
expected = Categorical([None, None, None], categories=['a', 'b'])
tm.assert_categorical_equal(result, expected)
def test_codes_dtypes(self):
# GH 8453
result = Categorical(['foo', 'bar', 'baz'])
assert result.codes.dtype == 'int8'
result = Categorical(['foo%05d' % i for i in range(400)])
assert result.codes.dtype == 'int16'
result = Categorical(['foo%05d' % i for i in range(40000)])
assert result.codes.dtype == 'int32'
# adding cats
result = Categorical(['foo', 'bar', 'baz'])
assert result.codes.dtype == 'int8'
result = result.add_categories(['foo%05d' % i for i in range(400)])
assert result.codes.dtype == 'int16'
# removing cats
result = result.remove_categories(['foo%05d' % i for i in range(300)])
assert result.codes.dtype == 'int8'
@pytest.mark.parametrize('ordered', [True, False])
def test_astype(self, ordered):
# string
cat = Categorical(list('abbaaccc'), ordered=ordered)
result = cat.astype(object)
expected = np.array(cat)
tm.assert_numpy_array_equal(result, expected)
msg = 'could not convert string to float'
with tm.assert_raises_regex(ValueError, msg):
cat.astype(float)
# numeric
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
result = cat.astype(object)
expected = np.array(cat, dtype=object)
tm.assert_numpy_array_equal(result, expected)
result = cat.astype(int)
expected = np.array(cat, dtype=np.int)
tm.assert_numpy_array_equal(result, expected)
result = cat.astype(float)
expected = np.array(cat, dtype=np.float)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize('dtype_ordered', [True, False])
@pytest.mark.parametrize('cat_ordered', [True, False])
def test_astype_category(self, dtype_ordered, cat_ordered):
# GH 10696/18593
data = list('abcaacbab')
cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)
# standard categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = cat.astype(dtype)
expected = Categorical(
data, categories=cat.categories, ordered=dtype_ordered)
tm.assert_categorical_equal(result, expected)
# non-standard categories
dtype = CategoricalDtype(list('adc'), dtype_ordered)
result = cat.astype(dtype)
expected = Categorical(data, dtype=dtype)
tm.assert_categorical_equal(result, expected)
if dtype_ordered is False:
# dtype='category' can't specify ordered, so only test once
result = cat.astype('category')
expected = cat
tm.assert_categorical_equal(result, expected)
def test_iter_python_types(self):
# GH-19909
# TODO(Py2): Remove long
cat = Categorical([1, 2])
assert isinstance(list(cat)[0], (int, long))
assert isinstance(cat.tolist()[0], (int, long))
def test_iter_python_types_datetime(self):
cat = Categorical([Timestamp('2017-01-01'),
Timestamp('2017-01-02')])
assert isinstance(list(cat)[0], Timestamp)
assert isinstance(cat.tolist()[0], Timestamp)
@@ -1,123 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import numpy as np
import pandas.util.testing as tm
from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
from pandas.tests.categorical.common import TestCategorical
class TestCategoricalIndexingWithFactor(TestCategorical):
def test_getitem(self):
assert self.factor[0] == 'a'
assert self.factor[-1] == 'c'
subf = self.factor[[0, 1, 2]]
tm.assert_numpy_array_equal(subf._codes,
np.array([0, 1, 1], dtype=np.int8))
subf = self.factor[np.asarray(self.factor) == 'c']
tm.assert_numpy_array_equal(subf._codes,
np.array([2, 2, 2], dtype=np.int8))
def test_setitem(self):
# int/positional
c = self.factor.copy()
c[0] = 'b'
assert c[0] == 'b'
c[-1] = 'a'
assert c[-1] == 'a'
# boolean
c = self.factor.copy()
indexer = np.zeros(len(c), dtype='bool')
indexer[0] = True
indexer[-1] = True
c[indexer] = 'c'
expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
ordered=True)
tm.assert_categorical_equal(c, expected)
class TestCategoricalIndexing(object):
def test_getitem_listlike(self):
# GH 9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
result = c.codes[np.array([100000]).astype(np.int64)]
expected = c[np.array([100000]).astype(np.int64)].codes
tm.assert_numpy_array_equal(result, expected)
def test_periodindex(self):
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
'2014-03', '2014-03'], freq='M')
cat1 = Categorical(idx1)
str(cat1)
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
tm.assert_index_equal(cat1.categories, exp_idx)
idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
'2014-03', '2014-01'], freq='M')
cat2 = Categorical(idx2, ordered=True)
str(cat2)
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
tm.assert_index_equal(cat2.categories, exp_idx2)
idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09',
'2013-08', '2013-07', '2013-05'], freq='M')
cat3 = Categorical(idx3, ordered=True)
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09',
'2013-10', '2013-11', '2013-12'], freq='M')
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
tm.assert_index_equal(cat3.categories, exp_idx)
def test_categories_assigments(self):
s = Categorical(["a", "b", "c", "a"])
exp = np.array([1, 2, 3, 1], dtype=np.int64)
s.categories = [1, 2, 3]
tm.assert_numpy_array_equal(s.__array__(), exp)
tm.assert_index_equal(s.categories, Index([1, 2, 3]))
# lengthen
def f():
s.categories = [1, 2, 3, 4]
pytest.raises(ValueError, f)
# shorten
def f():
s.categories = [1, 2]
pytest.raises(ValueError, f)
# Combinations of sorted/unique:
@pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
[1, 3, 3, 4], [1, 2, 2, 4]])
# Combinations of missing/unique
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
# GH 21448
key = key_class(key_values, categories=range(1, 5))
# Test for flat index and CategoricalIndex with same/different cats:
for dtype in None, 'category', key.dtype:
idx = Index(idx_values, dtype=dtype)
expected, exp_miss = idx.get_indexer_non_unique(key_values)
result, res_miss = idx.get_indexer_non_unique(key)
tm.assert_numpy_array_equal(expected, result)
tm.assert_numpy_array_equal(exp_miss, res_miss)
@@ -1,85 +0,0 @@
# -*- coding: utf-8 -*-
import collections
import numpy as np
import pytest
import pandas.util.testing as tm
from pandas import Categorical, Index, isna
from pandas.compat import lrange
from pandas.core.dtypes.dtypes import CategoricalDtype
class TestCategoricalMissing(object):
def test_na_flags_int_categories(self):
# #1457
categories = lrange(10)
labels = np.random.randint(0, 10, 20)
labels[::5] = -1
cat = Categorical(labels, categories, fastpath=True)
repr(cat)
tm.assert_numpy_array_equal(isna(cat), labels == -1)
def test_nan_handling(self):
# Nans are represented as -1 in codes
c = Categorical(["a", "b", np.nan, "a"])
tm.assert_index_equal(c.categories, Index(["a", "b"]))
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
dtype=np.int8))
c[1] = np.nan
tm.assert_index_equal(c.categories, Index(["a", "b"]))
tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0],
dtype=np.int8))
# Adding nan to categories should make assigned nan point to the
# category!
c = Categorical(["a", "b", np.nan, "a"])
tm.assert_index_equal(c.categories, Index(["a", "b"]))
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0],
dtype=np.int8))
def test_set_dtype_nans(self):
c = Categorical(['a', 'b', np.nan])
result = c._set_dtype(CategoricalDtype(['a', 'c']))
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1],
dtype='int8'))
def test_set_item_nan(self):
cat = Categorical([1, 2, 3])
cat[1] = np.nan
exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
tm.assert_categorical_equal(cat, exp)
@pytest.mark.parametrize('fillna_kwargs, msg', [
(dict(value=1, method='ffill'),
"Cannot specify both 'value' and 'method'."),
(dict(),
"Must specify a fill 'value' or 'method'."),
(dict(method='bad'),
"Invalid fill method. Expecting .* bad"),
])
def test_fillna_raises(self, fillna_kwargs, msg):
# https://github.com/pandas-dev/pandas/issues/19682
cat = Categorical([1, 2, 3])
with tm.assert_raises_regex(ValueError, msg):
cat.fillna(**fillna_kwargs)
@pytest.mark.parametrize("named", [True, False])
def test_fillna_iterable_category(self, named):
# https://github.com/pandas-dev/pandas/issues/21097
if named:
Point = collections.namedtuple("Point", "x y")
else:
Point = lambda *args: args # tuple
cat = Categorical([Point(0, 0), Point(0, 1), None])
result = cat.fillna(Point(0, 0))
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
tm.assert_categorical_equal(result, expected)
@@ -1,293 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import pandas as pd
import numpy as np
import pandas.util.testing as tm
from pandas import Categorical, Series, DataFrame, date_range
from pandas.tests.categorical.common import TestCategorical
class TestCategoricalOpsWithFactor(TestCategorical):
def test_categories_none_comparisons(self):
factor = Categorical(['a', 'b', 'b', 'a',
'a', 'c', 'c', 'c'], ordered=True)
tm.assert_categorical_equal(factor, self.factor)
def test_comparisons(self):
result = self.factor[self.factor == 'a']
expected = self.factor[np.asarray(self.factor) == 'a']
tm.assert_categorical_equal(result, expected)
result = self.factor[self.factor != 'a']
expected = self.factor[np.asarray(self.factor) != 'a']
tm.assert_categorical_equal(result, expected)
result = self.factor[self.factor < 'c']
expected = self.factor[np.asarray(self.factor) < 'c']
tm.assert_categorical_equal(result, expected)
result = self.factor[self.factor > 'a']
expected = self.factor[np.asarray(self.factor) > 'a']
tm.assert_categorical_equal(result, expected)
result = self.factor[self.factor >= 'b']
expected = self.factor[np.asarray(self.factor) >= 'b']
tm.assert_categorical_equal(result, expected)
result = self.factor[self.factor <= 'b']
expected = self.factor[np.asarray(self.factor) <= 'b']
tm.assert_categorical_equal(result, expected)
n = len(self.factor)
other = self.factor[np.random.permutation(n)]
result = self.factor == other
expected = np.asarray(self.factor) == np.asarray(other)
tm.assert_numpy_array_equal(result, expected)
result = self.factor == 'd'
expected = np.repeat(False, len(self.factor))
tm.assert_numpy_array_equal(result, expected)
# comparisons with categoricals
cat_rev = Categorical(
["a", "b", "c"], categories=["c", "b", "a"], ordered=True)
cat_rev_base = Categorical(
["b", "b", "b"], categories=["c", "b", "a"], ordered=True)
cat = Categorical(["a", "b", "c"], ordered=True)
cat_base = Categorical(
["b", "b", "b"], categories=cat.categories, ordered=True)
# comparisons need to take categories ordering into account
res_rev = cat_rev > cat_rev_base
exp_rev = np.array([True, False, False])
tm.assert_numpy_array_equal(res_rev, exp_rev)
res_rev = cat_rev < cat_rev_base
exp_rev = np.array([False, False, True])
tm.assert_numpy_array_equal(res_rev, exp_rev)
res = cat > cat_base
exp = np.array([False, False, True])
tm.assert_numpy_array_equal(res, exp)
# Only categories with same categories can be compared
def f():
cat > cat_rev
pytest.raises(TypeError, f)
cat_rev_base2 = Categorical(
["b", "b", "b"], categories=["c", "b", "a", "d"])
def f():
cat_rev > cat_rev_base2
pytest.raises(TypeError, f)
# Only categories with same ordering information can be compared
cat_unorderd = cat.set_ordered(False)
assert not (cat > cat).any()
def f():
cat > cat_unorderd
pytest.raises(TypeError, f)
# comparison (in both directions) with Series will raise
s = Series(["b", "b", "b"])
pytest.raises(TypeError, lambda: cat > s)
pytest.raises(TypeError, lambda: cat_rev > s)
pytest.raises(TypeError, lambda: s < cat)
pytest.raises(TypeError, lambda: s < cat_rev)
# comparison with numpy.array will raise in both direction, but only on
# newer numpy versions
a = np.array(["b", "b", "b"])
pytest.raises(TypeError, lambda: cat > a)
pytest.raises(TypeError, lambda: cat_rev > a)
# Make sure that unequal comparison take the categories order in
# account
cat_rev = Categorical(
list("abc"), categories=list("cba"), ordered=True)
exp = np.array([True, False, False])
res = cat_rev > "b"
tm.assert_numpy_array_equal(res, exp)
class TestCategoricalOps(object):
def test_datetime_categorical_comparison(self):
dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True)
tm.assert_numpy_array_equal(dt_cat > dt_cat[0],
np.array([False, True, True]))
tm.assert_numpy_array_equal(dt_cat[0] < dt_cat,
np.array([False, True, True]))
def test_reflected_comparison_with_scalars(self):
# GH8658
cat = Categorical([1, 2, 3], ordered=True)
tm.assert_numpy_array_equal(cat > cat[0],
np.array([False, True, True]))
tm.assert_numpy_array_equal(cat[0] < cat,
np.array([False, True, True]))
def test_comparison_with_unknown_scalars(self):
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
# and following comparisons with scalars not in categories should raise
# for unequal comps, but not for equal/not equal
cat = Categorical([1, 2, 3], ordered=True)
pytest.raises(TypeError, lambda: cat < 4)
pytest.raises(TypeError, lambda: cat > 4)
pytest.raises(TypeError, lambda: 4 < cat)
pytest.raises(TypeError, lambda: 4 > cat)
tm.assert_numpy_array_equal(cat == 4,
np.array([False, False, False]))
tm.assert_numpy_array_equal(cat != 4,
np.array([True, True, True]))
@pytest.mark.parametrize('data,reverse,base', [
(list("abc"), list("cba"), list("bbb")),
([1, 2, 3], [3, 2, 1], [2, 2, 2])]
)
def test_comparisons(self, data, reverse, base):
cat_rev = Series(
Categorical(data, categories=reverse, ordered=True))
cat_rev_base = Series(
Categorical(base, categories=reverse, ordered=True))
cat = Series(Categorical(data, ordered=True))
cat_base = Series(
Categorical(base, categories=cat.cat.categories, ordered=True))
s = Series(base)
a = np.array(base)
# comparisons need to take categories ordering into account
res_rev = cat_rev > cat_rev_base
exp_rev = Series([True, False, False])
tm.assert_series_equal(res_rev, exp_rev)
res_rev = cat_rev < cat_rev_base
exp_rev = Series([False, False, True])
tm.assert_series_equal(res_rev, exp_rev)
res = cat > cat_base
exp = Series([False, False, True])
tm.assert_series_equal(res, exp)
scalar = base[1]
res = cat > scalar
exp = Series([False, False, True])
exp2 = cat.values > scalar
tm.assert_series_equal(res, exp)
tm.assert_numpy_array_equal(res.values, exp2)
res_rev = cat_rev > scalar
exp_rev = Series([True, False, False])
exp_rev2 = cat_rev.values > scalar
tm.assert_series_equal(res_rev, exp_rev)
tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
# Only categories with same categories can be compared
def f():
cat > cat_rev
pytest.raises(TypeError, f)
# categorical cannot be compared to Series or numpy array, and also
# not the other way around
pytest.raises(TypeError, lambda: cat > s)
pytest.raises(TypeError, lambda: cat_rev > s)
pytest.raises(TypeError, lambda: cat > a)
pytest.raises(TypeError, lambda: cat_rev > a)
pytest.raises(TypeError, lambda: s < cat)
pytest.raises(TypeError, lambda: s < cat_rev)
pytest.raises(TypeError, lambda: a < cat)
pytest.raises(TypeError, lambda: a < cat_rev)
@pytest.mark.parametrize('ctor', [
lambda *args, **kwargs: Categorical(*args, **kwargs),
lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
])
def test_unordered_different_order_equal(self, ctor):
# https://github.com/pandas-dev/pandas/issues/16014
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
assert (c1 == c2).all()
c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False)
c2 = ctor(['b', 'a'], categories=['b', 'a'], ordered=False)
assert (c1 != c2).all()
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
c2 = ctor(['b', 'b'], categories=['b', 'a'], ordered=False)
assert (c1 != c2).all()
c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False)
c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False)
result = c1 == c2
tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
def test_unordered_different_categories_raises(self):
c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False)
with tm.assert_raises_regex(TypeError,
"Categoricals can only be compared"):
c1 == c2
def test_compare_different_lengths(self):
c1 = Categorical([], categories=['a', 'b'])
c2 = Categorical([], categories=['a'])
msg = "Categories are different lengths"
with tm.assert_raises_regex(TypeError, msg):
c1 == c2
def test_compare_unordered_different_order(self):
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
# 349290078
a = pd.Categorical(['a'], categories=['a', 'b'])
b = pd.Categorical(['b'], categories=['b', 'a'])
assert not a.equals(b)
def test_numeric_like_ops(self):
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
cat_labels = Categorical(labels, labels)
df = df.sort_values(by=['value'], ascending=True)
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
right=False, labels=cat_labels)
# numeric ops should not succeed
for op in ['__add__', '__sub__', '__mul__', '__truediv__']:
pytest.raises(TypeError,
lambda: getattr(df, op)(df))
# reduction ops should not succeed (unless specifically defined, e.g.
# min/max)
s = df['value_group']
for op in ['kurt', 'skew', 'var', 'std', 'mean', 'sum', 'median']:
pytest.raises(TypeError,
lambda: getattr(s, op)(numeric_only=False))
# mad technically works because it takes always the numeric data
# numpy ops
s = Series(Categorical([1, 2, 3, 4]))
pytest.raises(TypeError, lambda: np.sum(s))
# numeric ops on a Series
for op in ['__add__', '__sub__', '__mul__', '__truediv__']:
pytest.raises(TypeError, lambda: getattr(s, op)(2))
# invalid ufunc
pytest.raises(TypeError, lambda: np.log(s))
@@ -1,517 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
from pandas import (Categorical, Series, CategoricalIndex, date_range,
period_range, timedelta_range)
from pandas.compat import u, PY3
from pandas.core.config import option_context
from pandas.tests.categorical.common import TestCategorical
class TestCategoricalReprWithFactor(TestCategorical):
def test_print(self):
expected = ["[a, b, b, a, a, c, c, c]",
"Categories (3, object): [a < b < c]"]
expected = "\n".join(expected)
actual = repr(self.factor)
assert actual == expected
class TestCategoricalRepr(object):
def test_big_print(self):
factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'],
fastpath=True)
expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600",
"Categories (3, object): [a, b, c]"]
expected = "\n".join(expected)
actual = repr(factor)
assert actual == expected
def test_empty_print(self):
factor = Categorical([], ["a", "b", "c"])
expected = ("[], Categories (3, object): [a, b, c]")
# hack because array_repr changed in numpy > 1.6.x
actual = repr(factor)
assert actual == expected
assert expected == actual
factor = Categorical([], ["a", "b", "c"], ordered=True)
expected = ("[], Categories (3, object): [a < b < c]")
actual = repr(factor)
assert expected == actual
factor = Categorical([], [])
expected = ("[], Categories (0, object): []")
assert expected == repr(factor)
def test_print_none_width(self):
# GH10087
a = Series(Categorical([1, 2, 3, 4]))
exp = u("0 1\n1 2\n2 3\n3 4\n" +
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]")
with option_context("display.width", None):
assert exp == repr(a)
def test_unicode_print(self):
if PY3:
_rep = repr
else:
_rep = unicode # noqa
c = Categorical(['aaaaa', 'bb', 'cccc'] * 20)
expected = u"""\
[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc]
Length: 60
Categories (3, object): [aaaaa, bb, cccc]"""
assert _rep(c) == expected
c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
expected = u"""\
[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
Length: 60
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
assert _rep(c) == expected
# unicode option should not affect to Categorical, as it doesn't care
# the repr width
with option_context('display.unicode.east_asian_width', True):
c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
Length: 60
Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa
assert _rep(c) == expected
def test_categorical_repr(self):
c = Categorical([1, 2, 3])
exp = """[1, 2, 3]
Categories (3, int64): [1, 2, 3]"""
assert repr(c) == exp
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
exp = """[1, 2, 3, 1, 2, 3]
Categories (3, int64): [1, 2, 3]"""
assert repr(c) == exp
c = Categorical([1, 2, 3, 4, 5] * 10)
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
Length: 50
Categories (5, int64): [1, 2, 3, 4, 5]"""
assert repr(c) == exp
c = Categorical(np.arange(20))
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
Length: 20
Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]"""
assert repr(c) == exp
def test_categorical_repr_ordered(self):
c = Categorical([1, 2, 3], ordered=True)
exp = """[1, 2, 3]
Categories (3, int64): [1 < 2 < 3]"""
assert repr(c) == exp
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True)
exp = """[1, 2, 3, 1, 2, 3]
Categories (3, int64): [1 < 2 < 3]"""
assert repr(c) == exp
c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True)
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
Length: 50
Categories (5, int64): [1 < 2 < 3 < 4 < 5]"""
assert repr(c) == exp
c = Categorical(np.arange(20), ordered=True)
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
Length: 20
Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]"""
assert repr(c) == exp
def test_categorical_repr_datetime(self):
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
c = Categorical(idx)
# TODO(wesm): exceeding 80 characters in the console is not good
# behavior
exp = (
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
"2011-01-01 12:00:00, 2011-01-01 13:00:00]\n"
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
" 2011-01-01 12:00:00, "
"2011-01-01 13:00:00]""")
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx)
exp = (
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
"2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, "
"2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, "
"2011-01-01 13:00:00]\n"
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
" 2011-01-01 12:00:00, "
"2011-01-01 13:00:00]")
assert repr(c) == exp
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
tz='US/Eastern')
c = Categorical(idx)
exp = (
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
"2011-01-01 13:00:00-05:00]\n"
"Categories (5, datetime64[ns, US/Eastern]): "
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
" "
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
" "
"2011-01-01 13:00:00-05:00]")
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx)
exp = (
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
"2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, "
"2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, "
"2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n"
"Categories (5, datetime64[ns, US/Eastern]): "
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
" "
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
" "
"2011-01-01 13:00:00-05:00]")
assert repr(c) == exp
def test_categorical_repr_datetime_ordered(self):
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
c = Categorical(idx, ordered=True)
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx, ordered=True)
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
assert repr(c) == exp
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
tz='US/Eastern')
c = Categorical(idx, ordered=True)
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
2011-01-01 13:00:00-05:00]""" # noqa
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx, ordered=True)
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
2011-01-01 13:00:00-05:00]""" # noqa
assert repr(c) == exp
def test_categorical_repr_period(self):
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
c = Categorical(idx)
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2011-01-01 13:00]""" # noqa
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx)
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2011-01-01 13:00]""" # noqa
assert repr(c) == exp
idx = period_range('2011-01', freq='M', periods=5)
c = Categorical(idx)
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx)
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa
assert repr(c) == exp
def test_categorical_repr_period_ordered(self):
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
c = Categorical(idx, ordered=True)
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2011-01-01 13:00]""" # noqa
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx, ordered=True)
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2011-01-01 13:00]""" # noqa
assert repr(c) == exp
idx = period_range('2011-01', freq='M', periods=5)
c = Categorical(idx, ordered=True)
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx, ordered=True)
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa
assert repr(c) == exp
def test_categorical_repr_timedelta(self):
idx = timedelta_range('1 days', periods=5)
c = Categorical(idx)
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx)
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa
assert repr(c) == exp
idx = timedelta_range('1 hours', periods=20)
c = Categorical(idx)
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
Length: 20
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
18 days 01:00:00, 19 days 01:00:00]""" # noqa
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx)
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
Length: 40
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
18 days 01:00:00, 19 days 01:00:00]""" # noqa
assert repr(c) == exp
def test_categorical_repr_timedelta_ordered(self):
idx = timedelta_range('1 days', periods=5)
c = Categorical(idx, ordered=True)
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx, ordered=True)
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
assert repr(c) == exp
idx = timedelta_range('1 hours', periods=20)
c = Categorical(idx, ordered=True)
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
Length: 20
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
assert repr(c) == exp
c = Categorical(idx.append(idx), categories=idx, ordered=True)
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
Length: 40
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
18 days 01:00:00 < 19 days 01:00:00]""" # noqa
assert repr(c) == exp
def test_categorical_index_repr(self):
idx = CategoricalIndex(Categorical([1, 2, 3]))
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa
assert repr(idx) == exp
i = CategoricalIndex(Categorical(np.arange(10)))
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
def test_categorical_index_repr_ordered(self):
i = CategoricalIndex(Categorical([1, 2, 3], ordered=True))
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
i = CategoricalIndex(Categorical(np.arange(10), ordered=True))
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
def test_categorical_index_repr_datetime(self):
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
'2011-01-01 13:00:00'],
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
tz='US/Eastern')
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
'2011-01-01 13:00:00-05:00'],
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
def test_categorical_index_repr_datetime_ordered(self):
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
i = CategoricalIndex(Categorical(idx, ordered=True))
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
'2011-01-01 13:00:00'],
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
tz='US/Eastern')
i = CategoricalIndex(Categorical(idx, ordered=True))
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
'2011-01-01 13:00:00-05:00'],
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
i = CategoricalIndex(Categorical(idx.append(idx), ordered=True))
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
'2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
'2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
'2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
def test_categorical_index_repr_period(self):
# test all length
idx = period_range('2011-01-01 09:00', freq='H', periods=1)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
idx = period_range('2011-01-01 09:00', freq='H', periods=2)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
idx = period_range('2011-01-01 09:00', freq='H', periods=3)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
'2011-01-01 12:00', '2011-01-01 13:00'],
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
i = CategoricalIndex(Categorical(idx.append(idx)))
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
'2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00',
'2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00',
'2011-01-01 13:00'],
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
idx = period_range('2011-01', freq='M', periods=5)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
def test_categorical_index_repr_period_ordered(self):
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
i = CategoricalIndex(Categorical(idx, ordered=True))
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
'2011-01-01 12:00', '2011-01-01 13:00'],
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
idx = period_range('2011-01', freq='M', periods=5)
i = CategoricalIndex(Categorical(idx, ordered=True))
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
def test_categorical_index_repr_timedelta(self):
idx = timedelta_range('1 days', periods=5)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
idx = timedelta_range('1 hours', periods=10)
i = CategoricalIndex(Categorical(idx))
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
'9 days 01:00:00'],
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa
assert repr(i) == exp
def test_categorical_index_repr_timedelta_ordered(self):
idx = timedelta_range('1 days', periods=5)
i = CategoricalIndex(Categorical(idx, ordered=True))
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
idx = timedelta_range('1 hours', periods=10)
i = CategoricalIndex(Categorical(idx, ordered=True))
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
'9 days 01:00:00'],
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa
assert repr(i) == exp
@@ -1,123 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pandas.util.testing as tm
from pandas import Categorical, Index
class TestCategoricalSort(object):
def test_argsort(self):
c = Categorical([5, 3, 1, 4, 2], ordered=True)
expected = np.array([2, 4, 1, 3, 0])
tm.assert_numpy_array_equal(c.argsort(ascending=True), expected,
check_dtype=False)
expected = expected[::-1]
tm.assert_numpy_array_equal(c.argsort(ascending=False), expected,
check_dtype=False)
def test_numpy_argsort(self):
c = Categorical([5, 3, 1, 4, 2], ordered=True)
expected = np.array([2, 4, 1, 3, 0])
tm.assert_numpy_array_equal(np.argsort(c), expected,
check_dtype=False)
tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected,
check_dtype=False)
msg = "the 'axis' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.argsort,
c, axis=0)
msg = "the 'order' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.argsort,
c, order='C')
def test_sort_values(self):
# unordered cats are sortable
cat = Categorical(["a", "b", "b", "a"], ordered=False)
cat.sort_values()
cat = Categorical(["a", "c", "b", "d"], ordered=True)
# sort_values
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=object)
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, cat.categories)
cat = Categorical(["a", "c", "b", "d"],
categories=["a", "b", "c", "d"], ordered=True)
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=object)
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, cat.categories)
res = cat.sort_values(ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=object)
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, cat.categories)
# sort (inplace order)
cat1 = cat.copy()
cat1.sort_values(inplace=True)
exp = np.array(["a", "b", "c", "d"], dtype=object)
tm.assert_numpy_array_equal(cat1.__array__(), exp)
tm.assert_index_equal(res.categories, cat.categories)
# reverse
cat = Categorical(["a", "c", "c", "b", "d"], ordered=True)
res = cat.sort_values(ascending=False)
exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object)
exp_categories = Index(["a", "b", "c", "d"])
tm.assert_numpy_array_equal(res.__array__(), exp_val)
tm.assert_index_equal(res.categories, exp_categories)
def test_sort_values_na_position(self):
# see gh-12882
cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True)
exp_categories = Index([2, 5])
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
res = cat.sort_values() # default arguments
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, exp_categories)
exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0])
res = cat.sort_values(ascending=True, na_position='first')
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, exp_categories)
exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0])
res = cat.sort_values(ascending=False, na_position='first')
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, exp_categories)
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
res = cat.sort_values(ascending=True, na_position='last')
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, exp_categories)
exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan])
res = cat.sort_values(ascending=False, na_position='last')
tm.assert_numpy_array_equal(res.__array__(), exp)
tm.assert_index_equal(res.categories, exp_categories)
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
res = cat.sort_values(ascending=False, na_position='last')
exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object)
exp_categories = Index(["a", "b", "c", "d"])
tm.assert_numpy_array_equal(res.__array__(), exp_val)
tm.assert_index_equal(res.categories, exp_categories)
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
res = cat.sort_values(ascending=False, na_position='first')
exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object)
exp_categories = Index(["a", "b", "c", "d"])
tm.assert_numpy_array_equal(res.__array__(), exp_val)
tm.assert_index_equal(res.categories, exp_categories)
@@ -1,26 +0,0 @@
# -*- coding: utf-8 -*-
from pandas import Categorical
import pandas.util.testing as tm
class TestCategoricalSubclassing(object):
def test_constructor(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
assert isinstance(sc, tm.SubclassedCategorical)
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
def test_from_codes(self):
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
assert isinstance(sc, tm.SubclassedCategorical)
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
tm.assert_categorical_equal(sc, exp)
def test_map(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
res = sc.map(lambda x: x.upper())
assert isinstance(res, tm.SubclassedCategorical)
exp = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(res, exp)
@@ -1,18 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import pandas.util.testing as tm
class TestCategoricalWarnings(object):
def test_tab_complete_warning(self, ip):
# https://github.com/pandas-dev/pandas/issues/16409
pytest.importorskip('IPython', minversion="6.0.0")
from IPython.core.completer import provisionalcompleter
code = "import pandas as pd; c = Categorical([])"
ip.run_code(code)
with tm.assert_produces_warning(None):
with provisionalcompleter('ignore'):
list(ip.Completer.completions('c.', 1))