demo + utils venv
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,56 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Index, MultiIndex
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx():
|
||||
# a MultiIndex used to test the general functionality of the
|
||||
# general functionality of this object
|
||||
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
|
||||
minor_axis = Index(['one', 'two'])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ['first', 'second']
|
||||
mi = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names, verify_integrity=False)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx_dup():
|
||||
# compare tests/indexes/multi/conftest.py
|
||||
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
|
||||
minor_axis = Index(['one', 'two'])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 0, 1, 1])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ['first', 'second']
|
||||
mi = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names, verify_integrity=False)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index_names():
|
||||
# names that match those in the idx fixture for testing equality of
|
||||
# names assigned to the idx
|
||||
return ['first', 'second']
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def holder():
|
||||
# the MultiIndex constructor used to base compatibility with pickle
|
||||
return MultiIndex
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def compat_props():
|
||||
# a MultiIndex must have these properties associated with it
|
||||
return ['shape', 'ndim', 'size']
|
||||
@@ -0,0 +1,321 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex, date_range, period_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_shift(idx):
|
||||
|
||||
# GH8083 test the base class for shift
|
||||
pytest.raises(NotImplementedError, idx.shift, 1)
|
||||
pytest.raises(NotImplementedError, idx.shift, 1, 2)
|
||||
|
||||
|
||||
def test_groupby(idx):
|
||||
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
||||
labels = idx.get_values().tolist()
|
||||
exp = {1: labels[:3], 2: labels[3:]}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
# GH5620
|
||||
groups = idx.groupby(idx)
|
||||
exp = {key: [key] for key in idx}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
|
||||
def test_truncate():
|
||||
major_axis = Index(lrange(4))
|
||||
minor_axis = Index(lrange(2))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes])
|
||||
|
||||
result = index.truncate(before=1)
|
||||
assert 'foo' not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
|
||||
result = index.truncate(after=1)
|
||||
assert 2 not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
|
||||
result = index.truncate(before=1, after=2)
|
||||
assert len(result.levels[0]) == 2
|
||||
|
||||
# after < before
|
||||
pytest.raises(ValueError, index.truncate, 3, 1)
|
||||
|
||||
|
||||
def test_where():
|
||||
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
i.where(True)
|
||||
|
||||
|
||||
def test_where_array_like():
|
||||
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||||
klasses = [list, tuple, np.array, pd.Series]
|
||||
cond = [False, True]
|
||||
|
||||
for klass in klasses:
|
||||
with pytest.raises(NotImplementedError):
|
||||
i.where(klass(cond))
|
||||
|
||||
|
||||
# TODO: reshape
|
||||
|
||||
|
||||
def test_reorder_levels(idx):
|
||||
# this blows up
|
||||
with pytest.raises(IndexError, match='^Too many levels'):
|
||||
idx.reorder_levels([2, 1, 0])
|
||||
|
||||
|
||||
def test_numpy_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(['foo', 'bar'])
|
||||
|
||||
m = MultiIndex.from_product([
|
||||
numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([
|
||||
numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(np.repeat(m, reps), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(m, reps, axis=1)
|
||||
|
||||
|
||||
def test_append_mixed_dtypes():
|
||||
# GH 13660
|
||||
dti = date_range('2011-01-01', freq='M', periods=3, )
|
||||
dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern')
|
||||
pi = period_range('2011-01', freq='M', periods=3)
|
||||
|
||||
mi = MultiIndex.from_arrays([[1, 2, 3],
|
||||
[1.1, np.nan, 3.3],
|
||||
['a', 'b', 'c'],
|
||||
dti, dti_tz, pi])
|
||||
assert mi.nlevels == 6
|
||||
|
||||
res = mi.append(mi)
|
||||
exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3],
|
||||
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
||||
['a', 'b', 'c', 'a', 'b', 'c'],
|
||||
dti.append(dti),
|
||||
dti_tz.append(dti_tz),
|
||||
pi.append(pi)])
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'],
|
||||
['x', 'y', 'z'], ['x', 'y', 'z'],
|
||||
['x', 'y', 'z'], ['x', 'y', 'z']])
|
||||
|
||||
res = mi.append(other)
|
||||
exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'],
|
||||
[1.1, np.nan, 3.3, 'x', 'y', 'z'],
|
||||
['a', 'b', 'c', 'x', 'y', 'z'],
|
||||
dti.append(pd.Index(['x', 'y', 'z'])),
|
||||
dti_tz.append(pd.Index(['x', 'y', 'z'])),
|
||||
pi.append(pd.Index(['x', 'y', 'z']))])
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
|
||||
def test_take(idx):
|
||||
indexer = [4, 3, 0, 2]
|
||||
result = idx.take(indexer)
|
||||
expected = idx[indexer]
|
||||
assert result.equals(expected)
|
||||
|
||||
# TODO: Remove Commented Code
|
||||
# if not isinstance(idx,
|
||||
# (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
|
||||
# GH 10791
|
||||
with pytest.raises(AttributeError):
|
||||
idx.freq
|
||||
|
||||
|
||||
def test_take_invalid_kwargs(idx):
|
||||
idx = idx
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode='clip')
|
||||
|
||||
|
||||
def test_take_fill_value():
|
||||
# GH 12631
|
||||
vals = [['A', 'B'],
|
||||
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
|
||||
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
|
||||
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
exp_vals = [('A', pd.Timestamp('2011-01-02')),
|
||||
('A', pd.Timestamp('2011-01-01')),
|
||||
('B', pd.Timestamp('2011-01-02'))]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
exp_vals = [('A', pd.Timestamp('2011-01-02')),
|
||||
('A', pd.Timestamp('2011-01-01')),
|
||||
(np.nan, pd.NaT)]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
|
||||
fill_value=True)
|
||||
exp_vals = [('A', pd.Timestamp('2011-01-02')),
|
||||
('A', pd.Timestamp('2011-01-01')),
|
||||
('B', pd.Timestamp('2011-01-02'))]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = ('When allow_fill=True and fill_value is not None, '
|
||||
'all indices must be >= -1')
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
|
||||
def test_iter(idx):
|
||||
result = list(idx)
|
||||
expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
|
||||
('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sub(idx):
|
||||
|
||||
first = idx
|
||||
|
||||
# - now raises (previously was set op difference)
|
||||
with pytest.raises(TypeError):
|
||||
first - idx[-3:]
|
||||
with pytest.raises(TypeError):
|
||||
idx[-3:] - first
|
||||
with pytest.raises(TypeError):
|
||||
idx[-3:] - first.tolist()
|
||||
with pytest.raises(TypeError):
|
||||
first.tolist() - idx[-3:]
|
||||
|
||||
|
||||
def test_map(idx):
|
||||
# callable
|
||||
index = idx
|
||||
|
||||
# we don't infer UInt64
|
||||
if isinstance(index, pd.UInt64Index):
|
||||
expected = index.astype('int64')
|
||||
else:
|
||||
expected = index
|
||||
|
||||
result = index.map(lambda x: x)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, idx: {i: e for e, i in zip(values, idx)},
|
||||
lambda values, idx: pd.Series(values, idx)])
|
||||
def test_map_dictlike(idx, mapper):
|
||||
|
||||
if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)):
|
||||
pytest.skip("skipping tests for {}".format(type(idx)))
|
||||
|
||||
identity = mapper(idx.values, idx)
|
||||
|
||||
# we don't infer to UInt64 for a dict
|
||||
if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict):
|
||||
expected = idx.astype('int64')
|
||||
else:
|
||||
expected = idx
|
||||
|
||||
result = idx.map(identity)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty mappable
|
||||
expected = pd.Index([np.nan] * len(idx))
|
||||
result = idx.map(mapper(expected, idx))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('func', [
|
||||
np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10,
|
||||
np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin,
|
||||
np.arccos, np.arctan, np.sinh, np.cosh, np.tanh,
|
||||
np.arcsinh, np.arccosh, np.arctanh, np.deg2rad,
|
||||
np.rad2deg
|
||||
])
|
||||
def test_numpy_ufuncs(func):
|
||||
# test ufuncs of numpy. see:
|
||||
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
|
||||
|
||||
# copy and paste from idx fixture as pytest doesn't support
|
||||
# parameters and fixtures at the same time.
|
||||
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
|
||||
minor_axis = Index(['one', 'two'])
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ['first', 'second']
|
||||
|
||||
idx = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
with np.errstate(all='ignore'):
|
||||
func(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('func', [
|
||||
np.isfinite, np.isinf, np.isnan, np.signbit
|
||||
])
|
||||
def test_numpy_type_funcs(func):
|
||||
# for func in [np.isfinite, np.isinf, np.isnan, np.signbit]:
|
||||
# copy and paste from idx fixture as pytest doesn't support
|
||||
# parameters and fixtures at the same time.
|
||||
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
|
||||
minor_axis = Index(['one', 'two'])
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ['first', 'second']
|
||||
|
||||
idx = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False
|
||||
)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
func(idx)
|
||||
@@ -0,0 +1,32 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas.util.testing import assert_copy
|
||||
|
||||
|
||||
def test_astype(idx):
|
||||
expected = idx.copy()
|
||||
actual = idx.astype('O')
|
||||
assert_copy(actual.levels, expected.levels)
|
||||
assert_copy(actual.codes, expected.codes)
|
||||
assert [level.name for level in actual.levels] == list(expected.names)
|
||||
|
||||
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
|
||||
idx.astype(np.dtype(int))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_astype_category(idx, ordered):
|
||||
# GH 18630
|
||||
msg = '> 1 ndim Categorical are not supported at this time'
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype(CategoricalDtype(ordered=ordered))
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype('category')
|
||||
@@ -0,0 +1,131 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY3, long
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_numeric_compat(idx):
|
||||
with pytest.raises(TypeError, match="cannot perform __mul__"):
|
||||
idx * 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rmul__"):
|
||||
1 * idx
|
||||
|
||||
div_err = ("cannot perform __truediv__" if PY3
|
||||
else "cannot perform __div__")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
|
||||
div_err = div_err.replace(" __", " __r")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
|
||||
idx // 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
|
||||
1 // idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["all", "any"])
|
||||
def test_logical_compat(idx, method):
|
||||
msg = "cannot perform {method}".format(method=method)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)()
|
||||
|
||||
|
||||
def test_boolean_context_compat(idx):
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bool(idx)
|
||||
|
||||
|
||||
def test_boolean_context_compat2():
|
||||
|
||||
# boolean context compat
|
||||
# GH7897
|
||||
i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||||
i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)])
|
||||
common = i1.intersection(i2)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
bool(common)
|
||||
|
||||
|
||||
def test_inplace_mutation_resets_values():
|
||||
levels = [['a', 'b', 'c'], [4]]
|
||||
levels2 = [[1, 2, 3], ['a']]
|
||||
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
|
||||
|
||||
mi1 = MultiIndex(levels=levels, codes=codes)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes)
|
||||
vals = mi1.values.copy()
|
||||
vals2 = mi2.values.copy()
|
||||
|
||||
assert mi1._tuples is not None
|
||||
|
||||
# Make sure level setting works
|
||||
new_vals = mi1.set_levels(levels2).values
|
||||
tm.assert_almost_equal(vals2, new_vals)
|
||||
|
||||
# Non-inplace doesn't kill _tuples [implementation detail]
|
||||
tm.assert_almost_equal(mi1._tuples, vals)
|
||||
|
||||
# ...and values is still same too
|
||||
tm.assert_almost_equal(mi1.values, vals)
|
||||
|
||||
# Inplace should kill _tuples
|
||||
mi1.set_levels(levels2, inplace=True)
|
||||
tm.assert_almost_equal(mi1.values, vals2)
|
||||
|
||||
# Make sure label setting works too
|
||||
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
|
||||
exp_values = np.empty((6,), dtype=object)
|
||||
exp_values[:] = [(long(1), 'a')] * 6
|
||||
|
||||
# Must be 1d array of tuples
|
||||
assert exp_values.shape == (6,)
|
||||
new_values = mi2.set_codes(codes2).values
|
||||
|
||||
# Not inplace shouldn't change
|
||||
tm.assert_almost_equal(mi2._tuples, vals2)
|
||||
|
||||
# Should have correct values
|
||||
tm.assert_almost_equal(exp_values, new_values)
|
||||
|
||||
# ...and again setting inplace should kill _tuples, etc
|
||||
mi2.set_codes(codes2, inplace=True)
|
||||
tm.assert_almost_equal(mi2.values, new_values)
|
||||
|
||||
|
||||
def test_ndarray_compat_properties(idx, compat_props):
|
||||
assert idx.T.equals(idx)
|
||||
assert idx.transpose().equals(idx)
|
||||
|
||||
values = idx.values
|
||||
for prop in compat_props:
|
||||
assert getattr(idx, prop) == getattr(values, prop)
|
||||
|
||||
# test for validity
|
||||
idx.nbytes
|
||||
idx.values.nbytes
|
||||
|
||||
|
||||
def test_compat(indices):
|
||||
assert indices.tolist() == list(indices)
|
||||
|
||||
|
||||
def test_pickle_compat_construction(holder):
|
||||
# this is testing for pickle compat
|
||||
if holder is None:
|
||||
return
|
||||
|
||||
# need an object to create with
|
||||
pytest.raises(TypeError, holder)
|
||||
+577
@@ -0,0 +1,577 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import Timestamp
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_constructor_single_level():
|
||||
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
|
||||
codes=[[0, 1, 2, 3]], names=['first'])
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ['first']
|
||||
|
||||
|
||||
def test_constructor_no_levels():
|
||||
msg = "non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=[], codes=[])
|
||||
|
||||
both_re = re.compile('Must pass both levels and codes')
|
||||
with pytest.raises(TypeError, match=both_re):
|
||||
MultiIndex(levels=[])
|
||||
with pytest.raises(TypeError, match=both_re):
|
||||
MultiIndex(codes=[])
|
||||
|
||||
|
||||
def test_constructor_nonhashable_names():
|
||||
# GH 20527
|
||||
levels = [[1, 2], [u'one', u'two']]
|
||||
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
||||
names = (['foo'], ['bar'])
|
||||
message = "MultiIndex.name must be a hashable type"
|
||||
with pytest.raises(TypeError, match=message):
|
||||
MultiIndex(levels=levels, codes=codes, names=names)
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex(levels=[[1, 2], [u'one', u'two']],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||||
names=('foo', 'bar'))
|
||||
renamed = [['foor'], ['barr']]
|
||||
with pytest.raises(TypeError, match=message):
|
||||
mi.rename(names=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=message):
|
||||
mi.set_names(names=renamed)
|
||||
|
||||
|
||||
def test_constructor_mismatched_codes_levels(idx):
|
||||
codes = [np.array([1]), np.array([2]), np.array([3])]
|
||||
levels = ["a"]
|
||||
|
||||
msg = "Length of levels and codes must be the same"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
length_error = re.compile('>= length of level')
|
||||
label_error = re.compile(r'Unequal code lengths: \[4, 2\]')
|
||||
|
||||
# important to check that it's looking at the right thing.
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
MultiIndex(levels=[['a'], ['b']],
|
||||
codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
MultiIndex(levels=[['a'], ['b']], codes=[[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# external API
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
idx.copy().set_levels([['a'], ['b']])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
|
||||
def test_labels_deprecated(idx):
|
||||
# GH23752
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
|
||||
labels=[[0, 1, 2, 3]], names=['first'])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.labels
|
||||
|
||||
|
||||
def test_copy_in_constructor():
|
||||
levels = np.array(["a", "b", "c"])
|
||||
codes = np.array([1, 1, 2, 0, 0, 1, 1])
|
||||
val = codes[0]
|
||||
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes],
|
||||
copy=True)
|
||||
assert mi.codes[0][0] == val
|
||||
codes[0] = 15
|
||||
assert mi.codes[0][0] == val
|
||||
val = levels[0]
|
||||
levels[0] = "PANDA"
|
||||
assert mi.levels[0][0] == val
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_arrays
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_arrays(idx):
|
||||
arrays = [np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)]
|
||||
|
||||
# list of arrays as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# infer correctly
|
||||
result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
|
||||
['a', 'b']])
|
||||
assert result.levels[0].equals(Index([Timestamp('20130101')]))
|
||||
assert result.levels[1].equals(Index(['a', 'b']))
|
||||
|
||||
|
||||
def test_from_arrays_iterator(idx):
|
||||
# GH 18434
|
||||
arrays = [np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)]
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# invalid iterator input
|
||||
msg = "Input must be a list / sequence of array-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(0)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_datetimetz():
|
||||
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
|
||||
tz='US/Eastern')
|
||||
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3,
|
||||
tz='Asia/Tokyo')
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_timedelta():
|
||||
idx1 = pd.timedelta_range('1 days', freq='D', periods=3)
|
||||
idx2 = pd.timedelta_range('2 hours', freq='H', periods=3)
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_period():
|
||||
idx1 = pd.period_range('2011-01-01', freq='D', periods=3)
|
||||
idx2 = pd.period_range('2015-01-01', freq='H', periods=3)
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_datetimelike_mixed():
|
||||
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
|
||||
tz='US/Eastern')
|
||||
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3)
|
||||
idx3 = pd.timedelta_range('1 days', freq='D', periods=3)
|
||||
idx4 = pd.period_range('2011-01-01', freq='D', periods=3)
|
||||
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result.get_level_values(3), idx4)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1),
|
||||
pd.Series(idx2),
|
||||
pd.Series(idx3),
|
||||
pd.Series(idx4)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_categorical():
|
||||
# GH13743
|
||||
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
|
||||
ordered=False)
|
||||
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
|
||||
ordered=True)
|
||||
|
||||
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
|
||||
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
||||
|
||||
|
||||
def test_from_arrays_empty():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=[])
|
||||
|
||||
# 1 level
|
||||
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index([], name='A')
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
|
||||
# N levels
|
||||
for N in [2, 3]:
|
||||
arrays = [[]] * N
|
||||
names = list('ABC')[:N]
|
||||
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
||||
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N,
|
||||
names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('invalid_array', [
|
||||
(1),
|
||||
([1]),
|
||||
([1, 2]),
|
||||
([[1], 2]),
|
||||
('a'),
|
||||
(['a']),
|
||||
(['a', 'b']),
|
||||
([['a'], 'b']),
|
||||
])
|
||||
def test_from_arrays_invalid_input(invalid_array):
|
||||
invalid_inputs = [1, [1], [1, 2], [[1], 2],
|
||||
'a', ['a'], ['a', 'b'], [['a'], 'b']]
|
||||
for i in invalid_inputs:
|
||||
pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('idx1, idx2', [
|
||||
([1, 2, 3], ['a', 'b']),
|
||||
([], ['a', 'b']),
|
||||
([1, 2, 3], [])
|
||||
])
|
||||
def test_from_arrays_different_lengths(idx1, idx2):
|
||||
# see gh-13599
|
||||
msg = '^all arrays must be same length$'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_tuples
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_tuples():
|
||||
msg = 'Cannot infer number of levels from empty list'
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples([])
|
||||
|
||||
expected = MultiIndex(levels=[[1, 3], [2, 4]],
|
||||
codes=[[0, 1], [0, 1]],
|
||||
names=['a', 'b'])
|
||||
|
||||
# input tuples
|
||||
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_iterator():
|
||||
# GH 18434
|
||||
# input iterator for tuples
|
||||
expected = MultiIndex(levels=[[1, 3], [2, 4]],
|
||||
codes=[[0, 1], [0, 1]],
|
||||
names=['a', 'b'])
|
||||
|
||||
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# input non-iterables
|
||||
msg = 'Input must be a list / sequence of tuple-likes.'
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples(0)
|
||||
|
||||
|
||||
def test_from_tuples_empty():
|
||||
# GH 16777
|
||||
result = MultiIndex.from_tuples([], names=['a', 'b'])
|
||||
expected = MultiIndex.from_arrays(arrays=[[], []],
|
||||
names=['a', 'b'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_index_values(idx):
|
||||
result = MultiIndex.from_tuples(idx)
|
||||
assert (result.values == idx.values).all()
|
||||
|
||||
|
||||
def test_tuples_with_name_string():
|
||||
# GH 15110 and GH 14848
|
||||
|
||||
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
||||
with pytest.raises(ValueError):
|
||||
pd.Index(li, name='abc')
|
||||
with pytest.raises(ValueError):
|
||||
pd.Index(li, name='a')
|
||||
|
||||
|
||||
def test_from_tuples_with_tuple_label():
|
||||
# GH 15457
|
||||
expected = pd.DataFrame([[2, 1, 2], [4, (1, 2), 3]],
|
||||
columns=['a', 'b', 'c']).set_index(['a', 'b'])
|
||||
idx = pd.MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=('a', 'b'))
|
||||
result = pd.DataFrame([2, 3], columns=['c'], index=idx)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_product
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_product_empty_zero_levels():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_product([])
|
||||
|
||||
|
||||
def test_from_product_empty_one_level():
|
||||
result = MultiIndex.from_product([[]], names=['A'])
|
||||
expected = pd.Index([], name='A')
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('first, second', [
|
||||
([], []),
|
||||
(['foo', 'bar', 'baz'], []),
|
||||
([], ['a', 'b', 'c']),
|
||||
])
|
||||
def test_from_product_empty_two_levels(first, second):
|
||||
names = ['A', 'B']
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
expected = MultiIndex(levels=[first, second],
|
||||
codes=[[], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('N', list(range(4)))
|
||||
def test_from_product_empty_three_levels(N):
|
||||
# GH12258
|
||||
names = ['A', 'B', 'C']
|
||||
lvl2 = lrange(N)
|
||||
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
||||
expected = MultiIndex(levels=[[], lvl2, []],
|
||||
codes=[[], [], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('invalid_input', [
|
||||
1,
|
||||
[1],
|
||||
[1, 2],
|
||||
[[1], 2],
|
||||
'a',
|
||||
['a'],
|
||||
['a', 'b'],
|
||||
[['a'], 'b'],
|
||||
])
|
||||
def test_from_product_invalid_input(invalid_input):
|
||||
pytest.raises(TypeError, MultiIndex.from_product, iterables=invalid_input)
|
||||
|
||||
|
||||
def test_from_product_datetimeindex():
|
||||
dt_index = date_range('2000-01-01', periods=2)
|
||||
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
|
||||
etalon = construct_1d_object_array_from_listlike([
|
||||
(1, pd.Timestamp('2000-01-01')),
|
||||
(1, pd.Timestamp('2000-01-02')),
|
||||
(2, pd.Timestamp('2000-01-01')),
|
||||
(2, pd.Timestamp('2000-01-02')),
|
||||
])
|
||||
tm.assert_numpy_array_equal(mi.values, etalon)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ordered', [False, True])
|
||||
@pytest.mark.parametrize('f', [
|
||||
lambda x: x,
|
||||
lambda x: pd.Series(x),
|
||||
lambda x: x.values
|
||||
])
|
||||
def test_from_product_index_series_categorical(ordered, f):
|
||||
# GH13743
|
||||
first = ['foo', 'bar']
|
||||
|
||||
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
|
||||
ordered=ordered)
|
||||
expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"),
|
||||
categories=list("bac"),
|
||||
ordered=ordered)
|
||||
|
||||
result = pd.MultiIndex.from_product([first, f(idx)])
|
||||
tm.assert_index_equal(result.get_level_values(1), expected)
|
||||
|
||||
|
||||
def test_from_product():
|
||||
|
||||
first = ['foo', 'bar', 'buz']
|
||||
second = ['a', 'b', 'c']
|
||||
names = ['first', 'second']
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
|
||||
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
|
||||
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
|
||||
('buz', 'c')]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_iterator():
|
||||
# GH 18434
|
||||
first = ['foo', 'bar', 'buz']
|
||||
second = ['a', 'b', 'c']
|
||||
names = ['first', 'second']
|
||||
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
|
||||
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
|
||||
('buz', 'c')]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_product(iter([first, second]), names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Invalid non-iterable input
|
||||
msg = "Input must be a list / sequence of iterables."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(0)
|
||||
|
||||
|
||||
def test_create_index_existing_name(idx):
|
||||
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
index = idx
|
||||
index.names = ['foo', 'bar']
|
||||
result = pd.Index(index)
|
||||
expected = Index(
|
||||
Index([
|
||||
('foo', 'one'), ('foo', 'two'),
|
||||
('bar', 'one'), ('baz', 'two'),
|
||||
('qux', 'one'), ('qux', 'two')],
|
||||
dtype='object'
|
||||
),
|
||||
names=['foo', 'bar']
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = pd.Index(index, names=['A', 'B'])
|
||||
expected = Index(
|
||||
Index([
|
||||
('foo', 'one'), ('foo', 'two'),
|
||||
('bar', 'one'), ('baz', 'two'),
|
||||
('qux', 'one'), ('qux', 'two')],
|
||||
dtype='object'
|
||||
),
|
||||
names=['A', 'B']
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_frame
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_frame():
|
||||
# GH 22420
|
||||
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
|
||||
columns=['L1', 'L2'])
|
||||
expected = pd.MultiIndex.from_tuples([('a', 'a'), ('a', 'b'),
|
||||
('b', 'a'), ('b', 'b')],
|
||||
names=['L1', 'L2'])
|
||||
result = pd.MultiIndex.from_frame(df)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('non_frame', [
|
||||
pd.Series([1, 2, 3, 4]),
|
||||
[1, 2, 3, 4],
|
||||
[[1, 2], [3, 4], [5, 6]],
|
||||
pd.Index([1, 2, 3, 4]),
|
||||
np.array([[1, 2], [3, 4], [5, 6]]),
|
||||
27
|
||||
])
|
||||
def test_from_frame_error(non_frame):
|
||||
# GH 22420
|
||||
with pytest.raises(TypeError, match='Input must be a DataFrame'):
|
||||
pd.MultiIndex.from_frame(non_frame)
|
||||
|
||||
|
||||
def test_from_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(OrderedDict([
|
||||
('dates', pd.date_range('19910905', periods=6, tz='US/Eastern')),
|
||||
('a', [1, 1, 1, 2, 2, 2]),
|
||||
('b', pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True)),
|
||||
('c', ['x', 'x', 'y', 'z', 'x', 'y'])
|
||||
]))
|
||||
original_dtypes = df.dtypes.to_dict()
|
||||
|
||||
expected_mi = pd.MultiIndex.from_arrays([
|
||||
pd.date_range('19910905', periods=6, tz='US/Eastern'),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True),
|
||||
['x', 'x', 'y', 'z', 'x', 'y']
|
||||
], names=['dates', 'a', 'b', 'c'])
|
||||
mi = pd.MultiIndex.from_frame(df)
|
||||
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
tm.assert_index_equal(expected_mi, mi)
|
||||
assert original_dtypes == mi_dtypes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('names_in,names_out', [
|
||||
(None, [('L1', 'x'), ('L2', 'y')]),
|
||||
(['x', 'y'], ['x', 'y']),
|
||||
])
|
||||
def test_from_frame_valid_names(names_in, names_out):
|
||||
# GH 22420
|
||||
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
|
||||
columns=pd.MultiIndex.from_tuples([('L1', 'x'),
|
||||
('L2', 'y')]))
|
||||
mi = pd.MultiIndex.from_frame(df, names=names_in)
|
||||
assert mi.names == names_out
|
||||
|
||||
|
||||
@pytest.mark.parametrize('names_in,names_out', [
|
||||
('bad_input', ValueError("Names should be list-like for a MultiIndex")),
|
||||
(['a', 'b', 'c'], ValueError("Length of names must match number of "
|
||||
"levels in MultiIndex."))
|
||||
])
|
||||
def test_from_frame_invalid_names(names_in, names_out):
|
||||
# GH 22420
|
||||
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
|
||||
columns=pd.MultiIndex.from_tuples([('L1', 'x'),
|
||||
('L2', 'y')]))
|
||||
with pytest.raises(type(names_out), match=names_out.args[0]):
|
||||
pd.MultiIndex.from_frame(df, names=names_in)
|
||||
@@ -0,0 +1,97 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PYPY
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_contains_top_level():
|
||||
midx = MultiIndex.from_product([['A', 'B'], [1, 2]])
|
||||
assert 'A' in midx
|
||||
assert 'A' not in midx._engine
|
||||
|
||||
|
||||
def test_contains_with_nat():
|
||||
# MI with a NaT
|
||||
mi = MultiIndex(levels=[['C'],
|
||||
pd.date_range('2012-01-01', periods=5)],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
|
||||
names=[None, 'B'])
|
||||
assert ('C', pd.Timestamp('2012-01-01')) in mi
|
||||
for val in mi.values:
|
||||
assert val in mi
|
||||
|
||||
|
||||
def test_contains(idx):
|
||||
assert ('foo', 'two') in idx
|
||||
assert ('bar', 'two') not in idx
|
||||
assert None not in idx
|
||||
|
||||
|
||||
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
|
||||
def test_isin_nan_pypy():
|
||||
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
|
||||
np.array([False, True]))
|
||||
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
|
||||
np.array([False, True]))
|
||||
|
||||
|
||||
def test_isin():
|
||||
values = [('foo', 2), ('bar', 3), ('quux', 4)]
|
||||
|
||||
idx = MultiIndex.from_arrays([
|
||||
['qux', 'baz', 'foo', 'bar'],
|
||||
np.arange(4)
|
||||
])
|
||||
result = idx.isin(values)
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# empty, return dtype bool
|
||||
idx = MultiIndex.from_arrays([[], []])
|
||||
result = idx.isin(values)
|
||||
assert len(result) == 0
|
||||
assert result.dtype == np.bool_
|
||||
|
||||
|
||||
@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
|
||||
def test_isin_nan_not_pypy():
|
||||
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
|
||||
np.array([False, False]))
|
||||
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
|
||||
np.array([False, False]))
|
||||
|
||||
|
||||
def test_isin_level_kwarg():
|
||||
idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
|
||||
4)])
|
||||
|
||||
vals_0 = ['foo', 'bar', 'quux']
|
||||
vals_1 = [2, 3, 10]
|
||||
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
|
||||
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
|
||||
|
||||
pytest.raises(IndexError, idx.isin, vals_0, level=5)
|
||||
pytest.raises(IndexError, idx.isin, vals_0, level=-5)
|
||||
|
||||
pytest.raises(KeyError, idx.isin, vals_0, level=1.0)
|
||||
pytest.raises(KeyError, idx.isin, vals_1, level=-1.0)
|
||||
pytest.raises(KeyError, idx.isin, vals_1, level='A')
|
||||
|
||||
idx.names = ['A', 'B']
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A'))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B'))
|
||||
|
||||
pytest.raises(KeyError, idx.isin, vals_1, level='C')
|
||||
@@ -0,0 +1,224 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_tolist(idx):
|
||||
result = idx.tolist()
|
||||
exp = list(idx.values)
|
||||
assert result == exp
|
||||
|
||||
|
||||
def test_to_numpy(idx):
|
||||
result = idx.to_numpy()
|
||||
exp = idx.values
|
||||
tm.assert_numpy_array_equal(result, exp)
|
||||
|
||||
|
||||
def test_to_frame():
|
||||
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
|
||||
index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ['first', 'second']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False, name=['first', 'second'])
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ['first', 'second']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=['first', 'second'])
|
||||
expected.index = index
|
||||
expected.columns = ['first', 'second']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "'name' must be a list / sequence of column names."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.to_frame(name='first')
|
||||
|
||||
msg = "'name' should have same length as number of levels on index."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.to_frame(name=['first'])
|
||||
|
||||
# Tests for datetime index
|
||||
index = MultiIndex.from_product([range(5),
|
||||
pd.date_range('20130101', periods=3)])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(
|
||||
{0: np.repeat(np.arange(5, dtype='int64'), 3),
|
||||
1: np.tile(pd.date_range('20130101', periods=3), 5)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
result = index.to_frame(index=False, name=['first', 'second'])
|
||||
expected = DataFrame(
|
||||
{'first': np.repeat(np.arange(5, dtype='int64'), 3),
|
||||
'second': np.tile(pd.date_range('20130101', periods=3), 5)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=['first', 'second'])
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
mi = pd.MultiIndex.from_arrays([
|
||||
pd.date_range('19910905', periods=6, tz='US/Eastern'),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True),
|
||||
['x', 'x', 'y', 'z', 'x', 'y']
|
||||
], names=['dates', 'a', 'b', 'c'])
|
||||
original_dtypes = {name: mi.levels[i].dtype
|
||||
for i, name in enumerate(mi.names)}
|
||||
|
||||
expected_df = pd.DataFrame(OrderedDict([
|
||||
('dates', pd.date_range('19910905', periods=6, tz='US/Eastern')),
|
||||
('a', [1, 1, 1, 2, 2, 2]),
|
||||
('b', pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True)),
|
||||
('c', ['x', 'x', 'y', 'z', 'x', 'y'])
|
||||
]))
|
||||
df = mi.to_frame(index=False)
|
||||
df_dtypes = df.dtypes.to_dict()
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
||||
assert original_dtypes == df_dtypes
|
||||
|
||||
|
||||
def test_to_frame_resulting_column_order():
|
||||
# GH 22420
|
||||
expected = ['z', 0, 'a']
|
||||
mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z'],
|
||||
['q', 'w', 'e']], names=expected)
|
||||
result = mi.to_frame().columns.tolist()
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_to_hierarchical():
|
||||
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
|
||||
2, 'two')])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = index.to_hierarchical(3)
|
||||
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
|
||||
codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
|
||||
[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.names == index.names
|
||||
|
||||
# K > 1
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = index.to_hierarchical(3, 2)
|
||||
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
|
||||
codes=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
|
||||
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.names == index.names
|
||||
|
||||
# non-sorted
|
||||
index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
|
||||
(2, 'a'), (2, 'b')],
|
||||
names=['N1', 'N2'])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = index.to_hierarchical(2)
|
||||
expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'),
|
||||
(1, 'b'),
|
||||
(2, 'a'), (2, 'a'),
|
||||
(2, 'b'), (2, 'b')],
|
||||
names=['N1', 'N2'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.names == index.names
|
||||
|
||||
|
||||
def test_roundtrip_pickle_with_tz():
|
||||
return
|
||||
|
||||
# GH 8367
|
||||
# round-trip of timezone
|
||||
index = MultiIndex.from_product(
|
||||
[[1, 2], ['a', 'b'], date_range('20130101', periods=3,
|
||||
tz='US/Eastern')
|
||||
], names=['one', 'two', 'three'])
|
||||
unpickled = tm.round_trip_pickle(index)
|
||||
assert index.equal_levels(unpickled)
|
||||
|
||||
|
||||
def test_pickle(indices):
|
||||
return
|
||||
|
||||
unpickled = tm.round_trip_pickle(indices)
|
||||
assert indices.equals(unpickled)
|
||||
original_name, indices.name = indices.name, 'foo'
|
||||
unpickled = tm.round_trip_pickle(indices)
|
||||
assert indices.equals(unpickled)
|
||||
indices.name = original_name
|
||||
|
||||
|
||||
def test_to_series(idx):
|
||||
# assert that we are creating a copy of the index
|
||||
|
||||
s = idx.to_series()
|
||||
assert s.values is not idx.values
|
||||
assert s.index is not idx
|
||||
assert s.name == idx.name
|
||||
|
||||
|
||||
def test_to_series_with_arguments(idx):
|
||||
# GH18699
|
||||
|
||||
# index kwarg
|
||||
s = idx.to_series(index=idx)
|
||||
|
||||
assert s.values is not idx.values
|
||||
assert s.index is idx
|
||||
assert s.name == idx.name
|
||||
|
||||
# name kwarg
|
||||
idx = idx
|
||||
s = idx.to_series(name='__test')
|
||||
|
||||
assert s.values is not idx.values
|
||||
assert s.index is not idx
|
||||
assert s.name != idx.name
|
||||
|
||||
|
||||
def test_to_flat_index(idx):
|
||||
expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
|
||||
('baz', 'two'), ('qux', 'one'), ('qux', 'two')),
|
||||
tupleize_cols=False)
|
||||
result = idx.to_flat_index()
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,93 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from copy import copy, deepcopy
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def assert_multiindex_copied(copy, original):
|
||||
# Levels should be (at least, shallow copied)
|
||||
tm.assert_copy(copy.levels, original.levels)
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
|
||||
# Labels doesn't matter which way copied
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
assert copy.codes is not original.codes
|
||||
|
||||
# Names doesn't matter which way copied
|
||||
assert copy.names == original.names
|
||||
assert copy.names is not original.names
|
||||
|
||||
# Sort order should be copied
|
||||
assert copy.sortorder == original.sortorder
|
||||
|
||||
|
||||
def test_copy(idx):
|
||||
i_copy = idx.copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_shallow_copy(idx):
|
||||
i_copy = idx._shallow_copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_labels_deprecated(idx):
|
||||
# GH23752
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.copy(labels=idx.codes)
|
||||
|
||||
|
||||
def test_view(idx):
|
||||
i_view = idx.view()
|
||||
assert_multiindex_copied(i_view, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('func', [copy, deepcopy])
|
||||
def test_copy_and_deepcopy(func):
|
||||
|
||||
idx = MultiIndex(
|
||||
levels=[['foo', 'bar'], ['fizz', 'buzz']],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=['first', 'second']
|
||||
)
|
||||
idx_copy = func(idx)
|
||||
assert idx_copy is not idx
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('deep', [True, False])
|
||||
def test_copy_method(deep):
|
||||
idx = MultiIndex(
|
||||
levels=[['foo', 'bar'], ['fizz', 'buzz']],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=['first', 'second']
|
||||
)
|
||||
idx_copy = idx.copy(deep=deep)
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('deep', [True, False])
|
||||
@pytest.mark.parametrize('kwarg, value', [
|
||||
('names', ['thrid', 'fourth']),
|
||||
('levels', [['foo2', 'bar2'], ['fizz2', 'buzz2']]),
|
||||
('codes', [[1, 0, 0, 0], [1, 1, 0, 0]])
|
||||
])
|
||||
def test_copy_method_kwargs(deep, kwarg, value):
|
||||
# gh-12309: Check that the "name" argument as well other kwargs are honored
|
||||
idx = MultiIndex(
|
||||
levels=[['foo', 'bar'], ['fizz', 'buzz']],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=['first', 'second']
|
||||
)
|
||||
return
|
||||
idx_copy = idx.copy(**{kwarg: value, 'deep': deep})
|
||||
if kwarg == 'names':
|
||||
assert getattr(idx_copy, kwarg) == value
|
||||
else:
|
||||
assert [list(i) for i in getattr(idx_copy, kwarg)] == value
|
||||
@@ -0,0 +1,128 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_drop(idx):
|
||||
dropped = idx.drop([('foo', 'two'), ('qux', 'one')])
|
||||
|
||||
index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')])
|
||||
dropped2 = idx.drop(index)
|
||||
|
||||
expected = idx[[0, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
tm.assert_index_equal(dropped2, expected)
|
||||
|
||||
dropped = idx.drop(['bar'])
|
||||
expected = idx[[0, 1, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop('foo')
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([('bar', 'two')])
|
||||
pytest.raises(KeyError, idx.drop, [('bar', 'two')])
|
||||
pytest.raises(KeyError, idx.drop, index)
|
||||
pytest.raises(KeyError, idx.drop, ['foo', 'two'])
|
||||
|
||||
# partially correct argument
|
||||
mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
|
||||
pytest.raises(KeyError, idx.drop, mixed_index)
|
||||
|
||||
# error='ignore'
|
||||
dropped = idx.drop(index, errors='ignore')
|
||||
expected = idx[[0, 1, 2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(mixed_index, errors='ignore')
|
||||
expected = idx[[0, 1, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(['foo', 'two'], errors='ignore')
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop
|
||||
dropped = idx.drop(['foo', ('qux', 'one')])
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop / error='ignore'
|
||||
mixed_index = ['foo', ('qux', 'one'), 'two']
|
||||
pytest.raises(KeyError, idx.drop, mixed_index)
|
||||
dropped = idx.drop(mixed_index, errors='ignore')
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
|
||||
def test_droplevel_with_names(idx):
|
||||
index = idx[idx.get_loc('foo')]
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.name == 'second'
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
|
||||
codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||||
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
|
||||
names=['one', 'two', 'three'])
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.names == ('two', 'three')
|
||||
|
||||
dropped = index.droplevel('two')
|
||||
expected = index.droplevel(1)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
|
||||
def test_droplevel_list():
|
||||
index = MultiIndex(
|
||||
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
|
||||
codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||||
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
|
||||
names=['one', 'two', 'three'])
|
||||
|
||||
dropped = index[:2].droplevel(['three', 'one'])
|
||||
expected = index[:2].droplevel(2).droplevel(0)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
dropped = index[:2].droplevel([])
|
||||
expected = index[:2]
|
||||
assert dropped.equals(expected)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
index[:2].droplevel(['one', 'two', 'three'])
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
index[:2].droplevel(['one', 'four'])
|
||||
|
||||
|
||||
def test_drop_not_lexsorted():
|
||||
# GH 12078
|
||||
|
||||
# define the lexsorted version of the multi-index
|
||||
tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')]
|
||||
lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c'])
|
||||
assert lexsorted_mi.is_lexsorted()
|
||||
|
||||
# and the not-lexsorted version
|
||||
df = pd.DataFrame(columns=['a', 'b', 'c', 'd'],
|
||||
data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]])
|
||||
df = df.pivot_table(index='a', columns=['b', 'c'], values='d')
|
||||
df = df.reset_index()
|
||||
not_lexsorted_mi = df.columns
|
||||
assert not not_lexsorted_mi.is_lexsorted()
|
||||
|
||||
# compare the results
|
||||
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_index_equal(lexsorted_mi.drop('a'),
|
||||
not_lexsorted_mi.drop('a'))
|
||||
@@ -0,0 +1,266 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import hashtable
|
||||
from pandas.compat import range, u
|
||||
|
||||
from pandas import DatetimeIndex, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize('names', [None, ['first', 'second']])
|
||||
def test_unique(names):
|
||||
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
|
||||
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list('aaaa'), list('abab')],
|
||||
names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([list('aa'), list('ab')], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list('aaaa'), list('aaaa')], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([['a'], ['a']], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# GH #20568 - empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=names)
|
||||
res = mi.unique()
|
||||
tm.assert_index_equal(mi, res)
|
||||
|
||||
|
||||
def test_unique_datetimelike():
|
||||
idx1 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
|
||||
'2015-01-01', 'NaT', 'NaT'])
|
||||
idx2 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02',
|
||||
'2015-01-02', 'NaT', '2015-01-01'],
|
||||
tz='Asia/Tokyo')
|
||||
result = MultiIndex.from_arrays([idx1, idx2]).unique()
|
||||
|
||||
eidx1 = DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT'])
|
||||
eidx2 = DatetimeIndex(['2015-01-01', '2015-01-02',
|
||||
'NaT', '2015-01-01'],
|
||||
tz='Asia/Tokyo')
|
||||
exp = MultiIndex.from_arrays([eidx1, eidx2])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
|
||||
def test_unique_level(idx, level):
|
||||
# GH #17896 - with level= argument
|
||||
result = idx.unique(level=level)
|
||||
expected = idx.get_level_values(level).unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With already unique level
|
||||
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
|
||||
names=['first', 'second'])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=['first', 'second'])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dropna', [True, False])
|
||||
def test_get_unique_index(idx, dropna):
|
||||
mi = idx[[0, 1, 0, 1, 1, 0, 0]]
|
||||
expected = mi._shallow_copy(mi[[0, 1]])
|
||||
|
||||
result = mi._get_unique_index(dropna=dropna)
|
||||
assert result.unique
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_multiindex_codes():
|
||||
# GH 17464
|
||||
# Make sure that a MultiIndex with duplicate levels throws a ValueError
|
||||
with pytest.raises(ValueError):
|
||||
mi = MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)])
|
||||
|
||||
# And that using set_levels with duplicate levels fails
|
||||
mi = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'],
|
||||
[1, 2, 1, 2, 3]])
|
||||
with pytest.raises(ValueError):
|
||||
mi.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
|
||||
inplace=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
|
||||
[1, 'a', 1]])
|
||||
def test_duplicate_level_names(names):
|
||||
# GH18872, GH19029
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3)
|
||||
mi = mi.rename(names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename(., level=)
|
||||
mi.rename(names[1], level=1, inplace=True)
|
||||
mi = mi.rename([names[0], names[2]], level=[0, 2])
|
||||
assert mi.names == names
|
||||
|
||||
|
||||
def test_duplicate_meta_data():
|
||||
# GH 10115
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]],
|
||||
codes=[[0, 0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 2, 0, 0, 1, 2]])
|
||||
|
||||
for idx in [mi,
|
||||
mi.set_names([None, None]),
|
||||
mi.set_names([None, 'Num']),
|
||||
mi.set_names(['Upper', 'Num']), ]:
|
||||
assert idx.has_duplicates
|
||||
assert idx.drop_duplicates().names == idx.names
|
||||
|
||||
|
||||
def test_has_duplicates(idx, idx_dup):
|
||||
# see fixtures
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
assert idx_dup.is_unique is False
|
||||
assert idx_dup.has_duplicates is True
|
||||
|
||||
mi = MultiIndex(levels=[[0, 1], [0, 1, 2]],
|
||||
codes=[[0, 0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 2, 0, 0, 1, 2]])
|
||||
assert mi.is_unique is False
|
||||
assert mi.has_duplicates is True
|
||||
|
||||
|
||||
def test_has_duplicates_from_tuples():
|
||||
# GH 9075
|
||||
t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169),
|
||||
(u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119),
|
||||
(u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135),
|
||||
(u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145),
|
||||
(u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158),
|
||||
(u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122),
|
||||
(u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160),
|
||||
(u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180),
|
||||
(u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143),
|
||||
(u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128),
|
||||
(u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129),
|
||||
(u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111),
|
||||
(u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114),
|
||||
(u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121),
|
||||
(u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126),
|
||||
(u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155),
|
||||
(u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123),
|
||||
(u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)]
|
||||
|
||||
mi = MultiIndex.from_tuples(t)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
|
||||
def test_has_duplicates_overflow():
|
||||
# handle int64 overflow if possible
|
||||
def check(nlevels, with_nulls):
|
||||
codes = np.tile(np.arange(500), 2)
|
||||
level = np.arange(500)
|
||||
|
||||
if with_nulls: # inject some null values
|
||||
codes[500] = -1 # common nan value
|
||||
codes = [codes.copy() for i in range(nlevels)]
|
||||
for i in range(nlevels):
|
||||
codes[i][500 + i - nlevels // 2] = -1
|
||||
|
||||
codes += [np.array([-1, 1]).repeat(500)]
|
||||
else:
|
||||
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
|
||||
|
||||
levels = [level] * nlevels + [[0, 1]]
|
||||
|
||||
# no dups
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
# with a dup
|
||||
if with_nulls:
|
||||
def f(a):
|
||||
return np.insert(a, 1000, a[0])
|
||||
codes = list(map(f, codes))
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
else:
|
||||
values = mi.values.tolist()
|
||||
mi = MultiIndex.from_tuples(values + [values[0]])
|
||||
|
||||
assert mi.has_duplicates
|
||||
|
||||
# no overflow
|
||||
check(4, False)
|
||||
check(4, True)
|
||||
|
||||
# overflow possible
|
||||
check(8, False)
|
||||
check(8, True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('keep, expected', [
|
||||
('first', np.array([False, False, False, True, True, False])),
|
||||
('last', np.array([False, True, True, False, False, False])),
|
||||
(False, np.array([False, True, True, True, True, False]))
|
||||
])
|
||||
def test_duplicated(idx_dup, keep, expected):
|
||||
result = idx_dup.duplicated(keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('keep', ['first', 'last', False])
|
||||
def test_duplicated_large(keep):
|
||||
# GH 9125
|
||||
n, k = 200, 5000
|
||||
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
|
||||
codes = [np.random.choice(n, k * n) for lev in levels]
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
result = mi.duplicated(keep=keep)
|
||||
expected = hashtable.duplicated_object(mi.values, keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_duplicates():
|
||||
# GH5873
|
||||
for a in [101, 102]:
|
||||
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
|
||||
assert not mi.has_duplicates
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Deprecated - see GH20239
|
||||
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(),
|
||||
np.zeros(2, dtype='bool'))
|
||||
|
||||
for n in range(1, 6): # 1st level shape
|
||||
for m in range(1, 5): # 2nd level shape
|
||||
# all possible unique combinations, including nan
|
||||
codes = product(range(-1, n), range(-1, m))
|
||||
mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]],
|
||||
codes=np.random.permutation(list(codes)).T)
|
||||
assert len(mi) == (n + 1) * (m + 1)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# Deprecated - see GH20239
|
||||
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
|
||||
[[], []]))
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(),
|
||||
np.zeros(len(mi), dtype='bool'))
|
||||
+221
@@ -0,0 +1,221 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, lzip, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_equals(idx):
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(np.array(idx))
|
||||
|
||||
same_values = Index(idx, dtype=object)
|
||||
assert idx.equals(same_values)
|
||||
assert same_values.equals(idx)
|
||||
|
||||
if idx.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
|
||||
def test_equals_op(idx):
|
||||
# GH9947, GH10637
|
||||
index_a = idx
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
|
||||
def test_equals_multi(idx):
|
||||
assert idx.equals(idx)
|
||||
assert not idx.equals(idx.values)
|
||||
assert idx.equals(Index(idx.values))
|
||||
|
||||
assert idx.equal_levels(idx)
|
||||
assert not idx.equals(idx[:-1])
|
||||
assert not idx.equals(idx[-1])
|
||||
|
||||
# different number of levels
|
||||
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||||
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||||
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||||
|
||||
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
|
||||
assert not index.equals(index2)
|
||||
assert not index.equal_levels(index2)
|
||||
|
||||
# levels are different
|
||||
major_axis = Index(lrange(4))
|
||||
minor_axis = Index(lrange(2))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3])
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0])
|
||||
|
||||
index = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes])
|
||||
assert not idx.equals(index)
|
||||
assert not idx.equal_levels(index)
|
||||
|
||||
# some of the labels are different
|
||||
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
|
||||
minor_axis = Index(['one', 'two'])
|
||||
|
||||
major_codes = np.array([0, 0, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes])
|
||||
assert not idx.equals(index)
|
||||
|
||||
|
||||
def test_identical(idx):
|
||||
mi = idx.copy()
|
||||
mi2 = idx.copy()
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi = mi.set_names(['new1', 'new2'])
|
||||
assert mi.equals(mi2)
|
||||
assert not mi.identical(mi2)
|
||||
|
||||
mi2 = mi2.set_names(['new1', 'new2'])
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi3 = Index(mi.tolist(), names=mi.names)
|
||||
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
|
||||
assert mi.identical(mi3)
|
||||
assert not mi.identical(mi4)
|
||||
assert mi.equals(mi4)
|
||||
|
||||
|
||||
def test_equals_operator(idx):
|
||||
# GH9785
|
||||
assert (idx == idx).all()
|
||||
|
||||
|
||||
def test_equals_missing_values():
|
||||
# make sure take is not using -1
|
||||
i = pd.MultiIndex.from_tuples([(0, pd.NaT),
|
||||
(0, pd.Timestamp('20130101'))])
|
||||
result = i[0:1].equals(i[0])
|
||||
assert not result
|
||||
result = i[1:2].equals(i[1])
|
||||
assert not result
|
||||
|
||||
|
||||
def test_is_():
|
||||
mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
|
||||
assert mi.is_(mi)
|
||||
assert mi.is_(mi.view())
|
||||
assert mi.is_(mi.view().view().view().view())
|
||||
mi2 = mi.view()
|
||||
# names are metadata, they don't change id
|
||||
mi2.names = ["A", "B"]
|
||||
assert mi2.is_(mi)
|
||||
assert mi.is_(mi2)
|
||||
|
||||
assert mi.is_(mi.set_names(["C", "D"]))
|
||||
mi2 = mi.view()
|
||||
mi2.set_names(["E", "F"], inplace=True)
|
||||
assert mi.is_(mi2)
|
||||
# levels are inherent properties, they change identity
|
||||
mi3 = mi2.set_levels([lrange(10), lrange(10)])
|
||||
assert not mi3.is_(mi2)
|
||||
# shouldn't change
|
||||
assert mi2.is_(mi)
|
||||
mi4 = mi3.view()
|
||||
|
||||
# GH 17464 - Remove duplicate MultiIndex levels
|
||||
mi4.set_levels([lrange(10), lrange(10)], inplace=True)
|
||||
assert not mi4.is_(mi3)
|
||||
mi5 = mi.view()
|
||||
mi5.set_levels(mi5.levels, inplace=True)
|
||||
assert not mi5.is_(mi)
|
||||
|
||||
|
||||
def test_is_all_dates(idx):
|
||||
assert not idx.is_all_dates
|
||||
|
||||
|
||||
def test_is_numeric(idx):
|
||||
# MultiIndex is never numeric
|
||||
assert not idx.is_numeric()
|
||||
|
||||
|
||||
def test_multiindex_compare():
|
||||
# GH 21149
|
||||
# Ensure comparison operations for MultiIndex with nlevels == 1
|
||||
# behave consistently with those for MultiIndex with nlevels > 1
|
||||
|
||||
midx = pd.MultiIndex.from_product([[0, 1]])
|
||||
|
||||
# Equality self-test: MultiIndex object vs self
|
||||
expected = pd.Series([True, True])
|
||||
result = pd.Series(midx == midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Greater than comparison: MultiIndex object vs self
|
||||
expected = pd.Series([False, False])
|
||||
result = pd.Series(midx > midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,132 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY3, range, u
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex, compat
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_dtype_str(indices):
|
||||
dtype = indices.dtype_str
|
||||
assert isinstance(dtype, compat.string_types)
|
||||
assert dtype == str(indices.dtype)
|
||||
|
||||
|
||||
def test_format(idx):
|
||||
idx.format()
|
||||
idx[:0].format()
|
||||
|
||||
|
||||
def test_format_integer_names():
|
||||
index = MultiIndex(levels=[[0, 1], [0, 1]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1])
|
||||
index.format(names=True)
|
||||
|
||||
|
||||
def test_format_sparse_config(idx):
|
||||
warn_filters = warnings.filters
|
||||
warnings.filterwarnings('ignore', category=FutureWarning,
|
||||
module=".*format")
|
||||
# GH1538
|
||||
pd.set_option('display.multi_sparse', False)
|
||||
|
||||
result = idx.format()
|
||||
assert result[1] == 'foo two'
|
||||
|
||||
tm.reset_display_options()
|
||||
|
||||
warnings.filters = warn_filters
|
||||
|
||||
|
||||
def test_format_sparse_display():
|
||||
index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1],
|
||||
[0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]])
|
||||
|
||||
result = index.format()
|
||||
assert result[3] == '1 0 0 0'
|
||||
|
||||
|
||||
def test_repr_with_unicode_data():
|
||||
with pd.core.config.option_context("display.encoding", 'UTF-8'):
|
||||
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
assert "\\u" not in repr(index) # we don't want unicode-escaped
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="#22511 will remove this test")
|
||||
def test_repr_roundtrip():
|
||||
|
||||
mi = MultiIndex.from_product([list('ab'), range(3)],
|
||||
names=['first', 'second'])
|
||||
str(mi)
|
||||
|
||||
if PY3:
|
||||
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
|
||||
else:
|
||||
result = eval(repr(mi))
|
||||
# string coerces to unicode
|
||||
tm.assert_index_equal(result, mi, exact=False)
|
||||
assert mi.get_level_values('first').inferred_type == 'string'
|
||||
assert result.get_level_values('first').inferred_type == 'unicode'
|
||||
|
||||
mi_u = MultiIndex.from_product(
|
||||
[list(u'ab'), range(3)], names=['first', 'second'])
|
||||
result = eval(repr(mi_u))
|
||||
tm.assert_index_equal(result, mi_u, exact=True)
|
||||
|
||||
# formatting
|
||||
if PY3:
|
||||
str(mi)
|
||||
else:
|
||||
compat.text_type(mi)
|
||||
|
||||
# long format
|
||||
mi = MultiIndex.from_product([list('abcdefg'), range(10)],
|
||||
names=['first', 'second'])
|
||||
|
||||
if PY3:
|
||||
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
|
||||
else:
|
||||
result = eval(repr(mi))
|
||||
# string coerces to unicode
|
||||
tm.assert_index_equal(result, mi, exact=False)
|
||||
assert mi.get_level_values('first').inferred_type == 'string'
|
||||
assert result.get_level_values('first').inferred_type == 'unicode'
|
||||
|
||||
result = eval(repr(mi_u))
|
||||
tm.assert_index_equal(result, mi_u, exact=True)
|
||||
|
||||
|
||||
def test_unicode_string_with_unicode():
|
||||
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
|
||||
if PY3:
|
||||
str(idx)
|
||||
else:
|
||||
compat.text_type(idx)
|
||||
|
||||
|
||||
def test_bytestring_with_unicode():
|
||||
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
|
||||
if PY3:
|
||||
bytes(idx)
|
||||
else:
|
||||
str(idx)
|
||||
|
||||
|
||||
def test_repr_max_seq_item_setting(idx):
|
||||
# GH10182
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert '...' not in str(idx)
|
||||
@@ -0,0 +1,454 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import CategoricalIndex, Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def assert_matching(actual, expected, check_dtype=False):
|
||||
# avoid specifying internal representation
|
||||
# as much as possible
|
||||
assert len(actual) == len(expected)
|
||||
for act, exp in zip(actual, expected):
|
||||
act = np.asarray(act)
|
||||
exp = np.asarray(exp)
|
||||
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
|
||||
|
||||
|
||||
def test_get_level_number_integer(idx):
|
||||
idx.names = [1, 0]
|
||||
assert idx._get_level_number(1) == 0
|
||||
assert idx._get_level_number(0) == 1
|
||||
pytest.raises(IndexError, idx._get_level_number, 2)
|
||||
with pytest.raises(KeyError, match='Level fourth not found'):
|
||||
idx._get_level_number('fourth')
|
||||
|
||||
|
||||
def test_get_level_values(idx):
|
||||
result = idx.get_level_values(0)
|
||||
expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'],
|
||||
name='first')
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == 'first'
|
||||
|
||||
result = idx.get_level_values('first')
|
||||
expected = idx.get_level_values(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 10460
|
||||
index = MultiIndex(
|
||||
levels=[CategoricalIndex(['A', 'B']),
|
||||
CategoricalIndex([1, 2, 3])],
|
||||
codes=[np.array([0, 0, 0, 1, 1, 1]),
|
||||
np.array([0, 1, 2, 0, 1, 2])])
|
||||
|
||||
exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B'])
|
||||
tm.assert_index_equal(index.get_level_values(0), exp)
|
||||
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
|
||||
tm.assert_index_equal(index.get_level_values(1), exp)
|
||||
|
||||
|
||||
def test_get_value_duplicates():
|
||||
index = MultiIndex(levels=[['D', 'B', 'C'],
|
||||
[0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
|
||||
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=['tag', 'day'])
|
||||
|
||||
assert index.get_loc('D') == slice(0, 3)
|
||||
with pytest.raises(KeyError):
|
||||
index._engine.get_value(np.array([]), 'D')
|
||||
|
||||
|
||||
def test_get_level_values_all_na():
|
||||
# GH 17924 when level entirely consists of nan
|
||||
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.Index(['a', np.nan, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_int_with_na():
|
||||
# GH 17924
|
||||
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([1, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([np.nan, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_na():
|
||||
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = pd.Index([np.nan, np.nan, np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.Index(['a', np.nan, 1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.DatetimeIndex([0, 1, pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [[], []]
|
||||
index = pd.MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = pd.Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_name_methods(idx, index_names):
|
||||
# so long as these are synonyms, we don't need to test set_names
|
||||
assert idx.rename == idx.set_names
|
||||
new_names = [name + "SUFFIX" for name in index_names]
|
||||
ind = idx.set_names(new_names)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
with pytest.raises(ValueError, match="^Length"):
|
||||
ind.set_names(new_names + new_names)
|
||||
new_names2 = [name + "SUFFIX2" for name in new_names]
|
||||
res = ind.set_names(new_names2, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
# set names for specific level (# GH7792)
|
||||
ind = idx.set_names(new_names[0], level=0)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == [new_names[0], index_names[1]]
|
||||
|
||||
res = ind.set_names(new_names2[0], level=0, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == [new_names2[0], index_names[1]]
|
||||
|
||||
# set names for multiple levels
|
||||
ind = idx.set_names(new_names, level=[0, 1])
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
|
||||
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
|
||||
def test_set_levels_codes_directly(idx):
|
||||
# setting levels/codes directly raises AttributeError
|
||||
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + 'a' for lev in level] for level in levels]
|
||||
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
idx.levels = new_levels
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
idx.codes = new_codes
|
||||
|
||||
|
||||
def test_set_levels(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + 'a' for lev in level] for level in levels]
|
||||
|
||||
# level changing [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels)
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
|
||||
# level changing specific level [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels[0], level=0)
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.set_levels(new_levels[1], level=1)
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels, level=[0, 1])
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing specific level [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_levels(new_levels, level=[0, 1],
|
||||
inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# illegal level changing should not change levels
|
||||
# GH 13754
|
||||
original_index = idx.copy()
|
||||
for inplace in [True, False]:
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_levels(['c'], level=0, inplace=inplace)
|
||||
assert_matching(idx.levels, original_index.levels,
|
||||
check_dtype=True)
|
||||
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_codes([0, 1, 2, 3, 4, 5], level=0,
|
||||
inplace=inplace)
|
||||
assert_matching(idx.codes, original_index.codes,
|
||||
check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Levels"):
|
||||
idx.set_levels('c', level=0, inplace=inplace)
|
||||
assert_matching(idx.levels, original_index.levels,
|
||||
check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Codes"):
|
||||
idx.set_codes(1, level=0, inplace=inplace)
|
||||
assert_matching(idx.codes, original_index.codes,
|
||||
check_dtype=True)
|
||||
|
||||
|
||||
def test_set_codes(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
# changing codes w/o mutation
|
||||
ind2 = idx.set_codes(new_codes)
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# changing label w/ mutation
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
|
||||
# codes changing specific level w/o mutation
|
||||
ind2 = idx.set_codes(new_codes[0], level=0)
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.set_codes(new_codes[1], level=1)
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels w/o mutation
|
||||
ind2 = idx.set_codes(new_codes, level=[0, 1])
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing specific level w/ mutation
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
inplace_return = ind2.set_codes(new_codes, level=[0, 1],
|
||||
inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing for levels of different magnitude of categories
|
||||
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_codes = range(129, -1, -1)
|
||||
expected = pd.MultiIndex.from_tuples(
|
||||
[(0, i) for i in new_codes])
|
||||
|
||||
# [w/o mutation]
|
||||
result = ind.set_codes(codes=new_codes, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
# [w/ mutation]
|
||||
result = ind.copy()
|
||||
result.set_codes(codes=new_codes, level=1, inplace=True)
|
||||
assert result.equals(expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ind.set_codes(labels=new_codes, level=1)
|
||||
|
||||
|
||||
def test_set_labels_deprecated():
|
||||
# GH23752
|
||||
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_labels = range(129, -1, -1)
|
||||
expected = pd.MultiIndex.from_tuples(
|
||||
[(0, i) for i in new_labels])
|
||||
|
||||
# [w/o mutation]
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ind.set_labels(labels=new_labels, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
# [w/ mutation]
|
||||
result = ind.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result.set_labels(labels=new_labels, level=1, inplace=True)
|
||||
assert result.equals(expected)
|
||||
|
||||
|
||||
def test_set_levels_codes_names_bad_input(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
names = idx.names
|
||||
|
||||
with pytest.raises(ValueError, match='Length of levels'):
|
||||
idx.set_levels([levels[0]])
|
||||
|
||||
with pytest.raises(ValueError, match='Length of codes'):
|
||||
idx.set_codes([codes[0]])
|
||||
|
||||
with pytest.raises(ValueError, match='Length of names'):
|
||||
idx.set_names([names[0]])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match='list of lists-like'):
|
||||
idx.set_levels(levels[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match='list of lists-like'):
|
||||
idx.set_codes(codes[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match='list-like'):
|
||||
idx.set_names(names[0])
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match='list of lists-like'):
|
||||
idx.set_levels(levels[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match='list-like'):
|
||||
idx.set_levels(levels, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match='list of lists-like'):
|
||||
idx.set_codes(codes[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match='list-like'):
|
||||
idx.set_codes(codes, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(ValueError, match='Length of names'):
|
||||
idx.set_names(names[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match='Names must be a'):
|
||||
idx.set_names(names, level=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('inplace', [True, False])
|
||||
def test_set_names_with_nlevel_1(inplace):
|
||||
# GH 21149
|
||||
# Ensure that .set_names for MultiIndex with
|
||||
# nlevels == 1 does not raise any errors
|
||||
expected = pd.MultiIndex(levels=[[0, 1]],
|
||||
codes=[[0, 1]],
|
||||
names=['first'])
|
||||
m = pd.MultiIndex.from_product([[0, 1]])
|
||||
result = m.set_names('first', level=0, inplace=inplace)
|
||||
|
||||
if inplace:
|
||||
result = m
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ordered', [True, False])
|
||||
def test_set_levels_categorical(ordered):
|
||||
# GH13854
|
||||
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
|
||||
|
||||
cidx = CategoricalIndex(list("bac"), ordered=ordered)
|
||||
result = index.set_levels(cidx, 0)
|
||||
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]],
|
||||
codes=index.codes)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result_lvl = result.get_level_values(0)
|
||||
expected_lvl = CategoricalIndex(list("bacb"),
|
||||
categories=cidx.categories,
|
||||
ordered=cidx.ordered)
|
||||
tm.assert_index_equal(result_lvl, expected_lvl)
|
||||
|
||||
|
||||
def test_set_value_keeps_names():
|
||||
# motivating example from #3742
|
||||
lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe']
|
||||
lev2 = ['1', '2', '3'] * 2
|
||||
idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number'])
|
||||
df = pd.DataFrame(
|
||||
np.random.randn(6, 4),
|
||||
columns=['one', 'two', 'three', 'four'],
|
||||
index=idx)
|
||||
df = df.sort_index()
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ('Name', 'Number')
|
||||
df.at[('grethe', '4'), 'one'] = 99.34
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ('Name', 'Number')
|
||||
|
||||
|
||||
def test_set_levels_with_iterable():
|
||||
# GH23273
|
||||
sizes = [1, 2, 3]
|
||||
colors = ['black'] * 3
|
||||
index = pd.MultiIndex.from_arrays([sizes, colors], names=['size', 'color'])
|
||||
|
||||
result = index.set_levels(map(int, ['3', '2', '1']), level='size')
|
||||
|
||||
expected_sizes = [3, 2, 1]
|
||||
expected = pd.MultiIndex.from_arrays([expected_sizes, colors],
|
||||
names=['size', 'color'])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,375 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, CategoricalIndex, Index, IntervalIndex, MultiIndex,
|
||||
date_range)
|
||||
from pandas.core.indexes.base import InvalidIndexError
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_almost_equal
|
||||
|
||||
|
||||
def test_slice_locs_partial(idx):
|
||||
sorted_idx, _ = idx.sortlevel(0)
|
||||
|
||||
result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one'))
|
||||
assert result == (1, 5)
|
||||
|
||||
result = sorted_idx.slice_locs(None, ('qux', 'one'))
|
||||
assert result == (0, 5)
|
||||
|
||||
result = sorted_idx.slice_locs(('foo', 'two'), None)
|
||||
assert result == (1, len(sorted_idx))
|
||||
|
||||
result = sorted_idx.slice_locs('bar', 'baz')
|
||||
assert result == (2, 4)
|
||||
|
||||
|
||||
def test_slice_locs():
|
||||
df = tm.makeTimeDataFrame()
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
|
||||
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
|
||||
sliced = stacked[slob]
|
||||
expected = df[5:16].stack()
|
||||
tm.assert_almost_equal(sliced.values, expected.values)
|
||||
|
||||
slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30),
|
||||
df.index[15] - timedelta(seconds=30)))
|
||||
sliced = stacked[slob]
|
||||
expected = df[6:15].stack()
|
||||
tm.assert_almost_equal(sliced.values, expected.values)
|
||||
|
||||
|
||||
def test_slice_locs_with_type_mismatch():
|
||||
df = tm.makeTimeDataFrame()
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
with pytest.raises(TypeError, match='^Level type mismatch'):
|
||||
idx.slice_locs((1, 3))
|
||||
with pytest.raises(TypeError, match='^Level type mismatch'):
|
||||
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
|
||||
df = tm.makeCustomDataframe(5, 5)
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
with pytest.raises(TypeError, match='^Level type mismatch'):
|
||||
idx.slice_locs(timedelta(seconds=30))
|
||||
# TODO: Try creating a UnicodeDecodeError in exception message
|
||||
with pytest.raises(TypeError, match='^Level type mismatch'):
|
||||
idx.slice_locs(df.index[1], (16, "a"))
|
||||
|
||||
|
||||
def test_slice_locs_not_sorted():
|
||||
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||||
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||||
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||||
|
||||
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
index.slice_locs((1, 0, 1), (2, 1, 0))
|
||||
|
||||
# works
|
||||
sorted_index, _ = index.sortlevel(0)
|
||||
# should there be a test case here???
|
||||
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
|
||||
|
||||
|
||||
def test_slice_locs_not_contained():
|
||||
# some searchsorted action
|
||||
|
||||
index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3],
|
||||
[0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0)
|
||||
|
||||
result = index.slice_locs((1, 0), (5, 2))
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs(1, 5)
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs((2, 2), (5, 2))
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs(2, 5)
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs((1, 0), (6, 3))
|
||||
assert result == (3, 8)
|
||||
|
||||
result = index.slice_locs(-1, 10)
|
||||
assert result == (0, len(index))
|
||||
|
||||
|
||||
def test_putmask_with_wrong_mask(idx):
|
||||
# GH18368
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
idx.putmask(np.ones(len(idx) + 1, np.bool), 1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
idx.putmask(np.ones(len(idx) - 1, np.bool), 1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
idx.putmask('foo', 1)
|
||||
|
||||
|
||||
def test_get_indexer():
|
||||
major_axis = Index(lrange(4))
|
||||
minor_axis = Index(lrange(2))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
|
||||
|
||||
index = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes])
|
||||
idx1 = index[:5]
|
||||
idx2 = index[[1, 3, 5]]
|
||||
|
||||
r1 = idx1.get_indexer(idx2)
|
||||
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
|
||||
|
||||
r1 = idx2.get_indexer(idx1, method='pad')
|
||||
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
|
||||
assert_almost_equal(r1, e1)
|
||||
|
||||
r2 = idx2.get_indexer(idx1[::-1], method='pad')
|
||||
assert_almost_equal(r2, e1[::-1])
|
||||
|
||||
rffill1 = idx2.get_indexer(idx1, method='ffill')
|
||||
assert_almost_equal(r1, rffill1)
|
||||
|
||||
r1 = idx2.get_indexer(idx1, method='backfill')
|
||||
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
|
||||
assert_almost_equal(r1, e1)
|
||||
|
||||
r2 = idx2.get_indexer(idx1[::-1], method='backfill')
|
||||
assert_almost_equal(r2, e1[::-1])
|
||||
|
||||
rbfill1 = idx2.get_indexer(idx1, method='bfill')
|
||||
assert_almost_equal(r1, rbfill1)
|
||||
|
||||
# pass non-MultiIndex
|
||||
r1 = idx1.get_indexer(idx2.values)
|
||||
rexp1 = idx1.get_indexer(idx2)
|
||||
assert_almost_equal(r1, rexp1)
|
||||
|
||||
r1 = idx1.get_indexer([1, 2, 3])
|
||||
assert (r1 == [-1, -1, -1]).all()
|
||||
|
||||
# create index with duplicates
|
||||
idx1 = Index(lrange(10) + lrange(10))
|
||||
idx2 = Index(lrange(20))
|
||||
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx1.get_indexer(idx2)
|
||||
|
||||
|
||||
def test_get_indexer_nearest():
|
||||
midx = MultiIndex.from_tuples([('a', 1), ('b', 2)])
|
||||
with pytest.raises(NotImplementedError):
|
||||
midx.get_indexer(['a'], method='nearest')
|
||||
with pytest.raises(NotImplementedError):
|
||||
midx.get_indexer(['a'], method='pad', tolerance=2)
|
||||
|
||||
|
||||
def test_getitem(idx):
|
||||
# scalar
|
||||
assert idx[2] == ('bar', 'one')
|
||||
|
||||
# slice
|
||||
result = idx[2:5]
|
||||
expected = idx[[2, 3, 4]]
|
||||
assert result.equals(expected)
|
||||
|
||||
# boolean
|
||||
result = idx[[True, False, True, False, True, True]]
|
||||
result2 = idx[np.array([True, False, True, False, True, True])]
|
||||
expected = idx[[0, 2, 4, 5]]
|
||||
assert result.equals(expected)
|
||||
assert result2.equals(expected)
|
||||
|
||||
|
||||
def test_getitem_group_select(idx):
|
||||
sorted_idx, _ = idx.sortlevel(0)
|
||||
assert sorted_idx.get_loc('baz') == slice(3, 4)
|
||||
assert sorted_idx.get_loc('foo') == slice(0, 2)
|
||||
|
||||
|
||||
def test_get_indexer_consistency(idx):
|
||||
# See GH 16819
|
||||
if isinstance(idx, IntervalIndex):
|
||||
pass
|
||||
|
||||
if idx.is_unique or isinstance(idx, CategoricalIndex):
|
||||
indexer = idx.get_indexer(idx[0:2])
|
||||
assert isinstance(indexer, np.ndarray)
|
||||
assert indexer.dtype == np.intp
|
||||
else:
|
||||
e = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=e):
|
||||
idx.get_indexer(idx[0:2])
|
||||
|
||||
indexer, _ = idx.get_indexer_non_unique(idx[0:2])
|
||||
assert isinstance(indexer, np.ndarray)
|
||||
assert indexer.dtype == np.intp
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)])
|
||||
@pytest.mark.parametrize('ind2', [[True, False, True, False, False],
|
||||
pd.Index([True, False, True, False,
|
||||
False])])
|
||||
def test_getitem_bool_index_all(ind1, ind2):
|
||||
# GH#22533
|
||||
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3),
|
||||
(40, 4), (50, 5)])
|
||||
tm.assert_index_equal(idx[ind1], idx)
|
||||
|
||||
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
|
||||
tm.assert_index_equal(idx[ind2], expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ind1', [[True], pd.Index([True])])
|
||||
@pytest.mark.parametrize('ind2', [[False], pd.Index([False])])
|
||||
def test_getitem_bool_index_single(ind1, ind2):
|
||||
# GH#22533
|
||||
idx = MultiIndex.from_tuples([(10, 1)])
|
||||
tm.assert_index_equal(idx[ind1], idx)
|
||||
|
||||
expected = pd.MultiIndex(levels=[np.array([], dtype=np.int64),
|
||||
np.array([], dtype=np.int64)],
|
||||
codes=[[], []])
|
||||
tm.assert_index_equal(idx[ind2], expected)
|
||||
|
||||
|
||||
def test_get_loc(idx):
|
||||
assert idx.get_loc(('foo', 'two')) == 1
|
||||
assert idx.get_loc(('baz', 'two')) == 3
|
||||
pytest.raises(KeyError, idx.get_loc, ('bar', 'two'))
|
||||
pytest.raises(KeyError, idx.get_loc, 'quux')
|
||||
|
||||
pytest.raises(NotImplementedError, idx.get_loc, 'foo',
|
||||
method='nearest')
|
||||
|
||||
# 3 levels
|
||||
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||||
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||||
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||||
pytest.raises(KeyError, index.get_loc, (1, 1))
|
||||
assert index.get_loc((2, 0)) == slice(3, 5)
|
||||
|
||||
|
||||
def test_get_loc_duplicates():
|
||||
index = Index([2, 2, 2, 2])
|
||||
result = index.get_loc(2)
|
||||
expected = slice(0, 4)
|
||||
assert result == expected
|
||||
# pytest.raises(Exception, index.get_loc, 2)
|
||||
|
||||
index = Index(['c', 'a', 'a', 'b', 'b'])
|
||||
rs = index.get_loc('c')
|
||||
xp = 0
|
||||
assert rs == xp
|
||||
|
||||
|
||||
def test_get_loc_level():
|
||||
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||||
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||||
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||||
|
||||
loc, new_index = index.get_loc_level((0, 1))
|
||||
expected = slice(1, 2)
|
||||
exp_index = index[expected].droplevel(0).droplevel(0)
|
||||
assert loc == expected
|
||||
assert new_index.equals(exp_index)
|
||||
|
||||
loc, new_index = index.get_loc_level((0, 1, 0))
|
||||
expected = 1
|
||||
assert loc == expected
|
||||
assert new_index is None
|
||||
|
||||
pytest.raises(KeyError, index.get_loc_level, (2, 2))
|
||||
# GH 22221: unused label
|
||||
pytest.raises(KeyError, index.drop(2).get_loc_level, 2)
|
||||
# Unused label on unsorted level:
|
||||
pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2)
|
||||
|
||||
index = MultiIndex(levels=[[2000], lrange(4)], codes=[np.array(
|
||||
[0, 0, 0, 0]), np.array([0, 1, 2, 3])])
|
||||
result, new_index = index.get_loc_level((2000, slice(None, None)))
|
||||
expected = slice(None, None)
|
||||
assert result == expected
|
||||
assert new_index.equals(index.droplevel(0))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype1', [int, float, bool, str])
|
||||
@pytest.mark.parametrize('dtype2', [int, float, bool, str])
|
||||
def test_get_loc_multiple_dtypes(dtype1, dtype2):
|
||||
# GH 18520
|
||||
levels = [np.array([0, 1]).astype(dtype1),
|
||||
np.array([0, 1]).astype(dtype2)]
|
||||
idx = pd.MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(idx[2]) == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize('level', [0, 1])
|
||||
@pytest.mark.parametrize('dtypes', [[int, float], [float, int]])
|
||||
def test_get_loc_implicit_cast(level, dtypes):
|
||||
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
|
||||
levels = [['a', 'b'], ['c', 'd']]
|
||||
key = ['b', 'd']
|
||||
lev_dtype, key_dtype = dtypes
|
||||
levels[level] = np.array([0, 1], dtype=lev_dtype)
|
||||
key[level] = key_dtype(1)
|
||||
idx = MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(tuple(key)) == 3
|
||||
|
||||
|
||||
def test_get_loc_cast_bool():
|
||||
# GH 19086 : int is casted to bool, but not vice-versa
|
||||
levels = [[False, True], np.arange(2, dtype='int64')]
|
||||
idx = MultiIndex.from_product(levels)
|
||||
|
||||
assert idx.get_loc((0, 1)) == 1
|
||||
assert idx.get_loc((1, 0)) == 2
|
||||
|
||||
pytest.raises(KeyError, idx.get_loc, (False, True))
|
||||
pytest.raises(KeyError, idx.get_loc, (True, False))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('level', [0, 1])
|
||||
def test_get_loc_nan(level, nulls_fixture):
|
||||
# GH 18485 : NaN in MultiIndex
|
||||
levels = [['a', 'b'], ['c', 'd']]
|
||||
key = ['b', 'd']
|
||||
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
|
||||
key[level] = nulls_fixture
|
||||
idx = MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(tuple(key)) == 3
|
||||
|
||||
|
||||
def test_get_loc_missing_nan():
|
||||
# GH 8569
|
||||
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
|
||||
assert isinstance(idx.get_loc(1), slice)
|
||||
pytest.raises(KeyError, idx.get_loc, 3)
|
||||
pytest.raises(KeyError, idx.get_loc, np.nan)
|
||||
pytest.raises(KeyError, idx.get_loc, [np.nan])
|
||||
|
||||
|
||||
def test_get_indexer_categorical_time():
|
||||
# https://github.com/pandas-dev/pandas/issues/21390
|
||||
midx = MultiIndex.from_product(
|
||||
[Categorical(['a', 'b', 'c']),
|
||||
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
|
||||
result = midx.get_indexer(midx)
|
||||
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
|
||||
@@ -0,0 +1,293 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import IntervalIndex, MultiIndex, RangeIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_labels_dtypes():
|
||||
|
||||
# GH 8456
|
||||
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||||
assert i.codes[0].dtype == 'int8'
|
||||
assert i.codes[1].dtype == 'int8'
|
||||
|
||||
i = MultiIndex.from_product([['a'], range(40)])
|
||||
assert i.codes[1].dtype == 'int8'
|
||||
i = MultiIndex.from_product([['a'], range(400)])
|
||||
assert i.codes[1].dtype == 'int16'
|
||||
i = MultiIndex.from_product([['a'], range(40000)])
|
||||
assert i.codes[1].dtype == 'int32'
|
||||
|
||||
i = pd.MultiIndex.from_product([['a'], range(1000)])
|
||||
assert (i.codes[0] >= 0).all()
|
||||
assert (i.codes[1] >= 0).all()
|
||||
|
||||
|
||||
def test_values_boxed():
|
||||
tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT),
|
||||
(3, pd.Timestamp('2000-01-03')),
|
||||
(1, pd.Timestamp('2000-01-04')),
|
||||
(2, pd.Timestamp('2000-01-02')),
|
||||
(3, pd.Timestamp('2000-01-03'))]
|
||||
result = pd.MultiIndex.from_tuples(tuples)
|
||||
expected = construct_1d_object_array_from_listlike(tuples)
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
# Check that code branches for boxed values produce identical results
|
||||
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
||||
|
||||
|
||||
def test_values_multiindex_datetimeindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(10 ** 18, 10 ** 18 + 5)
|
||||
naive = pd.DatetimeIndex(ints)
|
||||
# TODO(GH-24559): Remove the FutureWarning
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
aware = pd.DatetimeIndex(ints, tz='US/Central')
|
||||
|
||||
idx = pd.MultiIndex.from_arrays([naive, aware])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive)
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive[:2])
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware[:2])
|
||||
|
||||
|
||||
def test_values_multiindex_periodindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(2007, 2012)
|
||||
pidx = pd.PeriodIndex(ints, freq='D')
|
||||
|
||||
idx = pd.MultiIndex.from_arrays([ints, pidx])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.Int64Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, pd.Int64Index(ints))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.Int64Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx[:2])
|
||||
|
||||
|
||||
def test_consistency():
|
||||
# need to construct an overflow
|
||||
major_axis = lrange(70000)
|
||||
minor_axis = lrange(10)
|
||||
|
||||
major_codes = np.arange(70000)
|
||||
minor_codes = np.repeat(lrange(10), 7000)
|
||||
|
||||
# the fact that is works means it's consistent
|
||||
index = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes])
|
||||
|
||||
# inconsistent
|
||||
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
||||
index = MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes])
|
||||
|
||||
assert index.is_unique is False
|
||||
|
||||
|
||||
def test_hash_collisions():
|
||||
# non-smoke test that we don't get hash collisions
|
||||
|
||||
index = MultiIndex.from_product([np.arange(1000), np.arange(1000)],
|
||||
names=['one', 'two'])
|
||||
result = index.get_indexer(index.values)
|
||||
tm.assert_numpy_array_equal(result, np.arange(
|
||||
len(index), dtype='intp'))
|
||||
|
||||
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
||||
result = index.get_loc(index[i])
|
||||
assert result == i
|
||||
|
||||
|
||||
def test_dims():
|
||||
pass
|
||||
|
||||
|
||||
def take_invalid_kwargs():
|
||||
vals = [['A', 'B'],
|
||||
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
|
||||
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode='clip')
|
||||
|
||||
|
||||
def test_isna_behavior(idx):
|
||||
# should not segfault GH5123
|
||||
# NOTE: if MI representation changes, may make sense to allow
|
||||
# isna(MI)
|
||||
with pytest.raises(NotImplementedError):
|
||||
pd.isna(idx)
|
||||
|
||||
|
||||
def test_large_multiindex_error():
|
||||
# GH12527
|
||||
df_below_1000000 = pd.DataFrame(
|
||||
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
|
||||
columns=['dest'])
|
||||
with pytest.raises(KeyError):
|
||||
df_below_1000000.loc[(-1, 0), 'dest']
|
||||
with pytest.raises(KeyError):
|
||||
df_below_1000000.loc[(3, 0), 'dest']
|
||||
df_above_1000000 = pd.DataFrame(
|
||||
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
|
||||
columns=['dest'])
|
||||
with pytest.raises(KeyError):
|
||||
df_above_1000000.loc[(-1, 0), 'dest']
|
||||
with pytest.raises(KeyError):
|
||||
df_above_1000000.loc[(3, 0), 'dest']
|
||||
|
||||
|
||||
def test_million_record_attribute_error():
|
||||
# GH 18165
|
||||
r = list(range(1000000))
|
||||
df = pd.DataFrame({'a': r, 'b': r},
|
||||
index=pd.MultiIndex.from_tuples([(x, x) for x in r]))
|
||||
|
||||
msg = "'Series' object has no attribute 'foo'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
df['a'].foo()
|
||||
|
||||
|
||||
def test_can_hold_identifiers(idx):
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
|
||||
def test_metadata_immutable(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
# shouldn't be able to set at either the top level or base level
|
||||
mutable_regex = re.compile('does not support mutable operations')
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0] = levels[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0][0] = levels[0][0]
|
||||
# ditto for labels
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0] = codes[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0][0] = codes[0][0]
|
||||
# and for names
|
||||
names = idx.names
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
names[0] = names[0]
|
||||
|
||||
|
||||
def test_level_setting_resets_attributes():
|
||||
ind = pd.MultiIndex.from_arrays([
|
||||
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
|
||||
])
|
||||
assert ind.is_monotonic
|
||||
ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True)
|
||||
# if this fails, probably didn't reset the cache correctly.
|
||||
assert not ind.is_monotonic
|
||||
|
||||
|
||||
def test_rangeindex_fallback_coercion_bug():
|
||||
# GH 12893
|
||||
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1)
|
||||
df.index.names = ['fizz', 'buzz']
|
||||
|
||||
str(df)
|
||||
expected = pd.DataFrame({'bar': np.arange(100),
|
||||
'foo': np.arange(100)},
|
||||
index=pd.MultiIndex.from_product(
|
||||
[range(10), range(10)],
|
||||
names=['fizz', 'buzz']))
|
||||
tm.assert_frame_equal(df, expected, check_like=True)
|
||||
|
||||
result = df.index.get_level_values('fizz')
|
||||
expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = df.index.get_level_values('buzz')
|
||||
expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_hash_error(indices):
|
||||
index = indices
|
||||
with pytest.raises(TypeError, match=("unhashable type: %r" %
|
||||
type(index).__name__)):
|
||||
hash(indices)
|
||||
|
||||
|
||||
def test_mutability(indices):
|
||||
if not len(indices):
|
||||
return
|
||||
pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
|
||||
|
||||
|
||||
def test_wrong_number_names(indices):
|
||||
with pytest.raises(ValueError, match="^Length"):
|
||||
indices.names = ["apple", "banana", "carrot"]
|
||||
|
||||
|
||||
def test_memory_usage(idx):
|
||||
result = idx.memory_usage()
|
||||
if len(idx):
|
||||
idx.get_loc(idx[0])
|
||||
result2 = idx.memory_usage()
|
||||
result3 = idx.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
if not isinstance(idx, (RangeIndex, IntervalIndex)):
|
||||
assert result2 > result
|
||||
|
||||
if idx.inferred_type == 'object':
|
||||
assert result3 > result2
|
||||
|
||||
else:
|
||||
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
|
||||
|
||||
def test_nlevels(idx):
|
||||
assert idx.nlevels == 2
|
||||
@@ -0,0 +1,96 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize('other', [
|
||||
Index(['three', 'one', 'two']),
|
||||
Index(['one']),
|
||||
Index(['one', 'three']),
|
||||
])
|
||||
def test_join_level(idx, other, join_type):
|
||||
join_index, lidx, ridx = other.join(idx, how=join_type,
|
||||
level='second',
|
||||
return_indexers=True)
|
||||
|
||||
exp_level = other.join(idx.levels[1], how=join_type)
|
||||
assert join_index.levels[0].equals(idx.levels[0])
|
||||
assert join_index.levels[1].equals(exp_level)
|
||||
|
||||
# pare down levels
|
||||
mask = np.array(
|
||||
[x[1] in exp_level for x in idx], dtype=bool)
|
||||
exp_values = idx.values[mask]
|
||||
tm.assert_numpy_array_equal(join_index.values, exp_values)
|
||||
|
||||
if join_type in ('outer', 'inner'):
|
||||
join_index2, ridx2, lidx2 = \
|
||||
idx.join(other, how=join_type, level='second',
|
||||
return_indexers=True)
|
||||
|
||||
assert join_index.equals(join_index2)
|
||||
tm.assert_numpy_array_equal(lidx, lidx2)
|
||||
tm.assert_numpy_array_equal(ridx, ridx2)
|
||||
tm.assert_numpy_array_equal(join_index2.values, exp_values)
|
||||
|
||||
|
||||
def test_join_level_corner_case(idx):
|
||||
# some corner cases
|
||||
index = Index(['three', 'one', 'two'])
|
||||
result = index.join(idx, level='second')
|
||||
assert isinstance(result, MultiIndex)
|
||||
|
||||
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
|
||||
idx.join(idx, level=1)
|
||||
|
||||
|
||||
def test_join_self(idx, join_type):
|
||||
joined = idx.join(idx, how=join_type)
|
||||
assert idx is joined
|
||||
|
||||
|
||||
def test_join_multi():
|
||||
# GH 10665
|
||||
midx = pd.MultiIndex.from_product(
|
||||
[np.arange(4), np.arange(4)], names=['a', 'b'])
|
||||
idx = pd.Index([1, 2, 5], name='b')
|
||||
|
||||
# inner
|
||||
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
|
||||
exp_idx = pd.MultiIndex.from_product(
|
||||
[np.arange(4), [1, 2]], names=['a', 'b'])
|
||||
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
|
||||
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
# keep MultiIndex
|
||||
jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True)
|
||||
exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0,
|
||||
1, -1], dtype=np.intp)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_self_unique(idx, join_type):
|
||||
if idx.is_unique:
|
||||
joined = idx.join(idx, how=join_type)
|
||||
assert (idx == joined).all()
|
||||
@@ -0,0 +1,129 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslib import iNaT
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index
|
||||
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_fillna(idx):
|
||||
# GH 11343
|
||||
|
||||
# TODO: Remove or Refactor. Not Implemented for MultiIndex
|
||||
for name, index in [('idx', idx), ]:
|
||||
if len(index) == 0:
|
||||
pass
|
||||
elif isinstance(index, MultiIndex):
|
||||
idx = index.copy()
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.fillna(idx[0])
|
||||
else:
|
||||
idx = index.copy()
|
||||
result = idx.fillna(idx[0])
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is not idx
|
||||
|
||||
msg = "'value' must be a scalar, passed: "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.fillna([idx[0]])
|
||||
|
||||
idx = index.copy()
|
||||
values = idx.values
|
||||
|
||||
if isinstance(index, DatetimeIndexOpsMixin):
|
||||
values[1] = iNaT
|
||||
elif isinstance(index, (Int64Index, UInt64Index)):
|
||||
continue
|
||||
else:
|
||||
values[1] = np.nan
|
||||
|
||||
if isinstance(index, PeriodIndex):
|
||||
idx = index.__class__(values, freq=index.freq)
|
||||
else:
|
||||
idx = index.__class__(values)
|
||||
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is True
|
||||
|
||||
|
||||
def test_dropna():
|
||||
# GH 6194
|
||||
idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5],
|
||||
[1, 2, np.nan, np.nan, 5],
|
||||
['a', 'b', 'c', np.nan, 'e']])
|
||||
|
||||
exp = pd.MultiIndex.from_arrays([[1, 5],
|
||||
[1, 5],
|
||||
['a', 'e']])
|
||||
tm.assert_index_equal(idx.dropna(), exp)
|
||||
tm.assert_index_equal(idx.dropna(how='any'), exp)
|
||||
|
||||
exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5],
|
||||
[1, 2, np.nan, 5],
|
||||
['a', 'b', 'c', 'e']])
|
||||
tm.assert_index_equal(idx.dropna(how='all'), exp)
|
||||
|
||||
msg = "invalid how option: xxx"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.dropna(how='xxx')
|
||||
|
||||
|
||||
def test_nulls(idx):
|
||||
# this is really a smoke test for the methods
|
||||
# as these are adequately tested for function elsewhere
|
||||
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.isna()
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_hasnans_isnans(idx):
|
||||
# GH 11343, added tests for hasnans / isnans
|
||||
index = idx.copy()
|
||||
|
||||
# cases in indices doesn't include NaN
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is False
|
||||
|
||||
index = idx.copy()
|
||||
values = index.values
|
||||
values[1] = np.nan
|
||||
|
||||
index = idx.__class__(values)
|
||||
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is True
|
||||
|
||||
|
||||
def test_nan_stays_float():
|
||||
|
||||
# GH 7031
|
||||
idx0 = pd.MultiIndex(levels=[["A", "B"], []],
|
||||
codes=[[1, 0], [-1, -1]],
|
||||
names=[0, 1])
|
||||
idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
|
||||
codes=[[0], [0]],
|
||||
names=[0, 1])
|
||||
idxm = idx0.join(idx1, how='outer')
|
||||
assert pd.isna(idx0.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
|
||||
|
||||
df0 = pd.DataFrame([[1, 2]], index=idx0)
|
||||
df1 = pd.DataFrame([[3, 4]], index=idx1)
|
||||
dfm = df0 - df1
|
||||
assert pd.isna(df0.index.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
|
||||
@@ -0,0 +1,213 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, IntervalIndex, MultiIndex
|
||||
from pandas.api.types import is_scalar
|
||||
|
||||
|
||||
def test_is_monotonic_increasing():
|
||||
i = MultiIndex.from_product([np.arange(10),
|
||||
np.arange(10)], names=['one', 'two'])
|
||||
assert i.is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
|
||||
i = MultiIndex.from_product([np.arange(10, 0, -1),
|
||||
np.arange(10)], names=['one', 'two'])
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product([np.arange(10),
|
||||
np.arange(10, 0, -1)],
|
||||
names=['one', 'two'])
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
# string ordering
|
||||
i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
|
||||
['one', 'two', 'three']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
assert i.is_monotonic is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
|
||||
['mom', 'next', 'zenith']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
assert i.is_monotonic is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237',
|
||||
'nl0000289783',
|
||||
'nl0000289965', 'nl0000301109']],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=['household_id', 'asset_id'])
|
||||
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
|
||||
def test_is_monotonic_decreasing():
|
||||
i = MultiIndex.from_product([np.arange(9, -1, -1),
|
||||
np.arange(9, -1, -1)],
|
||||
names=['one', 'two'])
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
|
||||
i = MultiIndex.from_product([np.arange(10),
|
||||
np.arange(10, 0, -1)],
|
||||
names=['one', 'two'])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product([np.arange(10, 0, -1),
|
||||
np.arange(10)], names=['one', 'two'])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# string ordering
|
||||
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
|
||||
['three', 'two', 'one']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
|
||||
['zenith', 'next', 'mom']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965',
|
||||
'nl0000289783', 'lu0197800237',
|
||||
'gb00b03mlx29']],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=['household_id', 'asset_id'])
|
||||
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_increasing():
|
||||
idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
|
||||
codes=[[0, 0, 1, 1], [0, 0, 0, 1]])
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_decreasing():
|
||||
idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
|
||||
codes=[[0, 0, 1, 1], [0, 0, 0, 1]])
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
|
||||
def test_searchsorted_monotonic(indices):
|
||||
# GH17271
|
||||
# not implemented for tuple searches in MultiIndex
|
||||
# or Intervals searches in IntervalIndex
|
||||
if isinstance(indices, (MultiIndex, IntervalIndex)):
|
||||
return
|
||||
|
||||
# nothing to test if the index is empty
|
||||
if indices.empty:
|
||||
return
|
||||
value = indices[0]
|
||||
|
||||
# determine the expected results (handle dupes for 'right')
|
||||
expected_left, expected_right = 0, (indices == value).argmin()
|
||||
if expected_right == 0:
|
||||
# all values are the same, expected_right should be length
|
||||
expected_right = len(indices)
|
||||
|
||||
# test _searchsorted_monotonic in all cases
|
||||
# test searchsorted only for increasing
|
||||
if indices.is_monotonic_increasing:
|
||||
ssm_left = indices._searchsorted_monotonic(value, side='left')
|
||||
assert is_scalar(ssm_left)
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = indices._searchsorted_monotonic(value, side='right')
|
||||
assert is_scalar(ssm_right)
|
||||
assert expected_right == ssm_right
|
||||
|
||||
ss_left = indices.searchsorted(value, side='left')
|
||||
assert is_scalar(ss_left)
|
||||
assert expected_left == ss_left
|
||||
|
||||
ss_right = indices.searchsorted(value, side='right')
|
||||
assert is_scalar(ss_right)
|
||||
assert expected_right == ss_right
|
||||
|
||||
elif indices.is_monotonic_decreasing:
|
||||
ssm_left = indices._searchsorted_monotonic(value, side='left')
|
||||
assert is_scalar(ssm_left)
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = indices._searchsorted_monotonic(value, side='right')
|
||||
assert is_scalar(ssm_right)
|
||||
assert expected_right == ssm_right
|
||||
|
||||
else:
|
||||
# non-monotonic should raise.
|
||||
with pytest.raises(ValueError):
|
||||
indices._searchsorted_monotonic(value, side='left')
|
||||
@@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def check_level_names(index, names):
|
||||
assert [level.name for level in index.levels] == list(names)
|
||||
|
||||
|
||||
def test_slice_keep_name():
|
||||
x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')],
|
||||
names=['x', 'y'])
|
||||
assert x[1:].names == x.names
|
||||
|
||||
|
||||
def test_index_name_retained():
|
||||
# GH9857
|
||||
result = pd.DataFrame({'x': [1, 2, 6],
|
||||
'y': [2, 2, 8],
|
||||
'z': [-5, 0, 5]})
|
||||
result = result.set_index('z')
|
||||
result.loc[10] = [9, 10]
|
||||
df_expected = pd.DataFrame({'x': [1, 2, 6, 9],
|
||||
'y': [2, 2, 8, 10],
|
||||
'z': [-5, 0, 5, 10]})
|
||||
df_expected = df_expected.set_index('z')
|
||||
tm.assert_frame_equal(result, df_expected)
|
||||
|
||||
|
||||
def test_changing_names(idx):
|
||||
|
||||
# names should be applied to levels
|
||||
level_names = [level.name for level in idx.levels]
|
||||
check_level_names(idx, idx.names)
|
||||
|
||||
view = idx.view()
|
||||
copy = idx.copy()
|
||||
shallow_copy = idx._shallow_copy()
|
||||
|
||||
# changing names should change level names on object
|
||||
new_names = [name + "a" for name in idx.names]
|
||||
idx.names = new_names
|
||||
check_level_names(idx, new_names)
|
||||
|
||||
# but not on copies
|
||||
check_level_names(view, level_names)
|
||||
check_level_names(copy, level_names)
|
||||
check_level_names(shallow_copy, level_names)
|
||||
|
||||
# and copies shouldn't change original
|
||||
shallow_copy.names = [name + "c" for name in shallow_copy.names]
|
||||
check_level_names(idx, new_names)
|
||||
|
||||
|
||||
def test_take_preserve_name(idx):
|
||||
taken = idx.take([3, 0, 1])
|
||||
assert taken.names == idx.names
|
||||
|
||||
|
||||
def test_copy_names():
|
||||
# Check that adding a "names" parameter to the copy is honored
|
||||
# GH14302
|
||||
multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2'])
|
||||
multi_idx1 = multi_idx.copy()
|
||||
|
||||
assert multi_idx.equals(multi_idx1)
|
||||
assert multi_idx.names == ['MyName1', 'MyName2']
|
||||
assert multi_idx1.names == ['MyName1', 'MyName2']
|
||||
|
||||
multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2'])
|
||||
|
||||
assert multi_idx.equals(multi_idx2)
|
||||
assert multi_idx.names == ['MyName1', 'MyName2']
|
||||
assert multi_idx2.names == ['NewName1', 'NewName2']
|
||||
|
||||
multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2'])
|
||||
|
||||
assert multi_idx.equals(multi_idx3)
|
||||
assert multi_idx.names == ['MyName1', 'MyName2']
|
||||
assert multi_idx3.names == ['NewName1', 'NewName2']
|
||||
|
||||
|
||||
def test_names(idx, index_names):
|
||||
|
||||
# names are assigned in setup
|
||||
names = index_names
|
||||
level_names = [level.name for level in idx.levels]
|
||||
assert names == level_names
|
||||
|
||||
# setting bad names on existing
|
||||
index = idx
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", list(index.names) + ["third"])
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", [])
|
||||
|
||||
# initializing with bad names (should always be equivalent)
|
||||
major_axis, minor_axis = idx.levels
|
||||
major_codes, minor_codes = idx.codes
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=['first'])
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=['first', 'second', 'third'])
|
||||
|
||||
# names are assigned
|
||||
index.names = ["a", "b"]
|
||||
ind_names = list(index.names)
|
||||
level_names = [level.name for level in index.levels]
|
||||
assert ind_names == level_names
|
||||
|
||||
|
||||
def test_duplicate_level_names_access_raises(idx):
|
||||
# GH19029
|
||||
idx.names = ['foo', 'foo']
|
||||
with pytest.raises(ValueError, match='name foo occurs multiple times'):
|
||||
idx._get_level_number('foo')
|
||||
+98
@@ -0,0 +1,98 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex():
|
||||
# GH10331
|
||||
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
|
||||
abc = ['a', 'b', 'c']
|
||||
ix = pd.MultiIndex.from_product([dr, abc])
|
||||
df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# c1
|
||||
# 2016-01-01 00:00:00 a 0
|
||||
# b 1
|
||||
# c 2
|
||||
# 2016-01-01 12:00:00 a 3
|
||||
# b 4
|
||||
# c 5
|
||||
# 2016-01-02 00:00:00 a 6
|
||||
# b 7
|
||||
# c 8
|
||||
# 2016-01-02 12:00:00 a 9
|
||||
# b 10
|
||||
# c 11
|
||||
# 2016-01-03 00:00:00 a 12
|
||||
# b 13
|
||||
# c 14
|
||||
|
||||
# partial string matching on a single index
|
||||
for df_swap in (df.swaplevel(),
|
||||
df.swaplevel(0),
|
||||
df.swaplevel(0, 1)):
|
||||
df_swap = df_swap.sort_index()
|
||||
just_a = df_swap.loc['a']
|
||||
result = just_a.loc['2016-01-01']
|
||||
expected = df.loc[idx[:, 'a'], :].iloc[0:2]
|
||||
expected.index = expected.index.droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# indexing with IndexSlice
|
||||
result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# match on secondary index
|
||||
result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
|
||||
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Even though this syntax works on a single index, this is somewhat
|
||||
# ambiguous and we don't want to extend this behavior forward to work
|
||||
# in multi-indexes. This would amount to selecting a scalar from a
|
||||
# column.
|
||||
with pytest.raises(KeyError):
|
||||
df['2016-01-01']
|
||||
|
||||
# partial string match on year only
|
||||
result = df.loc['2016']
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date
|
||||
result = df.loc['2016-01-01']
|
||||
expected = df.iloc[0:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date and hour, from middle
|
||||
result = df.loc['2016-01-02 12']
|
||||
expected = df.iloc[9:12]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on secondary index
|
||||
result = df_swap.loc[idx[:, '2016-01-02'], :]
|
||||
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# tuple selector with partial string match on date
|
||||
result = df.loc[('2016-01-01', 'a'), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Slicing date on first level should break (of course)
|
||||
with pytest.raises(KeyError):
|
||||
df_swap.loc['2016-01-01']
|
||||
|
||||
# GH12685 (partial string with daily resolution or below)
|
||||
dr = date_range('2013-01-01', periods=100, freq='D')
|
||||
ix = MultiIndex.from_product([dr, ['a', 'b']])
|
||||
df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix)
|
||||
|
||||
result = df.loc[idx['2013-03':'2013-03', :], :]
|
||||
expected = df.iloc[118:180]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,108 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def check_level_names(index, names):
|
||||
assert [level.name for level in index.levels] == list(names)
|
||||
|
||||
|
||||
def test_reindex(idx):
|
||||
result, indexer = idx.reindex(list(idx[:4]))
|
||||
assert isinstance(result, MultiIndex)
|
||||
check_level_names(result, idx[:4].names)
|
||||
|
||||
result, indexer = idx.reindex(list(idx))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert indexer is None
|
||||
check_level_names(result, idx.names)
|
||||
|
||||
|
||||
def test_reindex_level(idx):
|
||||
index = Index(['one'])
|
||||
|
||||
target, indexer = idx.reindex(index, level='second')
|
||||
target2, indexer2 = index.reindex(idx, level='second')
|
||||
|
||||
exp_index = idx.join(index, level='second', how='right')
|
||||
exp_index2 = idx.join(index, level='second', how='left')
|
||||
|
||||
assert target.equals(exp_index)
|
||||
exp_indexer = np.array([0, 2, 4])
|
||||
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
|
||||
|
||||
assert target2.equals(exp_index2)
|
||||
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
|
||||
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
idx.reindex(idx, method='pad', level='second')
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
index.reindex(index, method='bfill', level='first')
|
||||
|
||||
|
||||
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
|
||||
# GH6552
|
||||
idx = idx.copy()
|
||||
target = idx.copy()
|
||||
idx.names = target.names = [None, None]
|
||||
|
||||
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
|
||||
# list & ndarray cases
|
||||
assert idx.reindex([])[0].names == [None, None]
|
||||
assert idx.reindex(np.array([]))[0].names == [None, None]
|
||||
assert idx.reindex(target.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(target.values)[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.values)[0].names == [None, None]
|
||||
|
||||
idx.names = ['foo', 'bar']
|
||||
assert idx.reindex([])[0].names == ['foo', 'bar']
|
||||
assert idx.reindex(np.array([]))[0].names == ['foo', 'bar']
|
||||
assert idx.reindex(target.tolist())[0].names == ['foo', 'bar']
|
||||
assert idx.reindex(target.values)[0].names == ['foo', 'bar']
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar']
|
||||
assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar']
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
|
||||
# GH7774
|
||||
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']],
|
||||
names=['foo', 'bar'])
|
||||
assert idx.reindex([], level=0)[0].names == ['foo', 'bar']
|
||||
assert idx.reindex([], level=1)[0].names == ['foo', 'bar']
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
|
||||
# GH7774
|
||||
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
|
||||
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
|
||||
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
|
||||
|
||||
|
||||
def test_reindex_base(idx):
|
||||
idx = idx
|
||||
expected = np.arange(idx.size, dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match='Invalid fill method'):
|
||||
idx.get_indexer(idx, method='invalid')
|
||||
|
||||
|
||||
def test_reindex_non_unique():
|
||||
idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
|
||||
a = pd.Series(np.arange(4), index=idx)
|
||||
new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
|
||||
msg = 'cannot handle a non-unique multi-index!'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.reindex(new_idx)
|
||||
@@ -0,0 +1,126 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_insert(idx):
|
||||
# key contained in all levels
|
||||
new_index = idx.insert(0, ('bar', 'two'))
|
||||
assert new_index.equal_levels(idx)
|
||||
assert new_index[0] == ('bar', 'two')
|
||||
|
||||
# key not contained in all levels
|
||||
new_index = idx.insert(0, ('abc', 'three'))
|
||||
|
||||
exp0 = Index(list(idx.levels[0]) + ['abc'], name='first')
|
||||
tm.assert_index_equal(new_index.levels[0], exp0)
|
||||
|
||||
exp1 = Index(list(idx.levels[1]) + ['three'], name='second')
|
||||
tm.assert_index_equal(new_index.levels[1], exp1)
|
||||
assert new_index[0] == ('abc', 'three')
|
||||
|
||||
# key wrong length
|
||||
msg = "Item must have length equal to number of levels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.insert(0, ('foo2',))
|
||||
|
||||
left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]],
|
||||
columns=['1st', '2nd', '3rd'])
|
||||
left.set_index(['1st', '2nd'], inplace=True)
|
||||
ts = left['3rd'].copy(deep=True)
|
||||
|
||||
left.loc[('b', 'x'), '3rd'] = 2
|
||||
left.loc[('b', 'a'), '3rd'] = -1
|
||||
left.loc[('b', 'b'), '3rd'] = 3
|
||||
left.loc[('a', 'x'), '3rd'] = 4
|
||||
left.loc[('a', 'w'), '3rd'] = 5
|
||||
left.loc[('a', 'a'), '3rd'] = 6
|
||||
|
||||
ts.loc[('b', 'x')] = 2
|
||||
ts.loc['b', 'a'] = -1
|
||||
ts.loc[('b', 'b')] = 3
|
||||
ts.loc['a', 'x'] = 4
|
||||
ts.loc[('a', 'w')] = 5
|
||||
ts.loc['a', 'a'] = 6
|
||||
|
||||
right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2],
|
||||
['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4],
|
||||
['a', 'w', 5], ['a', 'a', 6]],
|
||||
columns=['1st', '2nd', '3rd'])
|
||||
right.set_index(['1st', '2nd'], inplace=True)
|
||||
# FIXME data types changes to float because
|
||||
# of intermediate nan insertion;
|
||||
tm.assert_frame_equal(left, right, check_dtype=False)
|
||||
tm.assert_series_equal(ts, right['3rd'])
|
||||
|
||||
# GH9250
|
||||
idx = [('test1', i) for i in range(5)] + \
|
||||
[('test2', i) for i in range(6)] + \
|
||||
[('test', 17), ('test', 18)]
|
||||
|
||||
left = pd.Series(np.linspace(0, 10, 11),
|
||||
pd.MultiIndex.from_tuples(idx[:-2]))
|
||||
|
||||
left.loc[('test', 17)] = 11
|
||||
left.loc[('test', 18)] = 12
|
||||
|
||||
right = pd.Series(np.linspace(0, 12, 13),
|
||||
pd.MultiIndex.from_tuples(idx))
|
||||
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
|
||||
def test_append(idx):
|
||||
result = idx[:3].append(idx[3:])
|
||||
assert result.equals(idx)
|
||||
|
||||
foos = [idx[:1], idx[1:3], idx[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
assert result.equals(idx)
|
||||
|
||||
# empty
|
||||
result = idx.append([])
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
def test_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(['foo', 'bar'])
|
||||
|
||||
m = MultiIndex.from_product([
|
||||
numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([
|
||||
numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(m.repeat(reps), expected)
|
||||
|
||||
|
||||
def test_insert_base(idx):
|
||||
|
||||
result = idx[1:4]
|
||||
|
||||
# test 0th element
|
||||
assert idx[0:4].equals(result.insert(0, idx[0]))
|
||||
|
||||
|
||||
def test_delete_base(idx):
|
||||
|
||||
expected = idx[1:]
|
||||
result = idx.delete(0)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
expected = idx[:-1]
|
||||
result = idx.delete(-1)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
with pytest.raises((IndexError, ValueError)):
|
||||
# Exception raised depends on NumPy version.
|
||||
idx.delete(len(idx))
|
||||
@@ -0,0 +1,251 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", [0.5, "xxx"])
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
@pytest.mark.parametrize("method", ["intersection", "union",
|
||||
"difference", "symmetric_difference"])
|
||||
def test_set_ops_error_cases(idx, case, sort, method):
|
||||
# non-iterable input
|
||||
msg = "Input must be Index or array-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)(case, sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_intersection_base(idx, sort):
|
||||
first = idx[:5]
|
||||
second = idx[:3]
|
||||
intersect = first.intersection(second, sort=sort)
|
||||
|
||||
if sort:
|
||||
tm.assert_index_equal(intersect, second.sort_values())
|
||||
assert tm.equalContents(intersect, second)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values)
|
||||
for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.intersection(case, sort=sort)
|
||||
if sort:
|
||||
tm.assert_index_equal(result, second.sort_values())
|
||||
assert tm.equalContents(result, second)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.intersection([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_union_base(idx, sort):
|
||||
first = idx[3:]
|
||||
second = idx[:5]
|
||||
everything = idx
|
||||
union = first.union(second, sort=sort)
|
||||
if sort:
|
||||
tm.assert_index_equal(union, everything.sort_values())
|
||||
assert tm.equalContents(union, everything)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values)
|
||||
for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.union(case, sort=sort)
|
||||
if sort:
|
||||
tm.assert_index_equal(result, everything.sort_values())
|
||||
assert tm.equalContents(result, everything)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.union([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_difference_base(idx, sort):
|
||||
second = idx[4:]
|
||||
answer = idx[:4]
|
||||
result = idx.difference(second, sort=sort)
|
||||
|
||||
if sort:
|
||||
answer = answer.sort_values()
|
||||
|
||||
assert result.equals(answer)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values)
|
||||
for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = idx.difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_symmetric_difference(idx, sort):
|
||||
first = idx[1:]
|
||||
second = idx[:-1]
|
||||
answer = idx[[-1, 0]]
|
||||
result = first.symmetric_difference(second, sort=sort)
|
||||
|
||||
if sort:
|
||||
answer = answer.sort_values()
|
||||
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values)
|
||||
for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.symmetric_difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.symmetric_difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_empty(idx):
|
||||
# GH 15270
|
||||
assert not idx.empty
|
||||
assert idx[:0].empty
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_difference(idx, sort):
|
||||
|
||||
first = idx
|
||||
result = first.difference(idx[-3:], sort=sort)
|
||||
vals = idx[:-3].values
|
||||
|
||||
if sort:
|
||||
vals = sorted(vals)
|
||||
|
||||
expected = MultiIndex.from_tuples(vals,
|
||||
sortorder=0,
|
||||
names=idx.names)
|
||||
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty difference: reflexive
|
||||
result = idx.difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: superset
|
||||
result = idx[-3:].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: degenerate
|
||||
result = idx[:0].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# names not the same
|
||||
chunklet = idx[-3:]
|
||||
chunklet.names = ['foo', 'baz']
|
||||
result = first.difference(chunklet, sort=sort)
|
||||
assert result.names == (None, None)
|
||||
|
||||
# empty, but non-equal
|
||||
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
|
||||
assert len(result) == 0
|
||||
|
||||
# raise Exception called with non-MultiIndex
|
||||
result = first.difference(first.values, sort=sort)
|
||||
assert result.equals(first[:0])
|
||||
|
||||
# name from empty array
|
||||
result = first.difference([], sort=sort)
|
||||
assert first.equals(result)
|
||||
assert first.names == result.names
|
||||
|
||||
# name from non-empty array
|
||||
result = first.difference([('foo', 'one')], sort=sort)
|
||||
expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), (
|
||||
'foo', 'two'), ('qux', 'one'), ('qux', 'two')])
|
||||
expected.names = first.names
|
||||
assert first.names == result.names
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.difference([1, 2, 3, 4, 5], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_union(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_union = piece1.union(piece2, sort=sort)
|
||||
|
||||
if sort:
|
||||
tm.assert_index_equal(the_union, idx.sort_values())
|
||||
|
||||
assert tm.equalContents(the_union, idx)
|
||||
|
||||
# corner case, pass self or empty thing:
|
||||
the_union = idx.union(idx, sort=sort)
|
||||
assert the_union is idx
|
||||
|
||||
the_union = idx.union(idx[:0], sort=sort)
|
||||
assert the_union is idx
|
||||
|
||||
# won't work in python 3
|
||||
# tuples = _index.values
|
||||
# result = _index[:4] | tuples[4:]
|
||||
# assert result.equals(tuples)
|
||||
|
||||
# not valid for python 3
|
||||
# def test_union_with_regular_index(self):
|
||||
# other = Index(['A', 'B', 'C'])
|
||||
|
||||
# result = other.union(idx)
|
||||
# assert ('foo', 'one') in result
|
||||
# assert 'B' in result
|
||||
|
||||
# result2 = _index.union(other)
|
||||
# assert result.equals(result2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_intersection(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_int = piece1.intersection(piece2, sort=sort)
|
||||
|
||||
if sort:
|
||||
tm.assert_index_equal(the_int, idx[3:5])
|
||||
assert tm.equalContents(the_int, idx[3:5])
|
||||
|
||||
# corner case, pass self
|
||||
the_int = idx.intersection(idx, sort=sort)
|
||||
assert the_int is idx
|
||||
|
||||
# empty intersection: disjoint
|
||||
empty = idx[:2].intersection(idx[2:], sort=sort)
|
||||
expected = idx[:0]
|
||||
assert empty.equals(expected)
|
||||
|
||||
# can't do in python 3
|
||||
# tuples = _index.values
|
||||
# result = _index & tuples
|
||||
# assert result.equals(tuples)
|
||||
@@ -0,0 +1,266 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
from pandas.errors import PerformanceWarning, UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_sortlevel(idx):
|
||||
import random
|
||||
|
||||
tuples = list(idx)
|
||||
random.shuffle(tuples)
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sortlevel_not_sort_remaining():
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
|
||||
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
|
||||
assert sorted_idx.equals(mi)
|
||||
|
||||
|
||||
def test_sortlevel_deterministic():
|
||||
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
|
||||
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sort(indices):
|
||||
with pytest.raises(TypeError):
|
||||
indices.sort()
|
||||
|
||||
|
||||
def test_numpy_argsort(idx):
|
||||
result = np.argsort(idx)
|
||||
expected = idx.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# these are the only two types that perform
|
||||
# pandas compatibility input validation - the
|
||||
# rest already perform separate (or no) such
|
||||
# validation via their 'values' attribute as
|
||||
# defined in pandas.core.indexes/base.py - they
|
||||
# cannot be changed at the moment due to
|
||||
# backwards compatibility concerns
|
||||
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, axis=1)
|
||||
|
||||
msg = "the 'kind' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, kind='mergesort')
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, order=('a', 'b'))
|
||||
|
||||
|
||||
def test_unsortedindex():
|
||||
# GH 11897
|
||||
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
|
||||
('x', 'b'), ('y', 'a'), ('z', 'b')],
|
||||
names=['one', 'two'])
|
||||
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
|
||||
columns=['one', 'two'])
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc(axis=0)['z', 'a']
|
||||
expected = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(UnsortedIndexError):
|
||||
df.loc(axis=0)['z', slice('a')]
|
||||
df.sort_index(inplace=True)
|
||||
assert len(df.loc(axis=0)['z', :]) == 2
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
df.loc(axis=0)['q', :]
|
||||
|
||||
|
||||
def test_unsortedindex_doc_examples():
|
||||
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
|
||||
dfm = DataFrame({'jim': [0, 0, 1, 1],
|
||||
'joe': ['x', 'x', 'z', 'y'],
|
||||
'jolie': np.random.rand(4)})
|
||||
|
||||
dfm = dfm.set_index(['jim', 'joe'])
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
dfm.loc[(1, 'z')]
|
||||
|
||||
with pytest.raises(UnsortedIndexError):
|
||||
dfm.loc[(0, 'y'):(1, 'z')]
|
||||
|
||||
assert not dfm.index.is_lexsorted()
|
||||
assert dfm.index.lexsort_depth == 1
|
||||
|
||||
# sort it
|
||||
dfm = dfm.sort_index()
|
||||
dfm.loc[(1, 'z')]
|
||||
dfm.loc[(0, 'y'):(1, 'z')]
|
||||
|
||||
assert dfm.index.is_lexsorted()
|
||||
assert dfm.index.lexsort_depth == 2
|
||||
|
||||
|
||||
def test_reconstruct_sort():
|
||||
|
||||
# starts off lexsorted & monotonic
|
||||
mi = MultiIndex.from_arrays([
|
||||
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
|
||||
])
|
||||
assert mi.is_lexsorted()
|
||||
assert mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert recons.is_lexsorted()
|
||||
assert recons.is_monotonic
|
||||
assert mi is recons
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
|
||||
('x', 'b'), ('y', 'a'), ('z', 'b')],
|
||||
names=['one', 'two'])
|
||||
assert not mi.is_lexsorted()
|
||||
assert not mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_lexsorted()
|
||||
assert not recons.is_monotonic
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
|
||||
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
|
||||
names=['col1', 'col2'])
|
||||
assert not mi.is_lexsorted()
|
||||
assert not mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_lexsorted()
|
||||
assert not recons.is_monotonic
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
|
||||
def test_reconstruct_remove_unused():
|
||||
# xref to GH 2770
|
||||
df = DataFrame([['deleteMe', 1, 9],
|
||||
['keepMe', 2, 9],
|
||||
['keepMeToo', 3, 9]],
|
||||
columns=['first', 'second', 'third'])
|
||||
df2 = df.set_index(['first', 'second'], drop=False)
|
||||
df2 = df2[df2['first'] != 'deleteMe']
|
||||
|
||||
# removed levels are there
|
||||
expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'],
|
||||
[1, 2, 3]],
|
||||
codes=[[1, 2], [1, 2]],
|
||||
names=['first', 'second'])
|
||||
result = df2.index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = MultiIndex(levels=[['keepMe', 'keepMeToo'],
|
||||
[2, 3]],
|
||||
codes=[[0, 1], [0, 1]],
|
||||
names=['first', 'second'])
|
||||
result = df2.index.remove_unused_levels()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# idempotent
|
||||
result2 = result.remove_unused_levels()
|
||||
tm.assert_index_equal(result2, expected)
|
||||
assert result2.is_(result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('first_type,second_type', [
|
||||
('int64', 'int64'),
|
||||
('datetime64[D]', 'str')
|
||||
])
|
||||
def test_remove_unused_levels_large(first_type, second_type):
|
||||
# GH16556
|
||||
|
||||
# because tests should be deterministic (and this test in particular
|
||||
# checks that levels are removed, which is not the case for every
|
||||
# random input):
|
||||
rng = np.random.RandomState(4) # seed is arbitrary value that works
|
||||
|
||||
size = 1 << 16
|
||||
df = DataFrame(dict(
|
||||
first=rng.randint(0, 1 << 13, size).astype(first_type),
|
||||
second=rng.randint(0, 1 << 10, size).astype(second_type),
|
||||
third=rng.rand(size)))
|
||||
df = df.groupby(['first', 'second']).sum()
|
||||
df = df[df.third < 0.1]
|
||||
|
||||
result = df.index.remove_unused_levels()
|
||||
assert len(result.levels[0]) < len(df.index.levels[0])
|
||||
assert len(result.levels[1]) < len(df.index.levels[1])
|
||||
assert result.equals(df.index)
|
||||
|
||||
expected = df.reset_index().set_index(['first', 'second']).index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('level0', [['a', 'd', 'b'],
|
||||
['a', 'd', 'b', 'unused']])
|
||||
@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'],
|
||||
['w', 'x', 'y', 'z', 'unused']])
|
||||
def test_remove_unused_nan(level0, level1):
|
||||
# GH 18417
|
||||
mi = pd.MultiIndex(levels=[level0, level1],
|
||||
codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
|
||||
|
||||
result = mi.remove_unused_levels()
|
||||
tm.assert_index_equal(result, mi)
|
||||
for level in 0, 1:
|
||||
assert('unused' not in result.levels[level])
|
||||
|
||||
|
||||
def test_argsort(idx):
|
||||
result = idx.argsort()
|
||||
expected = idx.values.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
Reference in New Issue
Block a user