Static code analysis and corrections

This commit is contained in:
Kristjan Komlosi
2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas import Index, MultiIndex
@pytest.fixture
def idx():
# a MultiIndex used to test the general functionality of the
# general functionality of this object
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
mi = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names, verify_integrity=False)
return mi
@pytest.fixture
def idx_dup():
# compare tests/indexes/multi/conftest.py
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 0, 1, 1])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
mi = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names, verify_integrity=False)
return mi
@pytest.fixture
def index_names():
# names that match those in the idx fixture for testing equality of
# names assigned to the idx
return ['first', 'second']
@pytest.fixture
def holder():
# the MultiIndex constructor used to base compatibility with pickle
return MultiIndex
@pytest.fixture
def compat_props():
# a MultiIndex must have these properties associated with it
return ['shape', 'ndim', 'size']
@@ -0,0 +1,321 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange
import pandas as pd
from pandas import Index, MultiIndex, date_range, period_range
import pandas.util.testing as tm
def test_shift(idx):
# GH8083 test the base class for shift
pytest.raises(NotImplementedError, idx.shift, 1)
pytest.raises(NotImplementedError, idx.shift, 1, 2)
def test_groupby(idx):
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = idx.get_values().tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)
# GH5620
groups = idx.groupby(idx)
exp = {key: [key] for key in idx}
tm.assert_dict_equal(groups, exp)
def test_truncate():
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
result = index.truncate(before=1)
assert 'foo' not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(after=1)
assert 2 not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(before=1, after=2)
assert len(result.levels[0]) == 2
# after < before
pytest.raises(ValueError, index.truncate, 3, 1)
def test_where():
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
with pytest.raises(NotImplementedError):
i.where(True)
def test_where_array_like():
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
klasses = [list, tuple, np.array, pd.Series]
cond = [False, True]
for klass in klasses:
with pytest.raises(NotImplementedError):
i.where(klass(cond))
# TODO: reshape
def test_reorder_levels(idx):
# this blows up
with pytest.raises(IndexError, match='^Too many levels'):
idx.reorder_levels([2, 1, 0])
def test_numpy_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(['foo', 'bar'])
m = MultiIndex.from_product([
numbers, names], names=names)
expected = MultiIndex.from_product([
numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(np.repeat(m, reps), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(m, reps, axis=1)
def test_append_mixed_dtypes():
# GH 13660
dti = date_range('2011-01-01', freq='M', periods=3, )
dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern')
pi = period_range('2011-01', freq='M', periods=3)
mi = MultiIndex.from_arrays([[1, 2, 3],
[1.1, np.nan, 3.3],
['a', 'b', 'c'],
dti, dti_tz, pi])
assert mi.nlevels == 6
res = mi.append(mi)
exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3],
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
['a', 'b', 'c', 'a', 'b', 'c'],
dti.append(dti),
dti_tz.append(dti_tz),
pi.append(pi)])
tm.assert_index_equal(res, exp)
other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'],
['x', 'y', 'z'], ['x', 'y', 'z'],
['x', 'y', 'z'], ['x', 'y', 'z']])
res = mi.append(other)
exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'],
[1.1, np.nan, 3.3, 'x', 'y', 'z'],
['a', 'b', 'c', 'x', 'y', 'z'],
dti.append(pd.Index(['x', 'y', 'z'])),
dti_tz.append(pd.Index(['x', 'y', 'z'])),
pi.append(pd.Index(['x', 'y', 'z']))])
tm.assert_index_equal(res, exp)
def test_take(idx):
indexer = [4, 3, 0, 2]
result = idx.take(indexer)
expected = idx[indexer]
assert result.equals(expected)
# TODO: Remove Commented Code
# if not isinstance(idx,
# (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
# GH 10791
with pytest.raises(AttributeError):
idx.freq
def test_take_invalid_kwargs(idx):
idx = idx
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode='clip')
def test_take_fill_value():
# GH 12631
vals = [['A', 'B'],
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
result = idx.take(np.array([1, 0, -1]))
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
('B', pd.Timestamp('2011-01-02'))]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
(np.nan, pd.NaT)]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
('B', pd.Timestamp('2011-01-02'))]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
def test_iter(idx):
result = list(idx)
expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
assert result == expected
def test_sub(idx):
first = idx
# - now raises (previously was set op difference)
with pytest.raises(TypeError):
first - idx[-3:]
with pytest.raises(TypeError):
idx[-3:] - first
with pytest.raises(TypeError):
idx[-3:] - first.tolist()
with pytest.raises(TypeError):
first.tolist() - idx[-3:]
def test_map(idx):
# callable
index = idx
# we don't infer UInt64
if isinstance(index, pd.UInt64Index):
expected = index.astype('int64')
else:
expected = index
result = index.map(lambda x: x)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"mapper",
[
lambda values, idx: {i: e for e, i in zip(values, idx)},
lambda values, idx: pd.Series(values, idx)])
def test_map_dictlike(idx, mapper):
if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)):
pytest.skip("skipping tests for {}".format(type(idx)))
identity = mapper(idx.values, idx)
# we don't infer to UInt64 for a dict
if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict):
expected = idx.astype('int64')
else:
expected = idx
result = idx.map(identity)
tm.assert_index_equal(result, expected)
# empty mappable
expected = pd.Index([np.nan] * len(idx))
result = idx.map(mapper(expected, idx))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('func', [
np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10,
np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin,
np.arccos, np.arctan, np.sinh, np.cosh, np.tanh,
np.arcsinh, np.arccosh, np.arctanh, np.deg2rad,
np.rad2deg
])
def test_numpy_ufuncs(func):
# test ufuncs of numpy. see:
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
# copy and paste from idx fixture as pytest doesn't support
# parameters and fixtures at the same time.
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
idx = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False
)
with pytest.raises(Exception):
with np.errstate(all='ignore'):
func(idx)
@pytest.mark.parametrize('func', [
np.isfinite, np.isinf, np.isnan, np.signbit
])
def test_numpy_type_funcs(func):
# for func in [np.isfinite, np.isinf, np.isnan, np.signbit]:
# copy and paste from idx fixture as pytest doesn't support
# parameters and fixtures at the same time.
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
idx = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False
)
with pytest.raises(Exception):
func(idx)
@@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.util.testing import assert_copy
def test_astype(idx):
expected = idx.copy()
actual = idx.astype('O')
assert_copy(actual.levels, expected.levels)
assert_copy(actual.codes, expected.codes)
assert [level.name for level in actual.levels] == list(expected.names)
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
@pytest.mark.parametrize('ordered', [True, False])
def test_astype_category(idx, ordered):
# GH 18630
msg = '> 1 ndim Categorical are not supported at this time'
with pytest.raises(NotImplementedError, match=msg):
idx.astype(CategoricalDtype(ordered=ordered))
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
with pytest.raises(NotImplementedError, match=msg):
idx.astype('category')
@@ -0,0 +1,131 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import PY3, long
from pandas import MultiIndex
import pandas.util.testing as tm
def test_numeric_compat(idx):
with pytest.raises(TypeError, match="cannot perform __mul__"):
idx * 1
with pytest.raises(TypeError, match="cannot perform __rmul__"):
1 * idx
div_err = ("cannot perform __truediv__" if PY3
else "cannot perform __div__")
with pytest.raises(TypeError, match=div_err):
idx / 1
div_err = div_err.replace(" __", " __r")
with pytest.raises(TypeError, match=div_err):
1 / idx
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
idx // 1
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
1 // idx
@pytest.mark.parametrize("method", ["all", "any"])
def test_logical_compat(idx, method):
msg = "cannot perform {method}".format(method=method)
with pytest.raises(TypeError, match=msg):
getattr(idx, method)()
def test_boolean_context_compat(idx):
with pytest.raises(ValueError):
bool(idx)
def test_boolean_context_compat2():
# boolean context compat
# GH7897
i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)])
i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)])
common = i1.intersection(i2)
with pytest.raises(ValueError):
bool(common)
def test_inplace_mutation_resets_values():
levels = [['a', 'b', 'c'], [4]]
levels2 = [[1, 2, 3], ['a']]
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
mi1 = MultiIndex(levels=levels, codes=codes)
mi2 = MultiIndex(levels=levels2, codes=codes)
vals = mi1.values.copy()
vals2 = mi2.values.copy()
assert mi1._tuples is not None
# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)
# Non-inplace doesn't kill _tuples [implementation detail]
tm.assert_almost_equal(mi1._tuples, vals)
# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)
# Inplace should kill _tuples
mi1.set_levels(levels2, inplace=True)
tm.assert_almost_equal(mi1.values, vals2)
# Make sure label setting works too
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
exp_values = np.empty((6,), dtype=object)
exp_values[:] = [(long(1), 'a')] * 6
# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_values = mi2.set_codes(codes2).values
# Not inplace shouldn't change
tm.assert_almost_equal(mi2._tuples, vals2)
# Should have correct values
tm.assert_almost_equal(exp_values, new_values)
# ...and again setting inplace should kill _tuples, etc
mi2.set_codes(codes2, inplace=True)
tm.assert_almost_equal(mi2.values, new_values)
def test_ndarray_compat_properties(idx, compat_props):
assert idx.T.equals(idx)
assert idx.transpose().equals(idx)
values = idx.values
for prop in compat_props:
assert getattr(idx, prop) == getattr(values, prop)
# test for validity
idx.nbytes
idx.values.nbytes
def test_compat(indices):
assert indices.tolist() == list(indices)
def test_pickle_compat_construction(holder):
# this is testing for pickle compat
if holder is None:
return
# need an object to create with
pytest.raises(TypeError, holder)
@@ -0,0 +1,577 @@
# -*- coding: utf-8 -*-
from collections import OrderedDict
import re
import numpy as np
import pytest
from pandas._libs.tslib import Timestamp
from pandas.compat import lrange, range
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import Index, MultiIndex, date_range
import pandas.util.testing as tm
def test_constructor_single_level():
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
codes=[[0, 1, 2, 3]], names=['first'])
assert isinstance(result, MultiIndex)
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ['first']
def test_constructor_no_levels():
msg = "non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=[], codes=[])
both_re = re.compile('Must pass both levels and codes')
with pytest.raises(TypeError, match=both_re):
MultiIndex(levels=[])
with pytest.raises(TypeError, match=both_re):
MultiIndex(codes=[])
def test_constructor_nonhashable_names():
# GH 20527
levels = [[1, 2], [u'one', u'two']]
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
names = (['foo'], ['bar'])
message = "MultiIndex.name must be a hashable type"
with pytest.raises(TypeError, match=message):
MultiIndex(levels=levels, codes=codes, names=names)
# With .rename()
mi = MultiIndex(levels=[[1, 2], [u'one', u'two']],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=('foo', 'bar'))
renamed = [['foor'], ['barr']]
with pytest.raises(TypeError, match=message):
mi.rename(names=renamed)
# With .set_names()
with pytest.raises(TypeError, match=message):
mi.set_names(names=renamed)
def test_constructor_mismatched_codes_levels(idx):
codes = [np.array([1]), np.array([2]), np.array([3])]
levels = ["a"]
msg = "Length of levels and codes must be the same"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=levels, codes=codes)
length_error = re.compile('>= length of level')
label_error = re.compile(r'Unequal code lengths: \[4, 2\]')
# important to check that it's looking at the right thing.
with pytest.raises(ValueError, match=length_error):
MultiIndex(levels=[['a'], ['b']],
codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
with pytest.raises(ValueError, match=label_error):
MultiIndex(levels=[['a'], ['b']], codes=[[0, 0, 0, 0], [0, 0]])
# external API
with pytest.raises(ValueError, match=length_error):
idx.copy().set_levels([['a'], ['b']])
with pytest.raises(ValueError, match=label_error):
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
def test_labels_deprecated(idx):
# GH23752
with tm.assert_produces_warning(FutureWarning):
MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
with tm.assert_produces_warning(FutureWarning):
idx.labels
def test_copy_in_constructor():
levels = np.array(["a", "b", "c"])
codes = np.array([1, 1, 2, 0, 0, 1, 1])
val = codes[0]
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes],
copy=True)
assert mi.codes[0][0] == val
codes[0] = 15
assert mi.codes[0][0] == val
val = levels[0]
levels[0] = "PANDA"
assert mi.levels[0][0] == val
# ----------------------------------------------------------------------------
# from_arrays
# ----------------------------------------------------------------------------
def test_from_arrays(idx):
arrays = [np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)]
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
['a', 'b']])
assert result.levels[0].equals(Index([Timestamp('20130101')]))
assert result.levels[1].equals(Index(['a', 'b']))
def test_from_arrays_iterator(idx):
# GH 18434
arrays = [np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)]
# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
tm.assert_index_equal(result, idx)
# invalid iterator input
msg = "Input must be a list / sequence of array-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(0)
def test_from_arrays_index_series_datetimetz():
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3,
tz='Asia/Tokyo')
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_timedelta():
idx1 = pd.timedelta_range('1 days', freq='D', periods=3)
idx2 = pd.timedelta_range('2 hours', freq='H', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_period():
idx1 = pd.period_range('2011-01-01', freq='D', periods=3)
idx2 = pd.period_range('2015-01-01', freq='H', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_datetimelike_mixed():
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3)
idx3 = pd.timedelta_range('1 days', freq='D', periods=3)
idx4 = pd.period_range('2011-01-01', freq='D', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
tm.assert_index_equal(result.get_level_values(2), idx3)
tm.assert_index_equal(result.get_level_values(3), idx4)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1),
pd.Series(idx2),
pd.Series(idx3),
pd.Series(idx4)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result2.get_level_values(2), idx3)
tm.assert_index_equal(result2.get_level_values(3), idx4)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_categorical():
# GH13743
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=False)
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=True)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
tm.assert_index_equal(result3.get_level_values(0), idx1)
tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_empty():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays(arrays=[])
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
assert isinstance(result, MultiIndex)
expected = Index([], name='A')
tm.assert_index_equal(result.levels[0], expected)
# N levels
for N in [2, 3]:
arrays = [[]] * N
names = list('ABC')[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N,
names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('invalid_array', [
(1),
([1]),
([1, 2]),
([[1], 2]),
('a'),
(['a']),
(['a', 'b']),
([['a'], 'b']),
])
def test_from_arrays_invalid_input(invalid_array):
invalid_inputs = [1, [1], [1, 2], [[1], 2],
'a', ['a'], ['a', 'b'], [['a'], 'b']]
for i in invalid_inputs:
pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i)
@pytest.mark.parametrize('idx1, idx2', [
([1, 2, 3], ['a', 'b']),
([], ['a', 'b']),
([1, 2, 3], [])
])
def test_from_arrays_different_lengths(idx1, idx2):
# see gh-13599
msg = '^all arrays must be same length$'
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays([idx1, idx2])
# ----------------------------------------------------------------------------
# from_tuples
# ----------------------------------------------------------------------------
def test_from_tuples():
msg = 'Cannot infer number of levels from empty list'
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples([])
expected = MultiIndex(levels=[[1, 3], [2, 4]],
codes=[[0, 1], [0, 1]],
names=['a', 'b'])
# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
tm.assert_index_equal(result, expected)
def test_from_tuples_iterator():
# GH 18434
# input iterator for tuples
expected = MultiIndex(levels=[[1, 3], [2, 4]],
codes=[[0, 1], [0, 1]],
names=['a', 'b'])
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b'])
tm.assert_index_equal(result, expected)
# input non-iterables
msg = 'Input must be a list / sequence of tuple-likes.'
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples(0)
def test_from_tuples_empty():
# GH 16777
result = MultiIndex.from_tuples([], names=['a', 'b'])
expected = MultiIndex.from_arrays(arrays=[[], []],
names=['a', 'b'])
tm.assert_index_equal(result, expected)
def test_from_tuples_index_values(idx):
result = MultiIndex.from_tuples(idx)
assert (result.values == idx.values).all()
def test_tuples_with_name_string():
# GH 15110 and GH 14848
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
with pytest.raises(ValueError):
pd.Index(li, name='abc')
with pytest.raises(ValueError):
pd.Index(li, name='a')
def test_from_tuples_with_tuple_label():
# GH 15457
expected = pd.DataFrame([[2, 1, 2], [4, (1, 2), 3]],
columns=['a', 'b', 'c']).set_index(['a', 'b'])
idx = pd.MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=('a', 'b'))
result = pd.DataFrame([2, 3], columns=['c'], index=idx)
tm.assert_frame_equal(expected, result)
# ----------------------------------------------------------------------------
# from_product
# ----------------------------------------------------------------------------
def test_from_product_empty_zero_levels():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_product([])
def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Index([], name='A')
tm.assert_index_equal(result.levels[0], expected)
@pytest.mark.parametrize('first, second', [
([], []),
(['foo', 'bar', 'baz'], []),
([], ['a', 'b', 'c']),
])
def test_from_product_empty_two_levels(first, second):
names = ['A', 'B']
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[first, second],
codes=[[], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('N', list(range(4)))
def test_from_product_empty_three_levels(N):
# GH12258
names = ['A', 'B', 'C']
lvl2 = lrange(N)
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[[], lvl2, []],
codes=[[], [], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('invalid_input', [
1,
[1],
[1, 2],
[[1], 2],
'a',
['a'],
['a', 'b'],
[['a'], 'b'],
])
def test_from_product_invalid_input(invalid_input):
pytest.raises(TypeError, MultiIndex.from_product, iterables=invalid_input)
def test_from_product_datetimeindex():
dt_index = date_range('2000-01-01', periods=2)
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
etalon = construct_1d_object_array_from_listlike([
(1, pd.Timestamp('2000-01-01')),
(1, pd.Timestamp('2000-01-02')),
(2, pd.Timestamp('2000-01-01')),
(2, pd.Timestamp('2000-01-02')),
])
tm.assert_numpy_array_equal(mi.values, etalon)
@pytest.mark.parametrize('ordered', [False, True])
@pytest.mark.parametrize('f', [
lambda x: x,
lambda x: pd.Series(x),
lambda x: x.values
])
def test_from_product_index_series_categorical(ordered, f):
# GH13743
first = ['foo', 'bar']
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=ordered)
expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"),
categories=list("bac"),
ordered=ordered)
result = pd.MultiIndex.from_product([first, f(idx)])
tm.assert_index_equal(result.get_level_values(1), expected)
def test_from_product():
first = ['foo', 'bar', 'buz']
second = ['a', 'b', 'c']
names = ['first', 'second']
result = MultiIndex.from_product([first, second], names=names)
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
('buz', 'c')]
expected = MultiIndex.from_tuples(tuples, names=names)
tm.assert_index_equal(result, expected)
def test_from_product_iterator():
# GH 18434
first = ['foo', 'bar', 'buz']
second = ['a', 'b', 'c']
names = ['first', 'second']
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
('buz', 'c')]
expected = MultiIndex.from_tuples(tuples, names=names)
# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)
# Invalid non-iterable input
msg = "Input must be a list / sequence of iterables."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(0)
def test_create_index_existing_name(idx):
# GH11193, when an existing index is passed, and a new name is not
# specified, the new index should inherit the previous object name
index = idx
index.names = ['foo', 'bar']
result = pd.Index(index)
expected = Index(
Index([
('foo', 'one'), ('foo', 'two'),
('bar', 'one'), ('baz', 'two'),
('qux', 'one'), ('qux', 'two')],
dtype='object'
),
names=['foo', 'bar']
)
tm.assert_index_equal(result, expected)
result = pd.Index(index, names=['A', 'B'])
expected = Index(
Index([
('foo', 'one'), ('foo', 'two'),
('bar', 'one'), ('baz', 'two'),
('qux', 'one'), ('qux', 'two')],
dtype='object'
),
names=['A', 'B']
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_frame
# ----------------------------------------------------------------------------
def test_from_frame():
# GH 22420
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
columns=['L1', 'L2'])
expected = pd.MultiIndex.from_tuples([('a', 'a'), ('a', 'b'),
('b', 'a'), ('b', 'b')],
names=['L1', 'L2'])
result = pd.MultiIndex.from_frame(df)
tm.assert_index_equal(expected, result)
@pytest.mark.parametrize('non_frame', [
pd.Series([1, 2, 3, 4]),
[1, 2, 3, 4],
[[1, 2], [3, 4], [5, 6]],
pd.Index([1, 2, 3, 4]),
np.array([[1, 2], [3, 4], [5, 6]]),
27
])
def test_from_frame_error(non_frame):
# GH 22420
with pytest.raises(TypeError, match='Input must be a DataFrame'):
pd.MultiIndex.from_frame(non_frame)
def test_from_frame_dtype_fidelity():
# GH 22420
df = pd.DataFrame(OrderedDict([
('dates', pd.date_range('19910905', periods=6, tz='US/Eastern')),
('a', [1, 1, 1, 2, 2, 2]),
('b', pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True)),
('c', ['x', 'x', 'y', 'z', 'x', 'y'])
]))
original_dtypes = df.dtypes.to_dict()
expected_mi = pd.MultiIndex.from_arrays([
pd.date_range('19910905', periods=6, tz='US/Eastern'),
[1, 1, 1, 2, 2, 2],
pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True),
['x', 'x', 'y', 'z', 'x', 'y']
], names=['dates', 'a', 'b', 'c'])
mi = pd.MultiIndex.from_frame(df)
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
tm.assert_index_equal(expected_mi, mi)
assert original_dtypes == mi_dtypes
@pytest.mark.parametrize('names_in,names_out', [
(None, [('L1', 'x'), ('L2', 'y')]),
(['x', 'y'], ['x', 'y']),
])
def test_from_frame_valid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
columns=pd.MultiIndex.from_tuples([('L1', 'x'),
('L2', 'y')]))
mi = pd.MultiIndex.from_frame(df, names=names_in)
assert mi.names == names_out
@pytest.mark.parametrize('names_in,names_out', [
('bad_input', ValueError("Names should be list-like for a MultiIndex")),
(['a', 'b', 'c'], ValueError("Length of names must match number of "
"levels in MultiIndex."))
])
def test_from_frame_invalid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
columns=pd.MultiIndex.from_tuples([('L1', 'x'),
('L2', 'y')]))
with pytest.raises(type(names_out), match=names_out.args[0]):
pd.MultiIndex.from_frame(df, names=names_in)
@@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import PYPY
import pandas as pd
from pandas import MultiIndex
import pandas.util.testing as tm
def test_contains_top_level():
midx = MultiIndex.from_product([['A', 'B'], [1, 2]])
assert 'A' in midx
assert 'A' not in midx._engine
def test_contains_with_nat():
# MI with a NaT
mi = MultiIndex(levels=[['C'],
pd.date_range('2012-01-01', periods=5)],
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
names=[None, 'B'])
assert ('C', pd.Timestamp('2012-01-01')) in mi
for val in mi.values:
assert val in mi
def test_contains(idx):
assert ('foo', 'two') in idx
assert ('bar', 'two') not in idx
assert None not in idx
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_pypy():
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
np.array([False, True]))
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
np.array([False, True]))
def test_isin():
values = [('foo', 2), ('bar', 3), ('quux', 4)]
idx = MultiIndex.from_arrays([
['qux', 'baz', 'foo', 'bar'],
np.arange(4)
])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_
@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_not_pypy():
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
np.array([False, False]))
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
np.array([False, False]))
def test_isin_level_kwarg():
idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
4)])
vals_0 = ['foo', 'bar', 'quux']
vals_1 = [2, 3, 10]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
pytest.raises(IndexError, idx.isin, vals_0, level=5)
pytest.raises(IndexError, idx.isin, vals_0, level=-5)
pytest.raises(KeyError, idx.isin, vals_0, level=1.0)
pytest.raises(KeyError, idx.isin, vals_1, level=-1.0)
pytest.raises(KeyError, idx.isin, vals_1, level='A')
idx.names = ['A', 'B']
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A'))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B'))
pytest.raises(KeyError, idx.isin, vals_1, level='C')
@@ -0,0 +1,224 @@
# -*- coding: utf-8 -*-
from collections import OrderedDict
import numpy as np
import pytest
from pandas.compat import range
import pandas as pd
from pandas import DataFrame, MultiIndex, date_range
import pandas.util.testing as tm
def test_tolist(idx):
result = idx.tolist()
exp = list(idx.values)
assert result == exp
def test_to_numpy(idx):
result = idx.to_numpy()
exp = idx.values
tm.assert_numpy_array_equal(result, exp)
def test_to_frame():
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False)
expected = DataFrame(tuples)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
result = index.to_frame(index=False)
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=['first', 'second'])
expected.index = index
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)
msg = "'name' must be a list / sequence of column names."
with pytest.raises(TypeError, match=msg):
index.to_frame(name='first')
msg = "'name' should have same length as number of levels on index."
with pytest.raises(ValueError, match=msg):
index.to_frame(name=['first'])
# Tests for datetime index
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame(index=False)
expected = DataFrame(
{0: np.repeat(np.arange(5, dtype='int64'), 3),
1: np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(
{'first': np.repeat(np.arange(5, dtype='int64'), 3),
'second': np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=['first', 'second'])
expected.index = index
tm.assert_frame_equal(result, expected)
def test_to_frame_dtype_fidelity():
# GH 22420
mi = pd.MultiIndex.from_arrays([
pd.date_range('19910905', periods=6, tz='US/Eastern'),
[1, 1, 1, 2, 2, 2],
pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True),
['x', 'x', 'y', 'z', 'x', 'y']
], names=['dates', 'a', 'b', 'c'])
original_dtypes = {name: mi.levels[i].dtype
for i, name in enumerate(mi.names)}
expected_df = pd.DataFrame(OrderedDict([
('dates', pd.date_range('19910905', periods=6, tz='US/Eastern')),
('a', [1, 1, 1, 2, 2, 2]),
('b', pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True)),
('c', ['x', 'x', 'y', 'z', 'x', 'y'])
]))
df = mi.to_frame(index=False)
df_dtypes = df.dtypes.to_dict()
tm.assert_frame_equal(df, expected_df)
assert original_dtypes == df_dtypes
def test_to_frame_resulting_column_order():
# GH 22420
expected = ['z', 0, 'a']
mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z'],
['q', 'w', 'e']], names=expected)
result = mi.to_frame().columns.tolist()
assert result == expected
def test_to_hierarchical():
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
2, 'two')])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = index.to_hierarchical(3)
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
tm.assert_index_equal(result, expected)
assert result.names == index.names
# K > 1
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = index.to_hierarchical(3, 2)
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
codes=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
tm.assert_index_equal(result, expected)
assert result.names == index.names
# non-sorted
index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
(2, 'a'), (2, 'b')],
names=['N1', 'N2'])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = index.to_hierarchical(2)
expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'),
(1, 'b'),
(2, 'a'), (2, 'a'),
(2, 'b'), (2, 'b')],
names=['N1', 'N2'])
tm.assert_index_equal(result, expected)
assert result.names == index.names
def test_roundtrip_pickle_with_tz():
return
# GH 8367
# round-trip of timezone
index = MultiIndex.from_product(
[[1, 2], ['a', 'b'], date_range('20130101', periods=3,
tz='US/Eastern')
], names=['one', 'two', 'three'])
unpickled = tm.round_trip_pickle(index)
assert index.equal_levels(unpickled)
def test_pickle(indices):
return
unpickled = tm.round_trip_pickle(indices)
assert indices.equals(unpickled)
original_name, indices.name = indices.name, 'foo'
unpickled = tm.round_trip_pickle(indices)
assert indices.equals(unpickled)
indices.name = original_name
def test_to_series(idx):
# assert that we are creating a copy of the index
s = idx.to_series()
assert s.values is not idx.values
assert s.index is not idx
assert s.name == idx.name
def test_to_series_with_arguments(idx):
# GH18699
# index kwarg
s = idx.to_series(index=idx)
assert s.values is not idx.values
assert s.index is idx
assert s.name == idx.name
# name kwarg
idx = idx
s = idx.to_series(name='__test')
assert s.values is not idx.values
assert s.index is not idx
assert s.name != idx.name
def test_to_flat_index(idx):
expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
('baz', 'two'), ('qux', 'one'), ('qux', 'two')),
tupleize_cols=False)
result = idx.to_flat_index()
tm.assert_index_equal(result, expected)
@@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-
from copy import copy, deepcopy
import pytest
from pandas import MultiIndex
import pandas.util.testing as tm
def assert_multiindex_copied(copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
tm.assert_almost_equal(copy.codes, original.codes)
# Labels doesn't matter which way copied
tm.assert_almost_equal(copy.codes, original.codes)
assert copy.codes is not original.codes
# Names doesn't matter which way copied
assert copy.names == original.names
assert copy.names is not original.names
# Sort order should be copied
assert copy.sortorder == original.sortorder
def test_copy(idx):
i_copy = idx.copy()
assert_multiindex_copied(i_copy, idx)
def test_shallow_copy(idx):
i_copy = idx._shallow_copy()
assert_multiindex_copied(i_copy, idx)
def test_labels_deprecated(idx):
# GH23752
with tm.assert_produces_warning(FutureWarning):
idx.copy(labels=idx.codes)
def test_view(idx):
i_view = idx.view()
assert_multiindex_copied(i_view, idx)
@pytest.mark.parametrize('func', [copy, deepcopy])
def test_copy_and_deepcopy(func):
idx = MultiIndex(
levels=[['foo', 'bar'], ['fizz', 'buzz']],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=['first', 'second']
)
idx_copy = func(idx)
assert idx_copy is not idx
assert idx_copy.equals(idx)
@pytest.mark.parametrize('deep', [True, False])
def test_copy_method(deep):
idx = MultiIndex(
levels=[['foo', 'bar'], ['fizz', 'buzz']],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=['first', 'second']
)
idx_copy = idx.copy(deep=deep)
assert idx_copy.equals(idx)
@pytest.mark.parametrize('deep', [True, False])
@pytest.mark.parametrize('kwarg, value', [
('names', ['thrid', 'fourth']),
('levels', [['foo2', 'bar2'], ['fizz2', 'buzz2']]),
('codes', [[1, 0, 0, 0], [1, 1, 0, 0]])
])
def test_copy_method_kwargs(deep, kwarg, value):
# gh-12309: Check that the "name" argument as well other kwargs are honored
idx = MultiIndex(
levels=[['foo', 'bar'], ['fizz', 'buzz']],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=['first', 'second']
)
return
idx_copy = idx.copy(**{kwarg: value, 'deep': deep})
if kwarg == 'names':
assert getattr(idx_copy, kwarg) == value
else:
assert [list(i) for i in getattr(idx_copy, kwarg)] == value
@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def test_drop(idx):
dropped = idx.drop([('foo', 'two'), ('qux', 'one')])
index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')])
dropped2 = idx.drop(index)
expected = idx[[0, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
tm.assert_index_equal(dropped2, expected)
dropped = idx.drop(['bar'])
expected = idx[[0, 1, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop('foo')
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
index = MultiIndex.from_tuples([('bar', 'two')])
pytest.raises(KeyError, idx.drop, [('bar', 'two')])
pytest.raises(KeyError, idx.drop, index)
pytest.raises(KeyError, idx.drop, ['foo', 'two'])
# partially correct argument
mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
pytest.raises(KeyError, idx.drop, mixed_index)
# error='ignore'
dropped = idx.drop(index, errors='ignore')
expected = idx[[0, 1, 2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(mixed_index, errors='ignore')
expected = idx[[0, 1, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(['foo', 'two'], errors='ignore')
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop
dropped = idx.drop(['foo', ('qux', 'one')])
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop / error='ignore'
mixed_index = ['foo', ('qux', 'one'), 'two']
pytest.raises(KeyError, idx.drop, mixed_index)
dropped = idx.drop(mixed_index, errors='ignore')
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
def test_droplevel_with_names(idx):
index = idx[idx.get_loc('foo')]
dropped = index.droplevel(0)
assert dropped.name == 'second'
index = MultiIndex(
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
names=['one', 'two', 'three'])
dropped = index.droplevel(0)
assert dropped.names == ('two', 'three')
dropped = index.droplevel('two')
expected = index.droplevel(1)
assert dropped.equals(expected)
def test_droplevel_list():
index = MultiIndex(
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
names=['one', 'two', 'three'])
dropped = index[:2].droplevel(['three', 'one'])
expected = index[:2].droplevel(2).droplevel(0)
assert dropped.equals(expected)
dropped = index[:2].droplevel([])
expected = index[:2]
assert dropped.equals(expected)
with pytest.raises(ValueError):
index[:2].droplevel(['one', 'two', 'three'])
with pytest.raises(KeyError):
index[:2].droplevel(['one', 'four'])
def test_drop_not_lexsorted():
# GH 12078
# define the lexsorted version of the multi-index
tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')]
lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c'])
assert lexsorted_mi.is_lexsorted()
# and the not-lexsorted version
df = pd.DataFrame(columns=['a', 'b', 'c', 'd'],
data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]])
df = df.pivot_table(index='a', columns=['b', 'c'], values='d')
df = df.reset_index()
not_lexsorted_mi = df.columns
assert not not_lexsorted_mi.is_lexsorted()
# compare the results
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_index_equal(lexsorted_mi.drop('a'),
not_lexsorted_mi.drop('a'))
@@ -0,0 +1,266 @@
# -*- coding: utf-8 -*-
from itertools import product
import numpy as np
import pytest
from pandas._libs import hashtable
from pandas.compat import range, u
from pandas import DatetimeIndex, MultiIndex
import pandas.util.testing as tm
@pytest.mark.parametrize('names', [None, ['first', 'second']])
def test_unique(names):
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list('aaaa'), list('abab')],
names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([list('aa'), list('ab')], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list('aaaa'), list('aaaa')], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([['a'], ['a']], names=mi.names)
tm.assert_index_equal(res, exp)
# GH #20568 - empty MI
mi = MultiIndex.from_arrays([[], []], names=names)
res = mi.unique()
tm.assert_index_equal(mi, res)
def test_unique_datetimelike():
idx1 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
'2015-01-01', 'NaT', 'NaT'])
idx2 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02',
'2015-01-02', 'NaT', '2015-01-01'],
tz='Asia/Tokyo')
result = MultiIndex.from_arrays([idx1, idx2]).unique()
eidx1 = DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT'])
eidx2 = DatetimeIndex(['2015-01-01', '2015-01-02',
'NaT', '2015-01-01'],
tz='Asia/Tokyo')
exp = MultiIndex.from_arrays([eidx1, eidx2])
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
def test_unique_level(idx, level):
# GH #17896 - with level= argument
result = idx.unique(level=level)
expected = idx.get_level_values(level).unique()
tm.assert_index_equal(result, expected)
# With already unique level
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
names=['first', 'second'])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
# With empty MI
mi = MultiIndex.from_arrays([[], []], names=['first', 'second'])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
@pytest.mark.parametrize('dropna', [True, False])
def test_get_unique_index(idx, dropna):
mi = idx[[0, 1, 0, 1, 1, 0, 0]]
expected = mi._shallow_copy(mi[[0, 1]])
result = mi._get_unique_index(dropna=dropna)
assert result.unique
tm.assert_index_equal(result, expected)
def test_duplicate_multiindex_codes():
# GH 17464
# Make sure that a MultiIndex with duplicate levels throws a ValueError
with pytest.raises(ValueError):
mi = MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)])
# And that using set_levels with duplicate levels fails
mi = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'],
[1, 2, 1, 2, 3]])
with pytest.raises(ValueError):
mi.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
inplace=True)
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
[1, 'a', 1]])
def test_duplicate_level_names(names):
# GH18872, GH19029
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
assert mi.names == names
# With .rename()
mi = MultiIndex.from_product([[0, 1]] * 3)
mi = mi.rename(names)
assert mi.names == names
# With .rename(., level=)
mi.rename(names[1], level=1, inplace=True)
mi = mi.rename([names[0], names[2]], level=[0, 2])
assert mi.names == names
def test_duplicate_meta_data():
# GH 10115
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]],
codes=[[0, 0, 0, 0, 1, 1, 1],
[0, 1, 2, 0, 0, 1, 2]])
for idx in [mi,
mi.set_names([None, None]),
mi.set_names([None, 'Num']),
mi.set_names(['Upper', 'Num']), ]:
assert idx.has_duplicates
assert idx.drop_duplicates().names == idx.names
def test_has_duplicates(idx, idx_dup):
# see fixtures
assert idx.is_unique is True
assert idx.has_duplicates is False
assert idx_dup.is_unique is False
assert idx_dup.has_duplicates is True
mi = MultiIndex(levels=[[0, 1], [0, 1, 2]],
codes=[[0, 0, 0, 0, 1, 1, 1],
[0, 1, 2, 0, 0, 1, 2]])
assert mi.is_unique is False
assert mi.has_duplicates is True
def test_has_duplicates_from_tuples():
# GH 9075
t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169),
(u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119),
(u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135),
(u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145),
(u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158),
(u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122),
(u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160),
(u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180),
(u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143),
(u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128),
(u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129),
(u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111),
(u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114),
(u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121),
(u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126),
(u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155),
(u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123),
(u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)]
mi = MultiIndex.from_tuples(t)
assert not mi.has_duplicates
def test_has_duplicates_overflow():
# handle int64 overflow if possible
def check(nlevels, with_nulls):
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
if with_nulls: # inject some null values
codes[500] = -1 # common nan value
codes = [codes.copy() for i in range(nlevels)]
for i in range(nlevels):
codes[i][500 + i - nlevels // 2] = -1
codes += [np.array([-1, 1]).repeat(500)]
else:
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
levels = [level] * nlevels + [[0, 1]]
# no dups
mi = MultiIndex(levels=levels, codes=codes)
assert not mi.has_duplicates
# with a dup
if with_nulls:
def f(a):
return np.insert(a, 1000, a[0])
codes = list(map(f, codes))
mi = MultiIndex(levels=levels, codes=codes)
else:
values = mi.values.tolist()
mi = MultiIndex.from_tuples(values + [values[0]])
assert mi.has_duplicates
# no overflow
check(4, False)
check(4, True)
# overflow possible
check(8, False)
check(8, True)
@pytest.mark.parametrize('keep, expected', [
('first', np.array([False, False, False, True, True, False])),
('last', np.array([False, True, True, False, False, False])),
(False, np.array([False, True, True, True, True, False]))
])
def test_duplicated(idx_dup, keep, expected):
result = idx_dup.duplicated(keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize('keep', ['first', 'last', False])
def test_duplicated_large(keep):
# GH 9125
n, k = 200, 5000
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
codes = [np.random.choice(n, k * n) for lev in levels]
mi = MultiIndex(levels=levels, codes=codes)
result = mi.duplicated(keep=keep)
expected = hashtable.duplicated_object(mi.values, keep=keep)
tm.assert_numpy_array_equal(result, expected)
def test_get_duplicates():
# GH5873
for a in [101, 102]:
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
tm.assert_numpy_array_equal(mi.duplicated(),
np.zeros(2, dtype='bool'))
for n in range(1, 6): # 1st level shape
for m in range(1, 5): # 2nd level shape
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]],
codes=np.random.permutation(list(codes)).T)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
[[], []]))
tm.assert_numpy_array_equal(mi.duplicated(),
np.zeros(len(mi), dtype='bool'))
@@ -0,0 +1,221 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange, lzip, range
import pandas as pd
from pandas import Index, MultiIndex, Series
import pandas.util.testing as tm
def test_equals(idx):
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(np.array(idx))
same_values = Index(idx, dtype=object)
assert idx.equals(same_values)
assert same_values.equals(idx)
if idx.nlevels == 1:
# do not test MultiIndex
assert not idx.equals(pd.Series(idx))
def test_equals_op(idx):
# GH9947, GH10637
index_a = idx
n = len(index_a)
index_b = index_a[0:-1]
index_c = index_a[0:-1].append(index_a[-2:-1])
index_d = index_a[0:1]
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_b
expected1 = np.array([True] * n)
expected2 = np.array([True] * (n - 1) + [False])
tm.assert_numpy_array_equal(index_a == index_a, expected1)
tm.assert_numpy_array_equal(index_a == index_c, expected2)
# test comparisons with numpy arrays
array_a = np.array(index_a)
array_b = np.array(index_a[0:-1])
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
array_d = np.array(index_a[0:1])
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_b
tm.assert_numpy_array_equal(index_a == array_a, expected1)
tm.assert_numpy_array_equal(index_a == array_c, expected2)
# test comparisons with Series
series_a = Series(array_a)
series_b = Series(array_b)
series_c = Series(array_c)
series_d = Series(array_d)
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_b
tm.assert_numpy_array_equal(index_a == series_a, expected1)
tm.assert_numpy_array_equal(index_a == series_c, expected2)
# cases where length is 1 for one of them
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_d
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
series_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
series_a == array_d
# comparing with a scalar should broadcast; note that we are excluding
# MultiIndex because in this case each item in the index is a tuple of
# length 2, and therefore is considered an array of length 2 in the
# comparison instead of a scalar
if not isinstance(index_a, MultiIndex):
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
# assuming the 2nd to last item is unique in the data
item = index_a[-2]
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
def test_equals_multi(idx):
assert idx.equals(idx)
assert not idx.equals(idx.values)
assert idx.equals(Index(idx.values))
assert idx.equal_levels(idx)
assert not idx.equals(idx[:-1])
assert not idx.equals(idx[-1])
# different number of levels
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
assert not index.equals(index2)
assert not index.equal_levels(index2)
# levels are different
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_codes = np.array([0, 0, 1, 2, 2, 3])
minor_codes = np.array([0, 1, 0, 0, 1, 0])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
assert not idx.equals(index)
assert not idx.equal_levels(index)
# some of the labels are different
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
assert not idx.equals(index)
def test_identical(idx):
mi = idx.copy()
mi2 = idx.copy()
assert mi.identical(mi2)
mi = mi.set_names(['new1', 'new2'])
assert mi.equals(mi2)
assert not mi.identical(mi2)
mi2 = mi2.set_names(['new1', 'new2'])
assert mi.identical(mi2)
mi3 = Index(mi.tolist(), names=mi.names)
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
assert mi.identical(mi3)
assert not mi.identical(mi4)
assert mi.equals(mi4)
def test_equals_operator(idx):
# GH9785
assert (idx == idx).all()
def test_equals_missing_values():
# make sure take is not using -1
i = pd.MultiIndex.from_tuples([(0, pd.NaT),
(0, pd.Timestamp('20130101'))])
result = i[0:1].equals(i[0])
assert not result
result = i[1:2].equals(i[1])
assert not result
def test_is_():
mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
assert mi.is_(mi)
assert mi.is_(mi.view())
assert mi.is_(mi.view().view().view().view())
mi2 = mi.view()
# names are metadata, they don't change id
mi2.names = ["A", "B"]
assert mi2.is_(mi)
assert mi.is_(mi2)
assert mi.is_(mi.set_names(["C", "D"]))
mi2 = mi.view()
mi2.set_names(["E", "F"], inplace=True)
assert mi.is_(mi2)
# levels are inherent properties, they change identity
mi3 = mi2.set_levels([lrange(10), lrange(10)])
assert not mi3.is_(mi2)
# shouldn't change
assert mi2.is_(mi)
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
mi4.set_levels([lrange(10), lrange(10)], inplace=True)
assert not mi4.is_(mi3)
mi5 = mi.view()
mi5.set_levels(mi5.levels, inplace=True)
assert not mi5.is_(mi)
def test_is_all_dates(idx):
assert not idx.is_all_dates
def test_is_numeric(idx):
# MultiIndex is never numeric
assert not idx.is_numeric()
def test_multiindex_compare():
# GH 21149
# Ensure comparison operations for MultiIndex with nlevels == 1
# behave consistently with those for MultiIndex with nlevels > 1
midx = pd.MultiIndex.from_product([[0, 1]])
# Equality self-test: MultiIndex object vs self
expected = pd.Series([True, True])
result = pd.Series(midx == midx)
tm.assert_series_equal(result, expected)
# Greater than comparison: MultiIndex object vs self
expected = pd.Series([False, False])
result = pd.Series(midx > midx)
tm.assert_series_equal(result, expected)
@@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
import warnings
import pytest
from pandas.compat import PY3, range, u
import pandas as pd
from pandas import MultiIndex, compat
import pandas.util.testing as tm
def test_dtype_str(indices):
dtype = indices.dtype_str
assert isinstance(dtype, compat.string_types)
assert dtype == str(indices.dtype)
def test_format(idx):
idx.format()
idx[:0].format()
def test_format_integer_names():
index = MultiIndex(levels=[[0, 1], [0, 1]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1])
index.format(names=True)
def test_format_sparse_config(idx):
warn_filters = warnings.filters
warnings.filterwarnings('ignore', category=FutureWarning,
module=".*format")
# GH1538
pd.set_option('display.multi_sparse', False)
result = idx.format()
assert result[1] == 'foo two'
tm.reset_display_options()
warnings.filters = warn_filters
def test_format_sparse_display():
index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]],
codes=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1],
[0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]])
result = index.format()
assert result[3] == '1 0 0 0'
def test_repr_with_unicode_data():
with pd.core.config.option_context("display.encoding", 'UTF-8'):
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
index = pd.DataFrame(d).set_index(["a", "b"]).index
assert "\\u" not in repr(index) # we don't want unicode-escaped
@pytest.mark.skip(reason="#22511 will remove this test")
def test_repr_roundtrip():
mi = MultiIndex.from_product([list('ab'), range(3)],
names=['first', 'second'])
str(mi)
if PY3:
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
else:
result = eval(repr(mi))
# string coerces to unicode
tm.assert_index_equal(result, mi, exact=False)
assert mi.get_level_values('first').inferred_type == 'string'
assert result.get_level_values('first').inferred_type == 'unicode'
mi_u = MultiIndex.from_product(
[list(u'ab'), range(3)], names=['first', 'second'])
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
# formatting
if PY3:
str(mi)
else:
compat.text_type(mi)
# long format
mi = MultiIndex.from_product([list('abcdefg'), range(10)],
names=['first', 'second'])
if PY3:
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
else:
result = eval(repr(mi))
# string coerces to unicode
tm.assert_index_equal(result, mi, exact=False)
assert mi.get_level_values('first').inferred_type == 'string'
assert result.get_level_values('first').inferred_type == 'unicode'
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
def test_unicode_string_with_unicode():
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
if PY3:
str(idx)
else:
compat.text_type(idx)
def test_bytestring_with_unicode():
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
if PY3:
bytes(idx)
else:
str(idx)
def test_repr_max_seq_item_setting(idx):
# GH10182
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
assert '...' not in str(idx)
@@ -0,0 +1,454 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import range
import pandas as pd
from pandas import CategoricalIndex, Index, MultiIndex
import pandas.util.testing as tm
def assert_matching(actual, expected, check_dtype=False):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp)
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
def test_get_level_number_integer(idx):
idx.names = [1, 0]
assert idx._get_level_number(1) == 0
assert idx._get_level_number(0) == 1
pytest.raises(IndexError, idx._get_level_number, 2)
with pytest.raises(KeyError, match='Level fourth not found'):
idx._get_level_number('fourth')
def test_get_level_values(idx):
result = idx.get_level_values(0)
expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'],
name='first')
tm.assert_index_equal(result, expected)
assert result.name == 'first'
result = idx.get_level_values('first')
expected = idx.get_level_values(0)
tm.assert_index_equal(result, expected)
# GH 10460
index = MultiIndex(
levels=[CategoricalIndex(['A', 'B']),
CategoricalIndex([1, 2, 3])],
codes=[np.array([0, 0, 0, 1, 1, 1]),
np.array([0, 1, 2, 0, 1, 2])])
exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B'])
tm.assert_index_equal(index.get_level_values(0), exp)
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)
def test_get_value_duplicates():
index = MultiIndex(levels=[['D', 'B', 'C'],
[0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=['tag', 'day'])
assert index.get_loc('D') == slice(0, 3)
with pytest.raises(KeyError):
index._engine.get_value(np.array([]), 'D')
def test_get_level_values_all_na():
# GH 17924 when level entirely consists of nan
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(['a', np.nan, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_int_with_na():
# GH 17924
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)
def test_get_level_values_na():
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan])
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(['a', np.nan, 1])
tm.assert_index_equal(result, expected)
arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_index_equal(result, expected)
arrays = [[], []]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_set_name_methods(idx, index_names):
# so long as these are synonyms, we don't need to test set_names
assert idx.rename == idx.set_names
new_names = [name + "SUFFIX" for name in index_names]
ind = idx.set_names(new_names)
assert idx.names == index_names
assert ind.names == new_names
with pytest.raises(ValueError, match="^Length"):
ind.set_names(new_names + new_names)
new_names2 = [name + "SUFFIX2" for name in new_names]
res = ind.set_names(new_names2, inplace=True)
assert res is None
assert ind.names == new_names2
# set names for specific level (# GH7792)
ind = idx.set_names(new_names[0], level=0)
assert idx.names == index_names
assert ind.names == [new_names[0], index_names[1]]
res = ind.set_names(new_names2[0], level=0, inplace=True)
assert res is None
assert ind.names == [new_names2[0], index_names[1]]
# set names for multiple levels
ind = idx.set_names(new_names, level=[0, 1])
assert idx.names == index_names
assert ind.names == new_names
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
assert res is None
assert ind.names == new_names2
def test_set_levels_codes_directly(idx):
# setting levels/codes directly raises AttributeError
levels = idx.levels
new_levels = [[lev + 'a' for lev in level] for level in levels]
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
with pytest.raises(AttributeError):
idx.levels = new_levels
with pytest.raises(AttributeError):
idx.codes = new_codes
def test_set_levels(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
levels = idx.levels
new_levels = [[lev + 'a' for lev in level] for level in levels]
# level changing [w/o mutation]
ind2 = idx.set_levels(new_levels)
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
# level changing specific level [w/o mutation]
ind2 = idx.set_levels(new_levels[0], level=0)
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.set_levels(new_levels[1], level=1)
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/o mutation]
ind2 = idx.set_levels(new_levels, level=[0, 1])
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing specific level [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels, level=[0, 1],
inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# illegal level changing should not change levels
# GH 13754
original_index = idx.copy()
for inplace in [True, False]:
with pytest.raises(ValueError, match="^On"):
idx.set_levels(['c'], level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels,
check_dtype=True)
with pytest.raises(ValueError, match="^On"):
idx.set_codes([0, 1, 2, 3, 4, 5], level=0,
inplace=inplace)
assert_matching(idx.codes, original_index.codes,
check_dtype=True)
with pytest.raises(TypeError, match="^Levels"):
idx.set_levels('c', level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels,
check_dtype=True)
with pytest.raises(TypeError, match="^Codes"):
idx.set_codes(1, level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes,
check_dtype=True)
def test_set_codes(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
# changing codes w/o mutation
ind2 = idx.set_codes(new_codes)
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# changing label w/ mutation
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
# codes changing specific level w/o mutation
ind2 = idx.set_codes(new_codes[0], level=0)
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.set_codes(new_codes[1], level=1)
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels w/o mutation
ind2 = idx.set_codes(new_codes, level=[0, 1])
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing specific level w/ mutation
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes, level=[0, 1],
inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing for levels of different magnitude of categories
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
new_codes = range(129, -1, -1)
expected = pd.MultiIndex.from_tuples(
[(0, i) for i in new_codes])
# [w/o mutation]
result = ind.set_codes(codes=new_codes, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
result.set_codes(codes=new_codes, level=1, inplace=True)
assert result.equals(expected)
with tm.assert_produces_warning(FutureWarning):
ind.set_codes(labels=new_codes, level=1)
def test_set_labels_deprecated():
# GH23752
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
new_labels = range(129, -1, -1)
expected = pd.MultiIndex.from_tuples(
[(0, i) for i in new_labels])
# [w/o mutation]
with tm.assert_produces_warning(FutureWarning):
result = ind.set_labels(labels=new_labels, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
with tm.assert_produces_warning(FutureWarning):
result.set_labels(labels=new_labels, level=1, inplace=True)
assert result.equals(expected)
def test_set_levels_codes_names_bad_input(idx):
levels, codes = idx.levels, idx.codes
names = idx.names
with pytest.raises(ValueError, match='Length of levels'):
idx.set_levels([levels[0]])
with pytest.raises(ValueError, match='Length of codes'):
idx.set_codes([codes[0]])
with pytest.raises(ValueError, match='Length of names'):
idx.set_names([names[0]])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_levels(levels[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_codes(codes[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match='list-like'):
idx.set_names(names[0])
# should have equal lengths
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_levels(levels[0], level=[0, 1])
with pytest.raises(TypeError, match='list-like'):
idx.set_levels(levels, level=0)
# should have equal lengths
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_codes(codes[0], level=[0, 1])
with pytest.raises(TypeError, match='list-like'):
idx.set_codes(codes, level=0)
# should have equal lengths
with pytest.raises(ValueError, match='Length of names'):
idx.set_names(names[0], level=[0, 1])
with pytest.raises(TypeError, match='Names must be a'):
idx.set_names(names, level=0)
@pytest.mark.parametrize('inplace', [True, False])
def test_set_names_with_nlevel_1(inplace):
# GH 21149
# Ensure that .set_names for MultiIndex with
# nlevels == 1 does not raise any errors
expected = pd.MultiIndex(levels=[[0, 1]],
codes=[[0, 1]],
names=['first'])
m = pd.MultiIndex.from_product([[0, 1]])
result = m.set_names('first', level=0, inplace=inplace)
if inplace:
result = m
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('ordered', [True, False])
def test_set_levels_categorical(ordered):
# GH13854
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
cidx = CategoricalIndex(list("bac"), ordered=ordered)
result = index.set_levels(cidx, 0)
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]],
codes=index.codes)
tm.assert_index_equal(result, expected)
result_lvl = result.get_level_values(0)
expected_lvl = CategoricalIndex(list("bacb"),
categories=cidx.categories,
ordered=cidx.ordered)
tm.assert_index_equal(result_lvl, expected_lvl)
def test_set_value_keeps_names():
# motivating example from #3742
lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe']
lev2 = ['1', '2', '3'] * 2
idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number'])
df = pd.DataFrame(
np.random.randn(6, 4),
columns=['one', 'two', 'three', 'four'],
index=idx)
df = df.sort_index()
assert df._is_copy is None
assert df.index.names == ('Name', 'Number')
df.at[('grethe', '4'), 'one'] = 99.34
assert df._is_copy is None
assert df.index.names == ('Name', 'Number')
def test_set_levels_with_iterable():
# GH23273
sizes = [1, 2, 3]
colors = ['black'] * 3
index = pd.MultiIndex.from_arrays([sizes, colors], names=['size', 'color'])
result = index.set_levels(map(int, ['3', '2', '1']), level='size')
expected_sizes = [3, 2, 1]
expected = pd.MultiIndex.from_arrays([expected_sizes, colors],
names=['size', 'color'])
tm.assert_index_equal(result, expected)
@@ -0,0 +1,375 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
import numpy as np
import pytest
from pandas.compat import lrange
import pandas as pd
from pandas import (
Categorical, CategoricalIndex, Index, IntervalIndex, MultiIndex,
date_range)
from pandas.core.indexes.base import InvalidIndexError
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
def test_slice_locs_partial(idx):
sorted_idx, _ = idx.sortlevel(0)
result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one'))
assert result == (1, 5)
result = sorted_idx.slice_locs(None, ('qux', 'one'))
assert result == (0, 5)
result = sorted_idx.slice_locs(('foo', 'two'), None)
assert result == (1, len(sorted_idx))
result = sorted_idx.slice_locs('bar', 'baz')
assert result == (2, 4)
def test_slice_locs():
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
sliced = stacked[slob]
expected = df[5:16].stack()
tm.assert_almost_equal(sliced.values, expected.values)
slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30),
df.index[15] - timedelta(seconds=30)))
sliced = stacked[slob]
expected = df[6:15].stack()
tm.assert_almost_equal(sliced.values, expected.values)
def test_slice_locs_with_type_mismatch():
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs((1, 3))
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
df = tm.makeCustomDataframe(5, 5)
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs(timedelta(seconds=30))
# TODO: Try creating a UnicodeDecodeError in exception message
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs(df.index[1], (16, "a"))
def test_slice_locs_not_sorted():
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
with pytest.raises(KeyError, match=msg):
index.slice_locs((1, 0, 1), (2, 1, 0))
# works
sorted_index, _ = index.sortlevel(0)
# should there be a test case here???
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
def test_slice_locs_not_contained():
# some searchsorted action
index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]],
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3],
[0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0)
result = index.slice_locs((1, 0), (5, 2))
assert result == (3, 6)
result = index.slice_locs(1, 5)
assert result == (3, 6)
result = index.slice_locs((2, 2), (5, 2))
assert result == (3, 6)
result = index.slice_locs(2, 5)
assert result == (3, 6)
result = index.slice_locs((1, 0), (6, 3))
assert result == (3, 8)
result = index.slice_locs(-1, 10)
assert result == (0, len(index))
def test_putmask_with_wrong_mask(idx):
# GH18368
with pytest.raises(ValueError):
idx.putmask(np.ones(len(idx) + 1, np.bool), 1)
with pytest.raises(ValueError):
idx.putmask(np.ones(len(idx) - 1, np.bool), 1)
with pytest.raises(ValueError):
idx.putmask('foo', 1)
def test_get_indexer():
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
idx1 = index[:5]
idx2 = index[[1, 3, 5]]
r1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
r1 = idx2.get_indexer(idx1, method='pad')
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method='pad')
assert_almost_equal(r2, e1[::-1])
rffill1 = idx2.get_indexer(idx1, method='ffill')
assert_almost_equal(r1, rffill1)
r1 = idx2.get_indexer(idx1, method='backfill')
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method='backfill')
assert_almost_equal(r2, e1[::-1])
rbfill1 = idx2.get_indexer(idx1, method='bfill')
assert_almost_equal(r1, rbfill1)
# pass non-MultiIndex
r1 = idx1.get_indexer(idx2.values)
rexp1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, rexp1)
r1 = idx1.get_indexer([1, 2, 3])
assert (r1 == [-1, -1, -1]).all()
# create index with duplicates
idx1 = Index(lrange(10) + lrange(10))
idx2 = Index(lrange(20))
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
idx1.get_indexer(idx2)
def test_get_indexer_nearest():
midx = MultiIndex.from_tuples([('a', 1), ('b', 2)])
with pytest.raises(NotImplementedError):
midx.get_indexer(['a'], method='nearest')
with pytest.raises(NotImplementedError):
midx.get_indexer(['a'], method='pad', tolerance=2)
def test_getitem(idx):
# scalar
assert idx[2] == ('bar', 'one')
# slice
result = idx[2:5]
expected = idx[[2, 3, 4]]
assert result.equals(expected)
# boolean
result = idx[[True, False, True, False, True, True]]
result2 = idx[np.array([True, False, True, False, True, True])]
expected = idx[[0, 2, 4, 5]]
assert result.equals(expected)
assert result2.equals(expected)
def test_getitem_group_select(idx):
sorted_idx, _ = idx.sortlevel(0)
assert sorted_idx.get_loc('baz') == slice(3, 4)
assert sorted_idx.get_loc('foo') == slice(0, 2)
def test_get_indexer_consistency(idx):
# See GH 16819
if isinstance(idx, IntervalIndex):
pass
if idx.is_unique or isinstance(idx, CategoricalIndex):
indexer = idx.get_indexer(idx[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
else:
e = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=e):
idx.get_indexer(idx[0:2])
indexer, _ = idx.get_indexer_non_unique(idx[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
@pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)])
@pytest.mark.parametrize('ind2', [[True, False, True, False, False],
pd.Index([True, False, True, False,
False])])
def test_getitem_bool_index_all(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3),
(40, 4), (50, 5)])
tm.assert_index_equal(idx[ind1], idx)
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
tm.assert_index_equal(idx[ind2], expected)
@pytest.mark.parametrize('ind1', [[True], pd.Index([True])])
@pytest.mark.parametrize('ind2', [[False], pd.Index([False])])
def test_getitem_bool_index_single(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1)])
tm.assert_index_equal(idx[ind1], idx)
expected = pd.MultiIndex(levels=[np.array([], dtype=np.int64),
np.array([], dtype=np.int64)],
codes=[[], []])
tm.assert_index_equal(idx[ind2], expected)
def test_get_loc(idx):
assert idx.get_loc(('foo', 'two')) == 1
assert idx.get_loc(('baz', 'two')) == 3
pytest.raises(KeyError, idx.get_loc, ('bar', 'two'))
pytest.raises(KeyError, idx.get_loc, 'quux')
pytest.raises(NotImplementedError, idx.get_loc, 'foo',
method='nearest')
# 3 levels
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
pytest.raises(KeyError, index.get_loc, (1, 1))
assert index.get_loc((2, 0)) == slice(3, 5)
def test_get_loc_duplicates():
index = Index([2, 2, 2, 2])
result = index.get_loc(2)
expected = slice(0, 4)
assert result == expected
# pytest.raises(Exception, index.get_loc, 2)
index = Index(['c', 'a', 'a', 'b', 'b'])
rs = index.get_loc('c')
xp = 0
assert rs == xp
def test_get_loc_level():
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
loc, new_index = index.get_loc_level((0, 1))
expected = slice(1, 2)
exp_index = index[expected].droplevel(0).droplevel(0)
assert loc == expected
assert new_index.equals(exp_index)
loc, new_index = index.get_loc_level((0, 1, 0))
expected = 1
assert loc == expected
assert new_index is None
pytest.raises(KeyError, index.get_loc_level, (2, 2))
# GH 22221: unused label
pytest.raises(KeyError, index.drop(2).get_loc_level, 2)
# Unused label on unsorted level:
pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2)
index = MultiIndex(levels=[[2000], lrange(4)], codes=[np.array(
[0, 0, 0, 0]), np.array([0, 1, 2, 3])])
result, new_index = index.get_loc_level((2000, slice(None, None)))
expected = slice(None, None)
assert result == expected
assert new_index.equals(index.droplevel(0))
@pytest.mark.parametrize('dtype1', [int, float, bool, str])
@pytest.mark.parametrize('dtype2', [int, float, bool, str])
def test_get_loc_multiple_dtypes(dtype1, dtype2):
# GH 18520
levels = [np.array([0, 1]).astype(dtype1),
np.array([0, 1]).astype(dtype2)]
idx = pd.MultiIndex.from_product(levels)
assert idx.get_loc(idx[2]) == 2
@pytest.mark.parametrize('level', [0, 1])
@pytest.mark.parametrize('dtypes', [[int, float], [float, int]])
def test_get_loc_implicit_cast(level, dtypes):
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
levels = [['a', 'b'], ['c', 'd']]
key = ['b', 'd']
lev_dtype, key_dtype = dtypes
levels[level] = np.array([0, 1], dtype=lev_dtype)
key[level] = key_dtype(1)
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_cast_bool():
# GH 19086 : int is casted to bool, but not vice-versa
levels = [[False, True], np.arange(2, dtype='int64')]
idx = MultiIndex.from_product(levels)
assert idx.get_loc((0, 1)) == 1
assert idx.get_loc((1, 0)) == 2
pytest.raises(KeyError, idx.get_loc, (False, True))
pytest.raises(KeyError, idx.get_loc, (True, False))
@pytest.mark.parametrize('level', [0, 1])
def test_get_loc_nan(level, nulls_fixture):
# GH 18485 : NaN in MultiIndex
levels = [['a', 'b'], ['c', 'd']]
key = ['b', 'd']
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
key[level] = nulls_fixture
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_missing_nan():
# GH 8569
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
assert isinstance(idx.get_loc(1), slice)
pytest.raises(KeyError, idx.get_loc, 3)
pytest.raises(KeyError, idx.get_loc, np.nan)
pytest.raises(KeyError, idx.get_loc, [np.nan])
def test_get_indexer_categorical_time():
# https://github.com/pandas-dev/pandas/issues/21390
midx = MultiIndex.from_product(
[Categorical(['a', 'b', 'c']),
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
result = midx.get_indexer(midx)
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
@@ -0,0 +1,293 @@
# -*- coding: utf-8 -*-
import re
import numpy as np
import pytest
from pandas.compat import lrange, range
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import IntervalIndex, MultiIndex, RangeIndex
import pandas.util.testing as tm
def test_labels_dtypes():
# GH 8456
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
assert i.codes[0].dtype == 'int8'
assert i.codes[1].dtype == 'int8'
i = MultiIndex.from_product([['a'], range(40)])
assert i.codes[1].dtype == 'int8'
i = MultiIndex.from_product([['a'], range(400)])
assert i.codes[1].dtype == 'int16'
i = MultiIndex.from_product([['a'], range(40000)])
assert i.codes[1].dtype == 'int32'
i = pd.MultiIndex.from_product([['a'], range(1000)])
assert (i.codes[0] >= 0).all()
assert (i.codes[1] >= 0).all()
def test_values_boxed():
tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT),
(3, pd.Timestamp('2000-01-03')),
(1, pd.Timestamp('2000-01-04')),
(2, pd.Timestamp('2000-01-02')),
(3, pd.Timestamp('2000-01-03'))]
result = pd.MultiIndex.from_tuples(tuples)
expected = construct_1d_object_array_from_listlike(tuples)
tm.assert_numpy_array_equal(result.values, expected)
# Check that code branches for boxed values produce identical results
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def test_values_multiindex_datetimeindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10 ** 18, 10 ** 18 + 5)
naive = pd.DatetimeIndex(ints)
# TODO(GH-24559): Remove the FutureWarning
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
aware = pd.DatetimeIndex(ints, tz='US/Central')
idx = pd.MultiIndex.from_arrays([naive, aware])
result = idx.values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive)
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware)
# n_lev > n_lab
result = idx[:2].values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive[:2])
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware[:2])
def test_values_multiindex_periodindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(2007, 2012)
pidx = pd.PeriodIndex(ints, freq='D')
idx = pd.MultiIndex.from_arrays([ints, pidx])
result = idx.values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx)
# n_lev > n_lab
result = idx[:2].values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx[:2])
def test_consistency():
# need to construct an overflow
major_axis = lrange(70000)
minor_axis = lrange(10)
major_codes = np.arange(70000)
minor_codes = np.repeat(lrange(10), 7000)
# the fact that is works means it's consistent
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
# inconsistent
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
assert index.is_unique is False
def test_hash_collisions():
# non-smoke test that we don't get hash collisions
index = MultiIndex.from_product([np.arange(1000), np.arange(1000)],
names=['one', 'two'])
result = index.get_indexer(index.values)
tm.assert_numpy_array_equal(result, np.arange(
len(index), dtype='intp'))
for i in [0, 1, len(index) - 2, len(index) - 1]:
result = index.get_loc(index[i])
assert result == i
def test_dims():
pass
def take_invalid_kwargs():
vals = [['A', 'B'],
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode='clip')
def test_isna_behavior(idx):
# should not segfault GH5123
# NOTE: if MI representation changes, may make sense to allow
# isna(MI)
with pytest.raises(NotImplementedError):
pd.isna(idx)
def test_large_multiindex_error():
# GH12527
df_below_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
columns=['dest'])
with pytest.raises(KeyError):
df_below_1000000.loc[(-1, 0), 'dest']
with pytest.raises(KeyError):
df_below_1000000.loc[(3, 0), 'dest']
df_above_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
columns=['dest'])
with pytest.raises(KeyError):
df_above_1000000.loc[(-1, 0), 'dest']
with pytest.raises(KeyError):
df_above_1000000.loc[(3, 0), 'dest']
def test_million_record_attribute_error():
# GH 18165
r = list(range(1000000))
df = pd.DataFrame({'a': r, 'b': r},
index=pd.MultiIndex.from_tuples([(x, x) for x in r]))
msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
df['a'].foo()
def test_can_hold_identifiers(idx):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_metadata_immutable(idx):
levels, codes = idx.levels, idx.codes
# shouldn't be able to set at either the top level or base level
mutable_regex = re.compile('does not support mutable operations')
with pytest.raises(TypeError, match=mutable_regex):
levels[0] = levels[0]
with pytest.raises(TypeError, match=mutable_regex):
levels[0][0] = levels[0][0]
# ditto for labels
with pytest.raises(TypeError, match=mutable_regex):
codes[0] = codes[0]
with pytest.raises(TypeError, match=mutable_regex):
codes[0][0] = codes[0][0]
# and for names
names = idx.names
with pytest.raises(TypeError, match=mutable_regex):
names[0] = names[0]
def test_level_setting_resets_attributes():
ind = pd.MultiIndex.from_arrays([
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
])
assert ind.is_monotonic
ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True)
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic
def test_rangeindex_fallback_coercion_bug():
# GH 12893
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1)
df.index.names = ['fizz', 'buzz']
str(df)
expected = pd.DataFrame({'bar': np.arange(100),
'foo': np.arange(100)},
index=pd.MultiIndex.from_product(
[range(10), range(10)],
names=['fizz', 'buzz']))
tm.assert_frame_equal(df, expected, check_like=True)
result = df.index.get_level_values('fizz')
expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10)
tm.assert_index_equal(result, expected)
result = df.index.get_level_values('buzz')
expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
tm.assert_index_equal(result, expected)
def test_hash_error(indices):
index = indices
with pytest.raises(TypeError, match=("unhashable type: %r" %
type(index).__name__)):
hash(indices)
def test_mutability(indices):
if not len(indices):
return
pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
def test_wrong_number_names(indices):
with pytest.raises(ValueError, match="^Length"):
indices.names = ["apple", "banana", "carrot"]
def test_memory_usage(idx):
result = idx.memory_usage()
if len(idx):
idx.get_loc(idx[0])
result2 = idx.memory_usage()
result3 = idx.memory_usage(deep=True)
# RangeIndex, IntervalIndex
# don't have engines
if not isinstance(idx, (RangeIndex, IntervalIndex)):
assert result2 > result
if idx.inferred_type == 'object':
assert result3 > result2
else:
# we report 0 for no-length
assert result == 0
def test_nlevels(idx):
assert idx.nlevels == 2
@@ -0,0 +1,96 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
@pytest.mark.parametrize('other', [
Index(['three', 'one', 'two']),
Index(['one']),
Index(['one', 'three']),
])
def test_join_level(idx, other, join_type):
join_index, lidx, ridx = other.join(idx, how=join_type,
level='second',
return_indexers=True)
exp_level = other.join(idx.levels[1], how=join_type)
assert join_index.levels[0].equals(idx.levels[0])
assert join_index.levels[1].equals(exp_level)
# pare down levels
mask = np.array(
[x[1] in exp_level for x in idx], dtype=bool)
exp_values = idx.values[mask]
tm.assert_numpy_array_equal(join_index.values, exp_values)
if join_type in ('outer', 'inner'):
join_index2, ridx2, lidx2 = \
idx.join(other, how=join_type, level='second',
return_indexers=True)
assert join_index.equals(join_index2)
tm.assert_numpy_array_equal(lidx, lidx2)
tm.assert_numpy_array_equal(ridx, ridx2)
tm.assert_numpy_array_equal(join_index2.values, exp_values)
def test_join_level_corner_case(idx):
# some corner cases
index = Index(['three', 'one', 'two'])
result = index.join(idx, level='second')
assert isinstance(result, MultiIndex)
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
idx.join(idx, level=1)
def test_join_self(idx, join_type):
joined = idx.join(idx, how=join_type)
assert idx is joined
def test_join_multi():
# GH 10665
midx = pd.MultiIndex.from_product(
[np.arange(4), np.arange(4)], names=['a', 'b'])
idx = pd.Index([1, 2, 5], name='b')
# inner
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
exp_idx = pd.MultiIndex.from_product(
[np.arange(4), [1, 2]], names=['a', 'b'])
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# keep MultiIndex
jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True)
exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0,
1, -1], dtype=np.intp)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_join_self_unique(idx, join_type):
if idx.is_unique:
joined = idx.join(idx, how=join_type)
assert (idx == joined).all()
@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas._libs.tslib import iNaT
import pandas as pd
from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
import pandas.util.testing as tm
def test_fillna(idx):
# GH 11343
# TODO: Remove or Refactor. Not Implemented for MultiIndex
for name, index in [('idx', idx), ]:
if len(index) == 0:
pass
elif isinstance(index, MultiIndex):
idx = index.copy()
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.fillna(idx[0])
else:
idx = index.copy()
result = idx.fillna(idx[0])
tm.assert_index_equal(result, idx)
assert result is not idx
msg = "'value' must be a scalar, passed: "
with pytest.raises(TypeError, match=msg):
idx.fillna([idx[0]])
idx = index.copy()
values = idx.values
if isinstance(index, DatetimeIndexOpsMixin):
values[1] = iNaT
elif isinstance(index, (Int64Index, UInt64Index)):
continue
else:
values[1] = np.nan
if isinstance(index, PeriodIndex):
idx = index.__class__(values, freq=index.freq)
else:
idx = index.__class__(values)
expected = np.array([False] * len(idx), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is True
def test_dropna():
# GH 6194
idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5],
[1, 2, np.nan, np.nan, 5],
['a', 'b', 'c', np.nan, 'e']])
exp = pd.MultiIndex.from_arrays([[1, 5],
[1, 5],
['a', 'e']])
tm.assert_index_equal(idx.dropna(), exp)
tm.assert_index_equal(idx.dropna(how='any'), exp)
exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5],
[1, 2, np.nan, 5],
['a', 'b', 'c', 'e']])
tm.assert_index_equal(idx.dropna(how='all'), exp)
msg = "invalid how option: xxx"
with pytest.raises(ValueError, match=msg):
idx.dropna(how='xxx')
def test_nulls(idx):
# this is really a smoke test for the methods
# as these are adequately tested for function elsewhere
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.isna()
@pytest.mark.xfail
def test_hasnans_isnans(idx):
# GH 11343, added tests for hasnans / isnans
index = idx.copy()
# cases in indices doesn't include NaN
expected = np.array([False] * len(index), dtype=bool)
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is False
index = idx.copy()
values = index.values
values[1] = np.nan
index = idx.__class__(values)
expected = np.array([False] * len(index), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is True
def test_nan_stays_float():
# GH 7031
idx0 = pd.MultiIndex(levels=[["A", "B"], []],
codes=[[1, 0], [-1, -1]],
names=[0, 1])
idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
codes=[[0], [0]],
names=[0, 1])
idxm = idx0.join(idx1, how='outer')
assert pd.isna(idx0.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
df0 = pd.DataFrame([[1, 2]], index=idx0)
df1 = pd.DataFrame([[3, 4]], index=idx1)
dfm = df0 - df1
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
@@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, IntervalIndex, MultiIndex
from pandas.api.types import is_scalar
def test_is_monotonic_increasing():
i = MultiIndex.from_product([np.arange(10),
np.arange(10)], names=['one', 'two'])
assert i.is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
i = MultiIndex.from_product([np.arange(10, 0, -1),
np.arange(10)], names=['one', 'two'])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product([np.arange(10),
np.arange(10, 0, -1)],
names=['one', 'two'])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
# string ordering
i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic is False
assert Index(i.values).is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
['mom', 'next', 'zenith']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237',
'nl0000289783',
'nl0000289965', 'nl0000301109']],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=['household_id', 'asset_id'])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
def test_is_monotonic_decreasing():
i = MultiIndex.from_product([np.arange(9, -1, -1),
np.arange(9, -1, -1)],
names=['one', 'two'])
assert i.is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
i = MultiIndex.from_product([np.arange(10),
np.arange(10, 0, -1)],
names=['one', 'two'])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product([np.arange(10, 0, -1),
np.arange(10)], names=['one', 'two'])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
# string ordering
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
['three', 'two', 'one']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
['zenith', 'next', 'mom']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965',
'nl0000289783', 'lu0197800237',
'gb00b03mlx29']],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=['household_id', 'asset_id'])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
def test_is_strictly_monotonic_increasing():
idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
codes=[[0, 0, 1, 1], [0, 0, 0, 1]])
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is False
def test_is_strictly_monotonic_decreasing():
idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
codes=[[0, 0, 1, 1], [0, 0, 0, 1]])
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is False
def test_searchsorted_monotonic(indices):
# GH17271
# not implemented for tuple searches in MultiIndex
# or Intervals searches in IntervalIndex
if isinstance(indices, (MultiIndex, IntervalIndex)):
return
# nothing to test if the index is empty
if indices.empty:
return
value = indices[0]
# determine the expected results (handle dupes for 'right')
expected_left, expected_right = 0, (indices == value).argmin()
if expected_right == 0:
# all values are the same, expected_right should be length
expected_right = len(indices)
# test _searchsorted_monotonic in all cases
# test searchsorted only for increasing
if indices.is_monotonic_increasing:
ssm_left = indices._searchsorted_monotonic(value, side='left')
assert is_scalar(ssm_left)
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side='right')
assert is_scalar(ssm_right)
assert expected_right == ssm_right
ss_left = indices.searchsorted(value, side='left')
assert is_scalar(ss_left)
assert expected_left == ss_left
ss_right = indices.searchsorted(value, side='right')
assert is_scalar(ss_right)
assert expected_right == ss_right
elif indices.is_monotonic_decreasing:
ssm_left = indices._searchsorted_monotonic(value, side='left')
assert is_scalar(ssm_left)
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side='right')
assert is_scalar(ssm_right)
assert expected_right == ssm_right
else:
# non-monotonic should raise.
with pytest.raises(ValueError):
indices._searchsorted_monotonic(value, side='left')
@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas.util.testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_slice_keep_name():
x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')],
names=['x', 'y'])
assert x[1:].names == x.names
def test_index_name_retained():
# GH9857
result = pd.DataFrame({'x': [1, 2, 6],
'y': [2, 2, 8],
'z': [-5, 0, 5]})
result = result.set_index('z')
result.loc[10] = [9, 10]
df_expected = pd.DataFrame({'x': [1, 2, 6, 9],
'y': [2, 2, 8, 10],
'z': [-5, 0, 5, 10]})
df_expected = df_expected.set_index('z')
tm.assert_frame_equal(result, df_expected)
def test_changing_names(idx):
# names should be applied to levels
level_names = [level.name for level in idx.levels]
check_level_names(idx, idx.names)
view = idx.view()
copy = idx.copy()
shallow_copy = idx._shallow_copy()
# changing names should change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, new_names)
# but not on copies
check_level_names(view, level_names)
check_level_names(copy, level_names)
check_level_names(shallow_copy, level_names)
# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, new_names)
def test_take_preserve_name(idx):
taken = idx.take([3, 0, 1])
assert taken.names == idx.names
def test_copy_names():
# Check that adding a "names" parameter to the copy is honored
# GH14302
multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2'])
multi_idx1 = multi_idx.copy()
assert multi_idx.equals(multi_idx1)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx1.names == ['MyName1', 'MyName2']
multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2'])
assert multi_idx.equals(multi_idx2)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx2.names == ['NewName1', 'NewName2']
multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2'])
assert multi_idx.equals(multi_idx3)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx3.names == ['NewName1', 'NewName2']
def test_names(idx, index_names):
# names are assigned in setup
names = index_names
level_names = [level.name for level in idx.levels]
assert names == level_names
# setting bad names on existing
index = idx
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", list(index.names) + ["third"])
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", [])
# initializing with bad names (should always be equivalent)
major_axis, minor_axis = idx.levels
major_codes, minor_codes = idx.codes
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=['first'])
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=['first', 'second', 'third'])
# names are assigned
index.names = ["a", "b"]
ind_names = list(index.names)
level_names = [level.name for level in index.levels]
assert ind_names == level_names
def test_duplicate_level_names_access_raises(idx):
# GH19029
idx.names = ['foo', 'foo']
with pytest.raises(ValueError, match='name foo occurs multiple times'):
idx._get_level_number('foo')
@@ -0,0 +1,98 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, MultiIndex, date_range
import pandas.util.testing as tm
def test_partial_string_timestamp_multiindex():
# GH10331
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
abc = ['a', 'b', 'c']
ix = pd.MultiIndex.from_product([dr, abc])
df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
idx = pd.IndexSlice
# c1
# 2016-01-01 00:00:00 a 0
# b 1
# c 2
# 2016-01-01 12:00:00 a 3
# b 4
# c 5
# 2016-01-02 00:00:00 a 6
# b 7
# c 8
# 2016-01-02 12:00:00 a 9
# b 10
# c 11
# 2016-01-03 00:00:00 a 12
# b 13
# c 14
# partial string matching on a single index
for df_swap in (df.swaplevel(),
df.swaplevel(0),
df.swaplevel(0, 1)):
df_swap = df_swap.sort_index()
just_a = df_swap.loc['a']
result = just_a.loc['2016-01-01']
expected = df.loc[idx[:, 'a'], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)
# indexing with IndexSlice
result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
expected = df
tm.assert_frame_equal(result, expected)
# match on secondary index
result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError):
df['2016-01-01']
# partial string match on year only
result = df.loc['2016']
expected = df
tm.assert_frame_equal(result, expected)
# partial string match on date
result = df.loc['2016-01-01']
expected = df.iloc[0:6]
tm.assert_frame_equal(result, expected)
# partial string match on date and hour, from middle
result = df.loc['2016-01-02 12']
expected = df.iloc[9:12]
tm.assert_frame_equal(result, expected)
# partial string match on secondary index
result = df_swap.loc[idx[:, '2016-01-02'], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)
# tuple selector with partial string match on date
result = df.loc[('2016-01-01', 'a'), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# Slicing date on first level should break (of course)
with pytest.raises(KeyError):
df_swap.loc['2016-01-01']
# GH12685 (partial string with daily resolution or below)
dr = date_range('2013-01-01', periods=100, freq='D')
ix = MultiIndex.from_product([dr, ['a', 'b']])
df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix)
result = df.loc[idx['2013-03':'2013-03', :], :]
expected = df.iloc[118:180]
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
check_level_names(result, idx[:4].names)
result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
check_level_names(result, idx.names)
def test_reindex_level(idx):
index = Index(['one'])
target, indexer = idx.reindex(index, level='second')
target2, indexer2 = index.reindex(idx, level='second')
exp_index = idx.join(index, level='second', how='right')
exp_index2 = idx.join(index, level='second', how='left')
assert target.equals(exp_index)
exp_indexer = np.array([0, 2, 4])
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
assert target2.equals(exp_index2)
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
with pytest.raises(TypeError, match="Fill method not supported"):
idx.reindex(idx, method='pad', level='second')
with pytest.raises(TypeError, match="Fill method not supported"):
index.reindex(index, method='bfill', level='first')
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
# GH6552
idx = idx.copy()
target = idx.copy()
idx.names = target.names = [None, None]
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
# list & ndarray cases
assert idx.reindex([])[0].names == [None, None]
assert idx.reindex(np.array([]))[0].names == [None, None]
assert idx.reindex(target.tolist())[0].names == [None, None]
assert idx.reindex(target.values)[0].names == [None, None]
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
assert idx.reindex(other_dtype.values)[0].names == [None, None]
idx.names = ['foo', 'bar']
assert idx.reindex([])[0].names == ['foo', 'bar']
assert idx.reindex(np.array([]))[0].names == ['foo', 'bar']
assert idx.reindex(target.tolist())[0].names == ['foo', 'bar']
assert idx.reindex(target.values)[0].names == ['foo', 'bar']
assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar']
assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar']
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']],
names=['foo', 'bar'])
assert idx.reindex([], level=0)[0].names == ['foo', 'bar']
assert idx.reindex([], level=1)[0].names == ['foo', 'bar']
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
def test_reindex_base(idx):
idx = idx
expected = np.arange(idx.size, dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match='Invalid fill method'):
idx.get_indexer(idx, method='invalid')
def test_reindex_non_unique():
idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
a = pd.Series(np.arange(4), index=idx)
new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
msg = 'cannot handle a non-unique multi-index!'
with pytest.raises(ValueError, match=msg):
a.reindex(new_idx)
@@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def test_insert(idx):
# key contained in all levels
new_index = idx.insert(0, ('bar', 'two'))
assert new_index.equal_levels(idx)
assert new_index[0] == ('bar', 'two')
# key not contained in all levels
new_index = idx.insert(0, ('abc', 'three'))
exp0 = Index(list(idx.levels[0]) + ['abc'], name='first')
tm.assert_index_equal(new_index.levels[0], exp0)
exp1 = Index(list(idx.levels[1]) + ['three'], name='second')
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ('abc', 'three')
# key wrong length
msg = "Item must have length equal to number of levels"
with pytest.raises(ValueError, match=msg):
idx.insert(0, ('foo2',))
left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]],
columns=['1st', '2nd', '3rd'])
left.set_index(['1st', '2nd'], inplace=True)
ts = left['3rd'].copy(deep=True)
left.loc[('b', 'x'), '3rd'] = 2
left.loc[('b', 'a'), '3rd'] = -1
left.loc[('b', 'b'), '3rd'] = 3
left.loc[('a', 'x'), '3rd'] = 4
left.loc[('a', 'w'), '3rd'] = 5
left.loc[('a', 'a'), '3rd'] = 6
ts.loc[('b', 'x')] = 2
ts.loc['b', 'a'] = -1
ts.loc[('b', 'b')] = 3
ts.loc['a', 'x'] = 4
ts.loc[('a', 'w')] = 5
ts.loc['a', 'a'] = 6
right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2],
['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4],
['a', 'w', 5], ['a', 'a', 6]],
columns=['1st', '2nd', '3rd'])
right.set_index(['1st', '2nd'], inplace=True)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
tm.assert_series_equal(ts, right['3rd'])
# GH9250
idx = [('test1', i) for i in range(5)] + \
[('test2', i) for i in range(6)] + \
[('test', 17), ('test', 18)]
left = pd.Series(np.linspace(0, 10, 11),
pd.MultiIndex.from_tuples(idx[:-2]))
left.loc[('test', 17)] = 11
left.loc[('test', 18)] = 12
right = pd.Series(np.linspace(0, 12, 13),
pd.MultiIndex.from_tuples(idx))
tm.assert_series_equal(left, right)
def test_append(idx):
result = idx[:3].append(idx[3:])
assert result.equals(idx)
foos = [idx[:1], idx[1:3], idx[3:]]
result = foos[0].append(foos[1:])
assert result.equals(idx)
# empty
result = idx.append([])
assert result.equals(idx)
def test_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(['foo', 'bar'])
m = MultiIndex.from_product([
numbers, names], names=names)
expected = MultiIndex.from_product([
numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(m.repeat(reps), expected)
def test_insert_base(idx):
result = idx[1:4]
# test 0th element
assert idx[0:4].equals(result.insert(0, idx[0]))
def test_delete_base(idx):
expected = idx[1:]
result = idx.delete(0)
assert result.equals(expected)
assert result.name == expected.name
expected = idx[:-1]
result = idx.delete(-1)
assert result.equals(expected)
assert result.name == expected.name
with pytest.raises((IndexError, ValueError)):
# Exception raised depends on NumPy version.
idx.delete(len(idx))
@@ -0,0 +1,251 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import MultiIndex, Series
import pandas.util.testing as tm
@pytest.mark.parametrize("case", [0.5, "xxx"])
@pytest.mark.parametrize("sort", [True, False])
@pytest.mark.parametrize("method", ["intersection", "union",
"difference", "symmetric_difference"])
def test_set_ops_error_cases(idx, case, sort, method):
# non-iterable input
msg = "Input must be Index or array-like"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)(case, sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_intersection_base(idx, sort):
first = idx[:5]
second = idx[:3]
intersect = first.intersection(second, sort=sort)
if sort:
tm.assert_index_equal(intersect, second.sort_values())
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = first.intersection(case, sort=sort)
if sort:
tm.assert_index_equal(result, second.sort_values())
assert tm.equalContents(result, second)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.intersection([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_union_base(idx, sort):
first = idx[3:]
second = idx[:5]
everything = idx
union = first.union(second, sort=sort)
if sort:
tm.assert_index_equal(union, everything.sort_values())
assert tm.equalContents(union, everything)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = first.union(case, sort=sort)
if sort:
tm.assert_index_equal(result, everything.sort_values())
assert tm.equalContents(result, everything)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.union([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_difference_base(idx, sort):
second = idx[4:]
answer = idx[:4]
result = idx.difference(second, sort=sort)
if sort:
answer = answer.sort_values()
assert result.equals(answer)
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = idx.difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
idx.difference([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_symmetric_difference(idx, sort):
first = idx[1:]
second = idx[:-1]
answer = idx[[-1, 0]]
result = first.symmetric_difference(second, sort=sort)
if sort:
answer = answer.sort_values()
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = first.symmetric_difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.symmetric_difference([1, 2, 3], sort=sort)
def test_empty(idx):
# GH 15270
assert not idx.empty
assert idx[:0].empty
@pytest.mark.parametrize("sort", [True, False])
def test_difference(idx, sort):
first = idx
result = first.difference(idx[-3:], sort=sort)
vals = idx[:-3].values
if sort:
vals = sorted(vals)
expected = MultiIndex.from_tuples(vals,
sortorder=0,
names=idx.names)
assert isinstance(result, MultiIndex)
assert result.equals(expected)
assert result.names == idx.names
tm.assert_index_equal(result, expected)
# empty difference: reflexive
result = idx.difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: superset
result = idx[-3:].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: degenerate
result = idx[:0].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# names not the same
chunklet = idx[-3:]
chunklet.names = ['foo', 'baz']
result = first.difference(chunklet, sort=sort)
assert result.names == (None, None)
# empty, but non-equal
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
assert len(result) == 0
# raise Exception called with non-MultiIndex
result = first.difference(first.values, sort=sort)
assert result.equals(first[:0])
# name from empty array
result = first.difference([], sort=sort)
assert first.equals(result)
assert first.names == result.names
# name from non-empty array
result = first.difference([('foo', 'one')], sort=sort)
expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), (
'foo', 'two'), ('qux', 'one'), ('qux', 'two')])
expected.names = first.names
assert first.names == result.names
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.difference([1, 2, 3, 4, 5], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_union(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_union = piece1.union(piece2, sort=sort)
if sort:
tm.assert_index_equal(the_union, idx.sort_values())
assert tm.equalContents(the_union, idx)
# corner case, pass self or empty thing:
the_union = idx.union(idx, sort=sort)
assert the_union is idx
the_union = idx.union(idx[:0], sort=sort)
assert the_union is idx
# won't work in python 3
# tuples = _index.values
# result = _index[:4] | tuples[4:]
# assert result.equals(tuples)
# not valid for python 3
# def test_union_with_regular_index(self):
# other = Index(['A', 'B', 'C'])
# result = other.union(idx)
# assert ('foo', 'one') in result
# assert 'B' in result
# result2 = _index.union(other)
# assert result.equals(result2)
@pytest.mark.parametrize("sort", [True, False])
def test_intersection(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_int = piece1.intersection(piece2, sort=sort)
if sort:
tm.assert_index_equal(the_int, idx[3:5])
assert tm.equalContents(the_int, idx[3:5])
# corner case, pass self
the_int = idx.intersection(idx, sort=sort)
assert the_int is idx
# empty intersection: disjoint
empty = idx[:2].intersection(idx[2:], sort=sort)
expected = idx[:0]
assert empty.equals(expected)
# can't do in python 3
# tuples = _index.values
# result = _index & tuples
# assert result.equals(tuples)
@@ -0,0 +1,266 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange
from pandas.errors import PerformanceWarning, UnsortedIndexError
import pandas as pd
from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex
import pandas.util.testing as tm
def test_sortlevel(idx):
import random
tuples = list(idx)
random.shuffle(tuples)
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sortlevel_not_sort_remaining():
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
assert sorted_idx.equals(mi)
def test_sortlevel_deterministic():
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sort(indices):
with pytest.raises(TypeError):
indices.sort()
def test_numpy_argsort(idx):
result = np.argsort(idx)
expected = idx.argsort()
tm.assert_numpy_array_equal(result, expected)
# these are the only two types that perform
# pandas compatibility input validation - the
# rest already perform separate (or no) such
# validation via their 'values' attribute as
# defined in pandas.core.indexes/base.py - they
# cannot be changed at the moment due to
# backwards compatibility concerns
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, axis=1)
msg = "the 'kind' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, kind='mergesort')
msg = "the 'order' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, order=('a', 'b'))
def test_unsortedindex():
# GH 11897
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
('x', 'b'), ('y', 'a'), ('z', 'b')],
names=['one', 'two'])
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
columns=['one', 'two'])
# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)['z', 'a']
expected = df.iloc[0]
tm.assert_series_equal(result, expected)
with pytest.raises(UnsortedIndexError):
df.loc(axis=0)['z', slice('a')]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)['z', :]) == 2
with pytest.raises(KeyError):
df.loc(axis=0)['q', :]
def test_unsortedindex_doc_examples():
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
dfm = DataFrame({'jim': [0, 0, 1, 1],
'joe': ['x', 'x', 'z', 'y'],
'jolie': np.random.rand(4)})
dfm = dfm.set_index(['jim', 'joe'])
with tm.assert_produces_warning(PerformanceWarning):
dfm.loc[(1, 'z')]
with pytest.raises(UnsortedIndexError):
dfm.loc[(0, 'y'):(1, 'z')]
assert not dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 1
# sort it
dfm = dfm.sort_index()
dfm.loc[(1, 'z')]
dfm.loc[(0, 'y'):(1, 'z')]
assert dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 2
def test_reconstruct_sort():
# starts off lexsorted & monotonic
mi = MultiIndex.from_arrays([
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
])
assert mi.is_lexsorted()
assert mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert recons.is_lexsorted()
assert recons.is_monotonic
assert mi is recons
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
('x', 'b'), ('y', 'a'), ('z', 'b')],
names=['one', 'two'])
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
names=['col1', 'col2'])
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
def test_reconstruct_remove_unused():
# xref to GH 2770
df = DataFrame([['deleteMe', 1, 9],
['keepMe', 2, 9],
['keepMeToo', 3, 9]],
columns=['first', 'second', 'third'])
df2 = df.set_index(['first', 'second'], drop=False)
df2 = df2[df2['first'] != 'deleteMe']
# removed levels are there
expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'],
[1, 2, 3]],
codes=[[1, 2], [1, 2]],
names=['first', 'second'])
result = df2.index
tm.assert_index_equal(result, expected)
expected = MultiIndex(levels=[['keepMe', 'keepMeToo'],
[2, 3]],
codes=[[0, 1], [0, 1]],
names=['first', 'second'])
result = df2.index.remove_unused_levels()
tm.assert_index_equal(result, expected)
# idempotent
result2 = result.remove_unused_levels()
tm.assert_index_equal(result2, expected)
assert result2.is_(result)
@pytest.mark.parametrize('first_type,second_type', [
('int64', 'int64'),
('datetime64[D]', 'str')
])
def test_remove_unused_levels_large(first_type, second_type):
# GH16556
# because tests should be deterministic (and this test in particular
# checks that levels are removed, which is not the case for every
# random input):
rng = np.random.RandomState(4) # seed is arbitrary value that works
size = 1 << 16
df = DataFrame(dict(
first=rng.randint(0, 1 << 13, size).astype(first_type),
second=rng.randint(0, 1 << 10, size).astype(second_type),
third=rng.rand(size)))
df = df.groupby(['first', 'second']).sum()
df = df[df.third < 0.1]
result = df.index.remove_unused_levels()
assert len(result.levels[0]) < len(df.index.levels[0])
assert len(result.levels[1]) < len(df.index.levels[1])
assert result.equals(df.index)
expected = df.reset_index().set_index(['first', 'second']).index
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('level0', [['a', 'd', 'b'],
['a', 'd', 'b', 'unused']])
@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'],
['w', 'x', 'y', 'z', 'unused']])
def test_remove_unused_nan(level0, level1):
# GH 18417
mi = pd.MultiIndex(levels=[level0, level1],
codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
result = mi.remove_unused_levels()
tm.assert_index_equal(result, mi)
for level in 0, 1:
assert('unused' not in result.levels[level])
def test_argsort(idx):
result = idx.argsort()
expected = idx.values.argsort()
tm.assert_numpy_array_equal(result, expected)