demo + utils venv
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,31 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multiindex_dataframe_random_data():
|
||||
"""DataFrame with 2 level MultiIndex with random data"""
|
||||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
|
||||
'three']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
return DataFrame(np.random.randn(10, 3), index=index,
|
||||
columns=Index(['A', 'B', 'C'], name='exp'))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multiindex_year_month_day_dataframe_random_data():
|
||||
"""DataFrame with 3 level MultiIndex (year, month, day) covering
|
||||
first 100 business days from 2000-01-01 with random data"""
|
||||
tdf = tm.makeTimeDataFrame(100)
|
||||
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month,
|
||||
lambda x: x.day]).sum()
|
||||
# use Int64Index, to make sure things work
|
||||
ymd.index.set_levels([lev.astype('i8') for lev in ymd.index.levels],
|
||||
inplace=True)
|
||||
ymd.index.set_names(['year', 'month', 'day'], inplace=True)
|
||||
return ymd
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, lzip, range
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.core import common as com
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_detect_chained_assignment():
|
||||
# Inplace ops, originally from:
|
||||
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
|
||||
('ears', 'right')]
|
||||
events = {('eyes', 'left'): a,
|
||||
('eyes', 'right'): b,
|
||||
('ears', 'left'): c,
|
||||
('ears', 'right'): d}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
|
||||
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
zed['eyes']['right'].fillna(value=555, inplace=True)
|
||||
|
||||
|
||||
def test_cache_updating():
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.rand(10, 3)
|
||||
df = DataFrame(a, columns=['x', 'y', 'z'])
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
df.loc[0]['z'].iloc[0] = 1.
|
||||
result = df.loc[(0, 0), 'z']
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), 'z'] = 2
|
||||
result = df.loc[(0, 0), 'z']
|
||||
assert result == 2
|
||||
|
||||
|
||||
def test_indexer_caching():
|
||||
# GH5727
|
||||
# make sure that indexers are in the _internal_names_set
|
||||
n = 1000001
|
||||
arrays = [lrange(n), lrange(n)]
|
||||
index = MultiIndex.from_tuples(lzip(*arrays))
|
||||
s = Series(np.zeros(n), index=index)
|
||||
str(s)
|
||||
|
||||
# setitem
|
||||
expected = Series(np.ones(n), index=index)
|
||||
s = Series(np.zeros(n), index=index)
|
||||
s[s == 0] = 1
|
||||
tm.assert_series_equal(s, expected)
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import Index, Period, Series, period_range
|
||||
|
||||
|
||||
def test_multiindex_period_datetime():
|
||||
# GH4861, using datetime in period of multiindex raises exception
|
||||
|
||||
idx1 = Index(['a', 'a', 'a', 'b', 'b'])
|
||||
idx2 = period_range('2012-01', periods=len(idx1), freq='M')
|
||||
s = Series(np.random.randn(len(idx1)), [idx1, idx2])
|
||||
|
||||
# try Period as index
|
||||
expected = s.iloc[0]
|
||||
result = s.loc['a', Period('2012-01')]
|
||||
assert result == expected
|
||||
|
||||
# try datetime as index
|
||||
result = s.loc['a', datetime(2012, 1, 1)]
|
||||
assert result == expected
|
||||
+237
@@ -0,0 +1,237 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import u, zip
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.core.indexing import IndexingError
|
||||
from pandas.util import testing as tm
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of Series with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
|
||||
lambda s, x: s.loc[:, x],
|
||||
lambda s, x: s.xs(x, level=1)])
|
||||
@pytest.mark.parametrize('level1_value, expected', [
|
||||
(0, Series([1], index=[0])),
|
||||
(1, Series([2, 3], index=[1, 2]))
|
||||
])
|
||||
def test_series_getitem_multiindex(access_method, level1_value, expected):
|
||||
|
||||
# GH 6018
|
||||
# series regression getitem with a multi-index
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
|
||||
result = access_method(s, level1_value)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('level0_value', ['D', 'A'])
|
||||
def test_series_getitem_duplicates_multiindex(level0_value):
|
||||
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
|
||||
# the appropriate error, only in PY3 of course!
|
||||
|
||||
index = MultiIndex(levels=[[level0_value, 'B', 'C'],
|
||||
[0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
|
||||
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=['tag', 'day'])
|
||||
arr = np.random.randn(len(index), 1)
|
||||
df = DataFrame(arr, index=index, columns=['val'])
|
||||
|
||||
# confirm indexing on missing value raises KeyError
|
||||
if level0_value != 'A':
|
||||
with pytest.raises(KeyError, match=r"^'A'$"):
|
||||
df.val['A']
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'X'$"):
|
||||
df.val['X']
|
||||
|
||||
result = df.val[level0_value]
|
||||
expected = Series(arr.ravel()[0:3], name='val', index=Index(
|
||||
[26, 37, 57], name='day'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda s: s[2000, 3],
|
||||
lambda s: s.loc[2000, 3]
|
||||
])
|
||||
def test_series_getitem(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
expected = s.reindex(s.index[42:65])
|
||||
expected.index = expected.index.droplevel(0).droplevel(0)
|
||||
|
||||
result = indexer(s)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda s: s[2000, 3, 10],
|
||||
lambda s: s.loc[2000, 3, 10]
|
||||
])
|
||||
def test_series_getitem_returns_scalar(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
expected = s.iloc[49]
|
||||
|
||||
result = indexer(s)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer,expected_error,expected_error_msg', [
|
||||
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356L?$"),
|
||||
(lambda s: s[(2000, 3, 4)], KeyError, r"^356L?$"),
|
||||
(lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'),
|
||||
(lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'),
|
||||
(lambda s: s[len(s)], IndexError, 'index out of bounds'),
|
||||
(lambda s: s.iloc[len(s)], IndexError,
|
||||
'single positional indexer is out-of-bounds')
|
||||
])
|
||||
def test_series_getitem_indexing_errors(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer,
|
||||
expected_error, expected_error_msg):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
with pytest.raises(expected_error, match=expected_error_msg):
|
||||
indexer(s)
|
||||
|
||||
|
||||
def test_series_getitem_corner_generator(
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
result = s[(x > 0 for x in s)]
|
||||
expected = s[s > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
def test_getitem_simple(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.values[:, 0]
|
||||
result = df['foo', 'one'].values
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer,expected_error_msg', [
|
||||
(lambda df: df[('foo', 'four')], r"^\('foo', 'four'\)$"),
|
||||
(lambda df: df['foobar'], r"^'foobar'$")
|
||||
])
|
||||
def test_frame_getitem_simple_key_error(
|
||||
multiindex_dataframe_random_data, indexer, expected_error_msg):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
with pytest.raises(KeyError, match=expected_error_msg):
|
||||
indexer(df)
|
||||
|
||||
|
||||
def test_frame_getitem_multicolumn_empty_level():
|
||||
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
|
||||
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
|
||||
['level3 item1', 'level3 item2']]
|
||||
|
||||
result = df['level1 item1']
|
||||
expected = DataFrame([['1'], ['2'], ['3']], index=df.index,
|
||||
columns=['level3 item1'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer,expected_slice', [
|
||||
(lambda df: df['foo'], slice(3)),
|
||||
(lambda df: df['bar'], slice(3, 5)),
|
||||
(lambda df: df.loc[:, 'bar'], slice(3, 5))
|
||||
])
|
||||
def test_frame_getitem_toplevel(
|
||||
multiindex_dataframe_random_data, indexer, expected_slice):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.reindex(columns=df.columns[expected_slice])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('unicode_strings', [True, False])
|
||||
def test_frame_mixed_depth_get(unicode_strings):
|
||||
# If unicode_strings is True, the column labels in dataframe
|
||||
# construction will use unicode strings in Python 2 (pull request
|
||||
# #17099).
|
||||
|
||||
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
|
||||
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
|
||||
['', 'wx', 'wy', '', '', '']]
|
||||
|
||||
if unicode_strings:
|
||||
arrays = [[u(s) for s in arr] for arr in arrays]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.randn(4, 6), columns=index)
|
||||
|
||||
result = df['a']
|
||||
expected = df['a', '', ''].rename('a')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df['routine1', 'result1']
|
||||
expected = df['routine1', 'result1', '']
|
||||
expected = expected.rename(('routine1', 'result1'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index with duplicates
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def dataframe_with_duplicate_index():
|
||||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
|
||||
data = [['a', 'd', 'e', 'c', 'f', 'b'],
|
||||
[1, 4, 5, 3, 6, 2],
|
||||
[1, 4, 5, 3, 6, 2]]
|
||||
index = ['h1', 'h3', 'h5']
|
||||
columns = MultiIndex(
|
||||
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
|
||||
names=['main', 'sub'])
|
||||
return DataFrame(data, index=index, columns=columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda df: df[('A', 'A1')],
|
||||
lambda df: df.loc[:, ('A', 'A1')]
|
||||
])
|
||||
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
|
||||
# GH 4145
|
||||
df = dataframe_with_duplicate_index
|
||||
index = Index(['h1', 'h3', 'h5'])
|
||||
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
|
||||
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
|
||||
# GH 4146, not returning a block manager when selecting a unique index
|
||||
# from a duplicate index
|
||||
# as of 4879, this returns a Series (which is similar to what happens
|
||||
# with a non-unique)
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
|
||||
result = df['A']['A1']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
|
||||
# selecting a non_unique from the 2nd level
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
|
||||
index=Index(['B2', 'B2'], name='sub'),
|
||||
columns=['h1', 'h3', 'h5'], ).T
|
||||
result = df['A']['B2']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,151 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_multiindex_dataframe():
|
||||
"""
|
||||
Factory function to create simple 3 x 3 dataframe with
|
||||
both columns and row MultiIndex using supplied data or
|
||||
random data by default.
|
||||
"""
|
||||
def _simple_multiindex_dataframe(data=None):
|
||||
if data is None:
|
||||
data = np.random.randn(3, 3)
|
||||
return DataFrame(data, columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
return _simple_multiindex_dataframe
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer, expected', [
|
||||
(lambda df: df.iloc[0],
|
||||
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8))),
|
||||
(lambda df: df.iloc[2],
|
||||
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12))),
|
||||
(lambda df: df.iloc[:, 2],
|
||||
lambda arr: Series(
|
||||
arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)))
|
||||
])
|
||||
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
|
||||
arr = np.random.randn(3, 3)
|
||||
df = simple_multiindex_dataframe(arr)
|
||||
result = indexer(df)
|
||||
expected = expected(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe()
|
||||
result = df.iloc[[0, 1]]
|
||||
expected = df.xs(4, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_scalar(simple_multiindex_dataframe):
|
||||
arr = np.random.randn(3, 3)
|
||||
df = simple_multiindex_dataframe(arr)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iloc_getitem_multiple_items():
|
||||
# GH 5528
|
||||
tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']])
|
||||
index = MultiIndex.from_tuples(tup)
|
||||
df = DataFrame(np.random.randn(4, 4), index=index)
|
||||
result = df.iloc[[2, 3]]
|
||||
expected = df.xs('b', drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_getitem_labels():
|
||||
# this is basically regular indexing
|
||||
arr = np.random.randn(4, 3)
|
||||
df = DataFrame(arr,
|
||||
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
|
||||
index=[['i', 'i', 'j', 'k'], ['X', 'X', 'Y', 'Y']])
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_getitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[:4]
|
||||
expected = df[:4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.iloc[:4] = 0
|
||||
|
||||
assert (df.values[:4] == 0).all()
|
||||
assert (df.values[4:] != 0).all()
|
||||
|
||||
|
||||
def test_indexing_ambiguity_bug_1678():
|
||||
# GH 1678
|
||||
columns = MultiIndex.from_tuples(
|
||||
[('Ohio', 'Green'), ('Ohio', 'Red'), ('Colorado', 'Green')])
|
||||
index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])
|
||||
|
||||
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
|
||||
|
||||
result = df.iloc[:, 1]
|
||||
expected = df.loc[:, ('Ohio', 'Red')]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_integer_locations():
|
||||
# GH 13797
|
||||
data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'],
|
||||
['str30', 'str31'], ['str40', 'str41']]
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')])
|
||||
|
||||
expected = DataFrame(data)
|
||||
df = DataFrame(data, index=index)
|
||||
|
||||
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, indexes, values, expected_k', [
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
|
||||
[755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
|
||||
[8, 15, 13])
|
||||
])
|
||||
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=['i', 'j', 'k'])
|
||||
df = df.set_index(['i', 'j'])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df['k'] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_getitem_iloc(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[2]
|
||||
expected = df.xs(df.index[2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
+89
@@ -0,0 +1,89 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_multiindex_get_loc(): # GH7724, GH2646
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
|
||||
# test indexing into a multi-index before & past the lexsort depth
|
||||
from numpy.random import randint, choice, randn
|
||||
cols = ['jim', 'joe', 'jolie', 'joline', 'jolia']
|
||||
|
||||
def validate(mi, df, key):
|
||||
mask = np.ones(len(df)).astype('bool')
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[:i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[:i + 1] in mi.index
|
||||
right = df[mask].copy()
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
right.drop(cols[:i + 1], axis=1, inplace=True)
|
||||
right.set_index(cols[i + 1:-1], inplace=True)
|
||||
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
right.set_index(cols[:-1], inplace=True)
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(right['jolia'].values,
|
||||
name=right.index[0],
|
||||
index=['jolia'])
|
||||
tm.assert_series_equal(mi.loc[key[:i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
|
||||
|
||||
def loop(mi, df, keys):
|
||||
for key in keys:
|
||||
validate(mi, df, key)
|
||||
|
||||
n, m = 1000, 50
|
||||
|
||||
vals = [randint(0, 10, n), choice(
|
||||
list('abcdefghij'), n), choice(
|
||||
pd.date_range('20141009', periods=10).tolist(), n), choice(
|
||||
list('ZYXWVUTSRQ'), n), randn(n)]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
|
||||
# bunch of keys for testing
|
||||
keys = [randint(0, 11, m), choice(
|
||||
list('abcdefghijk'), m), choice(
|
||||
pd.date_range('20141009', periods=11).tolist(), m), choice(
|
||||
list('ZYXWVUTSRQP'), m)]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += list(map(lambda t: t[:-1], vals[::n // m]))
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
df = DataFrame(vals, columns=cols)
|
||||
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
for frame in a, b:
|
||||
for i in range(5): # lexsort depth
|
||||
df = frame.copy() if i == 0 else frame.sort_values(
|
||||
by=cols[:i])
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index.lexsort_depth < i
|
||||
loop(mi, df, keys)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_large_mi_dataframe_indexing():
|
||||
# GH10645
|
||||
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
|
||||
assert (not (10 ** 6, 0) in result)
|
||||
@@ -0,0 +1,56 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
from pandas import DataFrame, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexIx(object):
|
||||
|
||||
def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
frame.loc[('bar', 'two'), 'B'] = 5
|
||||
assert frame.loc[('bar', 'two'), 'B'] == 5
|
||||
|
||||
# with integer labels
|
||||
df = frame.copy()
|
||||
df.columns = lrange(3)
|
||||
df.loc[('bar', 'two'), 1] = 7
|
||||
assert df.loc[('bar', 'two'), 1] == 7
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", DeprecationWarning)
|
||||
df = frame.copy()
|
||||
df.columns = lrange(3)
|
||||
df.ix[('bar', 'two'), 1] = 7
|
||||
assert df.loc[('bar', 'two'), 1] == 7
|
||||
|
||||
def test_ix_general(self):
|
||||
|
||||
# ix general issues
|
||||
|
||||
# GH 2817
|
||||
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
|
||||
df = DataFrame(data).set_index(keys=['col', 'year'])
|
||||
key = 4.0, 2012
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
df.sort_index(inplace=True)
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be Float64Index
|
||||
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
|
||||
names=['col', 'year'])
|
||||
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
@@ -0,0 +1,378 @@
|
||||
import itertools
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_level_multiindex():
|
||||
"""single level MultiIndex"""
|
||||
return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
|
||||
codes=[[0, 1, 2, 3]], names=['first'])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_random_data_integer_multi_index():
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
return DataFrame(np.random.randn(6, 2), index=index)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexLoc(object):
|
||||
|
||||
def test_loc_getitem_series(self):
|
||||
# GH14730
|
||||
# passing a series as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = Series([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
|
||||
dtype=np.float64)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = x.loc[[1, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH15424
|
||||
y1 = Series([1, 3], index=[1, 2])
|
||||
result = x.loc[y1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
empty = Series(data=[], dtype=np.float64)
|
||||
expected = Series([], index=MultiIndex(
|
||||
levels=index.levels, codes=[[], []], dtype=np.float64))
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_array(self):
|
||||
# GH15434
|
||||
# passing an array as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = np.array([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
|
||||
dtype=np.float64)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty array:
|
||||
empty = np.array([])
|
||||
expected = Series([], index=MultiIndex(
|
||||
levels=index.levels, codes=[[], []], dtype=np.float64))
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0-dim array (scalar):
|
||||
scalar = np.int64(1)
|
||||
expected = Series(
|
||||
data=[0, 1, 2],
|
||||
index=['A', 'B', 'C'],
|
||||
dtype=np.float64)
|
||||
result = x.loc[scalar]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex(self):
|
||||
|
||||
mi_labels = DataFrame(np.random.randn(3, 3),
|
||||
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
|
||||
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
|
||||
|
||||
mi_int = DataFrame(np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
|
||||
# the first row
|
||||
rs = mi_labels.loc['i']
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix['i']
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# 2nd (last) columns
|
||||
rs = mi_labels.loc[:, 'j']
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix[:, 'j']
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# corner column
|
||||
rs = mi_labels.loc['j'].loc[:, 'j']
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix['j'].ix[:, 'j']
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# with a tuple
|
||||
rs = mi_labels.loc[('i', 'X')]
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix[('i', 'X')]
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
rs = mi_int.loc[4]
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_int.ix[4]
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# missing label
|
||||
pytest.raises(KeyError, lambda: mi_int.loc[2])
|
||||
with catch_warnings(record=True):
|
||||
# GH 21593
|
||||
pytest.raises(KeyError, lambda: mi_int.ix[2])
|
||||
|
||||
def test_loc_multiindex_indexer_none(self):
|
||||
|
||||
# GH6788
|
||||
# multi-index indexer is None (meaning take all)
|
||||
attributes = ['Attribute' + str(i) for i in range(1)]
|
||||
attribute_values = ['Value' + str(i) for i in range(5)]
|
||||
|
||||
index = MultiIndex.from_product([attributes, attribute_values])
|
||||
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
|
||||
df = DataFrame(df, columns=index)
|
||||
result = df[attributes]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 7349
|
||||
# loc with a multi-index seems to be doing fallback
|
||||
df = DataFrame(np.arange(12).reshape(-1, 1),
|
||||
index=MultiIndex.from_product([[1, 2, 3, 4],
|
||||
[1, 2, 3]]))
|
||||
|
||||
expected = df.loc[([1, 2], ), :]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_incomplete(self):
|
||||
|
||||
# GH 7399
|
||||
# incomplete indexers
|
||||
s = Series(np.arange(15, dtype='int64'),
|
||||
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
|
||||
expected = s.loc[:, 'a':'c']
|
||||
|
||||
result = s.loc[0:4, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[:4, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[0:, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 7400
|
||||
# multiindexer gettitem with list of indexers skips wrong element
|
||||
s = Series(np.arange(15, dtype='int64'),
|
||||
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
|
||||
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
|
||||
result = s.loc[2:4:2, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_get_loc_single_level(self, single_level_multiindex):
|
||||
single_level = single_level_multiindex
|
||||
s = Series(np.random.randn(len(single_level)),
|
||||
index=single_level)
|
||||
for k in single_level.values:
|
||||
s[k]
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_tuples([t for t in itertools.product(
|
||||
[6, 7, 8], ['a', 'b'])])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([t
|
||||
for t in itertools.product(
|
||||
[10, 20, 30], ['a', 'b'])])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ['a', 'b']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
# expected = df.ix[:,10] (this fails)
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'indexer_type_1',
|
||||
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||||
@pytest.mark.parametrize(
|
||||
'indexer_type_2',
|
||||
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = MultiIndex.from_product([a, b])
|
||||
df = DataFrame(
|
||||
np.arange(len(index), dtype='int64'),
|
||||
index=index, columns=['Data'])
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys))
|
||||
|
||||
result = df.loc[indexer, 'Data']
|
||||
expected = Series(
|
||||
[1, 2, 4, 5], name='Data',
|
||||
index=MultiIndex.from_product(keys))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer, is_level1, expected_error', [
|
||||
([], False, None), # empty ok
|
||||
(['A'], False, None),
|
||||
(['A', 'D'], False, None),
|
||||
(['D'], False, r"\['D'\] not in index"), # not any values found
|
||||
(pd.IndexSlice[:, ['foo']], True, None),
|
||||
(pd.IndexSlice[:, ['foo', 'bah']], True, None)
|
||||
])
|
||||
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
|
||||
expected_error):
|
||||
# GH 7866
|
||||
# multi-index slicing with missing indexers
|
||||
idx = MultiIndex.from_product([['A', 'B', 'C'],
|
||||
['foo', 'bar', 'baz']],
|
||||
names=['one', 'two'])
|
||||
s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
|
||||
|
||||
if indexer == []:
|
||||
expected = s.iloc[[]]
|
||||
elif is_level1:
|
||||
expected = Series([0, 3, 6], index=MultiIndex.from_product(
|
||||
[['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
|
||||
else:
|
||||
exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
|
||||
names=['one', 'two'])
|
||||
expected = Series(np.arange(3, dtype='int64'),
|
||||
index=exp_idx).sort_index()
|
||||
|
||||
if expected_error is not None:
|
||||
with pytest.raises(KeyError, match=expected_error):
|
||||
s.loc[indexer]
|
||||
else:
|
||||
result = s.loc[indexer]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
|
||||
lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
])
|
||||
def test_series_loc_getitem_fancy(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
expected = s.reindex(s.index[49:51])
|
||||
|
||||
result = indexer(s)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('columns_indexer', [
|
||||
([], slice(None)),
|
||||
(['foo'], [])
|
||||
])
|
||||
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
|
||||
# GH 8737
|
||||
# empty indexer
|
||||
multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
|
||||
['alpha', 'beta']))
|
||||
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
|
||||
df = df.sort_index(level=0, axis=1)
|
||||
|
||||
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
|
||||
result = df.loc[:, columns_indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
|
||||
# regression from < 0.14.0
|
||||
# GH 7914
|
||||
df = DataFrame([[np.mean, np.median], ['mean', 'median']],
|
||||
columns=MultiIndex.from_tuples([('functs', 'mean'),
|
||||
('functs', 'median')]),
|
||||
index=['function', 'name'])
|
||||
result = df.loc['function', ('functs', 'mean')]
|
||||
expected = np.mean
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_getitem_tuple_plus_slice():
|
||||
# GH 671
|
||||
df = DataFrame({'a': np.arange(10),
|
||||
'b': np.arange(10),
|
||||
'c': np.random.randn(10),
|
||||
'd': np.random.randn(10)}
|
||||
).set_index(['a', 'b'])
|
||||
expected = df.loc[0, 0]
|
||||
result = df.loc[(0, 0), :]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
result = df.loc[1]
|
||||
expected = df[-3:]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int_raises_exception(
|
||||
frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
with pytest.raises(KeyError, match=r"^3L?$"):
|
||||
df.loc[3]
|
||||
|
||||
|
||||
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
|
||||
# test setup - check key not in dataframe
|
||||
with pytest.raises(KeyError, match=r"^11L?$"):
|
||||
df.loc[('bar', 'three'), 'B']
|
||||
|
||||
# in theory should be inserting in a sorted space????
|
||||
df.loc[('bar', 'three'), 'B'] = 0
|
||||
expected = 0
|
||||
result = df.sort_index().loc[('bar', 'three'), 'B']
|
||||
assert result == expected
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.index as _index
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexBasic(object):
|
||||
|
||||
def test_multiindex_perf_warn(self):
|
||||
|
||||
df = DataFrame({'jim': [0, 0, 1, 1],
|
||||
'joe': ['x', 'x', 'z', 'y'],
|
||||
'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning,
|
||||
clear=[pd.core.index]):
|
||||
df.loc[(1, 'z')]
|
||||
|
||||
df = df.iloc[[2, 1, 3, 0]]
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(0, )]
|
||||
|
||||
def test_multiindex_contains_dropped(self):
|
||||
# GH 19027
|
||||
# test that dropped MultiIndex levels are not in the MultiIndex
|
||||
# despite continuing to be in the MultiIndex's levels
|
||||
idx = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
assert 2 in idx
|
||||
idx = idx.drop(2)
|
||||
|
||||
# drop implementation keeps 2 in the levels
|
||||
assert 2 in idx.levels[0]
|
||||
# but it should no longer be in the index itself
|
||||
assert 2 not in idx
|
||||
|
||||
# also applies to strings
|
||||
idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
|
||||
assert 'a' in idx
|
||||
idx = idx.drop('a')
|
||||
assert 'a' in idx.levels[0]
|
||||
assert 'a' not in idx
|
||||
|
||||
@pytest.mark.parametrize("data, expected", [
|
||||
(MultiIndex.from_product([(), ()]), True),
|
||||
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
|
||||
(MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
|
||||
])
|
||||
def test_multiindex_is_homogeneous_type(self, data, expected):
|
||||
assert data._is_homogeneous_type is expected
|
||||
|
||||
def test_indexing_over_hashtable_size_cutoff(self):
|
||||
n = 10000
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
_index._SIZE_CUTOFF = 20000
|
||||
|
||||
s = Series(np.arange(n),
|
||||
MultiIndex.from_arrays((["a"] * n, np.arange(n))))
|
||||
|
||||
# hai it works!
|
||||
assert s[("a", 5)] == 5
|
||||
assert s[("a", 6)] == 6
|
||||
assert s[("a", 7)] == 7
|
||||
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
def test_multi_nan_indexing(self):
|
||||
|
||||
# GH 3588
|
||||
df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
|
||||
'b': ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20]})
|
||||
result = df.set_index(['a', 'b'], drop=False)
|
||||
expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
|
||||
'b': ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20]},
|
||||
index=[Index(['R1', 'R2', np.nan, 'R4'],
|
||||
name='a'),
|
||||
Index(['C1', 'C2', 'C3', 'C4'], name='b')])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
+103
@@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Panel, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning')
|
||||
class TestMultiIndexPanel(object):
|
||||
|
||||
def test_iloc_getitem_panel_multiindex(self):
|
||||
|
||||
# GH 7199
|
||||
# Panel with multi-index
|
||||
multi_index = MultiIndex.from_tuples([('ONE', 'one'),
|
||||
('TWO', 'two'),
|
||||
('THREE', 'three')],
|
||||
names=['UPPER', 'lower'])
|
||||
|
||||
simple_index = [x[0] for x in multi_index]
|
||||
wd1 = Panel(items=['First', 'Second'],
|
||||
major_axis=['a', 'b', 'c', 'd'],
|
||||
minor_axis=multi_index)
|
||||
|
||||
wd2 = Panel(items=['First', 'Second'],
|
||||
major_axis=['a', 'b', 'c', 'd'],
|
||||
minor_axis=simple_index)
|
||||
|
||||
expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
|
||||
result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG
|
||||
tm.assert_frame_equal(result1, expected1)
|
||||
|
||||
expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
|
||||
result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
expected1 = DataFrame(index=['a'], columns=multi_index,
|
||||
dtype='float64')
|
||||
result1 = wd1.iloc[0, [0], [0, 1, 2]]
|
||||
tm.assert_frame_equal(result1, expected1)
|
||||
|
||||
expected2 = DataFrame(index=['a'], columns=simple_index,
|
||||
dtype='float64')
|
||||
result2 = wd2.iloc[0, [0], [0, 1, 2]]
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
# GH 7516
|
||||
mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
|
||||
p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
|
||||
items=['a', 'b', 'c'], major_axis=mi,
|
||||
minor_axis=['u', 'v', 'w'])
|
||||
result = p.iloc[:, 1, 0]
|
||||
expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = p.loc[:, (1, 'y'), 'u']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_panel_setitem_with_multiindex(self):
|
||||
|
||||
# 10360
|
||||
# failing with a multi-index
|
||||
arr = np.array([[[1, 2, 3], [0, 0, 0]],
|
||||
[[0, 0, 0], [0, 0, 0]]],
|
||||
dtype=np.float64)
|
||||
|
||||
# reg index
|
||||
axes = dict(items=['A', 'B'], major_axis=[0, 1],
|
||||
minor_axis=['X', 'Y', 'Z'])
|
||||
p1 = Panel(0., **axes)
|
||||
p1.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p1, expected)
|
||||
|
||||
# multi-indexes
|
||||
axes['items'] = MultiIndex.from_tuples(
|
||||
[('A', 'a'), ('B', 'b')])
|
||||
p2 = Panel(0., **axes)
|
||||
p2.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p2, expected)
|
||||
|
||||
axes['major_axis'] = MultiIndex.from_tuples(
|
||||
[('A', 1), ('A', 2)])
|
||||
p3 = Panel(0., **axes)
|
||||
p3.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p3, expected)
|
||||
|
||||
axes['minor_axis'] = MultiIndex.from_product(
|
||||
[['X'], range(3)])
|
||||
p4 = Panel(0., **axes)
|
||||
p4.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p4, expected)
|
||||
|
||||
arr = np.array(
|
||||
[[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
|
||||
dtype=np.float64)
|
||||
p5 = Panel(0., **axes)
|
||||
p5.iloc[0, :, 0] = [1, 2]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p5, expected)
|
||||
+183
@@ -0,0 +1,183 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexPartial(object):
|
||||
|
||||
def test_getitem_partial_int(self):
|
||||
# GH 12416
|
||||
# with single item
|
||||
l1 = [10, 20]
|
||||
l2 = ['a', 'b']
|
||||
df = DataFrame(index=range(2),
|
||||
columns=MultiIndex.from_product([l1, l2]))
|
||||
expected = DataFrame(index=range(2),
|
||||
columns=l2)
|
||||
result = df[20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with list
|
||||
expected = DataFrame(index=range(2),
|
||||
columns=MultiIndex.from_product([l1[1:], l2]))
|
||||
result = df[[20]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# missing item:
|
||||
with pytest.raises(KeyError, match='1'):
|
||||
df[1]
|
||||
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
|
||||
df[[1]]
|
||||
|
||||
def test_series_slice_partial(self):
|
||||
pass
|
||||
|
||||
def test_xs_partial(self, multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = frame.xs('foo')
|
||||
result2 = frame.loc['foo']
|
||||
expected = frame.T['foo'].T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = ymd.xs((2000, 4))
|
||||
expected = ymd.loc[2000, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ex from #1796
|
||||
index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]],
|
||||
codes=[[0, 0, 0, 0, 1, 1, 1, 1],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
|
||||
0, 1]])
|
||||
df = DataFrame(np.random.randn(8, 4), index=index,
|
||||
columns=list('abcd'))
|
||||
|
||||
result = df.xs(['foo', 'one'])
|
||||
expected = df.loc['foo', 'one']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
ymd = ymd.T
|
||||
result = ymd[2000, 2]
|
||||
|
||||
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
|
||||
expected.columns = expected.columns.droplevel(0).droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fancy_slice_partial(
|
||||
self, multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
result = frame.loc['bar':'baz']
|
||||
expected = frame[3:7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[(2000, 2):(2000, 4)]
|
||||
lev = ymd.index.codes[1]
|
||||
expected = ymd[(lev >= 1) & (lev <= 3)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_column_select(self):
|
||||
idx = MultiIndex(codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
|
||||
levels=[['a', 'b'], ['x', 'y'], ['p', 'q']])
|
||||
df = DataFrame(np.random.rand(3, 2), index=idx)
|
||||
|
||||
result = df.loc[('a', 'y'), :]
|
||||
expected = df.loc[('a', 'y')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[('a', 'y'), [1, 0]]
|
||||
expected = df.loc[('a', 'y')][[1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", DeprecationWarning)
|
||||
result = df.ix[('a', 'y'), [1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
(('a', 'foo'), slice(None, None)))
|
||||
|
||||
def test_partial_set(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
# GH #397
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd.copy()
|
||||
exp = ymd.copy()
|
||||
df.loc[2000, 4] = 0
|
||||
exp.loc[2000, 4].values[:] = 0
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df['A'].loc[2000, 4] = 1
|
||||
exp['A'].loc[2000, 4].values[:] = 1
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df.loc[2000] = 5
|
||||
exp.loc[2000].values[:] = 5
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# this works...for now
|
||||
df['A'].iloc[14] = 5
|
||||
assert df['A'][14] == 5
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AMBIGUOUS CASES!
|
||||
|
||||
def test_partial_ix_missing(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
pytest.skip("skipping for now")
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[2000, 0]
|
||||
expected = ymd.loc[2000]['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# need to put in some work here
|
||||
|
||||
# self.ymd.loc[2000, 0] = 0
|
||||
# assert (self.ymd.loc[2000]['A'] == 0).all()
|
||||
|
||||
# Pretty sure the second (and maybe even the first) is already wrong.
|
||||
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6))
|
||||
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6), 0)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc[['foo', 'bar']] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc['foo':'bar'] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame['A'].copy()
|
||||
result = frame['A'].copy()
|
||||
result.loc[['foo', 'bar']] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = frame['A'].copy()
|
||||
result = frame['A'].copy()
|
||||
result.loc['foo':'bar'] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
from numpy.random import randn
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSetOps(object):
|
||||
|
||||
def test_multiindex_symmetric_difference(self):
|
||||
# GH 13490
|
||||
idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']],
|
||||
names=['a', 'b'])
|
||||
result = idx ^ idx
|
||||
assert result.names == idx.names
|
||||
|
||||
idx2 = idx.copy().rename(['A', 'B'])
|
||||
result = idx ^ idx2
|
||||
assert result.names == [None, None]
|
||||
|
||||
def test_mixed_depth_insert(self):
|
||||
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
|
||||
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
|
||||
['', 'wx', 'wy', '', '', '']]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(randn(4, 6), columns=index)
|
||||
|
||||
result = df.copy()
|
||||
expected = df.copy()
|
||||
result['b'] = [1, 2, 3, 4]
|
||||
expected['b', '', ''] = [1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_dataframe_insert_column_all_na(self):
|
||||
# GH #1534
|
||||
mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c')
|
||||
])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
|
||||
s = Series({(1, 1): 1, (1, 2): 2})
|
||||
df['new'] = s
|
||||
assert df['new'].isna().all()
|
||||
+439
@@ -0,0 +1,439 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna)
|
||||
import pandas.core.common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexSetItem(object):
|
||||
|
||||
def test_setitem_multiindex(self):
|
||||
with catch_warnings(record=True):
|
||||
|
||||
for index_fn in ('ix', 'loc'):
|
||||
|
||||
def assert_equal(a, b):
|
||||
assert a == b
|
||||
|
||||
def check(target, indexers, value, compare_fn, expected=None):
|
||||
fn = getattr(target, index_fn)
|
||||
fn.__setitem__(indexers, value)
|
||||
result = fn.__getitem__(indexers)
|
||||
if expected is None:
|
||||
expected = value
|
||||
compare_fn(result, expected)
|
||||
# GH7190
|
||||
index = MultiIndex.from_product([np.arange(0, 100),
|
||||
np.arange(0, 80)],
|
||||
names=['time', 'firm'])
|
||||
t, n = 0, 2
|
||||
df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df, indexers=((t, n), 'X'), value=0,
|
||||
compare_fn=assert_equal)
|
||||
|
||||
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df, indexers=((t, n), 'X'), value=1,
|
||||
compare_fn=assert_equal)
|
||||
|
||||
df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df, indexers=((t, n), 'X'), value=2,
|
||||
compare_fn=assert_equal)
|
||||
|
||||
# gh-7218: assigning with 0-dim arrays
|
||||
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df,
|
||||
indexers=((t, n), 'X'),
|
||||
value=np.array(3),
|
||||
compare_fn=assert_equal,
|
||||
expected=3, )
|
||||
|
||||
# GH5206
|
||||
df = DataFrame(np.arange(25).reshape(5, 5),
|
||||
columns='A,B,C,D,E'.split(','), dtype=float)
|
||||
df['F'] = 99
|
||||
row_selection = df['A'] % 2 == 0
|
||||
col_selection = ['B', 'C']
|
||||
with catch_warnings(record=True):
|
||||
df.ix[row_selection, col_selection] = df['F']
|
||||
output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
|
||||
with catch_warnings(record=True):
|
||||
tm.assert_frame_equal(df.ix[row_selection, col_selection],
|
||||
output)
|
||||
check(target=df,
|
||||
indexers=(row_selection, col_selection),
|
||||
value=df['F'],
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=output, )
|
||||
|
||||
# GH11372
|
||||
idx = MultiIndex.from_product([
|
||||
['A', 'B', 'C'],
|
||||
date_range('2015-01-01', '2015-04-01', freq='MS')])
|
||||
cols = MultiIndex.from_product([
|
||||
['foo', 'bar'],
|
||||
date_range('2016-01-01', '2016-02-01', freq='MS')])
|
||||
|
||||
df = DataFrame(np.random.random((12, 4)),
|
||||
index=idx, columns=cols)
|
||||
|
||||
subidx = MultiIndex.from_tuples(
|
||||
[('A', Timestamp('2015-01-01')),
|
||||
('A', Timestamp('2015-02-01'))])
|
||||
subcols = MultiIndex.from_tuples(
|
||||
[('foo', Timestamp('2016-01-01')),
|
||||
('foo', Timestamp('2016-02-01'))])
|
||||
|
||||
vals = DataFrame(np.random.random((2, 2)),
|
||||
index=subidx, columns=subcols)
|
||||
check(target=df,
|
||||
indexers=(subidx, subcols),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal, )
|
||||
# set all columns
|
||||
vals = DataFrame(
|
||||
np.random.random((2, 4)), index=subidx, columns=cols)
|
||||
check(target=df,
|
||||
indexers=(subidx, slice(None, None, None)),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal, )
|
||||
# identity
|
||||
copy = df.copy()
|
||||
check(target=df, indexers=(df.index, df.columns), value=df,
|
||||
compare_fn=tm.assert_frame_equal, expected=copy)
|
||||
|
||||
def test_multiindex_setitem(self):
|
||||
|
||||
# GH 3738
|
||||
# setting with a multi-index right hand side
|
||||
arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
|
||||
np.array(['one', 'two', 'one', 'one', 'two', 'one']),
|
||||
np.arange(0, 6, 1)]
|
||||
|
||||
df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
|
||||
columns=['A', 'B', 'C']).sort_index()
|
||||
|
||||
expected = df_orig.loc[['bar']] * 2
|
||||
df = df_orig.copy()
|
||||
df.loc[['bar']] *= 2
|
||||
tm.assert_frame_equal(df.loc[['bar']], expected)
|
||||
|
||||
# raise because these have differing levels
|
||||
with pytest.raises(TypeError):
|
||||
df.loc['bar'] *= 2
|
||||
|
||||
# from SO
|
||||
# http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
|
||||
df_orig = DataFrame.from_dict({'price': {
|
||||
('DE', 'Coal', 'Stock'): 2,
|
||||
('DE', 'Gas', 'Stock'): 4,
|
||||
('DE', 'Elec', 'Demand'): 1,
|
||||
('FR', 'Gas', 'Stock'): 5,
|
||||
('FR', 'Solar', 'SupIm'): 0,
|
||||
('FR', 'Wind', 'SupIm'): 0
|
||||
}})
|
||||
df_orig.index = MultiIndex.from_tuples(df_orig.index,
|
||||
names=['Sit', 'Com', 'Type'])
|
||||
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 2, 3]] *= 2
|
||||
|
||||
idx = pd.IndexSlice
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, 'Stock'], :] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, 'Stock'], 'price'] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_assignment(self):
|
||||
|
||||
# GH3777 part 2
|
||||
|
||||
# mixed dtype
|
||||
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list('abc'),
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
df['d'] = np.nan
|
||||
arr = np.array([0., 1.])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'd'] = arr
|
||||
tm.assert_series_equal(df.ix[4, 'd'],
|
||||
Series(arr, index=[8, 10], name='d'))
|
||||
|
||||
# single dtype
|
||||
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list('abc'),
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = arr
|
||||
exp = Series(arr, index=[8, 10], name='c', dtype='float64')
|
||||
tm.assert_series_equal(df.ix[4, 'c'], exp)
|
||||
|
||||
# scalar ok
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = 10
|
||||
exp = Series(10, index=[8, 10], name='c', dtype='float64')
|
||||
tm.assert_series_equal(df.ix[4, 'c'], exp)
|
||||
|
||||
# invalid assignments
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = [0, 1, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = [0]
|
||||
|
||||
# groupby example
|
||||
NUM_ROWS = 100
|
||||
NUM_COLS = 10
|
||||
col_names = ['A' + num for num in
|
||||
map(str, np.arange(NUM_COLS).tolist())]
|
||||
index_cols = col_names[:5]
|
||||
|
||||
df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
|
||||
dtype=np.int64, columns=col_names)
|
||||
df = df.set_index(index_cols).sort_index()
|
||||
grp = df.groupby(level=index_cols[:4])
|
||||
df['new_col'] = np.nan
|
||||
|
||||
f_index = np.arange(5)
|
||||
|
||||
def f(name, df2):
|
||||
return Series(np.arange(df2.shape[0]),
|
||||
name=df2.index.values[0]).reindex(f_index)
|
||||
|
||||
# TODO(wesm): unused?
|
||||
# new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
|
||||
|
||||
# we are actually operating on a copy here
|
||||
# but in this case, that's ok
|
||||
for name, df2 in grp:
|
||||
new_vals = np.arange(df2.shape[0])
|
||||
with catch_warnings(record=True):
|
||||
df.ix[name, 'new_col'] = new_vals
|
||||
|
||||
def test_series_setitem(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd['A']
|
||||
|
||||
s[2000, 3] = np.nan
|
||||
assert isna(s.values[42:65]).all()
|
||||
assert notna(s.values[:42]).all()
|
||||
assert notna(s.values[65:]).all()
|
||||
|
||||
s[2000, 3, 10] = np.nan
|
||||
assert isna(s[49])
|
||||
|
||||
def test_frame_getitem_setitem_boolean(
|
||||
self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T.copy()
|
||||
values = df.values
|
||||
|
||||
result = df[df > 0]
|
||||
expected = df.where(df > 0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df[df > 0] = 5
|
||||
values[values > 0] = 5
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
df[df == 5] = 0
|
||||
values[values == 5] = 0
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
# a df that needs alignment first
|
||||
df[df[:-1] < 0] = 2
|
||||
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
with pytest.raises(TypeError, match='boolean values only'):
|
||||
df[df * 0] = 2
|
||||
|
||||
def test_frame_getitem_setitem_multislice(self):
|
||||
levels = [['t1', 't2'], ['a', 'b', 'c']]
|
||||
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
|
||||
midx = MultiIndex(codes=codes, levels=levels, names=[None, 'id'])
|
||||
df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)
|
||||
|
||||
result = df.loc[:, 'value']
|
||||
tm.assert_series_equal(df['value'], result)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", DeprecationWarning)
|
||||
result = df.ix[:, 'value']
|
||||
tm.assert_series_equal(df['value'], result)
|
||||
|
||||
result = df.loc[df.index[1:3], 'value']
|
||||
tm.assert_series_equal(df['value'][1:3], result)
|
||||
|
||||
result = df.loc[:, :]
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
result = df
|
||||
df.loc[:, 'value'] = 10
|
||||
result['value'] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
df.loc[:, :] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_frame_setitem_multi_column(self):
|
||||
df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
|
||||
[0, 1, 0, 1]])
|
||||
|
||||
cp = df.copy()
|
||||
cp['a'] = cp['b']
|
||||
tm.assert_frame_equal(cp['a'], cp['b'])
|
||||
|
||||
# set with ndarray
|
||||
cp = df.copy()
|
||||
cp['a'] = cp['b'].values
|
||||
tm.assert_frame_equal(cp['a'], cp['b'])
|
||||
|
||||
# ---------------------------------------
|
||||
# #1803
|
||||
columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
|
||||
df = DataFrame(index=[1, 3, 5], columns=columns)
|
||||
|
||||
# Works, but adds a column instead of updating the two existing ones
|
||||
df['A'] = 0.0 # Doesn't work
|
||||
assert (df['A'].values == 0).all()
|
||||
|
||||
# it broadcasts
|
||||
df['B', '1'] = [1, 2, 3]
|
||||
df['A'] = df['B', '1']
|
||||
|
||||
sliced_a1 = df['A', '1']
|
||||
sliced_a2 = df['A', '2']
|
||||
sliced_b1 = df['B', '1']
|
||||
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
|
||||
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
|
||||
assert sliced_a1.name == ('A', '1')
|
||||
assert sliced_a2.name == ('A', '2')
|
||||
assert sliced_b1.name == ('B', '1')
|
||||
|
||||
def test_getitem_setitem_tuple_plus_columns(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
# GH #1013
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd[:5]
|
||||
|
||||
result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
|
||||
expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_setitem_slice_integers(self):
|
||||
index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])
|
||||
|
||||
frame = DataFrame(np.random.randn(len(index), 4), index=index,
|
||||
columns=['a', 'b', 'c', 'd'])
|
||||
res = frame.loc[1:2]
|
||||
exp = frame.reindex(frame.index[2:])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
frame.loc[1:2] = 7
|
||||
assert (frame.loc[1:2] == 7).values.all()
|
||||
|
||||
series = Series(np.random.randn(len(index)), index=index)
|
||||
|
||||
res = series.loc[1:2]
|
||||
exp = series.reindex(series.index[2:])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
series.loc[1:2] = 7
|
||||
assert (series.loc[1:2] == 7).values.all()
|
||||
|
||||
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
dft = frame.T
|
||||
s = dft['foo', 'two']
|
||||
dft['foo', 'two'] = s > s.median()
|
||||
tm.assert_series_equal(dft['foo', 'two'], s > s.median())
|
||||
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
|
||||
|
||||
reindexed = dft.reindex(columns=[('foo', 'two')])
|
||||
tm.assert_series_equal(reindexed['foo', 'two'], s > s.median())
|
||||
|
||||
def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
subset = frame.index[[1, 4, 5]]
|
||||
|
||||
frame.loc[subset] = 99
|
||||
assert (frame.loc[subset].values == 99).all()
|
||||
|
||||
col = frame['B']
|
||||
col[subset] = 97
|
||||
assert (frame.loc[subset, 'B'] == 97).all()
|
||||
|
||||
def test_nonunique_assignment_1750(self):
|
||||
df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]],
|
||||
columns=list("ABCD"))
|
||||
|
||||
df = df.set_index(['A', 'B'])
|
||||
ix = MultiIndex.from_tuples([(1, 1)])
|
||||
|
||||
df.loc[ix, "C"] = '_'
|
||||
|
||||
assert (df.xs((1, 1))['C'] == '_').all()
|
||||
|
||||
def test_astype_assignment_with_dups(self):
|
||||
|
||||
# GH 4686
|
||||
# assignment with dups that has a dtype change
|
||||
cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')])
|
||||
df = DataFrame(np.arange(3).reshape((1, 3)),
|
||||
columns=cols, dtype=object)
|
||||
index = df.index.copy()
|
||||
|
||||
df['A'] = df['A'].astype(np.float64)
|
||||
tm.assert_index_equal(df.index, index)
|
||||
|
||||
|
||||
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
|
||||
# this works because we are modifying the underlying array
|
||||
# really a no-no
|
||||
df = multiindex_dataframe_random_data.T
|
||||
df['foo'].values[:] = 0
|
||||
assert (df['foo'].values == 0).all()
|
||||
|
||||
|
||||
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
|
||||
# will raise/warn as its chained assignment
|
||||
df = multiindex_dataframe_random_data.T
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df['foo']['one'] = 2
|
||||
|
||||
|
||||
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data.T
|
||||
expected = frame
|
||||
df = frame.copy()
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df['foo']['one'] = 2
|
||||
|
||||
result = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
+576
@@ -0,0 +1,576 @@
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
|
||||
from pandas.core.indexing import _non_reducing_slice
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexSlicers(object):
|
||||
|
||||
def test_per_axis_per_level_getitem(self):
|
||||
|
||||
# GH6134
|
||||
# example test case
|
||||
ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl(
|
||||
'C', 4), _mklbl('D', 2)])
|
||||
df = DataFrame(np.arange(len(ix.get_values())), index=ix)
|
||||
|
||||
result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C2' or c == 'C3')]]
|
||||
result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples([('A', 1), ('A', 2),
|
||||
('A', 3), ('B', 1)],
|
||||
names=['one', 'two'])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(16, dtype='int64').reshape(
|
||||
4, 4), index=index, columns=columns)
|
||||
df = df.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
result = df.loc[(slice(None), slice(None)), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[:, (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# index
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), 1), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# columns
|
||||
result = df.loc[:, (slice(None), ['foo'])]
|
||||
expected = df.iloc[:, [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# both
|
||||
result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
|
||||
expected = df.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc['A', 'a']
|
||||
expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
|
||||
index=Index([1, 2, 3], name='two'),
|
||||
columns=Index(['bar', 'foo'], name='lvl1'))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1, 2]), :]
|
||||
expected = df.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi-level series
|
||||
s = Series(np.arange(len(ix.get_values())), index=ix)
|
||||
result = s.loc['A1':'A3', :, ['C1', 'C3']]
|
||||
expected = s.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in s.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# boolean indexers
|
||||
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
|
||||
expected = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), np.array([True, False])), :]
|
||||
|
||||
# ambiguous cases
|
||||
# these can be multiply interpreted (e.g. in this case
|
||||
# as df.loc[slice(None),[1]] as well
|
||||
pytest.raises(KeyError, lambda: df.loc[slice(None), [1]])
|
||||
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not lexsorted
|
||||
assert df.index.lexsort_depth == 2
|
||||
df = df.sort_index(level=1, axis=0)
|
||||
assert df.index.lexsort_depth == 0
|
||||
|
||||
msg = ('MultiIndex slicing requires the index to be '
|
||||
r'lexsorted: slicing on levels \[1\], lexsort depth 0')
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc[(slice(None), slice('bar')), :]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
|
||||
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
|
||||
|
||||
def test_multiindex_slicers_non_unique(self):
|
||||
|
||||
# GH 7106
|
||||
# non-unique mi index support
|
||||
df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
|
||||
B=['a', 'a', 'a', 'a'],
|
||||
C=[1, 2, 1, 3],
|
||||
D=[1, 2, 3, 4]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
assert not df.index.is_unique
|
||||
expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
|
||||
C=[1, 1], D=[1, 3]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this is equivalent of an xs expression
|
||||
result = df.xs(1, level=2, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
|
||||
B=['a', 'a', 'a', 'a'],
|
||||
C=[1, 2, 1, 2],
|
||||
D=[1, 2, 3, 4]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
assert not df.index.is_unique
|
||||
expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
|
||||
C=[1, 1], D=[1, 3]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
assert not result.index.is_unique
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH12896
|
||||
# numpy-implementation dependent bug
|
||||
ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16,
|
||||
17, 18, 19, 200000, 200000]
|
||||
n = len(ints)
|
||||
idx = MultiIndex.from_arrays([['a'] * n, ints])
|
||||
result = Series([1] * n, index=idx)
|
||||
result = result.sort_index()
|
||||
result = result.loc[(slice(None), slice(100000))]
|
||||
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_datetimelike(self):
|
||||
|
||||
# GH 7429
|
||||
# buggy/inconsistent behavior when slicing with datetime-like
|
||||
import datetime
|
||||
dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) +
|
||||
datetime.timedelta(days=i) for i in range(6)]
|
||||
freq = [1, 2]
|
||||
index = MultiIndex.from_product(
|
||||
[dates, freq], names=['date', 'frequency'])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(6 * 2 * 4, dtype='int64').reshape(
|
||||
-1, 4), index=index, columns=list('ABCD'))
|
||||
|
||||
# multi-axis slicing
|
||||
idx = pd.IndexSlice
|
||||
expected = df.iloc[[0, 2, 4], [0, 1]]
|
||||
result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
|
||||
Timestamp('2012-01-03 12:12:12')),
|
||||
slice(1, 1)), slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp(
|
||||
'2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
|
||||
Timestamp('2012-01-03 12:12:12')), 1),
|
||||
slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with strings
|
||||
result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'),
|
||||
slice(1, 1)), slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1),
|
||||
idx['A', 'B']]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_edges(self):
|
||||
# GH 8132
|
||||
# various edge cases
|
||||
df = DataFrame(
|
||||
{'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5,
|
||||
'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3,
|
||||
'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30",
|
||||
"2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09",
|
||||
"2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01",
|
||||
"2013-07-09", "2013-08-06", "2013-09-03"],
|
||||
'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]})
|
||||
|
||||
df['DATE'] = pd.to_datetime(df['DATE'])
|
||||
df1 = df.set_index(['A', 'B', 'DATE'])
|
||||
df1 = df1.sort_index()
|
||||
|
||||
# A1 - Get all values under "A0" and "A1"
|
||||
result = df1.loc[(slice('A1')), :]
|
||||
expected = df1.iloc[0:10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A2 - Get all values from the start to "A2"
|
||||
result = df1.loc[(slice('A2')), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A3 - Get all values under "B1" or "B2"
|
||||
result = df1.loc[(slice(None), slice('B1', 'B2')), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A4 - Get all values between 2013-07-02 and 2013-07-09
|
||||
result = df1.loc[(slice(None), slice(None),
|
||||
slice('20130702', '20130709')), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B1 - Get all values in B0 that are also under A0, A1 and A2
|
||||
result = df1.loc[(slice('A2'), slice('B0')), :]
|
||||
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
|
||||
# the As)
|
||||
result = df1.loc[(slice(None), slice('B2')), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B3 - Get all values from B1 to B2 and up to 2013-08-06
|
||||
result = df1.loc[(slice(None), slice('B1', 'B2'),
|
||||
slice('2013-08-06')), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B4 - Same as A4 but the start of the date slice is not a key.
|
||||
# shows indexing on a partial selection slice
|
||||
result = df1.loc[(slice(None), slice(None),
|
||||
slice('20130701', '20130709')), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_doc_examples(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# from indexing.rst / advanced
|
||||
index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
|
||||
_mklbl('C', 4), _mklbl('D', 2)])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
|
||||
.reshape((len(index), len(columns))),
|
||||
index=index, columns=columns)
|
||||
result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx[:, :, ['C1', 'C3']], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not sorted
|
||||
with pytest.raises(UnsortedIndexError):
|
||||
df.loc['A1', ('a', slice('foo'))]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
|
||||
df.loc['A1'].iloc[:, [0, 2]])
|
||||
|
||||
df = df.sort_index(axis=1)
|
||||
|
||||
# slicing
|
||||
df.loc['A1', (slice(None), 'foo')]
|
||||
df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')]
|
||||
|
||||
# setitem
|
||||
df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
|
||||
|
||||
def test_loc_axis_arguments(self):
|
||||
|
||||
index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
|
||||
_mklbl('C', 4), _mklbl('D', 2)])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
|
||||
.reshape((len(index), len(columns))),
|
||||
index=index,
|
||||
columns=columns).sort_index().sort_index(axis=1)
|
||||
|
||||
# axis 0
|
||||
result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis='index')[:, :, ['C1', 'C3']]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis 1
|
||||
result = df.loc(axis=1)[:, 'foo']
|
||||
expected = df.loc[:, (slice(None), 'foo')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis='columns')[:, 'foo']
|
||||
expected = df.loc[:, (slice(None), 'foo')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# invalid axis
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis=-1)[:, :, ['C1', 'C3']]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis=2)[:, :, ['C1', 'C3']]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis='foo')[:, :, ['C1', 'C3']]
|
||||
|
||||
def test_per_axis_per_level_setitem(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples([('A', 1), ('A', 2),
|
||||
('A', 3), ('B', 1)],
|
||||
names=['one', 'two'])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(16, dtype='int64').reshape(
|
||||
4, 4), index=index, columns=columns)
|
||||
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# index
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), [1]), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, 1] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), ['foo'])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# both
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, 1], idx[:, ['foo']]] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc['A', 'a'] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[0:3, 0:2] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# setting with a list-like
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
|
||||
[[100, 100], [100, 100]], dtype='int64')
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# not enough values
|
||||
df = df_orig.copy()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
|
||||
[[100], [100, 100]], dtype='int64')
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
|
||||
[100, 100, 100, 100], dtype='int64')
|
||||
|
||||
# with an alignable rhs
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice(
|
||||
None), 1), (slice(None), ['foo'])] * 5
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice(
|
||||
None), 1), (slice(None), ['foo'])]
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy()
|
||||
rhs.loc[:, ('c', 'bah')] = 10
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_label_slicing_with_negative_step(self):
|
||||
s = Series(np.arange(20),
|
||||
MultiIndex.from_product([list('abcde'), np.arange(4)]))
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
def assert_slices_equivalent(l_slc, i_slc):
|
||||
tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
|
||||
tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
|
||||
with catch_warnings(record=True):
|
||||
tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
|
||||
|
||||
assert_slices_equivalent(SLC[::-1], SLC[::-1])
|
||||
|
||||
assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
|
||||
assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1])
|
||||
|
||||
assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
|
||||
assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1])
|
||||
|
||||
assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])
|
||||
|
||||
assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
|
||||
assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
|
||||
assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
|
||||
|
||||
def test_multiindex_slice_first_level(self):
|
||||
# GH 12697
|
||||
freq = ['a', 'b', 'c', 'd']
|
||||
idx = MultiIndex.from_product([freq, np.arange(500)])
|
||||
df = DataFrame(list(range(2000)), index=idx, columns=['Test'])
|
||||
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
|
||||
result = df_slice.loc['a']
|
||||
expected = DataFrame(list(range(30, 71)),
|
||||
columns=['Test'], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df_slice.loc['d']
|
||||
expected = DataFrame(list(range(1530, 1571)),
|
||||
columns=['Test'], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_int_series_slicing(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd['A']
|
||||
result = s[5:]
|
||||
expected = s.reindex(s.index[5:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
exp = ymd['A'].copy()
|
||||
s[5:] = 0
|
||||
exp.values[5:] = 0
|
||||
tm.assert_numpy_array_equal(s.values, exp.values)
|
||||
|
||||
result = ymd[5:]
|
||||
expected = ymd.reindex(s.index[5:])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_non_reducing_slice_on_multiindex(self):
|
||||
# GH 19861
|
||||
dic = {
|
||||
('a', 'd'): [1, 4],
|
||||
('a', 'c'): [2, 3],
|
||||
('b', 'c'): [3, 2],
|
||||
('b', 'd'): [4, 1]
|
||||
}
|
||||
df = pd.DataFrame(dic, index=[0, 1])
|
||||
idx = pd.IndexSlice
|
||||
slice_ = idx[:, idx['b', 'd']]
|
||||
tslice_ = _non_reducing_slice(slice_)
|
||||
|
||||
result = df.loc[tslice_]
|
||||
expected = pd.DataFrame({('b', 'd'): [4, 1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
+92
@@ -0,0 +1,92 @@
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
|
||||
from pandas.compat import lzip
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSorted(object):
|
||||
def test_getitem_multilevel_index_tuple_not_sorted(self):
|
||||
index_columns = list("abc")
|
||||
df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]],
|
||||
columns=index_columns + ["data"])
|
||||
df = df.set_index(index_columns)
|
||||
query_index = df.index[:1]
|
||||
rs = df.loc[query_index, "data"]
|
||||
|
||||
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c'])
|
||||
xp = Series(['x'], index=xp_idx, name='data')
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.sort_index(level=1).T
|
||||
|
||||
# buglet with int typechecking
|
||||
result = df.iloc[:, :np.int32(3)]
|
||||
expected = df.reindex(columns=df.columns[:3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted2(self):
|
||||
# 13431
|
||||
df = DataFrame({'col1': ['b', 'd', 'b', 'a'],
|
||||
'col2': [3, 1, 1, 2],
|
||||
'data': ['one', 'two', 'three', 'four']})
|
||||
|
||||
df2 = df.set_index(['col1', 'col2'])
|
||||
df2_original = df2.copy()
|
||||
|
||||
df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True)
|
||||
df2.index.set_codes([0, 1, 0, 2], level='col1', inplace=True)
|
||||
assert not df2.index.is_lexsorted()
|
||||
assert not df2.index.is_monotonic
|
||||
|
||||
assert df2_original.index.equals(df2.index)
|
||||
expected = df2.sort_index()
|
||||
assert expected.index.is_lexsorted()
|
||||
assert expected.index.is_monotonic
|
||||
|
||||
result = df2.sort_index(level=0)
|
||||
assert result.index.is_lexsorted()
|
||||
assert result.index.is_monotonic
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T
|
||||
df['foo', 'four'] = 'foo'
|
||||
|
||||
arrays = [np.array(x) for x in zip(*df.columns.values)]
|
||||
|
||||
result = df['foo']
|
||||
result2 = df.loc[:, 'foo']
|
||||
expected = df.reindex(columns=df.columns[arrays[0] == 'foo'])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
df = df.T
|
||||
result = df.xs('foo')
|
||||
result2 = df.loc['foo']
|
||||
expected = df.reindex(df.index[arrays[0] == 'foo'])
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
def test_series_getitem_not_sorted(self):
|
||||
arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
|
||||
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
|
||||
tuples = lzip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
s = Series(randn(8), index=index)
|
||||
|
||||
arrays = [np.array(x) for x in zip(*index.values)]
|
||||
|
||||
result = s['qux']
|
||||
result2 = s.loc['qux']
|
||||
expected = s[arrays[0] == 'qux']
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
@@ -0,0 +1,237 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, product as cart_product
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
|
||||
import pandas.core.common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def four_level_index_dataframe():
|
||||
arr = np.array([[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838]])
|
||||
index = MultiIndex(
|
||||
levels=[['a', 'x'], ['b', 'q'], [10.0032, 20.0, 30.0], [3, 4, 5]],
|
||||
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
|
||||
names=['one', 'two', 'three', 'four'])
|
||||
return DataFrame(arr, index=index, columns=list('ABCDE'))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('key, level, exp_arr, exp_index', [
|
||||
('a', 'lvl0', lambda x: x[:, 0:2], Index(['bar', 'foo'], name='lvl1')),
|
||||
('foo', 'lvl1', lambda x: x[:, 1:2], Index(['a'], name='lvl0'))
|
||||
])
|
||||
def test_xs_named_levels_axis_eq_1(key, level, exp_arr, exp_index):
|
||||
# see gh-2903
|
||||
arr = np.random.randn(4, 4)
|
||||
index = MultiIndex(levels=[['a', 'b'], ['bar', 'foo', 'hello', 'world']],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
|
||||
names=['lvl0', 'lvl1'])
|
||||
df = DataFrame(arr, columns=index)
|
||||
result = df.xs(key, level=level, axis=1)
|
||||
expected = DataFrame(exp_arr(arr), columns=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_values(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(('bar', 'two')).values
|
||||
expected = df.values[4]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_loc_equality(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(('bar', 'two'))
|
||||
expected = df.loc[('bar', 'two')]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_missing_values_in_index():
|
||||
# see gh-6574
|
||||
# missing values in returned index should be preserrved
|
||||
acc = [
|
||||
('a', 'abcde', 1),
|
||||
('b', 'bbcde', 2),
|
||||
('y', 'yzcde', 25),
|
||||
('z', 'xbcde', 24),
|
||||
('z', None, 26),
|
||||
('z', 'zbcde', 25),
|
||||
('z', 'ybcde', 26),
|
||||
]
|
||||
df = DataFrame(acc,
|
||||
columns=['a1', 'a2', 'cnt']).set_index(['a1', 'a2'])
|
||||
expected = DataFrame({'cnt': [24, 26, 25, 26]}, index=Index(
|
||||
['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2'))
|
||||
|
||||
result = df.xs('z', level='a1')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('key, level', [
|
||||
('one', 'second'),
|
||||
(['one'], ['second'])
|
||||
])
|
||||
def test_xs_with_duplicates(key, level, multiindex_dataframe_random_data):
|
||||
# see gh-13719
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = concat([frame] * 2)
|
||||
assert df.index.is_unique is False
|
||||
expected = concat([frame.xs('one', level='second')] * 2)
|
||||
|
||||
result = df.xs(key, level=level)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs('two', level='second')
|
||||
expected = df[df.index.get_level_values(1) == 'two']
|
||||
expected.index = Index(['foo', 'bar', 'baz', 'qux'], name='first')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_eq_2():
|
||||
arr = np.random.randn(3, 5)
|
||||
index = MultiIndex(
|
||||
levels=[['a', 'p', 'x'], ['b', 'q', 'y'], ['c', 'r', 'z']],
|
||||
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]])
|
||||
df = DataFrame(arr, index=index)
|
||||
expected = DataFrame(arr[1:2], index=[['a'], ['b']])
|
||||
result = df.xs('c', level=2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda df: df.xs(('a', 4), level=['one', 'four']),
|
||||
lambda df: df.xs('a').xs(4, level='four')
|
||||
])
|
||||
def test_xs_level_multiple(indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[['q'], [20.0]],
|
||||
codes=[[0], [0]],
|
||||
names=['two', 'three'])
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list('ABCDE'))
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_setting_with_copy_error(multiindex_dataframe_random_data):
|
||||
# this is a copy in 0.14
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs('two', level='second')
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = 'A value is trying to be set on a copy of a slice from a DataFrame'
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
|
||||
def test_xs_setting_with_copy_error_multiple(four_level_index_dataframe):
|
||||
# this is a copy in 0.14
|
||||
df = four_level_index_dataframe
|
||||
result = df.xs(('a', 4), level=['one', 'four'])
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = 'A value is trying to be set on a copy of a slice from a DataFrame'
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
|
||||
def test_xs_integer_key():
|
||||
# see gh-2107
|
||||
dates = lrange(20111201, 20111205)
|
||||
ids = 'abcde'
|
||||
index = MultiIndex.from_tuples(
|
||||
[x for x in cart_product(dates, ids)],
|
||||
names=['date', 'secid'])
|
||||
df = DataFrame(
|
||||
np.random.randn(len(index), 3), index, ['X', 'Y', 'Z'])
|
||||
|
||||
result = df.xs(20111201, level='date')
|
||||
expected = df.loc[20111201, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda df: df.xs('a', level=0),
|
||||
lambda df: df.xs('a')
|
||||
])
|
||||
def test_xs_level0(indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[['b', 'q'], [10.0032, 20.0], [4, 5]],
|
||||
codes=[[0, 1], [0, 1], [1, 0]],
|
||||
names=['two', 'three', 'four'])
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list('ABCDE'))
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series(multiindex_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
df = multiindex_dataframe_random_data
|
||||
s = df['A']
|
||||
result = s[:, 'two']
|
||||
expected = df.xs('two', level=1)['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
s = df['A']
|
||||
result = s[2000, 5]
|
||||
expected = df.loc[2000, 5]['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series_slice_not_implemented(
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
# not implementing this for now
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
s = df['A']
|
||||
|
||||
msg = r'\(2000, slice\(3, 4, None\)\)'
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[2000, 3:4]
|
||||
|
||||
|
||||
def test_series_getitem_multiindex_xs():
|
||||
# GH6258
|
||||
dt = list(date_range('20130903', periods=3))
|
||||
idx = MultiIndex.from_product([list('AB'), dt])
|
||||
s = Series([1, 3, 4, 1, 3, 4], index=idx)
|
||||
expected = Series([1, 1], index=list('AB'))
|
||||
|
||||
result = s.xs('20130903', level=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem_multiindex_xs_by_label():
|
||||
# GH5684
|
||||
idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
|
||||
('b', 'two')])
|
||||
s = Series([1, 2, 3, 4], index=idx)
|
||||
s.index.set_names(['L1', 'L2'], inplace=True)
|
||||
expected = Series([1, 3], index=['a', 'b'])
|
||||
expected.index.set_names(['L1'], inplace=True)
|
||||
|
||||
result = s.xs('one', level='L2')
|
||||
tm.assert_series_equal(result, expected)
|
||||
Reference in New Issue
Block a user