Static code analysis and corrections
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,307 @@
|
||||
""" common utilities """
|
||||
|
||||
import itertools
|
||||
from warnings import catch_warnings, filterwarnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
from pandas import (
|
||||
DataFrame, Float64Index, MultiIndex, Panel, Series, UInt64Index,
|
||||
date_range)
|
||||
from pandas.util import testing as tm
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
_verbose = False
|
||||
|
||||
|
||||
def _mklbl(prefix, n):
|
||||
return ["%s%s" % (prefix, i) for i in range(n)]
|
||||
|
||||
|
||||
def _axify(obj, key, axis):
|
||||
# create a tuple accessor
|
||||
axes = [slice(None)] * obj.ndim
|
||||
axes[axis] = key
|
||||
return tuple(axes)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
|
||||
class Base(object):
|
||||
""" indexing comprehensive base class """
|
||||
|
||||
_objs = {'series', 'frame', 'panel'}
|
||||
_typs = {'ints', 'uints', 'labels', 'mixed', 'ts', 'floats', 'empty',
|
||||
'ts_rev', 'multi'}
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2))
|
||||
self.frame_ints = DataFrame(np.random.randn(4, 4),
|
||||
index=lrange(0, 8, 2),
|
||||
columns=lrange(0, 12, 3))
|
||||
with catch_warnings(record=True):
|
||||
self.panel_ints = Panel(np.random.rand(4, 4, 4),
|
||||
items=lrange(0, 8, 2),
|
||||
major_axis=lrange(0, 12, 3),
|
||||
minor_axis=lrange(0, 16, 4))
|
||||
|
||||
self.series_uints = Series(np.random.rand(4),
|
||||
index=UInt64Index(lrange(0, 8, 2)))
|
||||
self.frame_uints = DataFrame(np.random.randn(4, 4),
|
||||
index=UInt64Index(lrange(0, 8, 2)),
|
||||
columns=UInt64Index(lrange(0, 12, 3)))
|
||||
self.panel_uints = Panel(np.random.rand(4, 4, 4),
|
||||
items=UInt64Index(lrange(0, 8, 2)),
|
||||
major_axis=UInt64Index(lrange(0, 12, 3)),
|
||||
minor_axis=UInt64Index(lrange(0, 16, 4)))
|
||||
|
||||
self.series_floats = Series(np.random.rand(4),
|
||||
index=Float64Index(range(0, 8, 2)))
|
||||
self.frame_floats = DataFrame(np.random.randn(4, 4),
|
||||
index=Float64Index(range(0, 8, 2)),
|
||||
columns=Float64Index(range(0, 12, 3)))
|
||||
self.panel_floats = Panel(np.random.rand(4, 4, 4),
|
||||
items=Float64Index(range(0, 8, 2)),
|
||||
major_axis=Float64Index(range(0, 12, 3)),
|
||||
minor_axis=Float64Index(range(0, 16, 4)))
|
||||
|
||||
m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
MultiIndex.from_product([[5, 6], [7, 8]]),
|
||||
MultiIndex.from_product([[9, 10], [11, 12]])]
|
||||
|
||||
self.series_multi = Series(np.random.rand(4),
|
||||
index=m_idces[0])
|
||||
self.frame_multi = DataFrame(np.random.randn(4, 4),
|
||||
index=m_idces[0],
|
||||
columns=m_idces[1])
|
||||
self.panel_multi = Panel(np.random.rand(4, 4, 4),
|
||||
items=m_idces[0],
|
||||
major_axis=m_idces[1],
|
||||
minor_axis=m_idces[2])
|
||||
|
||||
self.series_labels = Series(np.random.randn(4), index=list('abcd'))
|
||||
self.frame_labels = DataFrame(np.random.randn(4, 4),
|
||||
index=list('abcd'), columns=list('ABCD'))
|
||||
self.panel_labels = Panel(np.random.randn(4, 4, 4),
|
||||
items=list('abcd'),
|
||||
major_axis=list('ABCD'),
|
||||
minor_axis=list('ZYXW'))
|
||||
|
||||
self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8])
|
||||
self.frame_mixed = DataFrame(np.random.randn(4, 4),
|
||||
index=[2, 4, 'null', 8])
|
||||
self.panel_mixed = Panel(np.random.randn(4, 4, 4),
|
||||
items=[2, 4, 'null', 8])
|
||||
|
||||
self.series_ts = Series(np.random.randn(4),
|
||||
index=date_range('20130101', periods=4))
|
||||
self.frame_ts = DataFrame(np.random.randn(4, 4),
|
||||
index=date_range('20130101', periods=4))
|
||||
self.panel_ts = Panel(np.random.randn(4, 4, 4),
|
||||
items=date_range('20130101', periods=4))
|
||||
|
||||
dates_rev = (date_range('20130101', periods=4)
|
||||
.sort_values(ascending=False))
|
||||
self.series_ts_rev = Series(np.random.randn(4),
|
||||
index=dates_rev)
|
||||
self.frame_ts_rev = DataFrame(np.random.randn(4, 4),
|
||||
index=dates_rev)
|
||||
self.panel_ts_rev = Panel(np.random.randn(4, 4, 4),
|
||||
items=dates_rev)
|
||||
|
||||
self.frame_empty = DataFrame({})
|
||||
self.series_empty = Series({})
|
||||
self.panel_empty = Panel({})
|
||||
|
||||
# form agglomerates
|
||||
for o in self._objs:
|
||||
|
||||
d = dict()
|
||||
for t in self._typs:
|
||||
d[t] = getattr(self, '%s_%s' % (o, t), None)
|
||||
|
||||
setattr(self, o, d)
|
||||
|
||||
def generate_indices(self, f, values=False):
|
||||
""" generate the indices
|
||||
if values is True , use the axis values
|
||||
is False, use the range
|
||||
"""
|
||||
|
||||
axes = f.axes
|
||||
if values:
|
||||
axes = [lrange(len(a)) for a in axes]
|
||||
|
||||
return itertools.product(*axes)
|
||||
|
||||
def get_result(self, obj, method, key, axis):
|
||||
""" return the result for this obj with this key and this axis """
|
||||
|
||||
if isinstance(key, dict):
|
||||
key = key[axis]
|
||||
|
||||
# use an artificial conversion to map the key as integers to the labels
|
||||
# so ix can work for comparisons
|
||||
if method == 'indexer':
|
||||
method = 'ix'
|
||||
key = obj._get_axis(axis)[key]
|
||||
|
||||
# in case we actually want 0 index slicing
|
||||
with catch_warnings(record=True):
|
||||
try:
|
||||
xp = getattr(obj, method).__getitem__(_axify(obj, key, axis))
|
||||
except AttributeError:
|
||||
xp = getattr(obj, method).__getitem__(key)
|
||||
|
||||
return xp
|
||||
|
||||
def get_value(self, f, i, values=False):
|
||||
""" return the value for the location i """
|
||||
|
||||
# check against values
|
||||
if values:
|
||||
return f.values[i]
|
||||
|
||||
# this is equiv of f[col][row].....
|
||||
# v = f
|
||||
# for a in reversed(i):
|
||||
# v = v.__getitem__(a)
|
||||
# return v
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
return f.ix[i]
|
||||
|
||||
def check_values(self, f, func, values=False):
|
||||
|
||||
if f is None:
|
||||
return
|
||||
axes = f.axes
|
||||
indicies = itertools.product(*axes)
|
||||
|
||||
for i in indicies:
|
||||
result = getattr(f, func)[i]
|
||||
|
||||
# check against values
|
||||
if values:
|
||||
expected = f.values[i]
|
||||
else:
|
||||
expected = f
|
||||
for a in reversed(i):
|
||||
expected = expected.__getitem__(a)
|
||||
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def check_result(self, name, method1, key1, method2, key2, typs=None,
|
||||
objs=None, axes=None, fails=None):
|
||||
def _eq(t, o, a, obj, k1, k2):
|
||||
""" compare equal for these 2 keys """
|
||||
|
||||
if a is not None and a > obj.ndim - 1:
|
||||
return
|
||||
|
||||
def _print(result, error=None):
|
||||
if error is not None:
|
||||
error = str(error)
|
||||
v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s,"
|
||||
"key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" %
|
||||
(name, result, t, o, method1, method2, a, error or ''))
|
||||
if _verbose:
|
||||
pprint_thing(v)
|
||||
|
||||
try:
|
||||
rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a))
|
||||
|
||||
try:
|
||||
xp = self.get_result(obj, method2, k2, a)
|
||||
except Exception:
|
||||
result = 'no comp'
|
||||
_print(result)
|
||||
return
|
||||
|
||||
detail = None
|
||||
|
||||
try:
|
||||
if is_scalar(rs) and is_scalar(xp):
|
||||
assert rs == xp
|
||||
elif xp.ndim == 1:
|
||||
tm.assert_series_equal(rs, xp)
|
||||
elif xp.ndim == 2:
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
elif xp.ndim == 3:
|
||||
tm.assert_panel_equal(rs, xp)
|
||||
result = 'ok'
|
||||
except AssertionError as e:
|
||||
detail = str(e)
|
||||
result = 'fail'
|
||||
|
||||
# reverse the checks
|
||||
if fails is True:
|
||||
if result == 'fail':
|
||||
result = 'ok (fail)'
|
||||
|
||||
_print(result)
|
||||
if not result.startswith('ok'):
|
||||
raise AssertionError(detail)
|
||||
|
||||
except AssertionError:
|
||||
raise
|
||||
except Exception as detail:
|
||||
|
||||
# if we are in fails, the ok, otherwise raise it
|
||||
if fails is not None:
|
||||
if isinstance(detail, fails):
|
||||
result = 'ok (%s)' % type(detail).__name__
|
||||
_print(result)
|
||||
return
|
||||
|
||||
result = type(detail).__name__
|
||||
raise AssertionError(_print(result, error=detail))
|
||||
|
||||
if typs is None:
|
||||
typs = self._typs
|
||||
|
||||
if objs is None:
|
||||
objs = self._objs
|
||||
|
||||
if axes is not None:
|
||||
if not isinstance(axes, (tuple, list)):
|
||||
axes = [axes]
|
||||
else:
|
||||
axes = list(axes)
|
||||
else:
|
||||
axes = [0, 1, 2]
|
||||
|
||||
# check
|
||||
for o in objs:
|
||||
if o not in self._objs:
|
||||
continue
|
||||
|
||||
d = getattr(self, o)
|
||||
for a in axes:
|
||||
for t in typs:
|
||||
if t not in self._typs:
|
||||
continue
|
||||
|
||||
obj = d[t]
|
||||
if obj is None:
|
||||
continue
|
||||
|
||||
def _call(obj=obj):
|
||||
obj = obj.copy()
|
||||
|
||||
k2 = key2
|
||||
_eq(t, o, a, obj, key1, k2)
|
||||
|
||||
# Panel deprecations
|
||||
if isinstance(obj, Panel):
|
||||
with catch_warnings():
|
||||
filterwarnings("ignore", "\nPanel*", FutureWarning)
|
||||
_call()
|
||||
else:
|
||||
_call()
|
||||
@@ -0,0 +1,20 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
|
||||
@pytest.fixture(params=[
|
||||
(libindex.Int64Engine, np.int64),
|
||||
(libindex.Int32Engine, np.int32),
|
||||
(libindex.Int16Engine, np.int16),
|
||||
(libindex.Int8Engine, np.int8),
|
||||
(libindex.UInt64Engine, np.uint64),
|
||||
(libindex.UInt32Engine, np.uint32),
|
||||
(libindex.UInt16Engine, np.uint16),
|
||||
(libindex.UInt8Engine, np.uint8),
|
||||
(libindex.Float64Engine, np.float64),
|
||||
(libindex.Float32Engine, np.float32),
|
||||
], ids=lambda x: x[0].__name__)
|
||||
def numeric_indexing_engine_type_and_dtype(request):
|
||||
return request.param
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
+267
@@ -0,0 +1,267 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Interval, IntervalIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_loc_with_scalar(self):
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s.loc[:3])
|
||||
tm.assert_series_equal(expected, s.loc[:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[0.1:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[-1:3])
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s.loc[s >= 2])
|
||||
|
||||
# TODO: check this behavior is consistent with test_interval_new.py
|
||||
def test_getitem_with_scalar(self):
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s[:3])
|
||||
tm.assert_series_equal(expected, s[:2.5])
|
||||
tm.assert_series_equal(expected, s[0.1:2.5])
|
||||
tm.assert_series_equal(expected, s[-1:3])
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s[s >= 2])
|
||||
|
||||
# TODO: check this behavior is consistent with test_interval_new.py
|
||||
@pytest.mark.parametrize('direction', ['increasing', 'decreasing'])
|
||||
def test_nonoverlapping_monotonic(self, direction, closed):
|
||||
tpls = [(0, 1), (2, 3), (4, 5)]
|
||||
if direction == 'decreasing':
|
||||
tpls = tpls[::-1]
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
s = Series(list('abc'), idx)
|
||||
|
||||
for key, expected in zip(idx.left, s):
|
||||
if idx.closed_left:
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError):
|
||||
s[key]
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[key]
|
||||
|
||||
for key, expected in zip(idx.right, s):
|
||||
if idx.closed_right:
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError):
|
||||
s[key]
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[key]
|
||||
|
||||
for key, expected in zip(idx.mid, s):
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_with_interval(self):
|
||||
|
||||
s = self.s
|
||||
expected = 0
|
||||
|
||||
result = s.loc[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
result = s[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[Interval(3, 6)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[[Interval(3, 6)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[[Interval(3, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(5, 6)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(5, 6)]
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_with_slices(self):
|
||||
|
||||
s = self.s
|
||||
|
||||
# slice of interval
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.loc[Interval(3, 6):]
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s[Interval(3, 6):]
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s[[Interval(3, 6)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# slice of scalar with step != 1
|
||||
with pytest.raises(ValueError):
|
||||
s[0:4:2]
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_with_overlaps(self):
|
||||
|
||||
s = self.s
|
||||
expected = s.iloc[[3, 4, 3, 4]]
|
||||
result = s.loc[[Interval(3, 6), Interval(3, 6)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s[4]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[4]]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.loc[[4]]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[Interval(3, 5)]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.loc[Interval(3, 5)]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# doesn't intersect unique set of intervals
|
||||
with pytest.raises(KeyError):
|
||||
s[[Interval(3, 5)]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[Interval(3, 5)]]
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_non_unique(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s.loc[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = s.loc[[Interval(1, 3)]]
|
||||
expected = s.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_non_unique_moar(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s.loc[Interval(1, 3)]
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# non-unique index and slices not allowed
|
||||
with pytest.raises(ValueError):
|
||||
s.loc[Interval(1, 3):]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
s[Interval(1, 3):]
|
||||
|
||||
# non-unique
|
||||
with pytest.raises(ValueError):
|
||||
s[[Interval(1, 3)]]
|
||||
|
||||
# TODO: check this behavior is consistent with test_interval_new.py
|
||||
def test_non_matching(self):
|
||||
s = self.s
|
||||
|
||||
# this is a departure from our current
|
||||
# indexin scheme, but simpler
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[-1, 3, 4, 5]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[-1, 3]]
|
||||
|
||||
def test_large_series(self):
|
||||
s = Series(np.arange(1000000),
|
||||
index=IntervalIndex.from_breaks(np.arange(1000001)))
|
||||
|
||||
result1 = s.loc[:80000]
|
||||
result2 = s.loc[0:80000]
|
||||
result3 = s.loc[0:80000:1]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
def test_loc_getitem_frame(self):
|
||||
|
||||
df = DataFrame({'A': range(10)})
|
||||
s = pd.cut(df.A, 5)
|
||||
df['B'] = s
|
||||
df = df.set_index('B')
|
||||
|
||||
result = df.loc[4]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
df.loc[10]
|
||||
|
||||
# single list-like
|
||||
result = df.loc[[4]]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-unique
|
||||
result = df.loc[[4, 5]]
|
||||
expected = df.take([4, 5, 4, 5])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
df.loc[[10]]
|
||||
|
||||
# partial missing
|
||||
with pytest.raises(KeyError):
|
||||
df.loc[[10, 4]]
|
||||
+246
@@ -0,0 +1,246 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Interval, IntervalIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
pytestmark = pytest.mark.skip(reason="new indexing tests for issue 16316")
|
||||
|
||||
|
||||
class TestIntervalIndex(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_loc_with_interval(self):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = 0
|
||||
result = s.loc[Interval(0, 1)]
|
||||
assert result == expected
|
||||
result = s[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing or not exact
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 5, closed='left')]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5, closed='left')]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(5, 6)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(5, 6)]
|
||||
|
||||
def test_loc_with_scalar(self):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
s = self.s
|
||||
|
||||
assert s.loc[1] == 0
|
||||
assert s.loc[1.5] == 1
|
||||
assert s.loc[2] == 1
|
||||
|
||||
# TODO with __getitem__ same rules as loc, or positional ?
|
||||
# assert s[1] == 0
|
||||
# assert s[1.5] == 1
|
||||
# assert s[2] == 1
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[[1, 1, 2, 1]]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s.loc[s >= 2])
|
||||
|
||||
def test_loc_with_slices(self):
|
||||
|
||||
# loc with slices:
|
||||
# - Interval objects: only works with exact matches
|
||||
# - scalars: only works for non-overlapping, monotonic intervals,
|
||||
# and start/stop select location based on the interval that
|
||||
# contains them:
|
||||
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
|
||||
|
||||
s = self.s
|
||||
|
||||
# slice of interval
|
||||
|
||||
expected = s.iloc[:3]
|
||||
result = s.loc[Interval(0, 1):Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[Interval(0, 1):Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[4:]
|
||||
result = s.loc[Interval(3, 4):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[Interval(3, 4):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 6):]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 6):]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 4, closed='left'):]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 4, closed='left'):]
|
||||
|
||||
# TODO with non-existing intervals ?
|
||||
# s.loc[Interval(-1, 0):Interval(2, 3)]
|
||||
|
||||
# slice of scalar
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s.loc[:3])
|
||||
tm.assert_series_equal(expected, s.loc[:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[0.1:2.5])
|
||||
|
||||
# TODO should this work? (-1 is not contained in any of the Intervals)
|
||||
# tm.assert_series_equal(expected, s.loc[-1:3])
|
||||
|
||||
# TODO with __getitem__ same rules as loc, or positional ?
|
||||
# tm.assert_series_equal(expected, s[:3])
|
||||
# tm.assert_series_equal(expected, s[:2.5])
|
||||
# tm.assert_series_equal(expected, s[0.1:2.5])
|
||||
|
||||
# slice of scalar with step != 1
|
||||
with pytest.raises(NotImplementedError):
|
||||
s[0:4:2]
|
||||
|
||||
def test_loc_with_overlap(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
# scalar
|
||||
expected = s
|
||||
result = s.loc[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.loc[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# interval
|
||||
expected = 0
|
||||
result = s.loc[Interval(1, 5)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[Interval(1, 5)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s.loc[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[Interval(3, 5)]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[[Interval(3, 5)]]
|
||||
|
||||
# slices with interval (only exact matches)
|
||||
expected = s
|
||||
result = s.loc[Interval(1, 5):Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[Interval(1, 5):Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(1, 6):Interval(3, 8)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(1, 6):Interval(3, 8)]
|
||||
|
||||
# slices with scalar raise for overlapping intervals
|
||||
# TODO KeyError is the appropriate error?
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[1:4]
|
||||
|
||||
def test_non_unique(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s.loc[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = s.loc[[Interval(1, 3)]]
|
||||
expected = s.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_non_unique_moar(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
expected = s.iloc[[0, 1]]
|
||||
result = s.loc[Interval(1, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s.loc[Interval(1, 3):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s[Interval(1, 3):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[[0, 1]]
|
||||
result = s[[Interval(1, 3)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,31 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multiindex_dataframe_random_data():
|
||||
"""DataFrame with 2 level MultiIndex with random data"""
|
||||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
|
||||
'three']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
return DataFrame(np.random.randn(10, 3), index=index,
|
||||
columns=Index(['A', 'B', 'C'], name='exp'))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multiindex_year_month_day_dataframe_random_data():
|
||||
"""DataFrame with 3 level MultiIndex (year, month, day) covering
|
||||
first 100 business days from 2000-01-01 with random data"""
|
||||
tdf = tm.makeTimeDataFrame(100)
|
||||
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month,
|
||||
lambda x: x.day]).sum()
|
||||
# use Int64Index, to make sure things work
|
||||
ymd.index.set_levels([lev.astype('i8') for lev in ymd.index.levels],
|
||||
inplace=True)
|
||||
ymd.index.set_names(['year', 'month', 'day'], inplace=True)
|
||||
return ymd
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, lzip, range
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.core import common as com
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_detect_chained_assignment():
|
||||
# Inplace ops, originally from:
|
||||
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
|
||||
('ears', 'right')]
|
||||
events = {('eyes', 'left'): a,
|
||||
('eyes', 'right'): b,
|
||||
('ears', 'left'): c,
|
||||
('ears', 'right'): d}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
|
||||
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
zed['eyes']['right'].fillna(value=555, inplace=True)
|
||||
|
||||
|
||||
def test_cache_updating():
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.rand(10, 3)
|
||||
df = DataFrame(a, columns=['x', 'y', 'z'])
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
df.loc[0]['z'].iloc[0] = 1.
|
||||
result = df.loc[(0, 0), 'z']
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), 'z'] = 2
|
||||
result = df.loc[(0, 0), 'z']
|
||||
assert result == 2
|
||||
|
||||
|
||||
def test_indexer_caching():
|
||||
# GH5727
|
||||
# make sure that indexers are in the _internal_names_set
|
||||
n = 1000001
|
||||
arrays = [lrange(n), lrange(n)]
|
||||
index = MultiIndex.from_tuples(lzip(*arrays))
|
||||
s = Series(np.zeros(n), index=index)
|
||||
str(s)
|
||||
|
||||
# setitem
|
||||
expected = Series(np.ones(n), index=index)
|
||||
s = Series(np.zeros(n), index=index)
|
||||
s[s == 0] = 1
|
||||
tm.assert_series_equal(s, expected)
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import Index, Period, Series, period_range
|
||||
|
||||
|
||||
def test_multiindex_period_datetime():
|
||||
# GH4861, using datetime in period of multiindex raises exception
|
||||
|
||||
idx1 = Index(['a', 'a', 'a', 'b', 'b'])
|
||||
idx2 = period_range('2012-01', periods=len(idx1), freq='M')
|
||||
s = Series(np.random.randn(len(idx1)), [idx1, idx2])
|
||||
|
||||
# try Period as index
|
||||
expected = s.iloc[0]
|
||||
result = s.loc['a', Period('2012-01')]
|
||||
assert result == expected
|
||||
|
||||
# try datetime as index
|
||||
result = s.loc['a', datetime(2012, 1, 1)]
|
||||
assert result == expected
|
||||
+237
@@ -0,0 +1,237 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import u, zip
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.core.indexing import IndexingError
|
||||
from pandas.util import testing as tm
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of Series with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
|
||||
lambda s, x: s.loc[:, x],
|
||||
lambda s, x: s.xs(x, level=1)])
|
||||
@pytest.mark.parametrize('level1_value, expected', [
|
||||
(0, Series([1], index=[0])),
|
||||
(1, Series([2, 3], index=[1, 2]))
|
||||
])
|
||||
def test_series_getitem_multiindex(access_method, level1_value, expected):
|
||||
|
||||
# GH 6018
|
||||
# series regression getitem with a multi-index
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
|
||||
result = access_method(s, level1_value)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('level0_value', ['D', 'A'])
|
||||
def test_series_getitem_duplicates_multiindex(level0_value):
|
||||
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
|
||||
# the appropriate error, only in PY3 of course!
|
||||
|
||||
index = MultiIndex(levels=[[level0_value, 'B', 'C'],
|
||||
[0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
|
||||
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=['tag', 'day'])
|
||||
arr = np.random.randn(len(index), 1)
|
||||
df = DataFrame(arr, index=index, columns=['val'])
|
||||
|
||||
# confirm indexing on missing value raises KeyError
|
||||
if level0_value != 'A':
|
||||
with pytest.raises(KeyError, match=r"^'A'$"):
|
||||
df.val['A']
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'X'$"):
|
||||
df.val['X']
|
||||
|
||||
result = df.val[level0_value]
|
||||
expected = Series(arr.ravel()[0:3], name='val', index=Index(
|
||||
[26, 37, 57], name='day'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda s: s[2000, 3],
|
||||
lambda s: s.loc[2000, 3]
|
||||
])
|
||||
def test_series_getitem(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
expected = s.reindex(s.index[42:65])
|
||||
expected.index = expected.index.droplevel(0).droplevel(0)
|
||||
|
||||
result = indexer(s)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda s: s[2000, 3, 10],
|
||||
lambda s: s.loc[2000, 3, 10]
|
||||
])
|
||||
def test_series_getitem_returns_scalar(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
expected = s.iloc[49]
|
||||
|
||||
result = indexer(s)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer,expected_error,expected_error_msg', [
|
||||
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356L?$"),
|
||||
(lambda s: s[(2000, 3, 4)], KeyError, r"^356L?$"),
|
||||
(lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'),
|
||||
(lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'),
|
||||
(lambda s: s[len(s)], IndexError, 'index out of bounds'),
|
||||
(lambda s: s.iloc[len(s)], IndexError,
|
||||
'single positional indexer is out-of-bounds')
|
||||
])
|
||||
def test_series_getitem_indexing_errors(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer,
|
||||
expected_error, expected_error_msg):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
with pytest.raises(expected_error, match=expected_error_msg):
|
||||
indexer(s)
|
||||
|
||||
|
||||
def test_series_getitem_corner_generator(
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
result = s[(x > 0 for x in s)]
|
||||
expected = s[s > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
def test_getitem_simple(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.values[:, 0]
|
||||
result = df['foo', 'one'].values
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer,expected_error_msg', [
|
||||
(lambda df: df[('foo', 'four')], r"^\('foo', 'four'\)$"),
|
||||
(lambda df: df['foobar'], r"^'foobar'$")
|
||||
])
|
||||
def test_frame_getitem_simple_key_error(
|
||||
multiindex_dataframe_random_data, indexer, expected_error_msg):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
with pytest.raises(KeyError, match=expected_error_msg):
|
||||
indexer(df)
|
||||
|
||||
|
||||
def test_frame_getitem_multicolumn_empty_level():
|
||||
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
|
||||
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
|
||||
['level3 item1', 'level3 item2']]
|
||||
|
||||
result = df['level1 item1']
|
||||
expected = DataFrame([['1'], ['2'], ['3']], index=df.index,
|
||||
columns=['level3 item1'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer,expected_slice', [
|
||||
(lambda df: df['foo'], slice(3)),
|
||||
(lambda df: df['bar'], slice(3, 5)),
|
||||
(lambda df: df.loc[:, 'bar'], slice(3, 5))
|
||||
])
|
||||
def test_frame_getitem_toplevel(
|
||||
multiindex_dataframe_random_data, indexer, expected_slice):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.reindex(columns=df.columns[expected_slice])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('unicode_strings', [True, False])
|
||||
def test_frame_mixed_depth_get(unicode_strings):
|
||||
# If unicode_strings is True, the column labels in dataframe
|
||||
# construction will use unicode strings in Python 2 (pull request
|
||||
# #17099).
|
||||
|
||||
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
|
||||
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
|
||||
['', 'wx', 'wy', '', '', '']]
|
||||
|
||||
if unicode_strings:
|
||||
arrays = [[u(s) for s in arr] for arr in arrays]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.randn(4, 6), columns=index)
|
||||
|
||||
result = df['a']
|
||||
expected = df['a', '', ''].rename('a')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df['routine1', 'result1']
|
||||
expected = df['routine1', 'result1', '']
|
||||
expected = expected.rename(('routine1', 'result1'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index with duplicates
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def dataframe_with_duplicate_index():
|
||||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
|
||||
data = [['a', 'd', 'e', 'c', 'f', 'b'],
|
||||
[1, 4, 5, 3, 6, 2],
|
||||
[1, 4, 5, 3, 6, 2]]
|
||||
index = ['h1', 'h3', 'h5']
|
||||
columns = MultiIndex(
|
||||
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
|
||||
names=['main', 'sub'])
|
||||
return DataFrame(data, index=index, columns=columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda df: df[('A', 'A1')],
|
||||
lambda df: df.loc[:, ('A', 'A1')]
|
||||
])
|
||||
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
|
||||
# GH 4145
|
||||
df = dataframe_with_duplicate_index
|
||||
index = Index(['h1', 'h3', 'h5'])
|
||||
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
|
||||
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
|
||||
# GH 4146, not returning a block manager when selecting a unique index
|
||||
# from a duplicate index
|
||||
# as of 4879, this returns a Series (which is similar to what happens
|
||||
# with a non-unique)
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
|
||||
result = df['A']['A1']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
|
||||
# selecting a non_unique from the 2nd level
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
|
||||
index=Index(['B2', 'B2'], name='sub'),
|
||||
columns=['h1', 'h3', 'h5'], ).T
|
||||
result = df['A']['B2']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,151 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_multiindex_dataframe():
|
||||
"""
|
||||
Factory function to create simple 3 x 3 dataframe with
|
||||
both columns and row MultiIndex using supplied data or
|
||||
random data by default.
|
||||
"""
|
||||
def _simple_multiindex_dataframe(data=None):
|
||||
if data is None:
|
||||
data = np.random.randn(3, 3)
|
||||
return DataFrame(data, columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
return _simple_multiindex_dataframe
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer, expected', [
|
||||
(lambda df: df.iloc[0],
|
||||
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8))),
|
||||
(lambda df: df.iloc[2],
|
||||
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12))),
|
||||
(lambda df: df.iloc[:, 2],
|
||||
lambda arr: Series(
|
||||
arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)))
|
||||
])
|
||||
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
|
||||
arr = np.random.randn(3, 3)
|
||||
df = simple_multiindex_dataframe(arr)
|
||||
result = indexer(df)
|
||||
expected = expected(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe()
|
||||
result = df.iloc[[0, 1]]
|
||||
expected = df.xs(4, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_scalar(simple_multiindex_dataframe):
|
||||
arr = np.random.randn(3, 3)
|
||||
df = simple_multiindex_dataframe(arr)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iloc_getitem_multiple_items():
|
||||
# GH 5528
|
||||
tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']])
|
||||
index = MultiIndex.from_tuples(tup)
|
||||
df = DataFrame(np.random.randn(4, 4), index=index)
|
||||
result = df.iloc[[2, 3]]
|
||||
expected = df.xs('b', drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_getitem_labels():
|
||||
# this is basically regular indexing
|
||||
arr = np.random.randn(4, 3)
|
||||
df = DataFrame(arr,
|
||||
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
|
||||
index=[['i', 'i', 'j', 'k'], ['X', 'X', 'Y', 'Y']])
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_getitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[:4]
|
||||
expected = df[:4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.iloc[:4] = 0
|
||||
|
||||
assert (df.values[:4] == 0).all()
|
||||
assert (df.values[4:] != 0).all()
|
||||
|
||||
|
||||
def test_indexing_ambiguity_bug_1678():
|
||||
# GH 1678
|
||||
columns = MultiIndex.from_tuples(
|
||||
[('Ohio', 'Green'), ('Ohio', 'Red'), ('Colorado', 'Green')])
|
||||
index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])
|
||||
|
||||
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
|
||||
|
||||
result = df.iloc[:, 1]
|
||||
expected = df.loc[:, ('Ohio', 'Red')]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_integer_locations():
|
||||
# GH 13797
|
||||
data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'],
|
||||
['str30', 'str31'], ['str40', 'str41']]
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')])
|
||||
|
||||
expected = DataFrame(data)
|
||||
df = DataFrame(data, index=index)
|
||||
|
||||
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, indexes, values, expected_k', [
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
|
||||
[755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
|
||||
[8, 15, 13])
|
||||
])
|
||||
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=['i', 'j', 'k'])
|
||||
df = df.set_index(['i', 'j'])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df['k'] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_getitem_iloc(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[2]
|
||||
expected = df.xs(df.index[2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
+89
@@ -0,0 +1,89 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_multiindex_get_loc(): # GH7724, GH2646
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
|
||||
# test indexing into a multi-index before & past the lexsort depth
|
||||
from numpy.random import randint, choice, randn
|
||||
cols = ['jim', 'joe', 'jolie', 'joline', 'jolia']
|
||||
|
||||
def validate(mi, df, key):
|
||||
mask = np.ones(len(df)).astype('bool')
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[:i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[:i + 1] in mi.index
|
||||
right = df[mask].copy()
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
right.drop(cols[:i + 1], axis=1, inplace=True)
|
||||
right.set_index(cols[i + 1:-1], inplace=True)
|
||||
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
right.set_index(cols[:-1], inplace=True)
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(right['jolia'].values,
|
||||
name=right.index[0],
|
||||
index=['jolia'])
|
||||
tm.assert_series_equal(mi.loc[key[:i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
|
||||
|
||||
def loop(mi, df, keys):
|
||||
for key in keys:
|
||||
validate(mi, df, key)
|
||||
|
||||
n, m = 1000, 50
|
||||
|
||||
vals = [randint(0, 10, n), choice(
|
||||
list('abcdefghij'), n), choice(
|
||||
pd.date_range('20141009', periods=10).tolist(), n), choice(
|
||||
list('ZYXWVUTSRQ'), n), randn(n)]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
|
||||
# bunch of keys for testing
|
||||
keys = [randint(0, 11, m), choice(
|
||||
list('abcdefghijk'), m), choice(
|
||||
pd.date_range('20141009', periods=11).tolist(), m), choice(
|
||||
list('ZYXWVUTSRQP'), m)]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += list(map(lambda t: t[:-1], vals[::n // m]))
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
df = DataFrame(vals, columns=cols)
|
||||
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
for frame in a, b:
|
||||
for i in range(5): # lexsort depth
|
||||
df = frame.copy() if i == 0 else frame.sort_values(
|
||||
by=cols[:i])
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index.lexsort_depth < i
|
||||
loop(mi, df, keys)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_large_mi_dataframe_indexing():
|
||||
# GH10645
|
||||
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
|
||||
assert (not (10 ** 6, 0) in result)
|
||||
@@ -0,0 +1,56 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
from pandas import DataFrame, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexIx(object):
|
||||
|
||||
def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
frame.loc[('bar', 'two'), 'B'] = 5
|
||||
assert frame.loc[('bar', 'two'), 'B'] == 5
|
||||
|
||||
# with integer labels
|
||||
df = frame.copy()
|
||||
df.columns = lrange(3)
|
||||
df.loc[('bar', 'two'), 1] = 7
|
||||
assert df.loc[('bar', 'two'), 1] == 7
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", DeprecationWarning)
|
||||
df = frame.copy()
|
||||
df.columns = lrange(3)
|
||||
df.ix[('bar', 'two'), 1] = 7
|
||||
assert df.loc[('bar', 'two'), 1] == 7
|
||||
|
||||
def test_ix_general(self):
|
||||
|
||||
# ix general issues
|
||||
|
||||
# GH 2817
|
||||
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
|
||||
df = DataFrame(data).set_index(keys=['col', 'year'])
|
||||
key = 4.0, 2012
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
df.sort_index(inplace=True)
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be Float64Index
|
||||
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
|
||||
names=['col', 'year'])
|
||||
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
@@ -0,0 +1,378 @@
|
||||
import itertools
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_level_multiindex():
|
||||
"""single level MultiIndex"""
|
||||
return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
|
||||
codes=[[0, 1, 2, 3]], names=['first'])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_random_data_integer_multi_index():
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
return DataFrame(np.random.randn(6, 2), index=index)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexLoc(object):
|
||||
|
||||
def test_loc_getitem_series(self):
|
||||
# GH14730
|
||||
# passing a series as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = Series([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
|
||||
dtype=np.float64)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = x.loc[[1, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH15424
|
||||
y1 = Series([1, 3], index=[1, 2])
|
||||
result = x.loc[y1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
empty = Series(data=[], dtype=np.float64)
|
||||
expected = Series([], index=MultiIndex(
|
||||
levels=index.levels, codes=[[], []], dtype=np.float64))
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_array(self):
|
||||
# GH15434
|
||||
# passing an array as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = np.array([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
|
||||
dtype=np.float64)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty array:
|
||||
empty = np.array([])
|
||||
expected = Series([], index=MultiIndex(
|
||||
levels=index.levels, codes=[[], []], dtype=np.float64))
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0-dim array (scalar):
|
||||
scalar = np.int64(1)
|
||||
expected = Series(
|
||||
data=[0, 1, 2],
|
||||
index=['A', 'B', 'C'],
|
||||
dtype=np.float64)
|
||||
result = x.loc[scalar]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex(self):
|
||||
|
||||
mi_labels = DataFrame(np.random.randn(3, 3),
|
||||
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
|
||||
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
|
||||
|
||||
mi_int = DataFrame(np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
|
||||
# the first row
|
||||
rs = mi_labels.loc['i']
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix['i']
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# 2nd (last) columns
|
||||
rs = mi_labels.loc[:, 'j']
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix[:, 'j']
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# corner column
|
||||
rs = mi_labels.loc['j'].loc[:, 'j']
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix['j'].ix[:, 'j']
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# with a tuple
|
||||
rs = mi_labels.loc[('i', 'X')]
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_labels.ix[('i', 'X')]
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
rs = mi_int.loc[4]
|
||||
with catch_warnings(record=True):
|
||||
xp = mi_int.ix[4]
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
# missing label
|
||||
pytest.raises(KeyError, lambda: mi_int.loc[2])
|
||||
with catch_warnings(record=True):
|
||||
# GH 21593
|
||||
pytest.raises(KeyError, lambda: mi_int.ix[2])
|
||||
|
||||
def test_loc_multiindex_indexer_none(self):
|
||||
|
||||
# GH6788
|
||||
# multi-index indexer is None (meaning take all)
|
||||
attributes = ['Attribute' + str(i) for i in range(1)]
|
||||
attribute_values = ['Value' + str(i) for i in range(5)]
|
||||
|
||||
index = MultiIndex.from_product([attributes, attribute_values])
|
||||
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
|
||||
df = DataFrame(df, columns=index)
|
||||
result = df[attributes]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 7349
|
||||
# loc with a multi-index seems to be doing fallback
|
||||
df = DataFrame(np.arange(12).reshape(-1, 1),
|
||||
index=MultiIndex.from_product([[1, 2, 3, 4],
|
||||
[1, 2, 3]]))
|
||||
|
||||
expected = df.loc[([1, 2], ), :]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_incomplete(self):
|
||||
|
||||
# GH 7399
|
||||
# incomplete indexers
|
||||
s = Series(np.arange(15, dtype='int64'),
|
||||
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
|
||||
expected = s.loc[:, 'a':'c']
|
||||
|
||||
result = s.loc[0:4, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[:4, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[0:, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 7400
|
||||
# multiindexer gettitem with list of indexers skips wrong element
|
||||
s = Series(np.arange(15, dtype='int64'),
|
||||
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
|
||||
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
|
||||
result = s.loc[2:4:2, 'a':'c']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_get_loc_single_level(self, single_level_multiindex):
|
||||
single_level = single_level_multiindex
|
||||
s = Series(np.random.randn(len(single_level)),
|
||||
index=single_level)
|
||||
for k in single_level.values:
|
||||
s[k]
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_tuples([t for t in itertools.product(
|
||||
[6, 7, 8], ['a', 'b'])])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([t
|
||||
for t in itertools.product(
|
||||
[10, 20, 30], ['a', 'b'])])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ['a', 'b']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
# expected = df.ix[:,10] (this fails)
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'indexer_type_1',
|
||||
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||||
@pytest.mark.parametrize(
|
||||
'indexer_type_2',
|
||||
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = MultiIndex.from_product([a, b])
|
||||
df = DataFrame(
|
||||
np.arange(len(index), dtype='int64'),
|
||||
index=index, columns=['Data'])
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys))
|
||||
|
||||
result = df.loc[indexer, 'Data']
|
||||
expected = Series(
|
||||
[1, 2, 4, 5], name='Data',
|
||||
index=MultiIndex.from_product(keys))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer, is_level1, expected_error', [
|
||||
([], False, None), # empty ok
|
||||
(['A'], False, None),
|
||||
(['A', 'D'], False, None),
|
||||
(['D'], False, r"\['D'\] not in index"), # not any values found
|
||||
(pd.IndexSlice[:, ['foo']], True, None),
|
||||
(pd.IndexSlice[:, ['foo', 'bah']], True, None)
|
||||
])
|
||||
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
|
||||
expected_error):
|
||||
# GH 7866
|
||||
# multi-index slicing with missing indexers
|
||||
idx = MultiIndex.from_product([['A', 'B', 'C'],
|
||||
['foo', 'bar', 'baz']],
|
||||
names=['one', 'two'])
|
||||
s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
|
||||
|
||||
if indexer == []:
|
||||
expected = s.iloc[[]]
|
||||
elif is_level1:
|
||||
expected = Series([0, 3, 6], index=MultiIndex.from_product(
|
||||
[['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
|
||||
else:
|
||||
exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
|
||||
names=['one', 'two'])
|
||||
expected = Series(np.arange(3, dtype='int64'),
|
||||
index=exp_idx).sort_index()
|
||||
|
||||
if expected_error is not None:
|
||||
with pytest.raises(KeyError, match=expected_error):
|
||||
s.loc[indexer]
|
||||
else:
|
||||
result = s.loc[indexer]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
|
||||
lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
])
|
||||
def test_series_loc_getitem_fancy(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer):
|
||||
s = multiindex_year_month_day_dataframe_random_data['A']
|
||||
expected = s.reindex(s.index[49:51])
|
||||
|
||||
result = indexer(s)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('columns_indexer', [
|
||||
([], slice(None)),
|
||||
(['foo'], [])
|
||||
])
|
||||
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
|
||||
# GH 8737
|
||||
# empty indexer
|
||||
multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
|
||||
['alpha', 'beta']))
|
||||
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
|
||||
df = df.sort_index(level=0, axis=1)
|
||||
|
||||
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
|
||||
result = df.loc[:, columns_indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
|
||||
# regression from < 0.14.0
|
||||
# GH 7914
|
||||
df = DataFrame([[np.mean, np.median], ['mean', 'median']],
|
||||
columns=MultiIndex.from_tuples([('functs', 'mean'),
|
||||
('functs', 'median')]),
|
||||
index=['function', 'name'])
|
||||
result = df.loc['function', ('functs', 'mean')]
|
||||
expected = np.mean
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_getitem_tuple_plus_slice():
|
||||
# GH 671
|
||||
df = DataFrame({'a': np.arange(10),
|
||||
'b': np.arange(10),
|
||||
'c': np.random.randn(10),
|
||||
'd': np.random.randn(10)}
|
||||
).set_index(['a', 'b'])
|
||||
expected = df.loc[0, 0]
|
||||
result = df.loc[(0, 0), :]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
result = df.loc[1]
|
||||
expected = df[-3:]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int_raises_exception(
|
||||
frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
with pytest.raises(KeyError, match=r"^3L?$"):
|
||||
df.loc[3]
|
||||
|
||||
|
||||
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
|
||||
# test setup - check key not in dataframe
|
||||
with pytest.raises(KeyError, match=r"^11L?$"):
|
||||
df.loc[('bar', 'three'), 'B']
|
||||
|
||||
# in theory should be inserting in a sorted space????
|
||||
df.loc[('bar', 'three'), 'B'] = 0
|
||||
expected = 0
|
||||
result = df.sort_index().loc[('bar', 'three'), 'B']
|
||||
assert result == expected
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.index as _index
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexBasic(object):
|
||||
|
||||
def test_multiindex_perf_warn(self):
|
||||
|
||||
df = DataFrame({'jim': [0, 0, 1, 1],
|
||||
'joe': ['x', 'x', 'z', 'y'],
|
||||
'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning,
|
||||
clear=[pd.core.index]):
|
||||
df.loc[(1, 'z')]
|
||||
|
||||
df = df.iloc[[2, 1, 3, 0]]
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(0, )]
|
||||
|
||||
def test_multiindex_contains_dropped(self):
|
||||
# GH 19027
|
||||
# test that dropped MultiIndex levels are not in the MultiIndex
|
||||
# despite continuing to be in the MultiIndex's levels
|
||||
idx = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
assert 2 in idx
|
||||
idx = idx.drop(2)
|
||||
|
||||
# drop implementation keeps 2 in the levels
|
||||
assert 2 in idx.levels[0]
|
||||
# but it should no longer be in the index itself
|
||||
assert 2 not in idx
|
||||
|
||||
# also applies to strings
|
||||
idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
|
||||
assert 'a' in idx
|
||||
idx = idx.drop('a')
|
||||
assert 'a' in idx.levels[0]
|
||||
assert 'a' not in idx
|
||||
|
||||
@pytest.mark.parametrize("data, expected", [
|
||||
(MultiIndex.from_product([(), ()]), True),
|
||||
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
|
||||
(MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
|
||||
])
|
||||
def test_multiindex_is_homogeneous_type(self, data, expected):
|
||||
assert data._is_homogeneous_type is expected
|
||||
|
||||
def test_indexing_over_hashtable_size_cutoff(self):
|
||||
n = 10000
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
_index._SIZE_CUTOFF = 20000
|
||||
|
||||
s = Series(np.arange(n),
|
||||
MultiIndex.from_arrays((["a"] * n, np.arange(n))))
|
||||
|
||||
# hai it works!
|
||||
assert s[("a", 5)] == 5
|
||||
assert s[("a", 6)] == 6
|
||||
assert s[("a", 7)] == 7
|
||||
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
def test_multi_nan_indexing(self):
|
||||
|
||||
# GH 3588
|
||||
df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
|
||||
'b': ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20]})
|
||||
result = df.set_index(['a', 'b'], drop=False)
|
||||
expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
|
||||
'b': ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20]},
|
||||
index=[Index(['R1', 'R2', np.nan, 'R4'],
|
||||
name='a'),
|
||||
Index(['C1', 'C2', 'C3', 'C4'], name='b')])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
+103
@@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Panel, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning')
|
||||
class TestMultiIndexPanel(object):
|
||||
|
||||
def test_iloc_getitem_panel_multiindex(self):
|
||||
|
||||
# GH 7199
|
||||
# Panel with multi-index
|
||||
multi_index = MultiIndex.from_tuples([('ONE', 'one'),
|
||||
('TWO', 'two'),
|
||||
('THREE', 'three')],
|
||||
names=['UPPER', 'lower'])
|
||||
|
||||
simple_index = [x[0] for x in multi_index]
|
||||
wd1 = Panel(items=['First', 'Second'],
|
||||
major_axis=['a', 'b', 'c', 'd'],
|
||||
minor_axis=multi_index)
|
||||
|
||||
wd2 = Panel(items=['First', 'Second'],
|
||||
major_axis=['a', 'b', 'c', 'd'],
|
||||
minor_axis=simple_index)
|
||||
|
||||
expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
|
||||
result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG
|
||||
tm.assert_frame_equal(result1, expected1)
|
||||
|
||||
expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
|
||||
result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
expected1 = DataFrame(index=['a'], columns=multi_index,
|
||||
dtype='float64')
|
||||
result1 = wd1.iloc[0, [0], [0, 1, 2]]
|
||||
tm.assert_frame_equal(result1, expected1)
|
||||
|
||||
expected2 = DataFrame(index=['a'], columns=simple_index,
|
||||
dtype='float64')
|
||||
result2 = wd2.iloc[0, [0], [0, 1, 2]]
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
# GH 7516
|
||||
mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
|
||||
p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
|
||||
items=['a', 'b', 'c'], major_axis=mi,
|
||||
minor_axis=['u', 'v', 'w'])
|
||||
result = p.iloc[:, 1, 0]
|
||||
expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = p.loc[:, (1, 'y'), 'u']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_panel_setitem_with_multiindex(self):
|
||||
|
||||
# 10360
|
||||
# failing with a multi-index
|
||||
arr = np.array([[[1, 2, 3], [0, 0, 0]],
|
||||
[[0, 0, 0], [0, 0, 0]]],
|
||||
dtype=np.float64)
|
||||
|
||||
# reg index
|
||||
axes = dict(items=['A', 'B'], major_axis=[0, 1],
|
||||
minor_axis=['X', 'Y', 'Z'])
|
||||
p1 = Panel(0., **axes)
|
||||
p1.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p1, expected)
|
||||
|
||||
# multi-indexes
|
||||
axes['items'] = MultiIndex.from_tuples(
|
||||
[('A', 'a'), ('B', 'b')])
|
||||
p2 = Panel(0., **axes)
|
||||
p2.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p2, expected)
|
||||
|
||||
axes['major_axis'] = MultiIndex.from_tuples(
|
||||
[('A', 1), ('A', 2)])
|
||||
p3 = Panel(0., **axes)
|
||||
p3.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p3, expected)
|
||||
|
||||
axes['minor_axis'] = MultiIndex.from_product(
|
||||
[['X'], range(3)])
|
||||
p4 = Panel(0., **axes)
|
||||
p4.iloc[0, 0, :] = [1, 2, 3]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p4, expected)
|
||||
|
||||
arr = np.array(
|
||||
[[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
|
||||
dtype=np.float64)
|
||||
p5 = Panel(0., **axes)
|
||||
p5.iloc[0, :, 0] = [1, 2]
|
||||
expected = Panel(arr, **axes)
|
||||
tm.assert_panel_equal(p5, expected)
|
||||
+183
@@ -0,0 +1,183 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexPartial(object):
|
||||
|
||||
def test_getitem_partial_int(self):
|
||||
# GH 12416
|
||||
# with single item
|
||||
l1 = [10, 20]
|
||||
l2 = ['a', 'b']
|
||||
df = DataFrame(index=range(2),
|
||||
columns=MultiIndex.from_product([l1, l2]))
|
||||
expected = DataFrame(index=range(2),
|
||||
columns=l2)
|
||||
result = df[20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with list
|
||||
expected = DataFrame(index=range(2),
|
||||
columns=MultiIndex.from_product([l1[1:], l2]))
|
||||
result = df[[20]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# missing item:
|
||||
with pytest.raises(KeyError, match='1'):
|
||||
df[1]
|
||||
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
|
||||
df[[1]]
|
||||
|
||||
def test_series_slice_partial(self):
|
||||
pass
|
||||
|
||||
def test_xs_partial(self, multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = frame.xs('foo')
|
||||
result2 = frame.loc['foo']
|
||||
expected = frame.T['foo'].T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = ymd.xs((2000, 4))
|
||||
expected = ymd.loc[2000, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ex from #1796
|
||||
index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]],
|
||||
codes=[[0, 0, 0, 0, 1, 1, 1, 1],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
|
||||
0, 1]])
|
||||
df = DataFrame(np.random.randn(8, 4), index=index,
|
||||
columns=list('abcd'))
|
||||
|
||||
result = df.xs(['foo', 'one'])
|
||||
expected = df.loc['foo', 'one']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
ymd = ymd.T
|
||||
result = ymd[2000, 2]
|
||||
|
||||
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
|
||||
expected.columns = expected.columns.droplevel(0).droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fancy_slice_partial(
|
||||
self, multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
result = frame.loc['bar':'baz']
|
||||
expected = frame[3:7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[(2000, 2):(2000, 4)]
|
||||
lev = ymd.index.codes[1]
|
||||
expected = ymd[(lev >= 1) & (lev <= 3)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_column_select(self):
|
||||
idx = MultiIndex(codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
|
||||
levels=[['a', 'b'], ['x', 'y'], ['p', 'q']])
|
||||
df = DataFrame(np.random.rand(3, 2), index=idx)
|
||||
|
||||
result = df.loc[('a', 'y'), :]
|
||||
expected = df.loc[('a', 'y')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[('a', 'y'), [1, 0]]
|
||||
expected = df.loc[('a', 'y')][[1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", DeprecationWarning)
|
||||
result = df.ix[('a', 'y'), [1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
(('a', 'foo'), slice(None, None)))
|
||||
|
||||
def test_partial_set(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
# GH #397
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd.copy()
|
||||
exp = ymd.copy()
|
||||
df.loc[2000, 4] = 0
|
||||
exp.loc[2000, 4].values[:] = 0
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df['A'].loc[2000, 4] = 1
|
||||
exp['A'].loc[2000, 4].values[:] = 1
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df.loc[2000] = 5
|
||||
exp.loc[2000].values[:] = 5
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# this works...for now
|
||||
df['A'].iloc[14] = 5
|
||||
assert df['A'][14] == 5
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AMBIGUOUS CASES!
|
||||
|
||||
def test_partial_ix_missing(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
pytest.skip("skipping for now")
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[2000, 0]
|
||||
expected = ymd.loc[2000]['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# need to put in some work here
|
||||
|
||||
# self.ymd.loc[2000, 0] = 0
|
||||
# assert (self.ymd.loc[2000]['A'] == 0).all()
|
||||
|
||||
# Pretty sure the second (and maybe even the first) is already wrong.
|
||||
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6))
|
||||
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6), 0)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc[['foo', 'bar']] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc['foo':'bar'] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame['A'].copy()
|
||||
result = frame['A'].copy()
|
||||
result.loc[['foo', 'bar']] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = frame['A'].copy()
|
||||
result = frame['A'].copy()
|
||||
result.loc['foo':'bar'] = 0
|
||||
expected.loc['foo'] = 0
|
||||
expected.loc['bar'] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
from numpy.random import randn
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSetOps(object):
|
||||
|
||||
def test_multiindex_symmetric_difference(self):
|
||||
# GH 13490
|
||||
idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']],
|
||||
names=['a', 'b'])
|
||||
result = idx ^ idx
|
||||
assert result.names == idx.names
|
||||
|
||||
idx2 = idx.copy().rename(['A', 'B'])
|
||||
result = idx ^ idx2
|
||||
assert result.names == [None, None]
|
||||
|
||||
def test_mixed_depth_insert(self):
|
||||
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
|
||||
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
|
||||
['', 'wx', 'wy', '', '', '']]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(randn(4, 6), columns=index)
|
||||
|
||||
result = df.copy()
|
||||
expected = df.copy()
|
||||
result['b'] = [1, 2, 3, 4]
|
||||
expected['b', '', ''] = [1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_dataframe_insert_column_all_na(self):
|
||||
# GH #1534
|
||||
mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c')
|
||||
])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
|
||||
s = Series({(1, 1): 1, (1, 2): 2})
|
||||
df['new'] = s
|
||||
assert df['new'].isna().all()
|
||||
+439
@@ -0,0 +1,439 @@
|
||||
from warnings import catch_warnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna)
|
||||
import pandas.core.common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexSetItem(object):
|
||||
|
||||
def test_setitem_multiindex(self):
|
||||
with catch_warnings(record=True):
|
||||
|
||||
for index_fn in ('ix', 'loc'):
|
||||
|
||||
def assert_equal(a, b):
|
||||
assert a == b
|
||||
|
||||
def check(target, indexers, value, compare_fn, expected=None):
|
||||
fn = getattr(target, index_fn)
|
||||
fn.__setitem__(indexers, value)
|
||||
result = fn.__getitem__(indexers)
|
||||
if expected is None:
|
||||
expected = value
|
||||
compare_fn(result, expected)
|
||||
# GH7190
|
||||
index = MultiIndex.from_product([np.arange(0, 100),
|
||||
np.arange(0, 80)],
|
||||
names=['time', 'firm'])
|
||||
t, n = 0, 2
|
||||
df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df, indexers=((t, n), 'X'), value=0,
|
||||
compare_fn=assert_equal)
|
||||
|
||||
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df, indexers=((t, n), 'X'), value=1,
|
||||
compare_fn=assert_equal)
|
||||
|
||||
df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df, indexers=((t, n), 'X'), value=2,
|
||||
compare_fn=assert_equal)
|
||||
|
||||
# gh-7218: assigning with 0-dim arrays
|
||||
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
|
||||
'X', 'd', 'profit'],
|
||||
index=index)
|
||||
check(target=df,
|
||||
indexers=((t, n), 'X'),
|
||||
value=np.array(3),
|
||||
compare_fn=assert_equal,
|
||||
expected=3, )
|
||||
|
||||
# GH5206
|
||||
df = DataFrame(np.arange(25).reshape(5, 5),
|
||||
columns='A,B,C,D,E'.split(','), dtype=float)
|
||||
df['F'] = 99
|
||||
row_selection = df['A'] % 2 == 0
|
||||
col_selection = ['B', 'C']
|
||||
with catch_warnings(record=True):
|
||||
df.ix[row_selection, col_selection] = df['F']
|
||||
output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
|
||||
with catch_warnings(record=True):
|
||||
tm.assert_frame_equal(df.ix[row_selection, col_selection],
|
||||
output)
|
||||
check(target=df,
|
||||
indexers=(row_selection, col_selection),
|
||||
value=df['F'],
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=output, )
|
||||
|
||||
# GH11372
|
||||
idx = MultiIndex.from_product([
|
||||
['A', 'B', 'C'],
|
||||
date_range('2015-01-01', '2015-04-01', freq='MS')])
|
||||
cols = MultiIndex.from_product([
|
||||
['foo', 'bar'],
|
||||
date_range('2016-01-01', '2016-02-01', freq='MS')])
|
||||
|
||||
df = DataFrame(np.random.random((12, 4)),
|
||||
index=idx, columns=cols)
|
||||
|
||||
subidx = MultiIndex.from_tuples(
|
||||
[('A', Timestamp('2015-01-01')),
|
||||
('A', Timestamp('2015-02-01'))])
|
||||
subcols = MultiIndex.from_tuples(
|
||||
[('foo', Timestamp('2016-01-01')),
|
||||
('foo', Timestamp('2016-02-01'))])
|
||||
|
||||
vals = DataFrame(np.random.random((2, 2)),
|
||||
index=subidx, columns=subcols)
|
||||
check(target=df,
|
||||
indexers=(subidx, subcols),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal, )
|
||||
# set all columns
|
||||
vals = DataFrame(
|
||||
np.random.random((2, 4)), index=subidx, columns=cols)
|
||||
check(target=df,
|
||||
indexers=(subidx, slice(None, None, None)),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal, )
|
||||
# identity
|
||||
copy = df.copy()
|
||||
check(target=df, indexers=(df.index, df.columns), value=df,
|
||||
compare_fn=tm.assert_frame_equal, expected=copy)
|
||||
|
||||
def test_multiindex_setitem(self):
|
||||
|
||||
# GH 3738
|
||||
# setting with a multi-index right hand side
|
||||
arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
|
||||
np.array(['one', 'two', 'one', 'one', 'two', 'one']),
|
||||
np.arange(0, 6, 1)]
|
||||
|
||||
df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
|
||||
columns=['A', 'B', 'C']).sort_index()
|
||||
|
||||
expected = df_orig.loc[['bar']] * 2
|
||||
df = df_orig.copy()
|
||||
df.loc[['bar']] *= 2
|
||||
tm.assert_frame_equal(df.loc[['bar']], expected)
|
||||
|
||||
# raise because these have differing levels
|
||||
with pytest.raises(TypeError):
|
||||
df.loc['bar'] *= 2
|
||||
|
||||
# from SO
|
||||
# http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
|
||||
df_orig = DataFrame.from_dict({'price': {
|
||||
('DE', 'Coal', 'Stock'): 2,
|
||||
('DE', 'Gas', 'Stock'): 4,
|
||||
('DE', 'Elec', 'Demand'): 1,
|
||||
('FR', 'Gas', 'Stock'): 5,
|
||||
('FR', 'Solar', 'SupIm'): 0,
|
||||
('FR', 'Wind', 'SupIm'): 0
|
||||
}})
|
||||
df_orig.index = MultiIndex.from_tuples(df_orig.index,
|
||||
names=['Sit', 'Com', 'Type'])
|
||||
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 2, 3]] *= 2
|
||||
|
||||
idx = pd.IndexSlice
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, 'Stock'], :] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, 'Stock'], 'price'] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_assignment(self):
|
||||
|
||||
# GH3777 part 2
|
||||
|
||||
# mixed dtype
|
||||
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list('abc'),
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
df['d'] = np.nan
|
||||
arr = np.array([0., 1.])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'd'] = arr
|
||||
tm.assert_series_equal(df.ix[4, 'd'],
|
||||
Series(arr, index=[8, 10], name='d'))
|
||||
|
||||
# single dtype
|
||||
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list('abc'),
|
||||
index=[[4, 4, 8], [8, 10, 12]])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = arr
|
||||
exp = Series(arr, index=[8, 10], name='c', dtype='float64')
|
||||
tm.assert_series_equal(df.ix[4, 'c'], exp)
|
||||
|
||||
# scalar ok
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = 10
|
||||
exp = Series(10, index=[8, 10], name='c', dtype='float64')
|
||||
tm.assert_series_equal(df.ix[4, 'c'], exp)
|
||||
|
||||
# invalid assignments
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = [0, 1, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[4, 'c'] = [0]
|
||||
|
||||
# groupby example
|
||||
NUM_ROWS = 100
|
||||
NUM_COLS = 10
|
||||
col_names = ['A' + num for num in
|
||||
map(str, np.arange(NUM_COLS).tolist())]
|
||||
index_cols = col_names[:5]
|
||||
|
||||
df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
|
||||
dtype=np.int64, columns=col_names)
|
||||
df = df.set_index(index_cols).sort_index()
|
||||
grp = df.groupby(level=index_cols[:4])
|
||||
df['new_col'] = np.nan
|
||||
|
||||
f_index = np.arange(5)
|
||||
|
||||
def f(name, df2):
|
||||
return Series(np.arange(df2.shape[0]),
|
||||
name=df2.index.values[0]).reindex(f_index)
|
||||
|
||||
# TODO(wesm): unused?
|
||||
# new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
|
||||
|
||||
# we are actually operating on a copy here
|
||||
# but in this case, that's ok
|
||||
for name, df2 in grp:
|
||||
new_vals = np.arange(df2.shape[0])
|
||||
with catch_warnings(record=True):
|
||||
df.ix[name, 'new_col'] = new_vals
|
||||
|
||||
def test_series_setitem(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd['A']
|
||||
|
||||
s[2000, 3] = np.nan
|
||||
assert isna(s.values[42:65]).all()
|
||||
assert notna(s.values[:42]).all()
|
||||
assert notna(s.values[65:]).all()
|
||||
|
||||
s[2000, 3, 10] = np.nan
|
||||
assert isna(s[49])
|
||||
|
||||
def test_frame_getitem_setitem_boolean(
|
||||
self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T.copy()
|
||||
values = df.values
|
||||
|
||||
result = df[df > 0]
|
||||
expected = df.where(df > 0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df[df > 0] = 5
|
||||
values[values > 0] = 5
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
df[df == 5] = 0
|
||||
values[values == 5] = 0
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
# a df that needs alignment first
|
||||
df[df[:-1] < 0] = 2
|
||||
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
with pytest.raises(TypeError, match='boolean values only'):
|
||||
df[df * 0] = 2
|
||||
|
||||
def test_frame_getitem_setitem_multislice(self):
|
||||
levels = [['t1', 't2'], ['a', 'b', 'c']]
|
||||
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
|
||||
midx = MultiIndex(codes=codes, levels=levels, names=[None, 'id'])
|
||||
df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)
|
||||
|
||||
result = df.loc[:, 'value']
|
||||
tm.assert_series_equal(df['value'], result)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", DeprecationWarning)
|
||||
result = df.ix[:, 'value']
|
||||
tm.assert_series_equal(df['value'], result)
|
||||
|
||||
result = df.loc[df.index[1:3], 'value']
|
||||
tm.assert_series_equal(df['value'][1:3], result)
|
||||
|
||||
result = df.loc[:, :]
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
result = df
|
||||
df.loc[:, 'value'] = 10
|
||||
result['value'] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
df.loc[:, :] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_frame_setitem_multi_column(self):
|
||||
df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
|
||||
[0, 1, 0, 1]])
|
||||
|
||||
cp = df.copy()
|
||||
cp['a'] = cp['b']
|
||||
tm.assert_frame_equal(cp['a'], cp['b'])
|
||||
|
||||
# set with ndarray
|
||||
cp = df.copy()
|
||||
cp['a'] = cp['b'].values
|
||||
tm.assert_frame_equal(cp['a'], cp['b'])
|
||||
|
||||
# ---------------------------------------
|
||||
# #1803
|
||||
columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
|
||||
df = DataFrame(index=[1, 3, 5], columns=columns)
|
||||
|
||||
# Works, but adds a column instead of updating the two existing ones
|
||||
df['A'] = 0.0 # Doesn't work
|
||||
assert (df['A'].values == 0).all()
|
||||
|
||||
# it broadcasts
|
||||
df['B', '1'] = [1, 2, 3]
|
||||
df['A'] = df['B', '1']
|
||||
|
||||
sliced_a1 = df['A', '1']
|
||||
sliced_a2 = df['A', '2']
|
||||
sliced_b1 = df['B', '1']
|
||||
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
|
||||
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
|
||||
assert sliced_a1.name == ('A', '1')
|
||||
assert sliced_a2.name == ('A', '2')
|
||||
assert sliced_b1.name == ('B', '1')
|
||||
|
||||
def test_getitem_setitem_tuple_plus_columns(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
# GH #1013
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd[:5]
|
||||
|
||||
result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
|
||||
expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_setitem_slice_integers(self):
|
||||
index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])
|
||||
|
||||
frame = DataFrame(np.random.randn(len(index), 4), index=index,
|
||||
columns=['a', 'b', 'c', 'd'])
|
||||
res = frame.loc[1:2]
|
||||
exp = frame.reindex(frame.index[2:])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
frame.loc[1:2] = 7
|
||||
assert (frame.loc[1:2] == 7).values.all()
|
||||
|
||||
series = Series(np.random.randn(len(index)), index=index)
|
||||
|
||||
res = series.loc[1:2]
|
||||
exp = series.reindex(series.index[2:])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
series.loc[1:2] = 7
|
||||
assert (series.loc[1:2] == 7).values.all()
|
||||
|
||||
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
dft = frame.T
|
||||
s = dft['foo', 'two']
|
||||
dft['foo', 'two'] = s > s.median()
|
||||
tm.assert_series_equal(dft['foo', 'two'], s > s.median())
|
||||
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
|
||||
|
||||
reindexed = dft.reindex(columns=[('foo', 'two')])
|
||||
tm.assert_series_equal(reindexed['foo', 'two'], s > s.median())
|
||||
|
||||
def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
subset = frame.index[[1, 4, 5]]
|
||||
|
||||
frame.loc[subset] = 99
|
||||
assert (frame.loc[subset].values == 99).all()
|
||||
|
||||
col = frame['B']
|
||||
col[subset] = 97
|
||||
assert (frame.loc[subset, 'B'] == 97).all()
|
||||
|
||||
def test_nonunique_assignment_1750(self):
|
||||
df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]],
|
||||
columns=list("ABCD"))
|
||||
|
||||
df = df.set_index(['A', 'B'])
|
||||
ix = MultiIndex.from_tuples([(1, 1)])
|
||||
|
||||
df.loc[ix, "C"] = '_'
|
||||
|
||||
assert (df.xs((1, 1))['C'] == '_').all()
|
||||
|
||||
def test_astype_assignment_with_dups(self):
|
||||
|
||||
# GH 4686
|
||||
# assignment with dups that has a dtype change
|
||||
cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')])
|
||||
df = DataFrame(np.arange(3).reshape((1, 3)),
|
||||
columns=cols, dtype=object)
|
||||
index = df.index.copy()
|
||||
|
||||
df['A'] = df['A'].astype(np.float64)
|
||||
tm.assert_index_equal(df.index, index)
|
||||
|
||||
|
||||
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
|
||||
# this works because we are modifying the underlying array
|
||||
# really a no-no
|
||||
df = multiindex_dataframe_random_data.T
|
||||
df['foo'].values[:] = 0
|
||||
assert (df['foo'].values == 0).all()
|
||||
|
||||
|
||||
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
|
||||
# will raise/warn as its chained assignment
|
||||
df = multiindex_dataframe_random_data.T
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df['foo']['one'] = 2
|
||||
|
||||
|
||||
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data.T
|
||||
expected = frame
|
||||
df = frame.copy()
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df['foo']['one'] = 2
|
||||
|
||||
result = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
+576
@@ -0,0 +1,576 @@
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
|
||||
from pandas.core.indexing import _non_reducing_slice
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestMultiIndexSlicers(object):
|
||||
|
||||
def test_per_axis_per_level_getitem(self):
|
||||
|
||||
# GH6134
|
||||
# example test case
|
||||
ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl(
|
||||
'C', 4), _mklbl('D', 2)])
|
||||
df = DataFrame(np.arange(len(ix.get_values())), index=ix)
|
||||
|
||||
result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C2' or c == 'C3')]]
|
||||
result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples([('A', 1), ('A', 2),
|
||||
('A', 3), ('B', 1)],
|
||||
names=['one', 'two'])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(16, dtype='int64').reshape(
|
||||
4, 4), index=index, columns=columns)
|
||||
df = df.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
result = df.loc[(slice(None), slice(None)), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[:, (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# index
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), 1), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# columns
|
||||
result = df.loc[:, (slice(None), ['foo'])]
|
||||
expected = df.iloc[:, [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# both
|
||||
result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
|
||||
expected = df.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc['A', 'a']
|
||||
expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
|
||||
index=Index([1, 2, 3], name='two'),
|
||||
columns=Index(['bar', 'foo'], name='lvl1'))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1, 2]), :]
|
||||
expected = df.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi-level series
|
||||
s = Series(np.arange(len(ix.get_values())), index=ix)
|
||||
result = s.loc['A1':'A3', :, ['C1', 'C3']]
|
||||
expected = s.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in s.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# boolean indexers
|
||||
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
|
||||
expected = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), np.array([True, False])), :]
|
||||
|
||||
# ambiguous cases
|
||||
# these can be multiply interpreted (e.g. in this case
|
||||
# as df.loc[slice(None),[1]] as well
|
||||
pytest.raises(KeyError, lambda: df.loc[slice(None), [1]])
|
||||
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not lexsorted
|
||||
assert df.index.lexsort_depth == 2
|
||||
df = df.sort_index(level=1, axis=0)
|
||||
assert df.index.lexsort_depth == 0
|
||||
|
||||
msg = ('MultiIndex slicing requires the index to be '
|
||||
r'lexsorted: slicing on levels \[1\], lexsort depth 0')
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc[(slice(None), slice('bar')), :]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
|
||||
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
|
||||
|
||||
def test_multiindex_slicers_non_unique(self):
|
||||
|
||||
# GH 7106
|
||||
# non-unique mi index support
|
||||
df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
|
||||
B=['a', 'a', 'a', 'a'],
|
||||
C=[1, 2, 1, 3],
|
||||
D=[1, 2, 3, 4]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
assert not df.index.is_unique
|
||||
expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
|
||||
C=[1, 1], D=[1, 3]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this is equivalent of an xs expression
|
||||
result = df.xs(1, level=2, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
|
||||
B=['a', 'a', 'a', 'a'],
|
||||
C=[1, 2, 1, 2],
|
||||
D=[1, 2, 3, 4]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
assert not df.index.is_unique
|
||||
expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
|
||||
C=[1, 1], D=[1, 3]))
|
||||
.set_index(['A', 'B', 'C']).sort_index())
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
assert not result.index.is_unique
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH12896
|
||||
# numpy-implementation dependent bug
|
||||
ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16,
|
||||
17, 18, 19, 200000, 200000]
|
||||
n = len(ints)
|
||||
idx = MultiIndex.from_arrays([['a'] * n, ints])
|
||||
result = Series([1] * n, index=idx)
|
||||
result = result.sort_index()
|
||||
result = result.loc[(slice(None), slice(100000))]
|
||||
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_datetimelike(self):
|
||||
|
||||
# GH 7429
|
||||
# buggy/inconsistent behavior when slicing with datetime-like
|
||||
import datetime
|
||||
dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) +
|
||||
datetime.timedelta(days=i) for i in range(6)]
|
||||
freq = [1, 2]
|
||||
index = MultiIndex.from_product(
|
||||
[dates, freq], names=['date', 'frequency'])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(6 * 2 * 4, dtype='int64').reshape(
|
||||
-1, 4), index=index, columns=list('ABCD'))
|
||||
|
||||
# multi-axis slicing
|
||||
idx = pd.IndexSlice
|
||||
expected = df.iloc[[0, 2, 4], [0, 1]]
|
||||
result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
|
||||
Timestamp('2012-01-03 12:12:12')),
|
||||
slice(1, 1)), slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp(
|
||||
'2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
|
||||
Timestamp('2012-01-03 12:12:12')), 1),
|
||||
slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with strings
|
||||
result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'),
|
||||
slice(1, 1)), slice('A', 'B')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1),
|
||||
idx['A', 'B']]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_edges(self):
|
||||
# GH 8132
|
||||
# various edge cases
|
||||
df = DataFrame(
|
||||
{'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5,
|
||||
'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3,
|
||||
'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30",
|
||||
"2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09",
|
||||
"2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01",
|
||||
"2013-07-09", "2013-08-06", "2013-09-03"],
|
||||
'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]})
|
||||
|
||||
df['DATE'] = pd.to_datetime(df['DATE'])
|
||||
df1 = df.set_index(['A', 'B', 'DATE'])
|
||||
df1 = df1.sort_index()
|
||||
|
||||
# A1 - Get all values under "A0" and "A1"
|
||||
result = df1.loc[(slice('A1')), :]
|
||||
expected = df1.iloc[0:10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A2 - Get all values from the start to "A2"
|
||||
result = df1.loc[(slice('A2')), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A3 - Get all values under "B1" or "B2"
|
||||
result = df1.loc[(slice(None), slice('B1', 'B2')), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A4 - Get all values between 2013-07-02 and 2013-07-09
|
||||
result = df1.loc[(slice(None), slice(None),
|
||||
slice('20130702', '20130709')), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B1 - Get all values in B0 that are also under A0, A1 and A2
|
||||
result = df1.loc[(slice('A2'), slice('B0')), :]
|
||||
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
|
||||
# the As)
|
||||
result = df1.loc[(slice(None), slice('B2')), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B3 - Get all values from B1 to B2 and up to 2013-08-06
|
||||
result = df1.loc[(slice(None), slice('B1', 'B2'),
|
||||
slice('2013-08-06')), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B4 - Same as A4 but the start of the date slice is not a key.
|
||||
# shows indexing on a partial selection slice
|
||||
result = df1.loc[(slice(None), slice(None),
|
||||
slice('20130701', '20130709')), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_doc_examples(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# from indexing.rst / advanced
|
||||
index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
|
||||
_mklbl('C', 4), _mklbl('D', 2)])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
|
||||
.reshape((len(index), len(columns))),
|
||||
index=index, columns=columns)
|
||||
result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx[:, :, ['C1', 'C3']], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not sorted
|
||||
with pytest.raises(UnsortedIndexError):
|
||||
df.loc['A1', ('a', slice('foo'))]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
|
||||
df.loc['A1'].iloc[:, [0, 2]])
|
||||
|
||||
df = df.sort_index(axis=1)
|
||||
|
||||
# slicing
|
||||
df.loc['A1', (slice(None), 'foo')]
|
||||
df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')]
|
||||
|
||||
# setitem
|
||||
df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
|
||||
|
||||
def test_loc_axis_arguments(self):
|
||||
|
||||
index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
|
||||
_mklbl('C', 4), _mklbl('D', 2)])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
|
||||
.reshape((len(index), len(columns))),
|
||||
index=index,
|
||||
columns=columns).sort_index().sort_index(axis=1)
|
||||
|
||||
# axis 0
|
||||
result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == 'A1' or a == 'A2' or a == 'A3') and (
|
||||
c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis='index')[:, :, ['C1', 'C3']]
|
||||
expected = df.loc[[tuple([a, b, c, d])
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == 'C1' or c == 'C3')]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis 1
|
||||
result = df.loc(axis=1)[:, 'foo']
|
||||
expected = df.loc[:, (slice(None), 'foo')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis='columns')[:, 'foo']
|
||||
expected = df.loc[:, (slice(None), 'foo')]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# invalid axis
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis=-1)[:, :, ['C1', 'C3']]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis=2)[:, :, ['C1', 'C3']]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc(axis='foo')[:, :, ['C1', 'C3']]
|
||||
|
||||
def test_per_axis_per_level_setitem(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples([('A', 1), ('A', 2),
|
||||
('A', 3), ('B', 1)],
|
||||
names=['one', 'two'])
|
||||
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
|
||||
('b', 'foo'), ('b', 'bah')],
|
||||
names=['lvl0', 'lvl1'])
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(16, dtype='int64').reshape(
|
||||
4, 4), index=index, columns=columns)
|
||||
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# index
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), [1]), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, 1] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), ['foo'])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# both
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, 1], idx[:, ['foo']]] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc['A', 'a'] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[0:3, 0:2] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# setting with a list-like
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
|
||||
[[100, 100], [100, 100]], dtype='int64')
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# not enough values
|
||||
df = df_orig.copy()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
|
||||
[[100], [100, 100]], dtype='int64')
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
|
||||
[100, 100, 100, 100], dtype='int64')
|
||||
|
||||
# with an alignable rhs
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice(
|
||||
None), 1), (slice(None), ['foo'])] * 5
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice(
|
||||
None), 1), (slice(None), ['foo'])]
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy()
|
||||
rhs.loc[:, ('c', 'bah')] = 10
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_label_slicing_with_negative_step(self):
|
||||
s = Series(np.arange(20),
|
||||
MultiIndex.from_product([list('abcde'), np.arange(4)]))
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
def assert_slices_equivalent(l_slc, i_slc):
|
||||
tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
|
||||
tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
|
||||
with catch_warnings(record=True):
|
||||
tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
|
||||
|
||||
assert_slices_equivalent(SLC[::-1], SLC[::-1])
|
||||
|
||||
assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
|
||||
assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1])
|
||||
|
||||
assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
|
||||
assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1])
|
||||
|
||||
assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1])
|
||||
assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])
|
||||
|
||||
assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
|
||||
assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
|
||||
assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
|
||||
|
||||
def test_multiindex_slice_first_level(self):
|
||||
# GH 12697
|
||||
freq = ['a', 'b', 'c', 'd']
|
||||
idx = MultiIndex.from_product([freq, np.arange(500)])
|
||||
df = DataFrame(list(range(2000)), index=idx, columns=['Test'])
|
||||
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
|
||||
result = df_slice.loc['a']
|
||||
expected = DataFrame(list(range(30, 71)),
|
||||
columns=['Test'], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df_slice.loc['d']
|
||||
expected = DataFrame(list(range(1530, 1571)),
|
||||
columns=['Test'], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_int_series_slicing(
|
||||
self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd['A']
|
||||
result = s[5:]
|
||||
expected = s.reindex(s.index[5:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
exp = ymd['A'].copy()
|
||||
s[5:] = 0
|
||||
exp.values[5:] = 0
|
||||
tm.assert_numpy_array_equal(s.values, exp.values)
|
||||
|
||||
result = ymd[5:]
|
||||
expected = ymd.reindex(s.index[5:])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_non_reducing_slice_on_multiindex(self):
|
||||
# GH 19861
|
||||
dic = {
|
||||
('a', 'd'): [1, 4],
|
||||
('a', 'c'): [2, 3],
|
||||
('b', 'c'): [3, 2],
|
||||
('b', 'd'): [4, 1]
|
||||
}
|
||||
df = pd.DataFrame(dic, index=[0, 1])
|
||||
idx = pd.IndexSlice
|
||||
slice_ = idx[:, idx['b', 'd']]
|
||||
tslice_ = _non_reducing_slice(slice_)
|
||||
|
||||
result = df.loc[tslice_]
|
||||
expected = pd.DataFrame({('b', 'd'): [4, 1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
+92
@@ -0,0 +1,92 @@
|
||||
import numpy as np
|
||||
from numpy.random import randn
|
||||
|
||||
from pandas.compat import lzip
|
||||
|
||||
from pandas import DataFrame, MultiIndex, Series
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSorted(object):
|
||||
def test_getitem_multilevel_index_tuple_not_sorted(self):
|
||||
index_columns = list("abc")
|
||||
df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]],
|
||||
columns=index_columns + ["data"])
|
||||
df = df.set_index(index_columns)
|
||||
query_index = df.index[:1]
|
||||
rs = df.loc[query_index, "data"]
|
||||
|
||||
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c'])
|
||||
xp = Series(['x'], index=xp_idx, name='data')
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.sort_index(level=1).T
|
||||
|
||||
# buglet with int typechecking
|
||||
result = df.iloc[:, :np.int32(3)]
|
||||
expected = df.reindex(columns=df.columns[:3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted2(self):
|
||||
# 13431
|
||||
df = DataFrame({'col1': ['b', 'd', 'b', 'a'],
|
||||
'col2': [3, 1, 1, 2],
|
||||
'data': ['one', 'two', 'three', 'four']})
|
||||
|
||||
df2 = df.set_index(['col1', 'col2'])
|
||||
df2_original = df2.copy()
|
||||
|
||||
df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True)
|
||||
df2.index.set_codes([0, 1, 0, 2], level='col1', inplace=True)
|
||||
assert not df2.index.is_lexsorted()
|
||||
assert not df2.index.is_monotonic
|
||||
|
||||
assert df2_original.index.equals(df2.index)
|
||||
expected = df2.sort_index()
|
||||
assert expected.index.is_lexsorted()
|
||||
assert expected.index.is_monotonic
|
||||
|
||||
result = df2.sort_index(level=0)
|
||||
assert result.index.is_lexsorted()
|
||||
assert result.index.is_monotonic
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T
|
||||
df['foo', 'four'] = 'foo'
|
||||
|
||||
arrays = [np.array(x) for x in zip(*df.columns.values)]
|
||||
|
||||
result = df['foo']
|
||||
result2 = df.loc[:, 'foo']
|
||||
expected = df.reindex(columns=df.columns[arrays[0] == 'foo'])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
df = df.T
|
||||
result = df.xs('foo')
|
||||
result2 = df.loc['foo']
|
||||
expected = df.reindex(df.index[arrays[0] == 'foo'])
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
def test_series_getitem_not_sorted(self):
|
||||
arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
|
||||
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
|
||||
tuples = lzip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
s = Series(randn(8), index=index)
|
||||
|
||||
arrays = [np.array(x) for x in zip(*index.values)]
|
||||
|
||||
result = s['qux']
|
||||
result2 = s.loc['qux']
|
||||
expected = s[arrays[0] == 'qux']
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
@@ -0,0 +1,237 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, product as cart_product
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
|
||||
import pandas.core.common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def four_level_index_dataframe():
|
||||
arr = np.array([[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838]])
|
||||
index = MultiIndex(
|
||||
levels=[['a', 'x'], ['b', 'q'], [10.0032, 20.0, 30.0], [3, 4, 5]],
|
||||
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
|
||||
names=['one', 'two', 'three', 'four'])
|
||||
return DataFrame(arr, index=index, columns=list('ABCDE'))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('key, level, exp_arr, exp_index', [
|
||||
('a', 'lvl0', lambda x: x[:, 0:2], Index(['bar', 'foo'], name='lvl1')),
|
||||
('foo', 'lvl1', lambda x: x[:, 1:2], Index(['a'], name='lvl0'))
|
||||
])
|
||||
def test_xs_named_levels_axis_eq_1(key, level, exp_arr, exp_index):
|
||||
# see gh-2903
|
||||
arr = np.random.randn(4, 4)
|
||||
index = MultiIndex(levels=[['a', 'b'], ['bar', 'foo', 'hello', 'world']],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
|
||||
names=['lvl0', 'lvl1'])
|
||||
df = DataFrame(arr, columns=index)
|
||||
result = df.xs(key, level=level, axis=1)
|
||||
expected = DataFrame(exp_arr(arr), columns=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_values(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(('bar', 'two')).values
|
||||
expected = df.values[4]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_loc_equality(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(('bar', 'two'))
|
||||
expected = df.loc[('bar', 'two')]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_missing_values_in_index():
|
||||
# see gh-6574
|
||||
# missing values in returned index should be preserrved
|
||||
acc = [
|
||||
('a', 'abcde', 1),
|
||||
('b', 'bbcde', 2),
|
||||
('y', 'yzcde', 25),
|
||||
('z', 'xbcde', 24),
|
||||
('z', None, 26),
|
||||
('z', 'zbcde', 25),
|
||||
('z', 'ybcde', 26),
|
||||
]
|
||||
df = DataFrame(acc,
|
||||
columns=['a1', 'a2', 'cnt']).set_index(['a1', 'a2'])
|
||||
expected = DataFrame({'cnt': [24, 26, 25, 26]}, index=Index(
|
||||
['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2'))
|
||||
|
||||
result = df.xs('z', level='a1')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('key, level', [
|
||||
('one', 'second'),
|
||||
(['one'], ['second'])
|
||||
])
|
||||
def test_xs_with_duplicates(key, level, multiindex_dataframe_random_data):
|
||||
# see gh-13719
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = concat([frame] * 2)
|
||||
assert df.index.is_unique is False
|
||||
expected = concat([frame.xs('one', level='second')] * 2)
|
||||
|
||||
result = df.xs(key, level=level)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs('two', level='second')
|
||||
expected = df[df.index.get_level_values(1) == 'two']
|
||||
expected.index = Index(['foo', 'bar', 'baz', 'qux'], name='first')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_eq_2():
|
||||
arr = np.random.randn(3, 5)
|
||||
index = MultiIndex(
|
||||
levels=[['a', 'p', 'x'], ['b', 'q', 'y'], ['c', 'r', 'z']],
|
||||
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]])
|
||||
df = DataFrame(arr, index=index)
|
||||
expected = DataFrame(arr[1:2], index=[['a'], ['b']])
|
||||
result = df.xs('c', level=2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda df: df.xs(('a', 4), level=['one', 'four']),
|
||||
lambda df: df.xs('a').xs(4, level='four')
|
||||
])
|
||||
def test_xs_level_multiple(indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[['q'], [20.0]],
|
||||
codes=[[0], [0]],
|
||||
names=['two', 'three'])
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list('ABCDE'))
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_setting_with_copy_error(multiindex_dataframe_random_data):
|
||||
# this is a copy in 0.14
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs('two', level='second')
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = 'A value is trying to be set on a copy of a slice from a DataFrame'
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
|
||||
def test_xs_setting_with_copy_error_multiple(four_level_index_dataframe):
|
||||
# this is a copy in 0.14
|
||||
df = four_level_index_dataframe
|
||||
result = df.xs(('a', 4), level=['one', 'four'])
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = 'A value is trying to be set on a copy of a slice from a DataFrame'
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
|
||||
def test_xs_integer_key():
|
||||
# see gh-2107
|
||||
dates = lrange(20111201, 20111205)
|
||||
ids = 'abcde'
|
||||
index = MultiIndex.from_tuples(
|
||||
[x for x in cart_product(dates, ids)],
|
||||
names=['date', 'secid'])
|
||||
df = DataFrame(
|
||||
np.random.randn(len(index), 3), index, ['X', 'Y', 'Z'])
|
||||
|
||||
result = df.xs(20111201, level='date')
|
||||
expected = df.loc[20111201, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('indexer', [
|
||||
lambda df: df.xs('a', level=0),
|
||||
lambda df: df.xs('a')
|
||||
])
|
||||
def test_xs_level0(indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[['b', 'q'], [10.0032, 20.0], [4, 5]],
|
||||
codes=[[0, 1], [0, 1], [1, 0]],
|
||||
names=['two', 'three', 'four'])
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list('ABCDE'))
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series(multiindex_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
df = multiindex_dataframe_random_data
|
||||
s = df['A']
|
||||
result = s[:, 'two']
|
||||
expected = df.xs('two', level=1)['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
s = df['A']
|
||||
result = s[2000, 5]
|
||||
expected = df.loc[2000, 5]['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_xs_level_series_slice_not_implemented(
|
||||
multiindex_year_month_day_dataframe_random_data):
|
||||
# this test is not explicitly testing .xs functionality
|
||||
# TODO: move to another module or refactor
|
||||
# not implementing this for now
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
s = df['A']
|
||||
|
||||
msg = r'\(2000, slice\(3, 4, None\)\)'
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[2000, 3:4]
|
||||
|
||||
|
||||
def test_series_getitem_multiindex_xs():
|
||||
# GH6258
|
||||
dt = list(date_range('20130903', periods=3))
|
||||
idx = MultiIndex.from_product([list('AB'), dt])
|
||||
s = Series([1, 3, 4, 1, 3, 4], index=idx)
|
||||
expected = Series([1, 1], index=list('AB'))
|
||||
|
||||
result = s.xs('20130903', level=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem_multiindex_xs_by_label():
|
||||
# GH5684
|
||||
idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
|
||||
('b', 'two')])
|
||||
s = Series([1, 2, 3, 4], index=idx)
|
||||
s.index.set_names(['L1', 'L2'], inplace=True)
|
||||
expected = Series([1, 3], index=['a', 'b'])
|
||||
expected.index.set_names(['L1'], inplace=True)
|
||||
|
||||
result = s.xs('one', level='L2')
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,268 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable-msg=W0612,E1101
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIndexingCallable(object):
|
||||
|
||||
def test_frame_loc_ix_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'),
|
||||
'C': [1, 2, 3, 4]})
|
||||
# iloc cannot use boolean Series (see GH3635)
|
||||
|
||||
# return bool indexer
|
||||
res = df.loc[lambda x: x.A > 2]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ])
|
||||
|
||||
res = df.loc[lambda x: x.B == 'b', :]
|
||||
tm.assert_frame_equal(res, df.loc[df.B == 'b', :])
|
||||
|
||||
res = df.loc[lambda x: x.B == 'b', :]
|
||||
tm.assert_frame_equal(res, df.loc[df.B == 'b', :])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == 'B']
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == 'B']
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: 'B']
|
||||
tm.assert_series_equal(res, df.loc[df.A > 2, 'B'])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: 'B']
|
||||
tm.assert_series_equal(res, df.loc[df.A > 2, 'B'])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A == 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A == 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A == 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A == 2, ['A', 'B']])
|
||||
|
||||
# scalar
|
||||
res = df.loc[lambda x: 1, lambda x: 'A']
|
||||
assert res == df.loc[1, 'A']
|
||||
|
||||
res = df.loc[lambda x: 1, lambda x: 'A']
|
||||
assert res == df.loc[1, 'A']
|
||||
|
||||
def test_frame_loc_ix_callable_mixture(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'),
|
||||
'C': [1, 2, 3, 4]})
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[[2, 3], lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[[2, 3], ['A', 'B']])
|
||||
|
||||
res = df.loc[[2, 3], lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[[2, 3], ['A', 'B']])
|
||||
|
||||
res = df.loc[3, lambda x: ['A', 'B']]
|
||||
tm.assert_series_equal(res, df.loc[3, ['A', 'B']])
|
||||
|
||||
res = df.loc[3, lambda x: ['A', 'B']]
|
||||
tm.assert_series_equal(res, df.loc[3, ['A', 'B']])
|
||||
|
||||
def test_frame_loc_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return label
|
||||
res = df.loc[lambda x: ['A', 'C']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C']])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], ]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], :]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], :])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], lambda x: 'X']
|
||||
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], lambda x: ['X']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
|
||||
|
||||
# mixture
|
||||
res = df.loc[['A', 'C'], lambda x: 'X']
|
||||
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
|
||||
|
||||
res = df.loc[['A', 'C'], lambda x: ['X']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], 'X']
|
||||
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], ['X']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
|
||||
|
||||
def test_frame_loc_callable_setitem(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return label
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C']] = -20
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C']] = -20
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], :] = 20
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], :] = 20
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], lambda x: 'X'] = -1
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], 'X'] = -1
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], lambda x: ['X']] = [5, 10]
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], ['X']] = [5, 10]
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# mixture
|
||||
res = df.copy()
|
||||
res.loc[['A', 'C'], lambda x: 'X'] = np.array([-1, -2])
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], 'X'] = np.array([-1, -2])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[['A', 'C'], lambda x: ['X']] = 10
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], ['X']] = 10
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], 'X'] = -2
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], 'X'] = -2
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], ['X']] = -4
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], ['X']] = -4
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_frame_iloc_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return location
|
||||
res = df.iloc[lambda x: [1, 3]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3]])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], :]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], :])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], lambda x: 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], lambda x: [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
# mixture
|
||||
res = df.iloc[[1, 3], lambda x: 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[[1, 3], lambda x: [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
def test_frame_iloc_callable_setitem(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return location
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3]] = 0
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3]] = 0
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], :] = -1
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], :] = -1
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], lambda x: 0] = 5
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = 5
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], lambda x: [0]] = 25
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = 25
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# mixture
|
||||
res = df.copy()
|
||||
res.iloc[[1, 3], lambda x: 0] = -3
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = -3
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[[1, 3], lambda x: [0]] = -5
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = -5
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], 0] = 10
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = 10
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], [0]] = [-5, -5]
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = [-5, -5]
|
||||
tm.assert_frame_equal(res, exp)
|
||||
@@ -0,0 +1,717 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
|
||||
from pandas.core.dtypes.common import is_categorical_dtype
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, CategoricalIndex, DataFrame, Index, Interval, Series,
|
||||
Timestamp)
|
||||
from pandas.api.types import CategoricalDtype as CDT
|
||||
from pandas.util import testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
class TestCategoricalIndex(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.df = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': Series(list('aabbca')).astype(
|
||||
CDT(list('cab')))}).set_index('B')
|
||||
self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': Series(list('aabbca')).astype(
|
||||
CDT(list('cabe')))}).set_index('B')
|
||||
self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': (Series([1, 1, 2, 1, 3, 2])
|
||||
.astype(CDT([3, 2, 1], ordered=True)))
|
||||
}).set_index('B')
|
||||
self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': (Series([1, 1, 2, 1, 3, 2])
|
||||
.astype(CDT([3, 2, 1], ordered=False)))
|
||||
}).set_index('B')
|
||||
|
||||
def test_loc_scalar(self):
|
||||
result = self.df.loc['a']
|
||||
expected = (DataFrame({'A': [0, 1, 5],
|
||||
'B': (Series(list('aaa'))
|
||||
.astype(CDT(list('cab'))))})
|
||||
.set_index('B'))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = self.df.copy()
|
||||
df.loc['a'] = 20
|
||||
expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20],
|
||||
'B': (Series(list('aabbca'))
|
||||
.astype(CDT(list('cab'))))})
|
||||
.set_index('B'))
|
||||
assert_frame_equal(df, expected)
|
||||
|
||||
# value not in the categories
|
||||
pytest.raises(KeyError, lambda: df.loc['d'])
|
||||
|
||||
def f():
|
||||
df.loc['d'] = 10
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def f():
|
||||
df.loc['d', 'A'] = 10
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def f():
|
||||
df.loc['d', 'C'] = 10
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def test_getitem_scalar(self):
|
||||
|
||||
cats = Categorical([Timestamp('12-31-1999'),
|
||||
Timestamp('12-31-2000')])
|
||||
|
||||
s = Series([1, 2], index=cats)
|
||||
|
||||
expected = s.iloc[0]
|
||||
result = s[cats[0]]
|
||||
assert result == expected
|
||||
|
||||
def test_slicing_directly(self):
|
||||
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
|
||||
sliced = cat[3]
|
||||
assert sliced == "d"
|
||||
sliced = cat[3:5]
|
||||
expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd'])
|
||||
tm.assert_numpy_array_equal(sliced._codes, expected._codes)
|
||||
tm.assert_index_equal(sliced.categories, expected.categories)
|
||||
|
||||
def test_slicing(self):
|
||||
cat = Series(Categorical([1, 2, 3, 4]))
|
||||
reversed = cat[::-1]
|
||||
exp = np.array([4, 3, 2, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(reversed.__array__(), exp)
|
||||
|
||||
df = DataFrame({'value': (np.arange(100) + 1).astype('int64')})
|
||||
df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
|
||||
|
||||
expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10)
|
||||
result = df.iloc[10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame({'value': np.arange(11, 21).astype('int64')},
|
||||
index=np.arange(10, 20).astype('int64'))
|
||||
expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
|
||||
result = df.iloc[10:20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8)
|
||||
result = df.loc[8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_slicing_and_getting_ops(self):
|
||||
|
||||
# systematically test the slicing operations:
|
||||
# for all slicing ops:
|
||||
# - returning a dataframe
|
||||
# - returning a column
|
||||
# - returning a row
|
||||
# - returning a single value
|
||||
|
||||
cats = Categorical(
|
||||
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"])
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 3, 4, 5, 6, 7]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
# the expected values
|
||||
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
|
||||
idx2 = Index(["j", "k"])
|
||||
values2 = [3, 4]
|
||||
|
||||
# 2:4,: | "j":"k",:
|
||||
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
|
||||
|
||||
# :,"cats" | :,0
|
||||
exp_col = Series(cats, index=idx, name='cats')
|
||||
|
||||
# "j",: | 2,:
|
||||
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object",
|
||||
name="j")
|
||||
|
||||
# "j","cats | 2,0
|
||||
exp_val = "b"
|
||||
|
||||
# iloc
|
||||
# frame
|
||||
res_df = df.iloc[2:4, :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.iloc[2, :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
# col
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.iloc[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# loc
|
||||
# frame
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# ix
|
||||
# frame
|
||||
# res_df = df.loc["j":"k",[0,1]] # doesn't work?
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", df.columns[0]]
|
||||
assert res_val == exp_val
|
||||
|
||||
# iat
|
||||
res_val = df.iat[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# at
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# fancy indexing
|
||||
exp_fancy = df.iloc[[2]]
|
||||
|
||||
res_fancy = df[df["cats"] == "b"]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
res_fancy = df[df["values"] == 3]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
|
||||
# get_value
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# i : int, slice, or sequence of integers
|
||||
res_row = df.iloc[2]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
res_df = df.iloc[slice(2, 4)]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_df = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
res_df = df.iloc[:, slice(0, 2)]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_df = df.iloc[:, [0, 1]]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
def test_slicing_doc_examples(self):
|
||||
|
||||
# GH 7918
|
||||
cats = Categorical(["a", "b", "b", "b", "c", "c", "c"],
|
||||
categories=["a", "b", "c"])
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n", ])
|
||||
values = [1, 2, 2, 2, 3, 4, 5]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
result = df.iloc[2:4, :]
|
||||
expected = DataFrame(
|
||||
{"cats": Categorical(['b', 'b'], categories=['a', 'b', 'c']),
|
||||
"values": [2, 2]}, index=['j', 'k'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[2:4, :].dtypes
|
||||
expected = Series(['category', 'int64'], ['cats', 'values'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", "cats"]
|
||||
expected = Series(Categorical(['a', 'b', 'b'],
|
||||
categories=['a', 'b', 'c']),
|
||||
index=['h', 'i', 'j'], name='cats')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", df.columns[0:1]]
|
||||
expected = DataFrame({'cats': Categorical(['a', 'b', 'b'],
|
||||
categories=['a', 'b', 'c'])},
|
||||
index=['h', 'i', 'j'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_category_type(self):
|
||||
# GH 14580
|
||||
# test iloc() on Series with Categorical data
|
||||
|
||||
s = Series([1, 2, 3]).astype('category')
|
||||
|
||||
# get slice
|
||||
result = s.iloc[0:2]
|
||||
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# get list of indexes
|
||||
result = s.iloc[[0, 1]]
|
||||
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# get boolean array
|
||||
result = s.iloc[[True, False, False]]
|
||||
expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_listlike(self):
|
||||
|
||||
# list of labels
|
||||
result = self.df.loc[['c', 'a']]
|
||||
expected = self.df.iloc[[4, 0, 1, 5]]
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.loc[['a', 'b', 'e']]
|
||||
exp_index = CategoricalIndex(
|
||||
list('aaabbe'), categories=list('cabe'), name='B')
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# element in the categories but not in the values
|
||||
pytest.raises(KeyError, lambda: self.df2.loc['e'])
|
||||
|
||||
# assign is ok
|
||||
df = self.df2.copy()
|
||||
df.loc['e'] = 20
|
||||
result = df.loc[['a', 'b', 'e']]
|
||||
exp_index = CategoricalIndex(
|
||||
list('aaabbe'), categories=list('cabe'), name='B')
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = self.df2.copy()
|
||||
result = df.loc[['a', 'b', 'e']]
|
||||
exp_index = CategoricalIndex(
|
||||
list('aaabbe'), categories=list('cabe'), name='B')
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# not all labels in the categories
|
||||
with pytest.raises(KeyError):
|
||||
self.df2.loc[['a', 'd']]
|
||||
|
||||
def test_loc_listlike_dtypes(self):
|
||||
# GH 11586
|
||||
|
||||
# unique categories and codes
|
||||
index = CategoricalIndex(['a', 'b', 'c'])
|
||||
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[['a', 'b']]
|
||||
exp_index = CategoricalIndex(['a', 'b'],
|
||||
categories=index.categories)
|
||||
exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[['a', 'a', 'b']]
|
||||
|
||||
exp_index = CategoricalIndex(['a', 'a', 'b'],
|
||||
categories=index.categories)
|
||||
exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
msg = ('a list-indexer must only include '
|
||||
'values that are in the categories')
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[['a', 'x']]
|
||||
|
||||
# duplicated categories and codes
|
||||
index = CategoricalIndex(['a', 'b', 'a'])
|
||||
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[['a', 'b']]
|
||||
exp = DataFrame({'A': [1, 3, 2],
|
||||
'B': [4, 6, 5]},
|
||||
index=CategoricalIndex(['a', 'a', 'b']))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[['a', 'a', 'b']]
|
||||
exp = DataFrame(
|
||||
{'A': [1, 3, 1, 3, 2],
|
||||
'B': [4, 6, 4, 6, 5
|
||||
]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
msg = ('a list-indexer must only include values '
|
||||
'that are in the categories')
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[['a', 'x']]
|
||||
|
||||
# contains unused category
|
||||
index = CategoricalIndex(
|
||||
['a', 'b', 'a', 'c'], categories=list('abcde'))
|
||||
df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
|
||||
|
||||
res = df.loc[['a', 'b']]
|
||||
exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]},
|
||||
index=CategoricalIndex(['a', 'a', 'b'],
|
||||
categories=list('abcde')))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
res = df.loc[['a', 'e']]
|
||||
exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
|
||||
index=CategoricalIndex(['a', 'a', 'e'],
|
||||
categories=list('abcde')))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[['a', 'a', 'b']]
|
||||
exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
|
||||
index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
|
||||
categories=list('abcde')))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
msg = ('a list-indexer must only include values '
|
||||
'that are in the categories')
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[['a', 'x']]
|
||||
|
||||
def test_get_indexer_array(self):
|
||||
arr = np.array([Timestamp('1999-12-31 00:00:00'),
|
||||
Timestamp('2000-12-31 00:00:00')], dtype=object)
|
||||
cats = [Timestamp('1999-12-31 00:00:00'),
|
||||
Timestamp('2000-12-31 00:00:00')]
|
||||
ci = CategoricalIndex(cats,
|
||||
categories=cats,
|
||||
ordered=False, dtype='category')
|
||||
result = ci.get_indexer(arr)
|
||||
expected = np.array([0, 1], dtype='intp')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_same_order(self):
|
||||
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
|
||||
categories=['a', 'b']))
|
||||
expected = np.array([1, 1], dtype='intp')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19551
|
||||
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
|
||||
categories=['b', 'a']))
|
||||
expected = np.array([1, 1], dtype='intp')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_getitem_with_listlike(self):
|
||||
# GH 16115
|
||||
cats = Categorical([Timestamp('12-31-1999'),
|
||||
Timestamp('12-31-2000')])
|
||||
|
||||
expected = DataFrame([[1, 0], [0, 1]], dtype='uint8',
|
||||
index=[0, 1], columns=cats)
|
||||
dummies = pd.get_dummies(cats)
|
||||
result = dummies[[c for c in dummies.columns]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_listlike(self):
|
||||
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
np.random.seed(1)
|
||||
c = Categorical(np.random.randint(0, 5, size=150000).astype(
|
||||
np.int8)).add_categories([-1000])
|
||||
indexer = np.array([100000]).astype(np.int64)
|
||||
c[indexer] = -1000
|
||||
|
||||
# we are asserting the code result here
|
||||
# which maps to the -1000 category
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
|
||||
|
||||
def test_ix_categorical_index(self):
|
||||
# GH 12531
|
||||
df = DataFrame(np.random.randn(3, 3),
|
||||
index=list('ABC'), columns=list('XYZ'))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
expect = Series(df.loc['A', :], index=cdf.columns, name='A')
|
||||
assert_series_equal(cdf.loc['A', :], expect)
|
||||
|
||||
expect = Series(df.loc[:, 'X'], index=cdf.index, name='X')
|
||||
assert_series_equal(cdf.loc[:, 'X'], expect)
|
||||
|
||||
exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
|
||||
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
|
||||
index=exp_index)
|
||||
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list('XY'),
|
||||
categories=['X', 'Y', 'Z'])
|
||||
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
|
||||
columns=exp_columns)
|
||||
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
|
||||
|
||||
# non-unique
|
||||
df = DataFrame(np.random.randn(3, 3),
|
||||
index=list('ABA'), columns=list('XYX'))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
exp_index = CategoricalIndex(list('AA'), categories=['A', 'B'])
|
||||
expect = DataFrame(df.loc['A', :], columns=cdf.columns,
|
||||
index=exp_index)
|
||||
assert_frame_equal(cdf.loc['A', :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y'])
|
||||
expect = DataFrame(df.loc[:, 'X'], index=cdf.index,
|
||||
columns=exp_columns)
|
||||
assert_frame_equal(cdf.loc[:, 'X'], expect)
|
||||
|
||||
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
|
||||
index=CategoricalIndex(list('AAB')))
|
||||
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
|
||||
|
||||
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
|
||||
columns=CategoricalIndex(list('XXY')))
|
||||
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
|
||||
|
||||
def test_read_only_source(self):
|
||||
# GH 10043
|
||||
rw_array = np.eye(10)
|
||||
rw_df = DataFrame(rw_array)
|
||||
|
||||
ro_array = np.eye(10)
|
||||
ro_array.setflags(write=False)
|
||||
ro_df = DataFrame(ro_array)
|
||||
|
||||
assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
|
||||
assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
|
||||
assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
|
||||
assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
|
||||
|
||||
assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
|
||||
assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
|
||||
assert_series_equal(rw_df.loc[1], ro_df.loc[1])
|
||||
assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
|
||||
|
||||
def test_reindexing(self):
|
||||
|
||||
# reindexing
|
||||
# convert to a regular index
|
||||
result = self.df2.reindex(['a', 'b', 'e'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
|
||||
'B': Series(list('aaabbe'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['a', 'b'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3],
|
||||
'B': Series(list('aaabb'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['e'])
|
||||
expected = DataFrame({'A': [np.nan],
|
||||
'B': Series(['e'])}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['d'])
|
||||
expected = DataFrame({'A': [np.nan],
|
||||
'B': Series(['d'])}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# since we are actually reindexing with a Categorical
|
||||
# then return a Categorical
|
||||
cats = list('cabe')
|
||||
|
||||
result = self.df2.reindex(Categorical(['a', 'd'], categories=cats))
|
||||
expected = DataFrame({'A': [0, 1, 5, np.nan],
|
||||
'B': Series(list('aaad')).astype(
|
||||
CDT(cats))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(Categorical(['a'], categories=cats))
|
||||
expected = DataFrame({'A': [0, 1, 5],
|
||||
'B': Series(list('aaa')).astype(
|
||||
CDT(cats))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['a', 'b', 'e'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
|
||||
'B': Series(list('aaabbe'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['a', 'b'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3],
|
||||
'B': Series(list('aaabb'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['e'])
|
||||
expected = DataFrame({'A': [np.nan],
|
||||
'B': Series(['e'])}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# give back the type of categorical that we received
|
||||
result = self.df2.reindex(Categorical(
|
||||
['a', 'd'], categories=cats, ordered=True))
|
||||
expected = DataFrame(
|
||||
{'A': [0, 1, 5, np.nan],
|
||||
'B': Series(list('aaad')).astype(
|
||||
CDT(cats, ordered=True))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(Categorical(
|
||||
['a', 'd'], categories=['a', 'd']))
|
||||
expected = DataFrame({'A': [0, 1, 5, np.nan],
|
||||
'B': Series(list('aaad')).astype(
|
||||
CDT(['a', 'd']))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# passed duplicate indexers are not allowed
|
||||
pytest.raises(ValueError, lambda: self.df2.reindex(['a', 'a']))
|
||||
|
||||
# args NotImplemented ATM
|
||||
pytest.raises(NotImplementedError,
|
||||
lambda: self.df2.reindex(['a'], method='ffill'))
|
||||
pytest.raises(NotImplementedError,
|
||||
lambda: self.df2.reindex(['a'], level=1))
|
||||
pytest.raises(NotImplementedError,
|
||||
lambda: self.df2.reindex(['a'], limit=2))
|
||||
|
||||
def test_loc_slice(self):
|
||||
# slicing
|
||||
# not implemented ATM
|
||||
# GH9748
|
||||
|
||||
pytest.raises(TypeError, lambda: self.df.loc[1:5])
|
||||
|
||||
# result = df.loc[1:5]
|
||||
# expected = df.iloc[[1,2,3,4]]
|
||||
# assert_frame_equal(result, expected)
|
||||
|
||||
def test_boolean_selection(self):
|
||||
|
||||
df3 = self.df3
|
||||
df4 = self.df4
|
||||
|
||||
result = df3[df3.index == 'a']
|
||||
expected = df3.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 'a']
|
||||
expected = df4.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index == 1]
|
||||
expected = df3.iloc[[0, 1, 3]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 1]
|
||||
expected = df4.iloc[[0, 1, 3]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# since we have an ordered categorical
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=True,
|
||||
# name=u'B')
|
||||
result = df3[df3.index < 2]
|
||||
expected = df3.iloc[[4]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index > 1]
|
||||
expected = df3.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# unordered
|
||||
# cannot be compared
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=False,
|
||||
# name=u'B')
|
||||
pytest.raises(TypeError, lambda: df4[df4.index < 2])
|
||||
pytest.raises(TypeError, lambda: df4[df4.index > 1])
|
||||
|
||||
def test_indexing_with_category(self):
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/12564
|
||||
# consistent result if comparing as Dataframe
|
||||
|
||||
cat = DataFrame({'A': ['foo', 'bar', 'baz']})
|
||||
exp = DataFrame({'A': [True, False, False]})
|
||||
|
||||
res = (cat[['A']] == 'foo')
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
cat['A'] = cat['A'].astype('category')
|
||||
|
||||
res = (cat[['A']] == 'foo')
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_map_with_dict_or_series(self):
|
||||
orig_values = ['a', 'B', 1, 'a']
|
||||
new_values = ['one', 2, 3.0, 'one']
|
||||
cur_index = pd.CategoricalIndex(orig_values, name='XXX')
|
||||
expected = pd.CategoricalIndex(new_values,
|
||||
name='XXX', categories=[3.0, 2, 'one'])
|
||||
|
||||
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
|
||||
output = cur_index.map(mapper)
|
||||
# Order of categories in output can be different
|
||||
tm.assert_index_equal(expected, output)
|
||||
|
||||
mapper = {o: n for o, n in
|
||||
zip(orig_values[:-1], new_values[:-1])}
|
||||
output = cur_index.map(mapper)
|
||||
# Order of categories in output can be different
|
||||
tm.assert_index_equal(expected, output)
|
||||
+402
@@ -0,0 +1,402 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame, Series, Timestamp, compat, date_range, option_context)
|
||||
from pandas.core import common as com
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestCaching(object):
|
||||
|
||||
def test_slice_consolidate_invalidate_item_cache(self):
|
||||
|
||||
# this is chained assignment, but will 'work'
|
||||
with option_context('chained_assignment', None):
|
||||
|
||||
# #3970
|
||||
df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5})
|
||||
|
||||
# Creates a second float block
|
||||
df["cc"] = 0.0
|
||||
|
||||
# caches a reference to the 'bb' series
|
||||
df["bb"]
|
||||
|
||||
# repr machinery triggers consolidation
|
||||
repr(df)
|
||||
|
||||
# Assignment to wrong series
|
||||
df['bb'].iloc[0] = 0.17
|
||||
df._clear_item_cache()
|
||||
tm.assert_almost_equal(df['bb'][0], 0.17)
|
||||
|
||||
def test_setitem_cache_updating(self):
|
||||
# GH 5424
|
||||
cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']
|
||||
|
||||
for do_ref in [False, False]:
|
||||
df = DataFrame({'a': cont,
|
||||
"b": cont[3:] + cont[:3],
|
||||
'c': np.arange(7)})
|
||||
|
||||
# ref the cache
|
||||
if do_ref:
|
||||
df.loc[0, "c"]
|
||||
|
||||
# set it
|
||||
df.loc[7, 'c'] = 1
|
||||
|
||||
assert df.loc[0, 'c'] == 0.0
|
||||
assert df.loc[7, 'c'] == 1.0
|
||||
|
||||
# GH 7084
|
||||
# not updating cache on series setting with slices
|
||||
expected = DataFrame({'A': [600, 600, 600]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
out = DataFrame({'A': [0, 0, 0]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})
|
||||
|
||||
# loop through df to update out
|
||||
six = Timestamp('5/7/2014')
|
||||
eix = Timestamp('5/9/2014')
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D']
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out['A'], expected['A'])
|
||||
|
||||
# try via a chain indexing
|
||||
# this actually works
|
||||
out = DataFrame({'A': [0, 0, 0]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
for ix, row in df.iterrows():
|
||||
v = out[row['C']][six:eix] + row['D']
|
||||
out[row['C']][six:eix] = v
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out['A'], expected['A'])
|
||||
|
||||
out = DataFrame({'A': [0, 0, 0]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row['C']] += row['D']
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out['A'], expected['A'])
|
||||
|
||||
|
||||
class TestChaining(object):
|
||||
|
||||
def test_setitem_chained_setfault(self):
|
||||
|
||||
# GH6026
|
||||
data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
|
||||
mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
|
||||
|
||||
df = DataFrame({'response': np.array(data)})
|
||||
mask = df.response == 'timeout'
|
||||
df.response[mask] = 'none'
|
||||
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
|
||||
|
||||
recarray = np.rec.fromarrays([data], names=['response'])
|
||||
df = DataFrame(recarray)
|
||||
mask = df.response == 'timeout'
|
||||
df.response[mask] = 'none'
|
||||
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
|
||||
|
||||
df = DataFrame({'response': data, 'response1': data})
|
||||
mask = df.response == 'timeout'
|
||||
df.response[mask] = 'none'
|
||||
tm.assert_frame_equal(df, DataFrame({'response': mdata,
|
||||
'response1': data}))
|
||||
|
||||
# GH 6056
|
||||
expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar']))
|
||||
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
|
||||
df['A'].iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
|
||||
df.A.iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_detect_chained_assignment(self):
|
||||
|
||||
pd.set_option('chained_assignment', 'raise')
|
||||
|
||||
# work with the chain
|
||||
expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
|
||||
df = DataFrame(np.arange(4).reshape(2, 2),
|
||||
columns=list('AB'), dtype='int64')
|
||||
assert df._is_copy is None
|
||||
|
||||
df['A'][0] = -5
|
||||
df['A'][1] = -6
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# test with the chaining
|
||||
df = DataFrame({'A': Series(range(2), dtype='int64'),
|
||||
'B': np.array(np.arange(2, 4), dtype=np.float64)})
|
||||
assert df._is_copy is None
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['A'][0] = -5
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['A'][1] = np.nan
|
||||
|
||||
assert df['A']._is_copy is None
|
||||
|
||||
# Using a copy (the chain), fails
|
||||
df = DataFrame({'A': Series(range(2), dtype='int64'),
|
||||
'B': np.array(np.arange(2, 4), dtype=np.float64)})
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[0]['A'] = -5
|
||||
|
||||
# Doc example
|
||||
df = DataFrame({'a': ['one', 'one', 'two', 'three',
|
||||
'two', 'one', 'six'],
|
||||
'c': Series(range(7), dtype='int64')})
|
||||
assert df._is_copy is None
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
indexer = df.a.str.startswith('o')
|
||||
df[indexer]['c'] = 42
|
||||
|
||||
expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
|
||||
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['A'][0] = 111
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[0]['A'] = 111
|
||||
|
||||
df.loc[0, 'A'] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# gh-5475: Make sure that is_copy is picked up reconstruction
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.ensure_clean('__tmp__pickle') as path:
|
||||
df.to_pickle(path)
|
||||
df2 = pd.read_pickle(path)
|
||||
df2["B"] = df2["A"]
|
||||
df2["B"] = df2["A"]
|
||||
|
||||
# gh-5597: a spurious raise as we are setting the entire column here
|
||||
from string import ascii_letters as letters
|
||||
|
||||
def random_text(nobs=100):
|
||||
df = []
|
||||
for i in range(nobs):
|
||||
idx = np.random.randint(len(letters), size=2)
|
||||
idx.sort()
|
||||
|
||||
df.append([letters[idx[0]:idx[1]]])
|
||||
|
||||
return DataFrame(df, columns=['letters'])
|
||||
|
||||
df = random_text(100000)
|
||||
|
||||
# Always a copy
|
||||
x = df.iloc[[0, 1, 2]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
x = df.iloc[[0, 1, 2, 4]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
# Explicitly copy
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer].copy()
|
||||
|
||||
assert df._is_copy is None
|
||||
df['letters'] = df['letters'].apply(str.lower)
|
||||
|
||||
# Implicitly take
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer]
|
||||
|
||||
assert df._is_copy is not None
|
||||
df['letters'] = df['letters'].apply(str.lower)
|
||||
|
||||
# Implicitly take 2
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
|
||||
df = df.loc[indexer]
|
||||
assert df._is_copy is not None
|
||||
df.loc[:, 'letters'] = df['letters'].apply(str.lower)
|
||||
|
||||
# Should be ok even though it's a copy!
|
||||
assert df._is_copy is None
|
||||
|
||||
df['letters'] = df['letters'].apply(str.lower)
|
||||
assert df._is_copy is None
|
||||
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df.loc[indexer, 'letters'] = (
|
||||
df.loc[indexer, 'letters'].apply(str.lower))
|
||||
|
||||
# an identical take, so no copy
|
||||
df = DataFrame({'a': [1]}).dropna()
|
||||
assert df._is_copy is None
|
||||
df['a'] += 1
|
||||
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
s = df.iloc[:, 0].sort_values()
|
||||
|
||||
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
|
||||
tm.assert_series_equal(s, df[0].sort_values())
|
||||
|
||||
# see gh-6025: false positives
|
||||
df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
|
||||
str(df)
|
||||
|
||||
df['column1'] = df['column1'] + 'b'
|
||||
str(df)
|
||||
|
||||
df = df[df['column2'] != 8]
|
||||
str(df)
|
||||
|
||||
df['column1'] = df['column1'] + 'c'
|
||||
str(df)
|
||||
|
||||
# from SO:
|
||||
# http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
|
||||
df = DataFrame(np.arange(0, 9), columns=['count'])
|
||||
df['group'] = 'b'
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.iloc[0:5]['group'] = 'a'
|
||||
|
||||
# Mixed type setting but same dtype & changing dtype
|
||||
df = DataFrame(dict(A=date_range('20130101', periods=5),
|
||||
B=np.random.randn(5),
|
||||
C=np.arange(5, dtype='int64'),
|
||||
D=list('abcde')))
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[2]['D'] = 'foo'
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[2]['C'] = 'foo'
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['C'][2] = 'foo'
|
||||
|
||||
def test_setting_with_copy_bug(self):
|
||||
|
||||
# operating on a copy
|
||||
df = DataFrame({'a': list(range(4)),
|
||||
'b': list('ab..'),
|
||||
'c': ['a', 'b', np.nan, 'd']})
|
||||
mask = pd.isna(df.c)
|
||||
|
||||
def f():
|
||||
df[['c']][mask] = df[['b']][mask]
|
||||
|
||||
pytest.raises(com.SettingWithCopyError, f)
|
||||
|
||||
# invalid warning as we are returning a new object
|
||||
# GH 8730
|
||||
df1 = DataFrame({'x': Series(['a', 'b', 'c']),
|
||||
'y': Series(['d', 'e', 'f'])})
|
||||
df2 = df1[['x']]
|
||||
|
||||
# this should not raise
|
||||
df2['y'] = ['g', 'h', 'i']
|
||||
|
||||
def test_detect_chained_assignment_warnings(self):
|
||||
with option_context("chained_assignment", "warn"):
|
||||
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
|
||||
with tm.assert_produces_warning(com.SettingWithCopyWarning):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
|
||||
# xref gh-13017.
|
||||
with option_context("chained_assignment", "warn"):
|
||||
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
|
||||
columns=["a", "a", "c"])
|
||||
|
||||
with tm.assert_produces_warning(com.SettingWithCopyWarning):
|
||||
df.c.loc[df.c > 0] = None
|
||||
|
||||
expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
|
||||
columns=["a", "a", "c"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_chained_getitem_with_lists(self):
|
||||
|
||||
# GH6394
|
||||
# Regression in chained getitem indexing with embedded list-like from
|
||||
# 0.12
|
||||
def check(result, expected):
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
assert isinstance(result, np.ndarray)
|
||||
|
||||
df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]})
|
||||
expected = df['A'].iloc[2]
|
||||
result = df.loc[2, 'A']
|
||||
check(result, expected)
|
||||
result2 = df.iloc[2]['A']
|
||||
check(result2, expected)
|
||||
result3 = df['A'].loc[2]
|
||||
check(result3, expected)
|
||||
result4 = df['A'].iloc[2]
|
||||
check(result4, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
|
||||
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
|
||||
def test_cache_updating(self):
|
||||
# GH 4939, make sure to update the cache on setitem
|
||||
|
||||
df = tm.makeDataFrame()
|
||||
df['A'] # cache series
|
||||
df.ix["Hello Friend"] = df.ix[0]
|
||||
assert "Hello Friend" in df['A'].index
|
||||
assert "Hello Friend" in df['B'].index
|
||||
|
||||
panel = tm.makePanel()
|
||||
panel.ix[0] # get first item into cache
|
||||
panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1
|
||||
assert "A+1" in panel.ix[0].columns
|
||||
assert "A+1" in panel.ix[1].columns
|
||||
|
||||
# 10264
|
||||
df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[
|
||||
'a', 'b', 'c', 'd', 'e'], index=range(5))
|
||||
df['f'] = 0
|
||||
df.f.values[3] = 1
|
||||
|
||||
# TODO(wesm): unused?
|
||||
# y = df.iloc[np.arange(2, len(df))]
|
||||
|
||||
df.f.values[3] = 2
|
||||
expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[
|
||||
'a', 'b', 'c', 'd', 'e', 'f'], index=range(5))
|
||||
expected.at[3, 'f'] = 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
expected = Series([0, 0, 0, 2, 0], name='f')
|
||||
tm.assert_series_equal(df.f, expected)
|
||||
|
||||
def test_deprecate_is_copy(self):
|
||||
# GH18801
|
||||
df = DataFrame({"A": [1, 2, 3]})
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# getter
|
||||
df.is_copy
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# setter
|
||||
df.is_copy = "test deprecated is_copy"
|
||||
@@ -0,0 +1,939 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
###############################################################
|
||||
# Index / Series common tests which may trigger dtype coercions
|
||||
###############################################################
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope='class')
|
||||
def check_comprehensiveness(request):
|
||||
# Iterate over combination of dtype, method and klass
|
||||
# and ensure that each are contained within a collected test
|
||||
cls = request.cls
|
||||
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
|
||||
|
||||
def has_test(combo):
|
||||
klass, dtype, method = combo
|
||||
cls_funcs = request.node.session.items
|
||||
return any(klass in x.name and dtype in x.name and
|
||||
method in x.name for x in cls_funcs)
|
||||
|
||||
for combo in combos:
|
||||
if not has_test(combo):
|
||||
msg = 'test method is not defined: {0}, {1}'
|
||||
raise AssertionError(msg.format(cls.__name__, combo))
|
||||
|
||||
yield
|
||||
|
||||
|
||||
class CoercionBase(object):
|
||||
|
||||
klasses = ['index', 'series']
|
||||
dtypes = ['object', 'int64', 'float64', 'complex128', 'bool',
|
||||
'datetime64', 'datetime64tz', 'timedelta64', 'period']
|
||||
|
||||
@property
|
||||
def method(self):
|
||||
raise NotImplementedError(self)
|
||||
|
||||
def _assert(self, left, right, dtype):
|
||||
# explicitly check dtype to avoid any unexpected result
|
||||
if isinstance(left, pd.Series):
|
||||
tm.assert_series_equal(left, right)
|
||||
elif isinstance(left, pd.Index):
|
||||
tm.assert_index_equal(left, right)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
assert left.dtype == dtype
|
||||
assert right.dtype == dtype
|
||||
|
||||
|
||||
class TestSetitemCoercion(CoercionBase):
|
||||
|
||||
method = 'setitem'
|
||||
|
||||
def _assert_setitem_series_conversion(self, original_series, loc_value,
|
||||
expected_series, expected_dtype):
|
||||
""" test series value's coercion triggered by assignment """
|
||||
temp = original_series.copy()
|
||||
temp[1] = loc_value
|
||||
tm.assert_series_equal(temp, expected_series)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.dtype == expected_dtype
|
||||
|
||||
# .loc works different rule, temporary disable
|
||||
# temp = original_series.copy()
|
||||
# temp.loc[1] = loc_value
|
||||
# tm.assert_series_equal(temp, expected_series)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_object(self, val, exp_dtype):
|
||||
obj = pd.Series(list('abcd'))
|
||||
assert obj.dtype == np.object
|
||||
|
||||
exp = pd.Series(['a', val, 'c', 'd'])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.int64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_int64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
|
||||
if exp_dtype is np.float64:
|
||||
exp = pd.Series([1, 1, 3, 4])
|
||||
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
|
||||
pytest.xfail("GH12747 The result must be float")
|
||||
|
||||
exp = pd.Series([1, val, 3, 4])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(np.int32(1), np.int8),
|
||||
(np.int16(2**9), np.int16)])
|
||||
def test_setitem_series_int8(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
|
||||
assert obj.dtype == np.int8
|
||||
|
||||
if exp_dtype is np.int16:
|
||||
exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.int8)
|
||||
pytest.xfail("BUG: it must be Series([1, 1, 3, 4], dtype=np.int16")
|
||||
|
||||
exp = pd.Series([1, val, 3, 4], dtype=np.int8)
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.float64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_float64(self, val, exp_dtype):
|
||||
obj = pd.Series([1.1, 2.2, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = pd.Series([1.1, val, 3.3, 4.4])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_complex128(self, val, exp_dtype):
|
||||
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
|
||||
assert obj.dtype == np.complex128
|
||||
|
||||
exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.int64),
|
||||
(3, np.int64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.bool)])
|
||||
def test_setitem_series_bool(self, val, exp_dtype):
|
||||
obj = pd.Series([True, False, True, False])
|
||||
assert obj.dtype == np.bool
|
||||
|
||||
if exp_dtype is np.int64:
|
||||
exp = pd.Series([True, True, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
|
||||
pytest.xfail("TODO_GH12747 The result must be int")
|
||||
elif exp_dtype is np.float64:
|
||||
exp = pd.Series([True, True, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
|
||||
pytest.xfail("TODO_GH12747 The result must be float")
|
||||
elif exp_dtype is np.complex128:
|
||||
exp = pd.Series([True, True, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
|
||||
pytest.xfail("TODO_GH12747 The result must be complex")
|
||||
|
||||
exp = pd.Series([True, val, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(1, np.object),
|
||||
('x', np.object)])
|
||||
def test_setitem_series_datetime64(self, val, exp_dtype):
|
||||
obj = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
val,
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
|
||||
'datetime64[ns, US/Eastern]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Pacific'), np.object),
|
||||
(pd.Timestamp('2012-01-01'), np.object),
|
||||
(1, np.object)])
|
||||
def test_setitem_series_datetime64tz(self, val, exp_dtype):
|
||||
tz = 'US/Eastern'
|
||||
obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2011-01-02', tz=tz),
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
assert obj.dtype == 'datetime64[ns, US/Eastern]'
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
val,
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(pd.Timedelta('12 day'), 'timedelta64[ns]'),
|
||||
(1, np.object),
|
||||
('x', np.object)])
|
||||
def test_setitem_series_timedelta64(self, val, exp_dtype):
|
||||
obj = pd.Series([pd.Timedelta('1 day'),
|
||||
pd.Timedelta('2 day'),
|
||||
pd.Timedelta('3 day'),
|
||||
pd.Timedelta('4 day')])
|
||||
assert obj.dtype == 'timedelta64[ns]'
|
||||
|
||||
exp = pd.Series([pd.Timedelta('1 day'),
|
||||
val,
|
||||
pd.Timedelta('3 day'),
|
||||
pd.Timedelta('4 day')])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
def _assert_setitem_index_conversion(self, original_series, loc_key,
|
||||
expected_index, expected_dtype):
|
||||
""" test index's coercion triggered by assign key """
|
||||
temp = original_series.copy()
|
||||
temp[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
temp = original_series.copy()
|
||||
temp.loc[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
('x', np.object),
|
||||
(5, IndexError),
|
||||
(1.1, np.object)])
|
||||
def test_setitem_index_object(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], index=list('abcd'))
|
||||
assert obj.index.dtype == np.object
|
||||
|
||||
if exp_dtype is IndexError:
|
||||
temp = obj.copy()
|
||||
with pytest.raises(exp_dtype):
|
||||
temp[5] = 5
|
||||
else:
|
||||
exp_index = pd.Index(list('abcd') + [val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index,
|
||||
exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(5, np.int64),
|
||||
(1.1, np.float64),
|
||||
('x', np.object)])
|
||||
def test_setitem_index_int64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4])
|
||||
assert obj.index.dtype == np.int64
|
||||
|
||||
exp_index = pd.Index([0, 1, 2, 3, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(5, IndexError),
|
||||
(5.1, np.float64),
|
||||
('x', np.object)])
|
||||
def test_setitem_index_float64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
|
||||
assert obj.index.dtype == np.float64
|
||||
|
||||
if exp_dtype is IndexError:
|
||||
# float + int -> int
|
||||
temp = obj.copy()
|
||||
with pytest.raises(exp_dtype):
|
||||
temp[5] = 5
|
||||
pytest.xfail("TODO_GH12747 The result must be float")
|
||||
|
||||
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
def test_setitem_series_period(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_complex128(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_bool(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_datetime64(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_datetime64tz(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_period(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestInsertIndexCoercion(CoercionBase):
|
||||
|
||||
klasses = ['index']
|
||||
method = 'insert'
|
||||
|
||||
def _assert_insert_conversion(self, original, value,
|
||||
expected, expected_dtype):
|
||||
""" test coercion triggered by insert """
|
||||
target = original.copy()
|
||||
res = target.insert(1, value)
|
||||
tm.assert_index_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(1, 1, np.object),
|
||||
(1.1, 1.1, np.object),
|
||||
(False, False, np.object),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Index(list('abcd'))
|
||||
assert obj.dtype == np.object
|
||||
|
||||
exp = pd.Index(['a', coerced_val, 'b', 'c', 'd'])
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(1, 1, np.int64),
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, 0, np.int64),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Int64Index([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
|
||||
exp = pd.Index([1, coerced_val, 2, 3, 4])
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(1, 1., np.float64),
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, 0., np.float64),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Float64Index([1., 2., 3., 4.])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = pd.Index([1., coerced_val, 2., 3., 4.])
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize('fill_val,exp_dtype', [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
|
||||
'datetime64[ns, US/Eastern]')],
|
||||
ids=['datetime64', 'datetime64tz'])
|
||||
def test_insert_index_datetimes(self, fill_val, exp_dtype):
|
||||
obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
|
||||
'2011-01-04'], tz=fill_val.tz)
|
||||
assert obj.dtype == exp_dtype
|
||||
|
||||
exp = pd.DatetimeIndex(['2011-01-01', fill_val.date(), '2011-01-02',
|
||||
'2011-01-03', '2011-01-04'], tz=fill_val.tz)
|
||||
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
|
||||
|
||||
msg = "Passed item and index have different timezone"
|
||||
if fill_val.tz:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.insert(1, pd.Timestamp('2012-01-01'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo'))
|
||||
|
||||
msg = "cannot insert DatetimeIndex with incompatible label"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.insert(1, 1)
|
||||
|
||||
pytest.xfail("ToDo: must coerce to object")
|
||||
|
||||
def test_insert_index_timedelta64(self):
|
||||
obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day'])
|
||||
assert obj.dtype == 'timedelta64[ns]'
|
||||
|
||||
# timedelta64 + timedelta64 => timedelta64
|
||||
exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day'])
|
||||
self._assert_insert_conversion(obj, pd.Timedelta('10 day'),
|
||||
exp, 'timedelta64[ns]')
|
||||
|
||||
# ToDo: must coerce to object
|
||||
msg = "cannot insert TimedeltaIndex with incompatible label"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.insert(1, pd.Timestamp('2012-01-01'))
|
||||
|
||||
# ToDo: must coerce to object
|
||||
msg = "cannot insert TimedeltaIndex with incompatible label"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.insert(1, 1)
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'),
|
||||
(pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object),
|
||||
(1, 1, np.object),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
|
||||
freq='M')
|
||||
assert obj.dtype == 'period[M]'
|
||||
|
||||
if isinstance(insert, pd.Period):
|
||||
index_type = pd.PeriodIndex
|
||||
else:
|
||||
index_type = pd.Index
|
||||
|
||||
exp = index_type([pd.Period('2011-01', freq='M'),
|
||||
coerced_val,
|
||||
pd.Period('2011-02', freq='M'),
|
||||
pd.Period('2011-03', freq='M'),
|
||||
pd.Period('2011-04', freq='M')], freq='M')
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
def test_insert_index_complex128(self):
|
||||
pass
|
||||
|
||||
def test_insert_index_bool(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestWhereCoercion(CoercionBase):
|
||||
|
||||
method = 'where'
|
||||
|
||||
def _assert_where_conversion(self, original, cond, values,
|
||||
expected, expected_dtype):
|
||||
""" test coercion triggered by where """
|
||||
target = original.copy()
|
||||
res = target.where(cond, values)
|
||||
self._assert(res, expected, expected_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.object)])
|
||||
def test_where_object(self, klass, fill_val, exp_dtype):
|
||||
obj = klass(list('abcd'))
|
||||
assert obj.dtype == np.object
|
||||
cond = klass([True, False, True, False])
|
||||
|
||||
if fill_val is True and klass is pd.Series:
|
||||
ret_val = 1
|
||||
else:
|
||||
ret_val = fill_val
|
||||
|
||||
exp = klass(['a', ret_val, 'c', ret_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
else:
|
||||
values = klass(fill_val * x for x in [5, 6, 7, 8])
|
||||
|
||||
exp = klass(['a', values[1], 'c', values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.int64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_where_int64(self, klass, fill_val, exp_dtype):
|
||||
if klass is pd.Index and exp_dtype is np.complex128:
|
||||
pytest.skip("Complex Index not supported")
|
||||
obj = klass([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
cond = klass([True, False, True, False])
|
||||
|
||||
exp = klass([1, fill_val, 3, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
else:
|
||||
values = klass(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = klass([1, values[1], 3, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val, exp_dtype", [
|
||||
(1, np.float64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_where_float64(self, klass, fill_val, exp_dtype):
|
||||
if klass is pd.Index and exp_dtype is np.complex128:
|
||||
pytest.skip("Complex Index not supported")
|
||||
obj = klass([1.1, 2.2, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
cond = klass([True, False, True, False])
|
||||
|
||||
exp = klass([1.1, fill_val, 3.3, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
else:
|
||||
values = klass(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = klass([1.1, values[1], 3.3, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_where_series_complex128(self, fill_val, exp_dtype):
|
||||
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
|
||||
assert obj.dtype == np.complex128
|
||||
cond = pd.Series([True, False, True, False])
|
||||
|
||||
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = pd.Series([True, False, True, True])
|
||||
else:
|
||||
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.bool)])
|
||||
def test_where_series_bool(self, fill_val, exp_dtype):
|
||||
|
||||
obj = pd.Series([True, False, True, False])
|
||||
assert obj.dtype == np.bool
|
||||
cond = pd.Series([True, False, True, False])
|
||||
|
||||
exp = pd.Series([True, fill_val, True, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = pd.Series([True, False, True, True])
|
||||
else:
|
||||
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = pd.Series([True, values[1], True, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
|
||||
ids=['datetime64', 'datetime64tz'])
|
||||
def test_where_series_datetime64(self, fill_val, exp_dtype):
|
||||
obj = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
cond = pd.Series([True, False, True, False])
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'), fill_val,
|
||||
pd.Timestamp('2011-01-03'), fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
values = pd.Series(pd.date_range(fill_val, periods=4))
|
||||
if fill_val.tz:
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2012-01-02 00:00', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2012-01-04 00:00',
|
||||
tz='US/Eastern')])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'), values[1],
|
||||
pd.Timestamp('2011-01-03'), values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
def test_where_index_datetime(self):
|
||||
fill_val = pd.Timestamp('2012-01-01')
|
||||
exp_dtype = 'datetime64[ns]'
|
||||
obj = pd.Index([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
cond = pd.Index([True, False, True, False])
|
||||
|
||||
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
|
||||
"of some kind")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.where(cond, fill_val)
|
||||
|
||||
values = pd.Index(pd.date_range(fill_val, periods=4))
|
||||
exp = pd.Index([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2012-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2012-01-04')])
|
||||
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="GH 22839: do not ignore timezone, must be object")
|
||||
def test_where_index_datetimetz(self):
|
||||
fill_val = pd.Timestamp('2012-01-01', tz='US/Eastern')
|
||||
exp_dtype = np.object
|
||||
obj = pd.Index([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
cond = pd.Index([True, False, True, False])
|
||||
|
||||
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
|
||||
"of some kind")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.where(cond, fill_val)
|
||||
|
||||
values = pd.Index(pd.date_range(fill_val, periods=4))
|
||||
exp = pd.Index([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2012-01-02', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2012-01-04', tz='US/Eastern')],
|
||||
dtype=exp_dtype)
|
||||
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
def test_where_index_complex128(self):
|
||||
pass
|
||||
|
||||
def test_where_index_bool(self):
|
||||
pass
|
||||
|
||||
def test_where_series_datetime64tz(self):
|
||||
pass
|
||||
|
||||
def test_where_series_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_where_series_period(self):
|
||||
pass
|
||||
|
||||
def test_where_index_datetime64tz(self):
|
||||
pass
|
||||
|
||||
def test_where_index_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_where_index_period(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestFillnaSeriesCoercion(CoercionBase):
|
||||
|
||||
# not indexing, but place here for consisntency
|
||||
|
||||
method = 'fillna'
|
||||
|
||||
def test_has_comprehensive_tests(self):
|
||||
pass
|
||||
|
||||
def _assert_fillna_conversion(self, original, value,
|
||||
expected, expected_dtype):
|
||||
""" test coercion triggered by fillna """
|
||||
target = original.copy()
|
||||
res = target.fillna(value)
|
||||
self._assert(res, expected, expected_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val, fill_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.object)])
|
||||
def test_fillna_object(self, klass, fill_val, fill_dtype):
|
||||
obj = klass(['a', np.nan, 'c', 'd'])
|
||||
assert obj.dtype == np.object
|
||||
|
||||
exp = klass(['a', fill_val, 'c', 'd'])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(1, np.float64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_fillna_float64(self, klass, fill_val, fill_dtype):
|
||||
obj = klass([1.1, np.nan, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = klass([1.1, fill_val, 3.3, 4.4])
|
||||
# float + complex -> we don't support a complex Index
|
||||
# complex for Series,
|
||||
# object for Index
|
||||
if fill_dtype == np.complex128 and klass == pd.Index:
|
||||
fill_dtype = np.object
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_fillna_series_complex128(self, fill_val, fill_dtype):
|
||||
obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j])
|
||||
assert obj.dtype == np.complex128
|
||||
|
||||
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object),
|
||||
(1, np.object), ('x', np.object)],
|
||||
ids=['datetime64', 'datetime64tz', 'object', 'object'])
|
||||
def test_fillna_datetime(self, klass, fill_val, fill_dtype):
|
||||
obj = klass([pd.Timestamp('2011-01-01'),
|
||||
pd.NaT,
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
|
||||
exp = klass([pd.Timestamp('2011-01-01'),
|
||||
fill_val,
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index])
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
|
||||
'datetime64[ns, US/Eastern]'),
|
||||
(pd.Timestamp('2012-01-01'), np.object),
|
||||
(pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), np.object),
|
||||
(1, np.object),
|
||||
('x', np.object)])
|
||||
def test_fillna_datetime64tz(self, klass, fill_val, fill_dtype):
|
||||
tz = 'US/Eastern'
|
||||
|
||||
obj = klass([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.NaT,
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
assert obj.dtype == 'datetime64[ns, US/Eastern]'
|
||||
|
||||
exp = klass([pd.Timestamp('2011-01-01', tz=tz),
|
||||
fill_val,
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
def test_fillna_series_int64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_int64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_series_bool(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_bool(self):
|
||||
pass
|
||||
|
||||
def test_fillna_series_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_series_period(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_period(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestReplaceSeriesCoercion(CoercionBase):
|
||||
|
||||
klasses = ['series']
|
||||
method = 'replace'
|
||||
|
||||
rep = {}
|
||||
rep['object'] = ['a', 'b']
|
||||
rep['int64'] = [4, 5]
|
||||
rep['float64'] = [1.1, 2.2]
|
||||
rep['complex128'] = [1 + 1j, 2 + 2j]
|
||||
rep['bool'] = [True, False]
|
||||
rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-03')]
|
||||
|
||||
for tz in ['UTC', 'US/Eastern']:
|
||||
# to test tz => different tz replacement
|
||||
key = 'datetime64[ns, {0}]'.format(tz)
|
||||
rep[key] = [pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2011-01-03', tz=tz)]
|
||||
|
||||
rep['timedelta64[ns]'] = [pd.Timedelta('1 day'),
|
||||
pd.Timedelta('2 day')]
|
||||
|
||||
@pytest.mark.parametrize('how', ['dict', 'series'])
|
||||
@pytest.mark.parametrize('to_key', [
|
||||
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
|
||||
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]'
|
||||
], ids=['object', 'int64', 'float64', 'complex128', 'bool',
|
||||
'datetime64', 'datetime64tz', 'datetime64tz', 'timedelta64'])
|
||||
@pytest.mark.parametrize('from_key', [
|
||||
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
|
||||
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]']
|
||||
)
|
||||
def test_replace_series(self, how, to_key, from_key):
|
||||
if from_key == 'bool' and how == 'series' and compat.PY3:
|
||||
# doesn't work in PY3, though ...dict_from_bool works fine
|
||||
pytest.skip("doesn't work as in PY3")
|
||||
|
||||
index = pd.Index([3, 4], name='xxx')
|
||||
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if (from_key.startswith('datetime') and to_key.startswith('datetime')):
|
||||
# tested below
|
||||
return
|
||||
elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
|
||||
# tested below
|
||||
return
|
||||
|
||||
if how == 'dict':
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == 'series':
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
result = obj.replace(replacer)
|
||||
|
||||
if ((from_key == 'float64' and to_key in ('int64')) or
|
||||
(from_key == 'complex128' and
|
||||
to_key in ('int64', 'float64'))):
|
||||
|
||||
if compat.is_platform_32bit() or compat.is_platform_windows():
|
||||
pytest.skip("32-bit platform buggy: {0} -> {1}".format
|
||||
(from_key, to_key))
|
||||
|
||||
# Expected: do not downcast by replacement
|
||||
exp = pd.Series(self.rep[to_key], index=index,
|
||||
name='yyy', dtype=from_key)
|
||||
|
||||
else:
|
||||
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
|
||||
assert exp.dtype == to_key
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# TODO(jbrockmendel) commented out to only have a single xfail printed
|
||||
@pytest.mark.xfail(reason='GH #18376, tzawareness-compat bug '
|
||||
'in BlockManager.replace_list')
|
||||
# @pytest.mark.parametrize('how', ['dict', 'series'])
|
||||
# @pytest.mark.parametrize('to_key', ['timedelta64[ns]', 'bool', 'object',
|
||||
# 'complex128', 'float64', 'int64'])
|
||||
# @pytest.mark.parametrize('from_key', ['datetime64[ns, UTC]',
|
||||
# 'datetime64[ns, US/Eastern]'])
|
||||
# def test_replace_series_datetime_tz(self, how, to_key, from_key):
|
||||
def test_replace_series_datetime_tz(self):
|
||||
how = 'series'
|
||||
from_key = 'datetime64[ns, US/Eastern]'
|
||||
to_key = 'timedelta64[ns]'
|
||||
|
||||
index = pd.Index([3, 4], name='xxx')
|
||||
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if how == 'dict':
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == 'series':
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
result = obj.replace(replacer)
|
||||
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
|
||||
assert exp.dtype == to_key
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# TODO(jreback) commented out to only have a single xfail printed
|
||||
@pytest.mark.xfail(reason="different tz, "
|
||||
"currently mask_missing raises SystemError",
|
||||
strict=False)
|
||||
# @pytest.mark.parametrize('how', ['dict', 'series'])
|
||||
# @pytest.mark.parametrize('to_key', [
|
||||
# 'datetime64[ns]', 'datetime64[ns, UTC]',
|
||||
# 'datetime64[ns, US/Eastern]'])
|
||||
# @pytest.mark.parametrize('from_key', [
|
||||
# 'datetime64[ns]', 'datetime64[ns, UTC]',
|
||||
# 'datetime64[ns, US/Eastern]'])
|
||||
# def test_replace_series_datetime_datetime(self, how, to_key, from_key):
|
||||
def test_replace_series_datetime_datetime(self):
|
||||
how = 'dict'
|
||||
to_key = 'datetime64[ns]'
|
||||
from_key = 'datetime64[ns]'
|
||||
|
||||
index = pd.Index([3, 4], name='xxx')
|
||||
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if how == 'dict':
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == 'series':
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
result = obj.replace(replacer)
|
||||
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
|
||||
assert exp.dtype == to_key
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_replace_series_period(self):
|
||||
pass
|
||||
@@ -0,0 +1,315 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from dateutil import tz
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series, Timestamp, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex(object):
|
||||
|
||||
def test_setitem_with_datetime_tz(self):
|
||||
# 16889
|
||||
# support .loc with alignment and tz-aware DatetimeIndex
|
||||
mask = np.array([True, False, True, False])
|
||||
|
||||
idx = date_range('20010101', periods=4, tz='UTC')
|
||||
df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask, :] = df.loc[mask, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask] = df.loc[mask]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
idx = date_range('20010101', periods=4)
|
||||
df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask, :] = df.loc[mask, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask] = df.loc[mask]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_indexing_with_datetime_tz(self):
|
||||
|
||||
# 8260
|
||||
# support datetime64 with tz
|
||||
|
||||
idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
|
||||
name='foo')
|
||||
dr = date_range('20130110', periods=3)
|
||||
df = DataFrame({'A': idx, 'B': dr})
|
||||
df['C'] = idx
|
||||
df.iloc[1, 1] = pd.NaT
|
||||
df.iloc[1, 2] = pd.NaT
|
||||
|
||||
# indexing
|
||||
result = df.iloc[1]
|
||||
expected = Series([Timestamp('2013-01-02 00:00:00-0500',
|
||||
tz='US/Eastern'), np.nan, np.nan],
|
||||
index=list('ABC'), dtype='object', name=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = df.loc[1]
|
||||
expected = Series([Timestamp('2013-01-02 00:00:00-0500',
|
||||
tz='US/Eastern'), np.nan, np.nan],
|
||||
index=list('ABC'), dtype='object', name=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# indexing - fast_xs
|
||||
df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')})
|
||||
result = df.iloc[5]
|
||||
expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D')
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[5]
|
||||
assert result == expected
|
||||
|
||||
# indexing - boolean
|
||||
result = df[df.a > df.a[3]]
|
||||
expected = df.iloc[4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# indexing - setting an element
|
||||
df = DataFrame(data=pd.to_datetime(
|
||||
['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time'])
|
||||
df['new_col'] = ['new', 'old']
|
||||
df.time = df.set_index('time').index.tz_localize('UTC')
|
||||
v = df[df.new_col == 'new'].set_index('time').index.tz_convert(
|
||||
'US/Pacific')
|
||||
|
||||
# trying to set a single element on a part of a different timezone
|
||||
# this converts to object
|
||||
df2 = df.copy()
|
||||
df2.loc[df2.new_col == 'new', 'time'] = v
|
||||
|
||||
expected = Series([v[0], df.loc[1, 'time']], name='time')
|
||||
tm.assert_series_equal(df2.time, expected)
|
||||
|
||||
v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s')
|
||||
df.loc[df.new_col == 'new', 'time'] = v
|
||||
tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v)
|
||||
|
||||
def test_consistency_with_tz_aware_scalar(self):
|
||||
# xef gh-12938
|
||||
# various ways of indexing the same tz-aware scalar
|
||||
df = Series([Timestamp('2016-03-30 14:35:25',
|
||||
tz='Europe/Brussels')]).to_frame()
|
||||
|
||||
df = pd.concat([df, df]).reset_index(drop=True)
|
||||
expected = Timestamp('2016-03-30 14:35:25+0200',
|
||||
tz='Europe/Brussels')
|
||||
|
||||
result = df[0][0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iat[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].loc[0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].at[0]
|
||||
assert result == expected
|
||||
|
||||
def test_indexing_with_datetimeindex_tz(self):
|
||||
|
||||
# GH 12050
|
||||
# indexing on a series with a datetimeindex with tz
|
||||
index = date_range('2015-01-01', periods=2, tz='utc')
|
||||
|
||||
ser = Series(range(2), index=index, dtype='int64')
|
||||
|
||||
# list-like indexing
|
||||
|
||||
for sel in (index, list(index)):
|
||||
# getitem
|
||||
tm.assert_series_equal(ser[sel], ser)
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
result[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .loc getitem
|
||||
tm.assert_series_equal(ser.loc[sel], ser)
|
||||
|
||||
# .loc setitem
|
||||
result = ser.copy()
|
||||
result.loc[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# single element indexing
|
||||
|
||||
# getitem
|
||||
assert ser[index[1]] == 1
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
result[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .loc getitem
|
||||
assert ser.loc[index[1]] == 1
|
||||
|
||||
# .loc setitem
|
||||
result = ser.copy()
|
||||
result.loc[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_partial_setting_with_datetimelike_dtype(self):
|
||||
|
||||
# GH9478
|
||||
# a datetimeindex alignment issue with partial setting
|
||||
df = DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'),
|
||||
index=date_range('1/1/2000', periods=3, freq='1H'))
|
||||
expected = df.copy()
|
||||
expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT]
|
||||
|
||||
mask = df.A < 1
|
||||
df.loc[mask, 'C'] = df.loc[mask].index
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_datetime(self):
|
||||
|
||||
# GH 9516
|
||||
dt1 = Timestamp('20130101 09:00:00')
|
||||
dt2 = Timestamp('20130101 10:00:00')
|
||||
|
||||
for conv in [lambda x: x, lambda x: x.to_datetime64(),
|
||||
lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]:
|
||||
|
||||
df = DataFrame()
|
||||
df.loc[conv(dt1), 'one'] = 100
|
||||
df.loc[conv(dt2), 'one'] = 200
|
||||
|
||||
expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_partial_set_datetime(self):
|
||||
# GH 11497
|
||||
|
||||
idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx')
|
||||
ser = Series([0.1, 0.2], index=idx, name='s')
|
||||
|
||||
result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]]
|
||||
exp = Series([0.1, 0.2], index=idx, name='s')
|
||||
tm.assert_series_equal(result, exp, check_index_type=True)
|
||||
|
||||
keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'),
|
||||
Timestamp('2011-01-01')]
|
||||
exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'),
|
||||
name='s')
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'),
|
||||
Timestamp('2011-01-03')]
|
||||
exp = Series([np.nan, 0.2, np.nan],
|
||||
index=pd.DatetimeIndex(keys, name='idx'), name='s')
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_period(self):
|
||||
# GH 11497
|
||||
|
||||
idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx')
|
||||
ser = Series([0.1, 0.2], index=idx, name='s')
|
||||
|
||||
result = ser.loc[[pd.Period('2011-01-01', freq='D'),
|
||||
pd.Period('2011-01-02', freq='D')]]
|
||||
exp = Series([0.1, 0.2], index=idx, name='s')
|
||||
tm.assert_series_equal(result, exp, check_index_type=True)
|
||||
|
||||
keys = [pd.Period('2011-01-02', freq='D'),
|
||||
pd.Period('2011-01-02', freq='D'),
|
||||
pd.Period('2011-01-01', freq='D')]
|
||||
exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'),
|
||||
name='s')
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
keys = [pd.Period('2011-01-03', freq='D'),
|
||||
pd.Period('2011-01-02', freq='D'),
|
||||
pd.Period('2011-01-03', freq='D')]
|
||||
exp = Series([np.nan, 0.2, np.nan],
|
||||
index=pd.PeriodIndex(keys, name='idx'), name='s')
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = ser.loc[keys]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_nanosecond_getitem_setitem_with_tz(self):
|
||||
# GH 11679
|
||||
data = ['2016-06-28 08:30:00.123456789']
|
||||
index = pd.DatetimeIndex(data, dtype='datetime64[ns, America/Chicago]')
|
||||
df = DataFrame({'a': [10]}, index=index)
|
||||
result = df.loc[df.index[0]]
|
||||
expected = Series(10, index=['a'], name=df.index[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[df.index[0], 'a'] = -1
|
||||
expected = DataFrame(-1, index=index, columns=['a'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_across_dst(self):
|
||||
# GH 21846
|
||||
idx = pd.date_range('2017-10-29 01:30:00',
|
||||
tz='Europe/Berlin', periods=5, freq='30 min')
|
||||
series2 = pd.Series([0, 1, 2, 3, 4],
|
||||
index=idx)
|
||||
|
||||
t_1 = pd.Timestamp('2017-10-29 02:30:00+02:00', tz='Europe/Berlin',
|
||||
freq='30min')
|
||||
t_2 = pd.Timestamp('2017-10-29 02:00:00+01:00', tz='Europe/Berlin',
|
||||
freq='30min')
|
||||
result = series2.loc[t_1:t_2]
|
||||
expected = pd.Series([2, 3], index=idx[2:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series2[t_1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
def test_loc_incremental_setitem_with_dst(self):
|
||||
# GH 20724
|
||||
base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific"))
|
||||
idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
|
||||
result = pd.Series([0], index=[idxs[0]])
|
||||
for ts in idxs:
|
||||
result.loc[ts] = 1
|
||||
expected = pd.Series(1, index=idxs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_setitem_with_existing_dst(self):
|
||||
# GH 18308
|
||||
start = pd.Timestamp('2017-10-29 00:00:00+0200', tz='Europe/Madrid')
|
||||
end = pd.Timestamp('2017-10-29 03:00:00+0100', tz='Europe/Madrid')
|
||||
ts = pd.Timestamp('2016-10-10 03:00:00', tz='Europe/Madrid')
|
||||
idx = pd.date_range(start, end, closed='left', freq="H")
|
||||
result = pd.DataFrame(index=idx, columns=['value'])
|
||||
result.loc[ts, 'value'] = 12
|
||||
expected = pd.DataFrame([np.nan] * len(idx) + [12],
|
||||
index=idx.append(pd.DatetimeIndex([ts])),
|
||||
columns=['value'],
|
||||
dtype=object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,898 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series)
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
ignore_ix = pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
|
||||
|
||||
class TestFloatIndexers(object):
|
||||
|
||||
def check(self, result, original, indexer, getitem):
|
||||
"""
|
||||
comparator for results
|
||||
we need to take care if we are indexing on a
|
||||
Series or a frame
|
||||
"""
|
||||
if isinstance(original, Series):
|
||||
expected = original.iloc[indexer]
|
||||
else:
|
||||
if getitem:
|
||||
expected = original.iloc[:, indexer]
|
||||
else:
|
||||
expected = original.iloc[indexer]
|
||||
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
def test_scalar_error(self):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
# this duplicates the code below
|
||||
# but is spefically testing for the error
|
||||
# message
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeCategoricalIndex,
|
||||
tm.makeDateIndex, tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex, tm.makeIntIndex,
|
||||
tm.makeRangeIndex]:
|
||||
|
||||
i = index(5)
|
||||
|
||||
s = Series(np.arange(len(i)), index=i)
|
||||
|
||||
msg = 'Cannot index by location index'
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.iloc[3.0]
|
||||
|
||||
def f():
|
||||
s.iloc[3.0] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
@ignore_ix
|
||||
def test_scalar_non_numeric(self):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeCategoricalIndex,
|
||||
tm.makeDateIndex, tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex]:
|
||||
|
||||
i = index(5)
|
||||
|
||||
for s in [Series(
|
||||
np.arange(len(i)), index=i), DataFrame(
|
||||
np.random.randn(
|
||||
len(i), len(i)), index=i, columns=i)]:
|
||||
|
||||
# getting
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.iloc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s)[3.0]
|
||||
|
||||
# gettitem on a DataFrame is a KeyError as it is indexing
|
||||
# via labels on the columns
|
||||
if getitem and isinstance(s, DataFrame):
|
||||
error = KeyError
|
||||
else:
|
||||
error = TypeError
|
||||
pytest.raises(error, f)
|
||||
|
||||
# label based can be a TypeError or KeyError
|
||||
def f():
|
||||
s.loc[3.0]
|
||||
|
||||
if s.index.inferred_type in ['string', 'unicode', 'mixed']:
|
||||
error = KeyError
|
||||
else:
|
||||
error = TypeError
|
||||
pytest.raises(error, f)
|
||||
|
||||
# contains
|
||||
assert 3.0 not in s
|
||||
|
||||
# setting with a float fails with iloc
|
||||
def f():
|
||||
s.iloc[3.0] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setting with an indexer
|
||||
if s.index.inferred_type in ['categorical']:
|
||||
# Value or Type Error
|
||||
pass
|
||||
elif s.index.inferred_type in ['datetime64', 'timedelta64',
|
||||
'period']:
|
||||
|
||||
# these should prob work
|
||||
# and are inconsisten between series/dataframe ATM
|
||||
# for idxr in [lambda x: x.ix,
|
||||
# lambda x: x]:
|
||||
# s2 = s.copy()
|
||||
# def f():
|
||||
# idxr(s2)[3.0] = 0
|
||||
# pytest.raises(TypeError, f)
|
||||
pass
|
||||
|
||||
else:
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[3.0] = 10
|
||||
assert s2.index.is_object()
|
||||
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x]:
|
||||
s2 = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[3.0] = 0
|
||||
assert s2.index.is_object()
|
||||
|
||||
# fallsback to position selection, series only
|
||||
s = Series(np.arange(len(i)), index=i)
|
||||
s[3]
|
||||
pytest.raises(TypeError, lambda: s[3.0])
|
||||
|
||||
@ignore_ix
|
||||
def test_scalar_with_mixed(self):
|
||||
|
||||
s2 = Series([1, 2, 3], index=['a', 'b', 'c'])
|
||||
s3 = Series([1, 2, 3], index=['a', 'b', 1.5])
|
||||
|
||||
# lookup in a pure string index
|
||||
# with an invalid indexer
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x,
|
||||
lambda x: x.iloc]:
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[1.0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
pytest.raises(KeyError, lambda: s2.loc[1.0])
|
||||
|
||||
result = s2.loc['b']
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
# mixed index so we have label
|
||||
# indexing
|
||||
for idxr in [lambda x: x]:
|
||||
|
||||
def f():
|
||||
idxr(s3)[1.0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
result = idxr(s3)[1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
# mixed index so we have label
|
||||
# indexing
|
||||
for idxr in [lambda x: x.ix]:
|
||||
with catch_warnings(record=True):
|
||||
|
||||
def f():
|
||||
idxr(s3)[1.0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
result = idxr(s3)[1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
pytest.raises(TypeError, lambda: s3.iloc[1.0])
|
||||
pytest.raises(KeyError, lambda: s3.loc[1.0])
|
||||
|
||||
result = s3.loc[1.5]
|
||||
expected = 3
|
||||
assert result == expected
|
||||
|
||||
@ignore_ix
|
||||
def test_scalar_integer(self):
|
||||
|
||||
# test how scalar float indexers work on int indexes
|
||||
|
||||
# integer index
|
||||
for i in [Int64Index(range(5)), RangeIndex(5)]:
|
||||
|
||||
for s in [Series(np.arange(len(i))),
|
||||
DataFrame(np.random.randn(len(i), len(i)),
|
||||
index=i, columns=i)]:
|
||||
|
||||
# coerce to equal int
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.loc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[3.0]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# coerce to equal int
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.loc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
if isinstance(s, Series):
|
||||
def compare(x, y):
|
||||
assert x == y
|
||||
expected = 100
|
||||
else:
|
||||
compare = tm.assert_series_equal
|
||||
if getitem:
|
||||
expected = Series(100,
|
||||
index=range(len(s)), name=3)
|
||||
else:
|
||||
expected = Series(100.,
|
||||
index=range(len(s)), name=3)
|
||||
|
||||
s2 = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[3.0] = 100
|
||||
|
||||
result = idxr(s2)[3.0]
|
||||
compare(result, expected)
|
||||
|
||||
result = idxr(s2)[3]
|
||||
compare(result, expected)
|
||||
|
||||
# contains
|
||||
# coerce to equal int
|
||||
assert 3.0 in s
|
||||
|
||||
@ignore_ix
|
||||
def test_scalar_float(self):
|
||||
|
||||
# scalar float indexers work on a float index
|
||||
index = Index(np.arange(5.))
|
||||
for s in [Series(np.arange(len(index)), index=index),
|
||||
DataFrame(np.random.randn(len(index), len(index)),
|
||||
index=index, columns=index)]:
|
||||
|
||||
# assert all operations except for iloc are ok
|
||||
indexer = index[3]
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.loc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
# getting
|
||||
result = idxr(s)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# setting
|
||||
s2 = s.copy()
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[indexer] = expected
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s2)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# random integer is a KeyError
|
||||
with catch_warnings(record=True):
|
||||
pytest.raises(KeyError, lambda: idxr(s)[3.5])
|
||||
|
||||
# contains
|
||||
assert 3.0 in s
|
||||
|
||||
# iloc succeeds with an integer
|
||||
expected = s.iloc[3]
|
||||
s2 = s.copy()
|
||||
|
||||
s2.iloc[3] = expected
|
||||
result = s2.iloc[3]
|
||||
self.check(result, s, 3, False)
|
||||
|
||||
# iloc raises with a float
|
||||
pytest.raises(TypeError, lambda: s.iloc[3.0])
|
||||
|
||||
def g():
|
||||
s2.iloc[3.0] = 0
|
||||
pytest.raises(TypeError, g)
|
||||
|
||||
@ignore_ix
|
||||
def test_slice_non_numeric(self):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeDateIndex, tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex]:
|
||||
|
||||
index = index(5)
|
||||
for s in [Series(range(5), index=index),
|
||||
DataFrame(np.random.randn(5, 2), index=index)]:
|
||||
|
||||
# getitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
def f():
|
||||
s.iloc[l]
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x.loc,
|
||||
lambda x: x.iloc,
|
||||
lambda x: x]:
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s)[l]
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
def f():
|
||||
s.iloc[l] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x.loc,
|
||||
lambda x: x.iloc,
|
||||
lambda x: x]:
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s)[l] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
@ignore_ix
|
||||
def test_slice_integer(self):
|
||||
|
||||
# same as above, but for Integer based indexes
|
||||
# these coerce to a like integer
|
||||
# oob indicates if we are out of bounds
|
||||
# of positional indexing
|
||||
for index, oob in [(Int64Index(range(5)), False),
|
||||
(RangeIndex(5), False),
|
||||
(Int64Index(range(5)) + 10, True)]:
|
||||
|
||||
# s is an in-range index
|
||||
s = Series(range(5), index=index)
|
||||
|
||||
# getitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(3, 5)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem out-of-bounds
|
||||
for l in [slice(-6, 6),
|
||||
slice(-6.0, 6.0)]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(-6, 6)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[slice(-6.0, 6.0)]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem odd floats
|
||||
for l, res1 in [(slice(2.5, 4), slice(3, 5)),
|
||||
(slice(2, 3.5), slice(2, 4)),
|
||||
(slice(2.5, 3.5), slice(3, 4))]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
if oob:
|
||||
res = slice(0, 0)
|
||||
else:
|
||||
res = res1
|
||||
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
sc = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(sc)[l] = 0
|
||||
result = idxr(sc)[l].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l] = 0
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def test_integer_positional_indexing(self):
|
||||
""" make sure that we are raising on positional indexing
|
||||
w.r.t. an integer index """
|
||||
|
||||
s = Series(range(2, 6), index=range(2, 6))
|
||||
|
||||
result = s[2:4]
|
||||
expected = s.iloc[2:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for idxr in [lambda x: x,
|
||||
lambda x: x.iloc]:
|
||||
|
||||
for l in [slice(2, 4.0),
|
||||
slice(2.0, 4),
|
||||
slice(2.0, 4.0)]:
|
||||
|
||||
def f():
|
||||
idxr(s)[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
@ignore_ix
|
||||
def test_slice_integer_frame_getitem(self):
|
||||
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
for index in [Int64Index(range(5)), RangeIndex(5)]:
|
||||
|
||||
s = DataFrame(np.random.randn(5, 2), index=index)
|
||||
|
||||
def f(idxr):
|
||||
|
||||
# getitem
|
||||
for l in [slice(0.0, 1),
|
||||
slice(0, 1.0),
|
||||
slice(0.0, 1.0)]:
|
||||
|
||||
result = idxr(s)[l]
|
||||
indexer = slice(0, 2)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem out-of-bounds
|
||||
for l in [slice(-10, 10),
|
||||
slice(-10.0, 10.0)]:
|
||||
|
||||
result = idxr(s)[l]
|
||||
self.check(result, s, slice(-10, 10), True)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[slice(-10.0, 10.0)]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem odd floats
|
||||
for l, res in [(slice(0.5, 1), slice(1, 2)),
|
||||
(slice(0, 0.5), slice(0, 1)),
|
||||
(slice(0.5, 1.5), slice(1, 2))]:
|
||||
|
||||
result = idxr(s)[l]
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
sc = s.copy()
|
||||
idxr(sc)[l] = 0
|
||||
result = idxr(sc)[l].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l] = 0
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
f(lambda x: x.loc)
|
||||
with catch_warnings(record=True):
|
||||
f(lambda x: x.ix)
|
||||
|
||||
@ignore_ix
|
||||
def test_slice_float(self):
|
||||
|
||||
# same as above, but for floats
|
||||
index = Index(np.arange(5.)) + 0.1
|
||||
for s in [Series(range(5), index=index),
|
||||
DataFrame(np.random.randn(5, 2), index=index)]:
|
||||
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
expected = s.iloc[3:4]
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x.loc,
|
||||
lambda x: x]:
|
||||
|
||||
# getitem
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
if isinstance(s, Series):
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
tm.assert_frame_equal(result, expected)
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[l] = 0
|
||||
result = idxr(s2)[l].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
def test_floating_index_doc_example(self):
|
||||
|
||||
index = Index([1.5, 2, 3, 4.5, 5])
|
||||
s = Series(range(5), index=index)
|
||||
assert s[3] == 2
|
||||
assert s.loc[3] == 2
|
||||
assert s.loc[3] == 2
|
||||
assert s.iloc[3] == 3
|
||||
|
||||
def test_floating_misc(self):
|
||||
|
||||
# related 236
|
||||
# scalar/slicing of a float index
|
||||
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
|
||||
|
||||
# label based slicing
|
||||
result1 = s[1.0:3.0]
|
||||
result2 = s.loc[1.0:3.0]
|
||||
result3 = s.loc[1.0:3.0]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
|
||||
# exact indexing when found
|
||||
result1 = s[5.0]
|
||||
result2 = s.loc[5.0]
|
||||
result3 = s.loc[5.0]
|
||||
assert result1 == result2
|
||||
assert result1 == result3
|
||||
|
||||
result1 = s[5]
|
||||
result2 = s.loc[5]
|
||||
result3 = s.loc[5]
|
||||
assert result1 == result2
|
||||
assert result1 == result3
|
||||
|
||||
assert s[5.0] == s[5]
|
||||
|
||||
# value not found (and no fallbacking at all)
|
||||
|
||||
# scalar integers
|
||||
pytest.raises(KeyError, lambda: s.loc[4])
|
||||
pytest.raises(KeyError, lambda: s.loc[4])
|
||||
pytest.raises(KeyError, lambda: s[4])
|
||||
|
||||
# fancy floats/integers create the correct entry (as nan)
|
||||
# fancy tests
|
||||
expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
|
||||
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
|
||||
assert_series_equal(s[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
|
||||
expected = Series([2, 0], index=Index([5, 0], dtype='int64'))
|
||||
for fancy_idx in [[5, 0], np.array([5, 0])]: # int
|
||||
assert_series_equal(s[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
|
||||
# all should return the same as we are slicing 'the same'
|
||||
result1 = s.loc[2:5]
|
||||
result2 = s.loc[2.0:5.0]
|
||||
result3 = s.loc[2.0:5]
|
||||
result4 = s.loc[2.1:5]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
# previously this did fallback indexing
|
||||
result1 = s[2:5]
|
||||
result2 = s[2.0:5.0]
|
||||
result3 = s[2.0:5]
|
||||
result4 = s[2.1:5]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
result1 = s.loc[2:5]
|
||||
result2 = s.loc[2.0:5.0]
|
||||
result3 = s.loc[2.0:5]
|
||||
result4 = s.loc[2.1:5]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
# combined test
|
||||
result1 = s.loc[2:5]
|
||||
result2 = s.loc[2:5]
|
||||
result3 = s[2:5]
|
||||
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
|
||||
# list selection
|
||||
result1 = s[[0.0, 5, 10]]
|
||||
result2 = s.loc[[0.0, 5, 10]]
|
||||
result3 = s.loc[[0.0, 5, 10]]
|
||||
result4 = s.iloc[[0, 2, 4]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result1 = s[[1.6, 5, 10]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result2 = s.loc[[1.6, 5, 10]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result3 = s.loc[[1.6, 5, 10]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, Series(
|
||||
[np.nan, 2, 4], index=[1.6, 5, 10]))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result1 = s[[0, 1, 2]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result2 = s.loc[[0, 1, 2]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result3 = s.loc[[0, 1, 2]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, Series(
|
||||
[0.0, np.nan, np.nan], index=[0, 1, 2]))
|
||||
|
||||
result1 = s.loc[[2.5, 5]]
|
||||
result2 = s.loc[[2.5, 5]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0]))
|
||||
|
||||
result1 = s[[2.5]]
|
||||
result2 = s.loc[[2.5]]
|
||||
result3 = s.loc[[2.5]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, Series([1], index=[2.5]))
|
||||
|
||||
def test_floating_tuples(self):
|
||||
# see gh-13509
|
||||
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name='foo')
|
||||
|
||||
result = s[0.0]
|
||||
assert result == (1, 1)
|
||||
|
||||
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo')
|
||||
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name='foo')
|
||||
|
||||
result = s[0.0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_float64index_slicing_bug(self):
|
||||
# GH 5557, related to slicing a float index
|
||||
ser = {256: 2321.0,
|
||||
1: 78.0,
|
||||
2: 2716.0,
|
||||
3: 0.0,
|
||||
4: 369.0,
|
||||
5: 0.0,
|
||||
6: 269.0,
|
||||
7: 0.0,
|
||||
8: 0.0,
|
||||
9: 0.0,
|
||||
10: 3536.0,
|
||||
11: 0.0,
|
||||
12: 24.0,
|
||||
13: 0.0,
|
||||
14: 931.0,
|
||||
15: 0.0,
|
||||
16: 101.0,
|
||||
17: 78.0,
|
||||
18: 9643.0,
|
||||
19: 0.0,
|
||||
20: 0.0,
|
||||
21: 0.0,
|
||||
22: 63761.0,
|
||||
23: 0.0,
|
||||
24: 446.0,
|
||||
25: 0.0,
|
||||
26: 34773.0,
|
||||
27: 0.0,
|
||||
28: 729.0,
|
||||
29: 78.0,
|
||||
30: 0.0,
|
||||
31: 0.0,
|
||||
32: 3374.0,
|
||||
33: 0.0,
|
||||
34: 1391.0,
|
||||
35: 0.0,
|
||||
36: 361.0,
|
||||
37: 0.0,
|
||||
38: 61808.0,
|
||||
39: 0.0,
|
||||
40: 0.0,
|
||||
41: 0.0,
|
||||
42: 6677.0,
|
||||
43: 0.0,
|
||||
44: 802.0,
|
||||
45: 0.0,
|
||||
46: 2691.0,
|
||||
47: 0.0,
|
||||
48: 3582.0,
|
||||
49: 0.0,
|
||||
50: 734.0,
|
||||
51: 0.0,
|
||||
52: 627.0,
|
||||
53: 70.0,
|
||||
54: 2584.0,
|
||||
55: 0.0,
|
||||
56: 324.0,
|
||||
57: 0.0,
|
||||
58: 605.0,
|
||||
59: 0.0,
|
||||
60: 0.0,
|
||||
61: 0.0,
|
||||
62: 3989.0,
|
||||
63: 10.0,
|
||||
64: 42.0,
|
||||
65: 0.0,
|
||||
66: 904.0,
|
||||
67: 0.0,
|
||||
68: 88.0,
|
||||
69: 70.0,
|
||||
70: 8172.0,
|
||||
71: 0.0,
|
||||
72: 0.0,
|
||||
73: 0.0,
|
||||
74: 64902.0,
|
||||
75: 0.0,
|
||||
76: 347.0,
|
||||
77: 0.0,
|
||||
78: 36605.0,
|
||||
79: 0.0,
|
||||
80: 379.0,
|
||||
81: 70.0,
|
||||
82: 0.0,
|
||||
83: 0.0,
|
||||
84: 3001.0,
|
||||
85: 0.0,
|
||||
86: 1630.0,
|
||||
87: 7.0,
|
||||
88: 364.0,
|
||||
89: 0.0,
|
||||
90: 67404.0,
|
||||
91: 9.0,
|
||||
92: 0.0,
|
||||
93: 0.0,
|
||||
94: 7685.0,
|
||||
95: 0.0,
|
||||
96: 1017.0,
|
||||
97: 0.0,
|
||||
98: 2831.0,
|
||||
99: 0.0,
|
||||
100: 2963.0,
|
||||
101: 0.0,
|
||||
102: 854.0,
|
||||
103: 0.0,
|
||||
104: 0.0,
|
||||
105: 0.0,
|
||||
106: 0.0,
|
||||
107: 0.0,
|
||||
108: 0.0,
|
||||
109: 0.0,
|
||||
110: 0.0,
|
||||
111: 0.0,
|
||||
112: 0.0,
|
||||
113: 0.0,
|
||||
114: 0.0,
|
||||
115: 0.0,
|
||||
116: 0.0,
|
||||
117: 0.0,
|
||||
118: 0.0,
|
||||
119: 0.0,
|
||||
120: 0.0,
|
||||
121: 0.0,
|
||||
122: 0.0,
|
||||
123: 0.0,
|
||||
124: 0.0,
|
||||
125: 0.0,
|
||||
126: 67744.0,
|
||||
127: 22.0,
|
||||
128: 264.0,
|
||||
129: 0.0,
|
||||
260: 197.0,
|
||||
268: 0.0,
|
||||
265: 0.0,
|
||||
269: 0.0,
|
||||
261: 0.0,
|
||||
266: 1198.0,
|
||||
267: 0.0,
|
||||
262: 2629.0,
|
||||
258: 775.0,
|
||||
257: 0.0,
|
||||
263: 0.0,
|
||||
259: 0.0,
|
||||
264: 163.0,
|
||||
250: 10326.0,
|
||||
251: 0.0,
|
||||
252: 1228.0,
|
||||
253: 0.0,
|
||||
254: 2769.0,
|
||||
255: 0.0}
|
||||
|
||||
# smoke test for the repr
|
||||
s = Series(ser)
|
||||
result = s.value_counts()
|
||||
str(result)
|
||||
@@ -0,0 +1,677 @@
|
||||
""" test positional based indexing with iloc """
|
||||
|
||||
from warnings import catch_warnings, filterwarnings, simplefilter
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lmap, lrange
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, concat, date_range, isna
|
||||
from pandas.api.types import is_scalar
|
||||
from pandas.tests.indexing.common import Base
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestiLoc(Base):
|
||||
|
||||
def test_iloc_exceeds_bounds(self):
|
||||
|
||||
# GH6296
|
||||
# iloc should allow indexers that exceed the bounds
|
||||
df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE'))
|
||||
|
||||
# lists of positions should raise IndexErrror!
|
||||
msg = 'positional indexers are out-of-bounds'
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[:, [0, 1, 2, 3, 4, 5]]
|
||||
pytest.raises(IndexError, lambda: df.iloc[[1, 30]])
|
||||
pytest.raises(IndexError, lambda: df.iloc[[1, -30]])
|
||||
pytest.raises(IndexError, lambda: df.iloc[[100]])
|
||||
|
||||
s = df['A']
|
||||
pytest.raises(IndexError, lambda: s.iloc[[100]])
|
||||
pytest.raises(IndexError, lambda: s.iloc[[-100]])
|
||||
|
||||
# still raise on a single indexer
|
||||
msg = 'single positional indexer is out-of-bounds'
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[30]
|
||||
pytest.raises(IndexError, lambda: df.iloc[-30])
|
||||
|
||||
# GH10779
|
||||
# single positive/negative indexer exceeding Series bounds should raise
|
||||
# an IndexError
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[30]
|
||||
pytest.raises(IndexError, lambda: s.iloc[-30])
|
||||
|
||||
# slices are ok
|
||||
result = df.iloc[:, 4:10] # 0 < start < len < stop
|
||||
expected = df.iloc[:, 4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -4:-10] # stop < 0 < start < len
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
|
||||
expected = df.iloc[:, :4:-1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
|
||||
expected = df.iloc[:, 4::-1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -10:4] # start < 0 < stop < len
|
||||
expected = df.iloc[:, :4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:4] # 0 < stop < len < start
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:11] # 0 < len < start < stop
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice bounds exceeding is ok
|
||||
result = s.iloc[18:30]
|
||||
expected = s.iloc[18:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.iloc[30:]
|
||||
expected = s.iloc[:0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.iloc[30::-1]
|
||||
expected = s.iloc[::-1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# doc example
|
||||
def check(result, expected):
|
||||
str(result)
|
||||
result.dtypes
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
dfl = DataFrame(np.random.randn(5, 2), columns=list('AB'))
|
||||
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
|
||||
check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
|
||||
check(dfl.iloc[4:6], dfl.iloc[[4]])
|
||||
|
||||
pytest.raises(IndexError, lambda: dfl.iloc[[4, 5, 6]])
|
||||
pytest.raises(IndexError, lambda: dfl.iloc[:, 4])
|
||||
|
||||
def test_iloc_getitem_int(self):
|
||||
|
||||
# integer
|
||||
self.check_result('integer', 'iloc', 2, 'ix',
|
||||
{0: 4, 1: 6, 2: 8}, typs=['ints', 'uints'])
|
||||
self.check_result('integer', 'iloc', 2, 'indexer', 2,
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_neg_int(self):
|
||||
|
||||
# neg integer
|
||||
self.check_result('neg int', 'iloc', -1, 'ix',
|
||||
{0: 6, 1: 9, 2: 12}, typs=['ints', 'uints'])
|
||||
self.check_result('neg int', 'iloc', -1, 'indexer', -1,
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
@pytest.mark.parametrize('dims', [1, 2])
|
||||
def test_iloc_getitem_invalid_scalar(self, dims):
|
||||
# GH 21982
|
||||
|
||||
if dims == 1:
|
||||
s = Series(np.arange(10))
|
||||
else:
|
||||
s = DataFrame(np.arange(100).reshape(10, 10))
|
||||
|
||||
with pytest.raises(TypeError, match='Cannot index by location index'):
|
||||
s.iloc['a']
|
||||
|
||||
def test_iloc_array_not_mutating_negative_indices(self):
|
||||
|
||||
# GH 21867
|
||||
array_with_neg_numbers = np.array([1, 2, -1])
|
||||
array_copy = array_with_neg_numbers.copy()
|
||||
df = pd.DataFrame({
|
||||
'A': [100, 101, 102],
|
||||
'B': [103, 104, 105],
|
||||
'C': [106, 107, 108]},
|
||||
index=[1, 2, 3])
|
||||
df.iloc[array_with_neg_numbers]
|
||||
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
|
||||
df.iloc[:, array_with_neg_numbers]
|
||||
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
|
||||
|
||||
def test_iloc_getitem_list_int(self):
|
||||
|
||||
# list of ints
|
||||
self.check_result('list int', 'iloc', [0, 1, 2], 'ix',
|
||||
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
|
||||
typs=['ints', 'uints'])
|
||||
self.check_result('list int', 'iloc', [2], 'ix',
|
||||
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
|
||||
self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2],
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
# array of ints (GH5006), make sure that a single indexer is returning
|
||||
# the correct type
|
||||
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix',
|
||||
{0: [0, 2, 4],
|
||||
1: [0, 3, 6],
|
||||
2: [0, 4, 8]}, typs=['ints', 'uints'])
|
||||
self.check_result('array int', 'iloc', np.array([2]), 'ix',
|
||||
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
|
||||
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer',
|
||||
[0, 1, 2],
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_neg_int_can_reach_first_index(self):
|
||||
# GH10547 and GH10779
|
||||
# negative integers should be able to reach index 0
|
||||
df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]})
|
||||
s = df['A']
|
||||
|
||||
expected = df.iloc[0]
|
||||
result = df.iloc[-3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = df.iloc[[0]]
|
||||
result = df.iloc[[-3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = s.iloc[0]
|
||||
result = s.iloc[-3]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[[0]]
|
||||
result = s.iloc[[-3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# check the length 1 Series case highlighted in GH10547
|
||||
expected = Series(['a'], index=['A'])
|
||||
result = expected.iloc[[-1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_dups(self):
|
||||
|
||||
# no dups in panel (bug?)
|
||||
self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix',
|
||||
{0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
|
||||
objs=['series', 'frame'], typs=['ints', 'uints'])
|
||||
|
||||
# GH 6766
|
||||
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
|
||||
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
# cross-sectional indexing
|
||||
result = df.iloc[0, 0]
|
||||
assert isna(result)
|
||||
|
||||
result = df.iloc[0, :]
|
||||
expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'],
|
||||
name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_array(self):
|
||||
|
||||
# array like
|
||||
s = Series(index=lrange(1, 4))
|
||||
self.check_result('array like', 'iloc', s.index, 'ix',
|
||||
{0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]},
|
||||
typs=['ints', 'uints'])
|
||||
|
||||
def test_iloc_getitem_bool(self):
|
||||
|
||||
# boolean indexers
|
||||
b = [True, False, True, False, ]
|
||||
self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints'])
|
||||
self.check_result('bool', 'iloc', b, 'ix', b,
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_slice(self):
|
||||
|
||||
# slices
|
||||
self.check_result('slice', 'iloc', slice(1, 3), 'ix',
|
||||
{0: [2, 4], 1: [3, 6], 2: [4, 8]},
|
||||
typs=['ints', 'uints'])
|
||||
self.check_result('slice', 'iloc', slice(1, 3), 'indexer',
|
||||
slice(1, 3),
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_slice_dups(self):
|
||||
|
||||
df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
|
||||
df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2),
|
||||
columns=['A', 'C'])
|
||||
|
||||
# axis=1
|
||||
df = concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, :4], df1)
|
||||
tm.assert_frame_equal(df.iloc[:, 4:], df2)
|
||||
|
||||
df = concat([df2, df1], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[:, 2:], df1)
|
||||
|
||||
exp = concat([df2, df1.iloc[:, [0]]], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, 0:3], exp)
|
||||
|
||||
# axis=0
|
||||
df = concat([df, df], axis=0)
|
||||
tm.assert_frame_equal(df.iloc[0:10, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
|
||||
tm.assert_frame_equal(df.iloc[10:, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
|
||||
|
||||
def test_iloc_setitem(self):
|
||||
df = self.frame_ints
|
||||
|
||||
df.iloc[1, 1] = 1
|
||||
result = df.iloc[1, 1]
|
||||
assert result == 1
|
||||
|
||||
df.iloc[:, 2:3] = 0
|
||||
expected = df.iloc[:, 2:3]
|
||||
result = df.iloc[:, 2:3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH5771
|
||||
s = Series(0, index=[4, 5, 6])
|
||||
s.iloc[1:2] += 1
|
||||
expected = Series([0, 1, 0], index=[4, 5, 6])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_iloc_setitem_list(self):
|
||||
|
||||
# setitem with an iloc list
|
||||
df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"],
|
||||
columns=["A", "B", "C"])
|
||||
df.iloc[[0, 1], [1, 2]]
|
||||
df.iloc[[0, 1], [1, 2]] += 100
|
||||
|
||||
expected = DataFrame(
|
||||
np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
|
||||
index=["A", "B", "C"], columns=["A", "B", "C"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_iloc_setitem_pandas_object(self):
|
||||
# GH 17193
|
||||
s_orig = Series([0, 1, 2, 3])
|
||||
expected = Series([0, -1, -2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s.iloc[Series([1, 2])] = [-1, -2]
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.iloc[pd.Index([1, 2])] = [-1, -2]
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_iloc_setitem_dups(self):
|
||||
|
||||
# GH 6766
|
||||
# iloc with a mask aligning from another iloc
|
||||
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
|
||||
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
expected = df.fillna(3)
|
||||
expected['A'] = expected['A'].astype('float64')
|
||||
inds = np.isnan(df.iloc[:, 0])
|
||||
mask = inds[inds].index
|
||||
df.iloc[mask, 0] = df.iloc[mask, 2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# del a dup column across blocks
|
||||
expected = DataFrame({0: [1, 2], 1: [3, 4]})
|
||||
expected.columns = ['B', 'B']
|
||||
del df['A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# assign back to self
|
||||
df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# reversed x 2
|
||||
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
|
||||
drop=True)
|
||||
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
|
||||
drop=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_iloc_getitem_frame(self):
|
||||
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
|
||||
columns=lrange(0, 8, 2))
|
||||
|
||||
result = df.iloc[2]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
exp = df.ix[4]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = df.iloc[2, 2]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
exp = df.ix[4, 4]
|
||||
assert result == exp
|
||||
|
||||
# slice
|
||||
result = df.iloc[4:8]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
expected = df.ix[8:14]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 2:3]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
expected = df.ix[:, 4:5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# list of integers
|
||||
result = df.iloc[[0, 1, 3]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
expected = df.ix[[0, 2, 6]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[[0, 1, 3], [0, 1]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
expected = df.ix[[0, 2, 6], [0, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# neg indices
|
||||
result = df.iloc[[-1, 1, 3], [-1, 1]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
expected = df.ix[[18, 2, 6], [6, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dups indices
|
||||
result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
expected = df.ix[[18, 18, 2, 6], [6, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with index-like
|
||||
s = Series(index=lrange(1, 5))
|
||||
result = df.iloc[s.index]
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
expected = df.ix[[2, 4, 6, 8]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_labelled_frame(self):
|
||||
# try with labelled frame
|
||||
df = DataFrame(np.random.randn(10, 4),
|
||||
index=list('abcdefghij'), columns=list('ABCD'))
|
||||
|
||||
result = df.iloc[1, 1]
|
||||
exp = df.loc['b', 'B']
|
||||
assert result == exp
|
||||
|
||||
result = df.iloc[:, 2:3]
|
||||
expected = df.loc[:, ['C']]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# negative indexing
|
||||
result = df.iloc[-1, -1]
|
||||
exp = df.loc['j', 'D']
|
||||
assert result == exp
|
||||
|
||||
# out-of-bounds exception
|
||||
pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5]))
|
||||
|
||||
# trying to use a label
|
||||
pytest.raises(ValueError, df.iloc.__getitem__, tuple(['j', 'D']))
|
||||
|
||||
def test_iloc_getitem_doc_issue(self):
|
||||
|
||||
# multi axis slicing issue with single block
|
||||
# surfaced in GH 6059
|
||||
|
||||
arr = np.random.randn(6, 4)
|
||||
index = date_range('20130101', periods=6)
|
||||
columns = list('ABCD')
|
||||
df = DataFrame(arr, index=index, columns=columns)
|
||||
|
||||
# defines ref_locs
|
||||
df.describe()
|
||||
|
||||
result = df.iloc[3:5, 0:2]
|
||||
str(result)
|
||||
result.dtypes
|
||||
|
||||
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
|
||||
columns=columns[0:2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# for dups
|
||||
df.columns = list('aaaa')
|
||||
result = df.iloc[3:5, 0:2]
|
||||
str(result)
|
||||
result.dtypes
|
||||
|
||||
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
|
||||
columns=list('aa'))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# related
|
||||
arr = np.random.randn(6, 4)
|
||||
index = list(range(0, 12, 2))
|
||||
columns = list(range(0, 8, 2))
|
||||
df = DataFrame(arr, index=index, columns=columns)
|
||||
|
||||
df._data.blocks[0].mgr_locs
|
||||
result = df.iloc[1:5, 2:4]
|
||||
str(result)
|
||||
result.dtypes
|
||||
expected = DataFrame(arr[1:5, 2:4], index=index[1:5],
|
||||
columns=columns[2:4])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_iloc_setitem_series(self):
|
||||
df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'),
|
||||
columns=list('ABCD'))
|
||||
|
||||
df.iloc[1, 1] = 1
|
||||
result = df.iloc[1, 1]
|
||||
assert result == 1
|
||||
|
||||
df.iloc[:, 2:3] = 0
|
||||
expected = df.iloc[:, 2:3]
|
||||
result = df.iloc[:, 2:3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
s.iloc[1] = 1
|
||||
result = s.iloc[1]
|
||||
assert result == 1
|
||||
|
||||
s.iloc[:4] = 0
|
||||
expected = s.iloc[:4]
|
||||
result = s.iloc[:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series([-1] * 6)
|
||||
s.iloc[0::2] = [0, 2, 4]
|
||||
s.iloc[1::2] = [1, 3, 5]
|
||||
result = s
|
||||
expected = Series([0, 1, 2, 3, 4, 5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_setitem_list_of_lists(self):
|
||||
|
||||
# GH 7551
|
||||
# list-of-list is set incorrectly in mixed vs. single dtyped frames
|
||||
df = DataFrame(dict(A=np.arange(5, dtype='int64'),
|
||||
B=np.arange(5, 10, dtype='int64')))
|
||||
df.iloc[2:4] = [[10, 11], [12, 13]]
|
||||
expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(
|
||||
dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64')))
|
||||
df.iloc[2:4] = [['x', 11], ['y', 13]]
|
||||
expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'],
|
||||
B=[5, 6, 11, 13, 9]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'indexer', [[0], slice(None, 1, None), np.array([0])])
|
||||
@pytest.mark.parametrize(
|
||||
'value', [['Z'], np.array(['Z'])])
|
||||
def test_iloc_setitem_with_scalar_index(self, indexer, value):
|
||||
# GH #19474
|
||||
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
|
||||
# elementwisely, not using "setter('A', ['Z'])".
|
||||
|
||||
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
|
||||
df.iloc[0, indexer] = value
|
||||
result = df.iloc[0, 0]
|
||||
|
||||
assert is_scalar(result) and result == 'Z'
|
||||
|
||||
def test_iloc_mask(self):
|
||||
|
||||
# GH 3631, iloc with a mask (of a series) should raise
|
||||
df = DataFrame(lrange(5), list('ABCDE'), columns=['a'])
|
||||
mask = (df.a % 2 == 0)
|
||||
pytest.raises(ValueError, df.iloc.__getitem__, tuple([mask]))
|
||||
mask.index = lrange(len(mask))
|
||||
pytest.raises(NotImplementedError, df.iloc.__getitem__,
|
||||
tuple([mask]))
|
||||
|
||||
# ndarray ok
|
||||
result = df.iloc[np.array([True] * len(mask), dtype=bool)]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# the possibilities
|
||||
locs = np.arange(4)
|
||||
nums = 2 ** locs
|
||||
reps = lmap(bin, nums)
|
||||
df = DataFrame({'locs': locs, 'nums': nums}, reps)
|
||||
|
||||
expected = {
|
||||
(None, ''): '0b1100',
|
||||
(None, '.loc'): '0b1100',
|
||||
(None, '.iloc'): '0b1100',
|
||||
('index', ''): '0b11',
|
||||
('index', '.loc'): '0b11',
|
||||
('index', '.iloc'): ('iLocation based boolean indexing '
|
||||
'cannot use an indexable as a mask'),
|
||||
('locs', ''): 'Unalignable boolean Series provided as indexer '
|
||||
'(index of the boolean Series and of the indexed '
|
||||
'object do not match',
|
||||
('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
|
||||
'(index of the boolean Series and of the '
|
||||
'indexed object do not match',
|
||||
('locs', '.iloc'): ('iLocation based boolean indexing on an '
|
||||
'integer type is not available'),
|
||||
}
|
||||
|
||||
# UserWarnings from reindex of a boolean mask
|
||||
with catch_warnings(record=True):
|
||||
simplefilter("ignore", UserWarning)
|
||||
result = dict()
|
||||
for idx in [None, 'index', 'locs']:
|
||||
mask = (df.nums > 2).values
|
||||
if idx:
|
||||
mask = Series(mask, list(reversed(getattr(df, idx))))
|
||||
for method in ['', '.loc', '.iloc']:
|
||||
try:
|
||||
if method:
|
||||
accessor = getattr(df, method[1:])
|
||||
else:
|
||||
accessor = df
|
||||
ans = str(bin(accessor[mask]['nums'].sum()))
|
||||
except Exception as e:
|
||||
ans = str(e)
|
||||
|
||||
key = tuple([idx, method])
|
||||
r = expected.get(key)
|
||||
if r != ans:
|
||||
raise AssertionError(
|
||||
"[%s] does not match [%s], received [%s]"
|
||||
% (key, ans, r))
|
||||
|
||||
def test_iloc_non_unique_indexing(self):
|
||||
|
||||
# GH 4017, non-unique indexing (on the axis)
|
||||
df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000})
|
||||
idx = np.array(lrange(30)) * 99
|
||||
expected = df.iloc[idx]
|
||||
|
||||
df3 = concat([df, 2 * df, 3 * df])
|
||||
result = df3.iloc[idx]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000})
|
||||
df2 = concat([df2, 2 * df2, 3 * df2])
|
||||
|
||||
sidx = df2.index.to_series()
|
||||
expected = df2.iloc[idx[idx <= sidx.max()]]
|
||||
|
||||
new_list = []
|
||||
for r, s in expected.iterrows():
|
||||
new_list.append(s)
|
||||
new_list.append(s * 2)
|
||||
new_list.append(s * 3)
|
||||
|
||||
expected = DataFrame(new_list)
|
||||
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])],
|
||||
sort=True)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = df2.loc[idx]
|
||||
tm.assert_frame_equal(result, expected, check_index_type=False)
|
||||
|
||||
def test_iloc_empty_list_indexer_is_ok(self):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
|
||||
def test_identity_slice_returns_new_object(self):
|
||||
# GH13873
|
||||
original_df = DataFrame({'a': [1, 2, 3]})
|
||||
sliced_df = original_df.iloc[:]
|
||||
assert sliced_df is not original_df
|
||||
|
||||
# should be a shallow copy
|
||||
original_df['a'] = [4, 4, 4]
|
||||
assert (sliced_df['a'] == 4).all()
|
||||
|
||||
original_series = Series([1, 2, 3, 4, 5, 6])
|
||||
sliced_series = original_series.iloc[:]
|
||||
assert sliced_series is not original_series
|
||||
|
||||
# should also be a shallow copy
|
||||
original_series[:3] = [7, 8, 9]
|
||||
assert all(sliced_series[:3] == [7, 8, 9])
|
||||
File diff suppressed because it is too large
Load Diff
+169
@@ -0,0 +1,169 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import algos as libalgos, index as libindex
|
||||
|
||||
from pandas import compat
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestNumericEngine(object):
|
||||
def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
num = 1000
|
||||
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
|
||||
|
||||
# monotonic increasing
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_monotonic_increasing is True
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
# monotonic decreasing
|
||||
engine = engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is True
|
||||
|
||||
# neither monotonic increasing or decreasing
|
||||
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
|
||||
engine = engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
# unique
|
||||
arr = np.array([1, 3, 2], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is True
|
||||
|
||||
# not unique
|
||||
arr = np.array([1, 2, 1], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is False
|
||||
|
||||
def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
# unique
|
||||
arr = np.array([1, 2, 3], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc(2) == 1
|
||||
|
||||
# monotonic
|
||||
num = 1000
|
||||
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc(2) == slice(1000, 2000)
|
||||
|
||||
# not monotonic
|
||||
arr = np.array([1, 2, 3] * num, dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
expected = np.array([False, True, False] * num, dtype=bool)
|
||||
result = engine.get_loc(2)
|
||||
assert (result == expected).all()
|
||||
|
||||
def test_get_backfill_indexer(
|
||||
self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
arr = np.array([1, 5, 10], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.array(compat.range(12), dtype=dtype)
|
||||
result = engine.get_backfill_indexer(new)
|
||||
|
||||
expected = libalgos.backfill(arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_pad_indexer(
|
||||
self, numeric_indexing_engine_type_and_dtype):
|
||||
engine_type, dtype = numeric_indexing_engine_type_and_dtype
|
||||
|
||||
arr = np.array([1, 5, 10], dtype=dtype)
|
||||
engine = engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.array(compat.range(12), dtype=dtype)
|
||||
result = engine.get_pad_indexer(new)
|
||||
|
||||
expected = libalgos.pad(arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestObjectEngine(object):
|
||||
engine_type = libindex.ObjectEngine
|
||||
dtype = np.object_
|
||||
values = list('abc')
|
||||
|
||||
def test_is_monotonic(self):
|
||||
|
||||
num = 1000
|
||||
arr = np.array(['a'] * num + ['a'] * num + ['c'] * num,
|
||||
dtype=self.dtype)
|
||||
|
||||
# monotonic increasing
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_monotonic_increasing is True
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
# monotonic decreasing
|
||||
engine = self.engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is True
|
||||
|
||||
# neither monotonic increasing or decreasing
|
||||
arr = np.array(['a'] * num + ['b'] * num + ['a'] * num,
|
||||
dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr[::-1], len(arr))
|
||||
assert engine.is_monotonic_increasing is False
|
||||
assert engine.is_monotonic_decreasing is False
|
||||
|
||||
def test_is_unique(self):
|
||||
# unique
|
||||
arr = np.array(self.values, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is True
|
||||
|
||||
# not unique
|
||||
arr = np.array(['a', 'b', 'a'], dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.is_unique is False
|
||||
|
||||
def test_get_loc(self):
|
||||
# unique
|
||||
arr = np.array(self.values, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc('b') == 1
|
||||
|
||||
# monotonic
|
||||
num = 1000
|
||||
arr = np.array(['a'] * num + ['b'] * num + ['c'] * num,
|
||||
dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
assert engine.get_loc('b') == slice(1000, 2000)
|
||||
|
||||
# not monotonic
|
||||
arr = np.array(self.values * num, dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
expected = np.array([False, True, False] * num, dtype=bool)
|
||||
result = engine.get_loc('b')
|
||||
assert (result == expected).all()
|
||||
|
||||
def test_get_backfill_indexer(self):
|
||||
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.array(list('abcdefghij'), dtype=self.dtype)
|
||||
result = engine.get_backfill_indexer(new)
|
||||
|
||||
expected = libalgos.backfill["object"](arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_pad_indexer(self):
|
||||
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
|
||||
engine = self.engine_type(lambda: arr, len(arr))
|
||||
|
||||
new = np.array(list('abcdefghij'), dtype=self.dtype)
|
||||
result = engine.get_pad_indexer(new)
|
||||
|
||||
expected = libalgos.pad["object"](arr, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIndexingSlow(object):
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_large_dataframe_indexing(self):
|
||||
# GH10692
|
||||
result = DataFrame({'x': range(10 ** 6)}, dtype='int64')
|
||||
result.loc[len(result)] = len(result) + 1
|
||||
expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,314 @@
|
||||
""" test indexing with ix """
|
||||
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, option_context
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
def test_ix_deprecation():
|
||||
# GH 15114
|
||||
|
||||
df = DataFrame({'A': [1, 2, 3]})
|
||||
with tm.assert_produces_warning(DeprecationWarning,
|
||||
check_stacklevel=False):
|
||||
df.ix[1, 'A']
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
class TestIX(object):
|
||||
|
||||
def test_ix_loc_setitem_consistency(self):
|
||||
|
||||
# GH 5771
|
||||
# loc with slice and series
|
||||
s = Series(0, index=[4, 5, 6])
|
||||
s.loc[4:5] += 1
|
||||
expected = Series([1, 1, 0], index=[4, 5, 6])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 5928
|
||||
# chained indexing assignment
|
||||
df = DataFrame({'a': [0, 1, 2]})
|
||||
expected = df.copy()
|
||||
with catch_warnings(record=True):
|
||||
expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a']
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]})
|
||||
with catch_warnings(record=True):
|
||||
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype(
|
||||
'float64') + 0.5
|
||||
expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 8607
|
||||
# ix setitem consistency
|
||||
df = DataFrame({'delta': [1174, 904, 161],
|
||||
'elapsed': [7673, 9277, 1470],
|
||||
'timestamp': [1413840976, 1413842580, 1413760580]})
|
||||
expected = DataFrame({'delta': [1174, 904, 161],
|
||||
'elapsed': [7673, 9277, 1470],
|
||||
'timestamp': pd.to_datetime(
|
||||
[1413840976, 1413842580, 1413760580],
|
||||
unit='s')
|
||||
})
|
||||
|
||||
df2 = df.copy()
|
||||
df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df2 = df.copy()
|
||||
df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df2 = df.copy()
|
||||
with catch_warnings(record=True):
|
||||
df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_ix_loc_consistency(self):
|
||||
|
||||
# GH 8613
|
||||
# some edge cases where ix/loc should return the same
|
||||
# this is not an exhaustive case
|
||||
|
||||
def compare(result, expected):
|
||||
if is_scalar(expected):
|
||||
assert result == expected
|
||||
else:
|
||||
assert expected.equals(result)
|
||||
|
||||
# failure cases for .loc, but these work for .ix
|
||||
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'))
|
||||
for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]),
|
||||
tuple([slice(0, 2), df.columns[0:2]])]:
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeDateIndex, tm.makePeriodIndex,
|
||||
tm.makeTimedeltaIndex]:
|
||||
df.index = index(len(df.index))
|
||||
with catch_warnings(record=True):
|
||||
df.ix[key]
|
||||
|
||||
pytest.raises(TypeError, lambda: df.loc[key])
|
||||
|
||||
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'),
|
||||
index=pd.date_range('2012-01-01', periods=5))
|
||||
|
||||
for key in ['2012-01-03',
|
||||
'2012-01-31',
|
||||
slice('2012-01-03', '2012-01-03'),
|
||||
slice('2012-01-03', '2012-01-04'),
|
||||
slice('2012-01-03', '2012-01-06', 2),
|
||||
slice('2012-01-03', '2012-01-31'),
|
||||
tuple([[True, True, True, False, True]]), ]:
|
||||
|
||||
# getitem
|
||||
|
||||
# if the expected raises, then compare the exceptions
|
||||
try:
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[key]
|
||||
except KeyError:
|
||||
pytest.raises(KeyError, lambda: df.loc[key])
|
||||
continue
|
||||
|
||||
result = df.loc[key]
|
||||
compare(result, expected)
|
||||
|
||||
# setitem
|
||||
df1 = df.copy()
|
||||
df2 = df.copy()
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df1.ix[key] = 10
|
||||
df2.loc[key] = 10
|
||||
compare(df2, df1)
|
||||
|
||||
# edge cases
|
||||
s = Series([1, 2, 3, 4], index=list('abde'))
|
||||
|
||||
result1 = s['a':'c']
|
||||
with catch_warnings(record=True):
|
||||
result2 = s.ix['a':'c']
|
||||
result3 = s.loc['a':'c']
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
# now work rather than raising KeyError
|
||||
s = Series(range(5), [-2, -1, 1, 2, 3])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result1 = s.ix[-10:3]
|
||||
result2 = s.loc[-10:3]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result1 = s.ix[0:3]
|
||||
result2 = s.loc[0:3]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
def test_ix_weird_slicing(self):
|
||||
# http://stackoverflow.com/q/17056560/1240268
|
||||
df = DataFrame({'one': [1, 2, 3, np.nan, np.nan],
|
||||
'two': [1, 2, 3, 4, 5]})
|
||||
df.loc[df['one'] > 1, 'two'] = -df['two']
|
||||
|
||||
expected = DataFrame({'one': {0: 1.0,
|
||||
1: 2.0,
|
||||
2: 3.0,
|
||||
3: np.nan,
|
||||
4: np.nan},
|
||||
'two': {0: 1,
|
||||
1: -2,
|
||||
2: -3,
|
||||
3: 4,
|
||||
4: 5}})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_assign_column_mixed(self):
|
||||
# GH #1142
|
||||
df = DataFrame(tm.getSeriesData())
|
||||
df['foo'] = 'bar'
|
||||
|
||||
orig = df.loc[:, 'B'].copy()
|
||||
df.loc[:, 'B'] = df.loc[:, 'B'] + 1
|
||||
tm.assert_series_equal(df.B, orig + 1)
|
||||
|
||||
# GH 3668, mixed frame with series value
|
||||
df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'})
|
||||
expected = df.copy()
|
||||
|
||||
for i in range(5):
|
||||
indexer = i * 2
|
||||
v = 1000 + i * 200
|
||||
expected.loc[indexer, 'y'] = v
|
||||
assert expected.loc[indexer, 'y'] == v
|
||||
|
||||
df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 4508, making sure consistency of assignments
|
||||
df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
|
||||
df.loc[[0, 2, ], 'b'] = [100, -100]
|
||||
expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'a': lrange(4)})
|
||||
df['b'] = np.nan
|
||||
df.loc[[1, 3], 'b'] = [100, -100]
|
||||
expected = DataFrame({'a': [0, 1, 2, 3],
|
||||
'b': [np.nan, 100, np.nan, -100]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# ok, but chained assignments are dangerous
|
||||
# if we turn off chained assignment it will work
|
||||
with option_context('chained_assignment', None):
|
||||
df = DataFrame({'a': lrange(4)})
|
||||
df['b'] = np.nan
|
||||
df['b'].loc[[1, 3]] = [100, -100]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_get_set_consistency(self):
|
||||
|
||||
# GH 4544
|
||||
# ix/loc get/set not consistent when
|
||||
# a mixed int/string index
|
||||
df = DataFrame(np.arange(16).reshape((4, 4)),
|
||||
columns=['a', 'b', 8, 'c'],
|
||||
index=['e', 7, 'f', 'g'])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
assert df.ix['e', 8] == 2
|
||||
assert df.loc['e', 8] == 2
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix['e', 8] = 42
|
||||
assert df.ix['e', 8] == 42
|
||||
assert df.loc['e', 8] == 42
|
||||
|
||||
df.loc['e', 8] = 45
|
||||
with catch_warnings(record=True):
|
||||
assert df.ix['e', 8] == 45
|
||||
assert df.loc['e', 8] == 45
|
||||
|
||||
def test_ix_slicing_strings(self):
|
||||
# see gh-3836
|
||||
data = {'Classification':
|
||||
['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'],
|
||||
'Random': [1, 2, 3, 4, 5],
|
||||
'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']}
|
||||
df = DataFrame(data)
|
||||
x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF'
|
||||
])]
|
||||
with catch_warnings(record=True):
|
||||
df.ix[x.index, 'X'] = df['Classification']
|
||||
|
||||
expected = DataFrame({'Classification': {0: 'SA EQUITY CFD',
|
||||
1: 'bbb',
|
||||
2: 'SA EQUITY',
|
||||
3: 'SA SSF',
|
||||
4: 'aaa'},
|
||||
'Random': {0: 1,
|
||||
1: 2,
|
||||
2: 3,
|
||||
3: 4,
|
||||
4: 5},
|
||||
'X': {0: 'correct',
|
||||
1: 'bbb',
|
||||
2: 'correct',
|
||||
3: 'correct',
|
||||
4: 'aaa'}}) # bug was 4: 'bbb'
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_setitem_out_of_bounds_axis_0(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(2, 5), index=["row%s" % i for i in range(2)],
|
||||
columns=["col%s" % i for i in range(5)])
|
||||
with catch_warnings(record=True):
|
||||
pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100)
|
||||
|
||||
def test_ix_setitem_out_of_bounds_axis_1(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(5, 2), index=["row%s" % i for i in range(5)],
|
||||
columns=["col%s" % i for i in range(2)])
|
||||
with catch_warnings(record=True):
|
||||
pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100)
|
||||
|
||||
def test_ix_empty_list_indexer_is_ok(self):
|
||||
with catch_warnings(record=True):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
|
||||
def test_ix_duplicate_returns_series(self):
|
||||
df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2],
|
||||
columns=list('abc'))
|
||||
with catch_warnings(record=True):
|
||||
r = df.ix[0.2, 'a']
|
||||
e = df.loc[0.2, 'a']
|
||||
tm.assert_series_equal(r, e)
|
||||
@@ -0,0 +1,767 @@
|
||||
""" test label based indexing with loc """
|
||||
|
||||
from warnings import catch_warnings, filterwarnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY2, StringIO, lrange
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, Timestamp, date_range
|
||||
from pandas.api.types import is_scalar
|
||||
from pandas.tests.indexing.common import Base
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestLoc(Base):
|
||||
|
||||
def test_loc_getitem_dups(self):
|
||||
# GH 5678
|
||||
# repeated gettitems on a dup index returning a ndarray
|
||||
df = DataFrame(
|
||||
np.random.random_sample((20, 5)),
|
||||
index=['ABCDE' [x % 5] for x in range(20)])
|
||||
expected = df.loc['A', 0]
|
||||
result = df.loc[:, 0].loc['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_dups2(self):
|
||||
|
||||
# GH4726
|
||||
# dup indexing with iloc/loc
|
||||
df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]],
|
||||
columns=['a', 'a', 'a', 'a', 'a'], index=[1])
|
||||
expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')],
|
||||
index=['a', 'a', 'a', 'a', 'a'], name=1)
|
||||
|
||||
result = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_setitem_dups(self):
|
||||
|
||||
# GH 6541
|
||||
df_orig = DataFrame(
|
||||
{'me': list('rttti'),
|
||||
'foo': list('aaade'),
|
||||
'bar': np.arange(5, dtype='float64') * 1.34 + 2,
|
||||
'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me')
|
||||
|
||||
indexer = tuple(['r', ['bar', 'bar2']])
|
||||
df = df_orig.copy()
|
||||
df.loc[indexer] *= 2.0
|
||||
tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
|
||||
|
||||
indexer = tuple(['r', 'bar'])
|
||||
df = df_orig.copy()
|
||||
df.loc[indexer] *= 2.0
|
||||
assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
|
||||
|
||||
indexer = tuple(['t', ['bar', 'bar2']])
|
||||
df = df_orig.copy()
|
||||
df.loc[indexer] *= 2.0
|
||||
tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
|
||||
|
||||
def test_loc_setitem_slice(self):
|
||||
# GH10503
|
||||
|
||||
# assigning the same type should not change the type
|
||||
df1 = DataFrame({'a': [0, 1, 1],
|
||||
'b': Series([100, 200, 300], dtype='uint32')})
|
||||
ix = df1['a'] == 1
|
||||
newb1 = df1.loc[ix, 'b'] + 1
|
||||
df1.loc[ix, 'b'] = newb1
|
||||
expected = DataFrame({'a': [0, 1, 1],
|
||||
'b': Series([100, 201, 301], dtype='uint32')})
|
||||
tm.assert_frame_equal(df1, expected)
|
||||
|
||||
# assigning a new type should get the inferred type
|
||||
df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
|
||||
dtype='uint64')
|
||||
ix = df1['a'] == 1
|
||||
newb2 = df2.loc[ix, 'b']
|
||||
df1.loc[ix, 'b'] = newb2
|
||||
expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
|
||||
dtype='uint64')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_loc_getitem_int(self):
|
||||
|
||||
# int label
|
||||
self.check_result('int label', 'loc', 2, 'ix', 2,
|
||||
typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('int label', 'loc', 3, 'ix', 3,
|
||||
typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('int label', 'loc', 4, 'ix', 4,
|
||||
typs=['ints', 'uints'], axes=2)
|
||||
self.check_result('int label', 'loc', 2, 'ix', 2,
|
||||
typs=['label'], fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label(self):
|
||||
|
||||
# label
|
||||
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'],
|
||||
axes=0)
|
||||
self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'],
|
||||
axes=0)
|
||||
self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0)
|
||||
self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1,
|
||||
typs=['ts'], axes=0)
|
||||
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'],
|
||||
fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label_out_of_range(self):
|
||||
|
||||
# out of range label
|
||||
self.check_result('label range', 'loc', 'f', 'ix', 'f',
|
||||
typs=['ints', 'uints', 'labels', 'mixed', 'ts'],
|
||||
fails=KeyError)
|
||||
self.check_result('label range', 'loc', 'f', 'ix', 'f',
|
||||
typs=['floats'], fails=KeyError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20,
|
||||
typs=['ints', 'uints', 'mixed'], fails=KeyError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20,
|
||||
typs=['labels'], fails=TypeError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'],
|
||||
axes=0, fails=TypeError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'],
|
||||
axes=0, fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label_list(self):
|
||||
|
||||
# list of labels
|
||||
self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4],
|
||||
typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9],
|
||||
typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12],
|
||||
typs=['ints', 'uints'], axes=2)
|
||||
self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix',
|
||||
['a', 'b', 'd'], typs=['labels'], axes=0)
|
||||
self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix',
|
||||
['A', 'B', 'C'], typs=['labels'], axes=1)
|
||||
self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix',
|
||||
['Z', 'Y', 'W'], typs=['labels'], axes=2)
|
||||
self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix',
|
||||
[2, 8, 'null'], typs=['mixed'], axes=0)
|
||||
self.check_result('list lbl', 'loc',
|
||||
[Timestamp('20130102'), Timestamp('20130103')], 'ix',
|
||||
[Timestamp('20130102'), Timestamp('20130103')],
|
||||
typs=['ts'], axes=0)
|
||||
|
||||
@pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with "
|
||||
"Python 2 (GH #20770)"))
|
||||
def test_loc_getitem_label_list_with_missing(self):
|
||||
self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
|
||||
typs=['empty'], fails=KeyError)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10],
|
||||
typs=['ints', 'uints', 'floats'],
|
||||
axes=0, fails=KeyError)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
|
||||
typs=['ints', 'uints', 'floats'],
|
||||
axes=1, fails=KeyError)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
|
||||
typs=['ints', 'uints', 'floats'],
|
||||
axes=2, fails=KeyError)
|
||||
|
||||
# GH 17758 - MultiIndex and missing keys
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)],
|
||||
'ix', [(1, 3), (1, 4), (2, 5)],
|
||||
typs=['multi'],
|
||||
axes=0)
|
||||
|
||||
def test_getitem_label_list_with_missing(self):
|
||||
s = Series(range(3), index=['a', 'b', 'c'])
|
||||
|
||||
# consistency
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s[['a', 'd']]
|
||||
|
||||
s = Series(range(3))
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s[[0, 3]]
|
||||
|
||||
def test_loc_getitem_label_list_fails(self):
|
||||
# fails
|
||||
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
|
||||
typs=['ints', 'uints'], axes=1, fails=KeyError)
|
||||
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
|
||||
typs=['ints', 'uints'], axes=2, fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label_array_like(self):
|
||||
# array like
|
||||
self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index,
|
||||
'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index,
|
||||
'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index,
|
||||
'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2)
|
||||
|
||||
def test_loc_getitem_bool(self):
|
||||
# boolean indexers
|
||||
b = [True, False, True, False]
|
||||
self.check_result('bool', 'loc', b, 'ix', b,
|
||||
typs=['ints', 'uints', 'labels',
|
||||
'mixed', 'ts', 'floats'])
|
||||
self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
|
||||
fails=KeyError)
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
|
||||
# ok
|
||||
self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4],
|
||||
typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6],
|
||||
typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8],
|
||||
typs=['ints', 'uints'], axes=2)
|
||||
|
||||
def test_loc_to_fail(self):
|
||||
|
||||
# GH3449
|
||||
df = DataFrame(np.random.random((3, 3)),
|
||||
index=['a', 'b', 'c'],
|
||||
columns=['e', 'f', 'g'])
|
||||
|
||||
# raise a KeyError?
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
tuple([[1, 2], [1, 2]]))
|
||||
|
||||
# GH 7496
|
||||
# loc should not fallback
|
||||
|
||||
s = Series()
|
||||
s.loc[1] = 1
|
||||
s.loc['a'] = 2
|
||||
|
||||
pytest.raises(KeyError, lambda: s.loc[-1])
|
||||
pytest.raises(KeyError, lambda: s.loc[[-1, -2]])
|
||||
|
||||
pytest.raises(KeyError, lambda: s.loc[['4']])
|
||||
|
||||
s.loc[-1] = 3
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s.loc[[-1, -2]]
|
||||
expected = Series([3, np.nan], index=[-1, -2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s['a'] = 2
|
||||
pytest.raises(KeyError, lambda: s.loc[[-2]])
|
||||
|
||||
del s['a']
|
||||
|
||||
def f():
|
||||
s.loc[[-2]] = 0
|
||||
|
||||
pytest.raises(KeyError, f)
|
||||
|
||||
# inconsistency between .loc[values] and .loc[values,:]
|
||||
# GH 7999
|
||||
df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value'])
|
||||
|
||||
def f():
|
||||
df.loc[[3], :]
|
||||
|
||||
pytest.raises(KeyError, f)
|
||||
|
||||
def f():
|
||||
df.loc[[3]]
|
||||
|
||||
pytest.raises(KeyError, f)
|
||||
|
||||
def test_loc_getitem_list_with_fail(self):
|
||||
# 15747
|
||||
# should KeyError if *any* missing labels
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
s.loc[[2]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[3]]
|
||||
|
||||
# a non-match and a match
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
expected = s.loc[[2, 3]]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_label_slice(self):
|
||||
|
||||
# label slices (with ints)
|
||||
self.check_result('lab slice', 'loc', slice(1, 3),
|
||||
'ix', slice(1, 3),
|
||||
typs=['labels', 'mixed', 'empty', 'ts', 'floats'],
|
||||
fails=TypeError)
|
||||
|
||||
# real label slices
|
||||
self.check_result('lab slice', 'loc', slice('a', 'c'),
|
||||
'ix', slice('a', 'c'), typs=['labels'], axes=0)
|
||||
self.check_result('lab slice', 'loc', slice('A', 'C'),
|
||||
'ix', slice('A', 'C'), typs=['labels'], axes=1)
|
||||
self.check_result('lab slice', 'loc', slice('W', 'Z'),
|
||||
'ix', slice('W', 'Z'), typs=['labels'], axes=2)
|
||||
|
||||
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||||
'ix', slice('20130102', '20130104'),
|
||||
typs=['ts'], axes=0)
|
||||
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||||
'ix', slice('20130102', '20130104'),
|
||||
typs=['ts'], axes=1, fails=TypeError)
|
||||
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||||
'ix', slice('20130102', '20130104'),
|
||||
typs=['ts'], axes=2, fails=TypeError)
|
||||
|
||||
# GH 14316
|
||||
self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'),
|
||||
'indexer', [0, 1, 2], typs=['ts_rev'], axes=0)
|
||||
|
||||
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||||
typs=['mixed'], axes=0, fails=TypeError)
|
||||
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||||
typs=['mixed'], axes=1, fails=KeyError)
|
||||
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||||
typs=['mixed'], axes=2, fails=KeyError)
|
||||
|
||||
self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
|
||||
2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)
|
||||
|
||||
def test_loc_index(self):
|
||||
# gh-17131
|
||||
# a boolean index should index like a boolean numpy array
|
||||
|
||||
df = DataFrame(
|
||||
np.random.random(size=(5, 10)),
|
||||
index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
|
||||
|
||||
mask = df.index.map(lambda x: "alpha" in x)
|
||||
expected = df.loc[np.array(mask)]
|
||||
|
||||
result = df.loc[mask]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[mask.values]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_general(self):
|
||||
|
||||
df = DataFrame(
|
||||
np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'],
|
||||
index=['A', 'B', 'C', 'D'])
|
||||
|
||||
# want this to work
|
||||
result = df.loc[:, "A":"B"].iloc[0:2, :]
|
||||
assert (result.columns == ['A', 'B']).all()
|
||||
assert (result.index == ['A', 'B']).all()
|
||||
|
||||
# mixed type
|
||||
result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0]
|
||||
expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == object
|
||||
|
||||
def test_loc_setitem_consistency(self):
|
||||
# GH 6149
|
||||
# coerce similarly for setitem and loc when rows have a null-slice
|
||||
expected = DataFrame({'date': Series(0, index=range(5),
|
||||
dtype=np.int64),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(
|
||||
range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = 0
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = np.array(0, dtype=np.int64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame({'date': Series('foo', index=range(5)),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = 'foo'
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame({'date': Series(1.0, index=range(5)),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = 1.0
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 15494
|
||||
# setting on frame with single row
|
||||
df = DataFrame({'date': Series([Timestamp('20180101')])})
|
||||
df.loc[:, 'date'] = 'string'
|
||||
expected = DataFrame({'date': Series(['string'])})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_consistency_empty(self):
|
||||
# empty (essentially noops)
|
||||
expected = DataFrame(columns=['x', 'y'])
|
||||
expected['x'] = expected['x'].astype(np.int64)
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df.loc[:, 'x'] = 1
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df['x'] = 1
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_consistency_slice_column_len(self):
|
||||
# .loc[:,column] setting with slice == len of the column
|
||||
# GH10408
|
||||
data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
|
||||
Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
|
||||
Region,Site,RespondentID,,,,,
|
||||
Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
|
||||
Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
|
||||
Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
|
||||
Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""
|
||||
|
||||
df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
|
||||
df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
|
||||
'Respondent', 'StartDate')])
|
||||
df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
|
||||
'Respondent', 'EndDate')])
|
||||
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
|
||||
'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]
|
||||
|
||||
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
|
||||
'Respondent', 'Duration')].astype('timedelta64[s]')
|
||||
expected = Series([1380, 720, 840, 2160.], index=df.index,
|
||||
name=('Respondent', 'Duration'))
|
||||
tm.assert_series_equal(df[('Respondent', 'Duration')], expected)
|
||||
|
||||
def test_loc_setitem_frame(self):
|
||||
df = self.frame_labels
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
|
||||
df.loc['a', 'A'] = 1
|
||||
result = df.loc['a', 'A']
|
||||
assert result == 1
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == 1
|
||||
|
||||
df.loc[:, 'B':'D'] = 0
|
||||
expected = df.loc[:, 'B':'D']
|
||||
result = df.iloc[:, 1:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH 6254
|
||||
# setting issue
|
||||
df = DataFrame(index=[3, 5, 4], columns=['A'])
|
||||
df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64')
|
||||
expected = DataFrame(dict(A=Series(
|
||||
[1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 6252
|
||||
# setting with an empty frame
|
||||
keys1 = ['@' + str(i) for i in range(5)]
|
||||
val1 = np.arange(5, dtype='int64')
|
||||
|
||||
keys2 = ['@' + str(i) for i in range(4)]
|
||||
val2 = np.arange(4, dtype='int64')
|
||||
|
||||
index = list(set(keys1).union(keys2))
|
||||
df = DataFrame(index=index)
|
||||
df['A'] = np.nan
|
||||
df.loc[keys1, 'A'] = val1
|
||||
|
||||
df['B'] = np.nan
|
||||
df.loc[keys2, 'B'] = val2
|
||||
|
||||
expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series(
|
||||
val2, index=keys2))).reindex(index=index)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 8669
|
||||
# invalid coercion of nan -> int
|
||||
df = DataFrame({'A': [1, 2, 3], 'B': np.nan})
|
||||
df.loc[df.B > df.A, 'B'] = df.A
|
||||
expected = DataFrame({'A': [1, 2, 3], 'B': np.nan})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 6546
|
||||
# setting with mixed labels
|
||||
df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']})
|
||||
|
||||
result = df.loc[0, [1, 2]]
|
||||
expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']})
|
||||
df.loc[0, [1, 2]] = [5, 6]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_frame_multiples(self):
|
||||
# multiple setting
|
||||
df = DataFrame({'A': ['foo', 'bar', 'baz'],
|
||||
'B': Series(
|
||||
range(3), dtype=np.int64)})
|
||||
rhs = df.loc[1:2]
|
||||
rhs.index = df.index[0:2]
|
||||
df.loc[0:1] = rhs
|
||||
expected = DataFrame({'A': ['bar', 'baz', 'baz'],
|
||||
'B': Series(
|
||||
[1, 2, 2], dtype=np.int64)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# multiple setting with frame on rhs (with M8)
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(
|
||||
range(5), dtype=np.int64)})
|
||||
expected = DataFrame({'date': [Timestamp('20000101'), Timestamp(
|
||||
'20000102'), Timestamp('20000101'), Timestamp('20000102'),
|
||||
Timestamp('20000103')],
|
||||
'val': Series(
|
||||
[0, 1, 0, 1, 2], dtype=np.int64)})
|
||||
rhs = df.loc[0:2]
|
||||
rhs.index = df.index[2:5]
|
||||
df.loc[2:4] = rhs
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'indexer', [['A'], slice(None, 'A', None), np.array(['A'])])
|
||||
@pytest.mark.parametrize(
|
||||
'value', [['Z'], np.array(['Z'])])
|
||||
def test_loc_setitem_with_scalar_index(self, indexer, value):
|
||||
# GH #19474
|
||||
# assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
|
||||
# elementwisely, not using "setter('A', ['Z'])".
|
||||
|
||||
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
|
||||
df.loc[0, indexer] = value
|
||||
result = df.loc[0, 'A']
|
||||
|
||||
assert is_scalar(result) and result == 'Z'
|
||||
|
||||
def test_loc_coerceion(self):
|
||||
|
||||
# 12411
|
||||
df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'),
|
||||
pd.NaT]})
|
||||
expected = df.dtypes
|
||||
|
||||
result = df.iloc[[0]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
result = df.iloc[[1]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
# 12045
|
||||
import datetime
|
||||
df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
|
||||
datetime.datetime(1012, 1, 2)]})
|
||||
expected = df.dtypes
|
||||
|
||||
result = df.iloc[[0]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
result = df.iloc[[1]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
# 11594
|
||||
df = DataFrame({'text': ['some words'] + [None] * 9})
|
||||
expected = df.dtypes
|
||||
|
||||
result = df.iloc[0:2]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
result = df.iloc[3:]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
def test_loc_non_unique(self):
|
||||
# GH3659
|
||||
# non-unique indexer with loc slice
|
||||
# https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
|
||||
|
||||
# these are going to raise because the we are non monotonic
|
||||
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
|
||||
'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3])
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
tuple([slice(1, None)]))
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
tuple([slice(0, None)]))
|
||||
pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)]))
|
||||
|
||||
# monotonic are ok
|
||||
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
|
||||
'B': [3, 4, 5, 6, 7, 8]},
|
||||
index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0)
|
||||
result = df.loc[1:]
|
||||
expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]},
|
||||
index=[1, 1, 2, 3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[0:]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[1:2]
|
||||
expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]},
|
||||
index=[1, 1, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_non_unique_memory_error(self):
|
||||
|
||||
# GH 4280
|
||||
# non_unique index with a large selection triggers a memory error
|
||||
|
||||
columns = list('ABCDEFG')
|
||||
|
||||
def gen_test(l, l2):
|
||||
return pd.concat([
|
||||
DataFrame(np.random.randn(l, len(columns)),
|
||||
index=lrange(l), columns=columns),
|
||||
DataFrame(np.ones((l2, len(columns))),
|
||||
index=[0] * l2, columns=columns)])
|
||||
|
||||
def gen_expected(df, mask):
|
||||
len_mask = len(mask)
|
||||
return pd.concat([df.take([0]),
|
||||
DataFrame(np.ones((len_mask, len(columns))),
|
||||
index=[0] * len_mask,
|
||||
columns=columns),
|
||||
df.take(mask[1:])])
|
||||
|
||||
df = gen_test(900, 100)
|
||||
assert df.index.is_unique is False
|
||||
|
||||
mask = np.arange(100)
|
||||
result = df.loc[mask]
|
||||
expected = gen_expected(df, mask)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = gen_test(900000, 100000)
|
||||
assert df.index.is_unique is False
|
||||
|
||||
mask = np.arange(100000)
|
||||
result = df.loc[mask]
|
||||
expected = gen_expected(df, mask)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_name(self):
|
||||
# GH 3880
|
||||
df = DataFrame([[1, 1], [1, 1]])
|
||||
df.index.name = 'index_name'
|
||||
result = df.iloc[[0, 1]].index.name
|
||||
assert result == 'index_name'
|
||||
|
||||
with catch_warnings(record=True):
|
||||
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
|
||||
result = df.ix[[0, 1]].index.name
|
||||
assert result == 'index_name'
|
||||
|
||||
result = df.loc[[0, 1]].index.name
|
||||
assert result == 'index_name'
|
||||
|
||||
def test_loc_empty_list_indexer_is_ok(self):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
|
||||
def test_identity_slice_returns_new_object(self):
|
||||
# GH13873
|
||||
original_df = DataFrame({'a': [1, 2, 3]})
|
||||
sliced_df = original_df.loc[:]
|
||||
assert sliced_df is not original_df
|
||||
assert original_df[:] is not original_df
|
||||
|
||||
# should be a shallow copy
|
||||
original_df['a'] = [4, 4, 4]
|
||||
assert (sliced_df['a'] == 4).all()
|
||||
|
||||
# These should not return copies
|
||||
assert original_df is original_df.loc[:, :]
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
assert df[0] is df.loc[:, 0]
|
||||
|
||||
# Same tests for Series
|
||||
original_series = Series([1, 2, 3, 4, 5, 6])
|
||||
sliced_series = original_series.loc[:]
|
||||
assert sliced_series is not original_series
|
||||
assert original_series[:] is not original_series
|
||||
|
||||
original_series[:3] = [7, 8, 9]
|
||||
assert all(sliced_series[:3] == [7, 8, 9])
|
||||
|
||||
def test_loc_uint64(self):
|
||||
# GH20722
|
||||
# Test whether loc accept uint64 max value as index.
|
||||
s = pd.Series([1, 2],
|
||||
index=[np.iinfo('uint64').max - 1,
|
||||
np.iinfo('uint64').max])
|
||||
|
||||
result = s.loc[np.iinfo('uint64').max - 1]
|
||||
expected = s.iloc[0]
|
||||
assert result == expected
|
||||
|
||||
result = s.loc[[np.iinfo('uint64').max - 1]]
|
||||
expected = s.iloc[[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[[np.iinfo('uint64').max - 1,
|
||||
np.iinfo('uint64').max]]
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
def test_loc_setitem_empty_append(self):
|
||||
# GH6173, various appends to an empty dataframe
|
||||
|
||||
data = [1, 2, 3]
|
||||
expected = DataFrame({'x': data, 'y': [None] * len(data)})
|
||||
|
||||
# appends to fit length of data
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df.loc[:, 'x'] = data
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# only appends one value
|
||||
expected = DataFrame({'x': [1.0], 'y': [np.nan]})
|
||||
df = DataFrame(columns=['x', 'y'],
|
||||
dtype=np.float)
|
||||
df.loc[0, 'x'] = expected.loc[0, 'x']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_empty_append_raises(self):
|
||||
# GH6173, various appends to an empty dataframe
|
||||
|
||||
data = [1, 2]
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
msg = (r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
|
||||
r"are in the \[index\]")
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[[0, 1], 'x'] = data
|
||||
|
||||
msg = "cannot copy sequence with size 2 to array axis with dimension 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[0:2, 'x'] = data
|
||||
@@ -0,0 +1,214 @@
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Panel, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
|
||||
class TestPanel(object):
|
||||
|
||||
def test_iloc_getitem_panel(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH 7189
|
||||
p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2),
|
||||
items=['A', 'B', 'C', 'D'],
|
||||
major_axis=['a', 'b', 'c'],
|
||||
minor_axis=['one', 'two'])
|
||||
|
||||
result = p.iloc[1]
|
||||
expected = p.loc['B']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = p.iloc[1, 1]
|
||||
expected = p.loc['B', 'b']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = p.iloc[1, 1, 1]
|
||||
expected = p.loc['B', 'b', 'two']
|
||||
assert result == expected
|
||||
|
||||
# slice
|
||||
result = p.iloc[1:3]
|
||||
expected = p.loc[['B', 'C']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = p.iloc[:, 0:2]
|
||||
expected = p.loc[:, ['a', 'b']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# list of integers
|
||||
result = p.iloc[[0, 2]]
|
||||
expected = p.loc[['A', 'C']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# neg indices
|
||||
result = p.iloc[[-1, 1], [-1, 1]]
|
||||
expected = p.loc[['D', 'B'], ['c', 'b']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# dups indices
|
||||
result = p.iloc[[-1, -1, 1], [-1, 1]]
|
||||
expected = p.loc[['D', 'D', 'B'], ['c', 'b']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# combined
|
||||
result = p.iloc[0, [True, True], [0, 1]]
|
||||
expected = p.loc['A', ['a', 'b'], ['one', 'two']]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# out-of-bounds exception
|
||||
with pytest.raises(IndexError):
|
||||
p.iloc[tuple([10, 5])]
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
p.iloc[0, [True, True], [0, 1, 2]]
|
||||
|
||||
# trying to use a label
|
||||
with pytest.raises(ValueError):
|
||||
p.iloc[tuple(['j', 'D'])]
|
||||
|
||||
# GH
|
||||
p = Panel(
|
||||
np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'],
|
||||
major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y'])
|
||||
expected = p['A']
|
||||
|
||||
result = p.iloc[0, :, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = p.iloc[0, [True, True, True], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = p.iloc[0, [True, True, True], [0, 1]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
p.iloc[0, [True, True, True], [0, 1, 2]]
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
p.iloc[0, [True, True, True], [2]]
|
||||
|
||||
def test_iloc_panel_issue(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# see gh-3617
|
||||
p = Panel(np.random.randn(4, 4, 4))
|
||||
|
||||
assert p.iloc[:3, :3, :3].shape == (3, 3, 3)
|
||||
assert p.iloc[1, :3, :3].shape == (3, 3)
|
||||
assert p.iloc[:3, 1, :3].shape == (3, 3)
|
||||
assert p.iloc[:3, :3, 1].shape == (3, 3)
|
||||
assert p.iloc[1, 1, :3].shape == (3, )
|
||||
assert p.iloc[1, :3, 1].shape == (3, )
|
||||
assert p.iloc[:3, 1, 1].shape == (3, )
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
def test_panel_getitem(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH4016, date selection returns a frame when a partial string
|
||||
# selection
|
||||
ind = date_range(start="2000", freq="D", periods=1000)
|
||||
df = DataFrame(
|
||||
np.random.randn(
|
||||
len(ind), 5), index=ind, columns=list('ABCDE'))
|
||||
panel = Panel({'frame_' + c: df for c in list('ABC')})
|
||||
|
||||
test2 = panel.loc[:, "2002":"2002-12-31"]
|
||||
test1 = panel.loc[:, "2002"]
|
||||
tm.assert_panel_equal(test1, test2)
|
||||
|
||||
# GH8710
|
||||
# multi-element getting with a list
|
||||
panel = tm.makePanel()
|
||||
|
||||
expected = panel.iloc[[0, 1]]
|
||||
|
||||
result = panel.loc[['ItemA', 'ItemB']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = panel.loc[['ItemA', 'ItemB'], :, :]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = panel[['ItemA', 'ItemB']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = panel.loc['ItemA':'ItemB']
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = panel.ix[['ItemA', 'ItemB']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# with an object-like
|
||||
# GH 9140
|
||||
class TestObject(object):
|
||||
|
||||
def __str__(self):
|
||||
return "TestObject"
|
||||
|
||||
obj = TestObject()
|
||||
|
||||
p = Panel(np.random.randn(1, 5, 4), items=[obj],
|
||||
major_axis=date_range('1/1/2000', periods=5),
|
||||
minor_axis=['A', 'B', 'C', 'D'])
|
||||
|
||||
expected = p.iloc[0]
|
||||
result = p[obj]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_panel_setitem(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH 7763
|
||||
# loc and setitem have setting differences
|
||||
np.random.seed(0)
|
||||
index = range(3)
|
||||
columns = list('abc')
|
||||
|
||||
panel = Panel({'A': DataFrame(np.random.randn(3, 3),
|
||||
index=index, columns=columns),
|
||||
'B': DataFrame(np.random.randn(3, 3),
|
||||
index=index, columns=columns),
|
||||
'C': DataFrame(np.random.randn(3, 3),
|
||||
index=index, columns=columns)})
|
||||
|
||||
replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns)
|
||||
expected = Panel({'A': replace, 'B': replace, 'C': replace})
|
||||
|
||||
p = panel.copy()
|
||||
for idx in list('ABC'):
|
||||
p[idx] = replace
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
p = panel.copy()
|
||||
for idx in list('ABC'):
|
||||
p.loc[idx, :, :] = replace
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
def test_panel_assignment(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH3777
|
||||
wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
|
||||
major_axis=date_range('1/1/2000', periods=5),
|
||||
minor_axis=['A', 'B', 'C', 'D'])
|
||||
wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
|
||||
major_axis=date_range('1/1/2000', periods=5),
|
||||
minor_axis=['A', 'B', 'C', 'D'])
|
||||
|
||||
# TODO: unused?
|
||||
# expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[
|
||||
['Item1', 'Item2'], :, ['A', 'B']]
|
||||
|
||||
# to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']]
|
||||
# wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign
|
||||
# result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
|
||||
# tm.assert_panel_equal(result,expected)
|
||||
@@ -0,0 +1,620 @@
|
||||
"""
|
||||
test setting *parts* of objects both positionally and label based
|
||||
|
||||
TOD: these should be split among the indexer tests
|
||||
"""
|
||||
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Panel, Series, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestPartialSetting(object):
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
|
||||
def test_partial_setting(self):
|
||||
|
||||
# GH2578, allow ix and friends to partially set
|
||||
|
||||
# series
|
||||
s_orig = Series([1, 2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5.
|
||||
expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5.
|
||||
expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# iloc/iat raise
|
||||
s = s_orig.copy()
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
s.iloc[3] = 5.
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
s.iat[3] = 5.
|
||||
|
||||
# ## frame ##
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64')
|
||||
|
||||
# iloc/iat raise
|
||||
df = df_orig.copy()
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
df.iloc[4, 2] = 5.
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
df.iat[4, 2] = 5.
|
||||
|
||||
# row setting where it exists
|
||||
expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.iloc[1] = df.iloc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[1] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# like 2578, partial setting with dtype preservation
|
||||
expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[3] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, overwrite
|
||||
expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]}))
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'B'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed dtype frame, overwrite
|
||||
expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])}))
|
||||
df = df_orig.copy()
|
||||
df['B'] = df['B'].astype(np.float64)
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'B'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected['C'] = df['A']
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'C'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected['C'] = df['A']
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'C'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# ## panel ##
|
||||
p_orig = Panel(np.arange(16).reshape(2, 4, 2),
|
||||
items=['Item1', 'Item2'],
|
||||
major_axis=pd.date_range('2001/1/12', periods=4),
|
||||
minor_axis=['A', 'B'], dtype='float64')
|
||||
|
||||
# panel setting via item
|
||||
p_orig = Panel(np.arange(16).reshape(2, 4, 2),
|
||||
items=['Item1', 'Item2'],
|
||||
major_axis=pd.date_range('2001/1/12', periods=4),
|
||||
minor_axis=['A', 'B'], dtype='float64')
|
||||
expected = p_orig.copy()
|
||||
expected['Item3'] = expected['Item1']
|
||||
p = p_orig.copy()
|
||||
p.loc['Item3'] = p['Item1']
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
# panel with aligned series
|
||||
expected = p_orig.copy()
|
||||
expected = expected.transpose(2, 1, 0)
|
||||
expected['C'] = DataFrame({'Item1': [30, 30, 30, 30],
|
||||
'Item2': [32, 32, 32, 32]},
|
||||
index=p_orig.major_axis)
|
||||
expected = expected.transpose(2, 1, 0)
|
||||
p = p_orig.copy()
|
||||
p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items)
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
# GH 8473
|
||||
dates = date_range('1/1/2000', periods=8)
|
||||
df_orig = DataFrame(np.random.randn(8, 4), index=dates,
|
||||
columns=['A', 'B', 'C', 'D'])
|
||||
|
||||
expected = pd.concat([df_orig,
|
||||
DataFrame({'A': 7},
|
||||
index=[dates[-1] + dates.freq])],
|
||||
sort=True)
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, 'A'] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, 'A'] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
exp_other = DataFrame({0: 7}, index=[dates[-1] + dates.freq])
|
||||
expected = pd.concat([df_orig, exp_other], axis=1)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting_mixed_dtype(self):
|
||||
|
||||
# in a mixed dtype environment, try to preserve dtypes
|
||||
# by appending
|
||||
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
|
||||
|
||||
s = df.loc[1].copy()
|
||||
s.name = 2
|
||||
expected = df.append(s)
|
||||
|
||||
df.loc[2] = df.loc[1]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[0] = Series(1, index=range(4))
|
||||
tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0]))
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[0] = Series(1, index=['B'])
|
||||
|
||||
exp = DataFrame([[np.nan, 1]], columns=['A', 'B'],
|
||||
index=[0], dtype='float64')
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# list-like must conform
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[0] = [1, 2, 3]
|
||||
|
||||
# TODO: #15657, these are left as object and not coerced
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[3] = [6, 7]
|
||||
|
||||
exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'],
|
||||
dtype='object')
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_series_partial_set(self):
|
||||
# partial set with new index
|
||||
# Regression from GH4825
|
||||
ser = Series([0.1, 0.2], index=[1, 2])
|
||||
|
||||
# loc equiv to .reindex
|
||||
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([3, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x'])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3, 'x']]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([3, 2, 3, 'x'])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 'x', 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([2, 2, 'x', 1])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing in in the index
|
||||
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([2, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
|
||||
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[3, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([3, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[5, 3, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([5, 3, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[5, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([5, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[4, 5, 6, 7])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[7, 2, 2]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([7, 2, 2])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[1, 2, 3, 4])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[4, 5, 5]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([4, 5, 5])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_with_name(self):
|
||||
# GH 11497
|
||||
|
||||
idx = Index([1, 2], dtype='int64', name='idx')
|
||||
ser = Series([0.1, 0.2], index=idx, name='s')
|
||||
|
||||
# loc
|
||||
exp_idx = Index([3, 2, 3], dtype='int64', name='idx')
|
||||
expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx')
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx,
|
||||
name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3, 'x']]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([2, 2, 1], dtype='int64', name='idx')
|
||||
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s')
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx')
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 'x', 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing in in the index
|
||||
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
|
||||
|
||||
exp_idx = Index([2, 2, 3], dtype='int64', name='idx')
|
||||
expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([3, 4, 4], dtype='int64', name='idx')
|
||||
expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3],
|
||||
index=idx,
|
||||
name='s').loc[[3, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([5, 3, 3], dtype='int64', name='idx')
|
||||
expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[5, 3, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([5, 4, 4], dtype='int64', name='idx')
|
||||
expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[5, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([7, 2, 2], dtype='int64', name='idx')
|
||||
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
|
||||
idx = Index([4, 5, 6, 7], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[7, 2, 2]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([4, 5, 5], dtype='int64', name='idx')
|
||||
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[4, 5, 5]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx')
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s')
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\n.ix")
|
||||
def test_partial_set_invalid(self):
|
||||
|
||||
# GH 4940
|
||||
# allow only setting of 'valid' values
|
||||
|
||||
orig = tm.makeTimeDataFrame()
|
||||
df = orig.copy()
|
||||
|
||||
# don't allow not string inserts
|
||||
with pytest.raises(TypeError):
|
||||
with catch_warnings(record=True):
|
||||
df.loc[100.0, :] = df.ix[0]
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
with catch_warnings(record=True):
|
||||
df.loc[100, :] = df.ix[0]
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[100.0, :] = df.ix[0]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with catch_warnings(record=True):
|
||||
df.ix[100, :] = df.ix[0]
|
||||
|
||||
# allow object conversion here
|
||||
df = orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.loc['a', :] = df.ix[0]
|
||||
exp = orig.append(Series(df.ix[0], name='a'))
|
||||
tm.assert_frame_equal(df, exp)
|
||||
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ['a']))
|
||||
assert df.index.dtype == 'object'
|
||||
|
||||
def test_partial_set_empty_series(self):
|
||||
|
||||
# GH5226
|
||||
|
||||
# partially set with an empty object series
|
||||
s = Series()
|
||||
s.loc[1] = 1
|
||||
tm.assert_series_equal(s, Series([1], index=[1]))
|
||||
s.loc[3] = 3
|
||||
tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))
|
||||
|
||||
s = Series()
|
||||
s.loc[1] = 1.
|
||||
tm.assert_series_equal(s, Series([1.], index=[1]))
|
||||
s.loc[3] = 3.
|
||||
tm.assert_series_equal(s, Series([1., 3.], index=[1, 3]))
|
||||
|
||||
s = Series()
|
||||
s.loc['foo'] = 1
|
||||
tm.assert_series_equal(s, Series([1], index=['foo']))
|
||||
s.loc['bar'] = 3
|
||||
tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar']))
|
||||
s.loc[3] = 4
|
||||
tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3]))
|
||||
|
||||
def test_partial_set_empty_frame(self):
|
||||
|
||||
# partially set with an empty object
|
||||
# frame
|
||||
df = DataFrame()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[1] = 1
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[1] = Series([1], index=['foo'])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.loc[:, 1] = 1
|
||||
|
||||
# these work as they don't really change
|
||||
# anything but the index
|
||||
# GH5632
|
||||
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = Series([], dtype='object')
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = Series(df.index)
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = df.index
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
|
||||
expected['foo'] = expected['foo'].astype('float64')
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = []
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = Series(np.arange(len(df)), dtype='float64')
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.index, Index([], dtype='object'))
|
||||
df['foo'] = range(len(df))
|
||||
return df
|
||||
|
||||
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
|
||||
expected['foo'] = expected['foo'].astype('float64')
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.columns, Index([], dtype=object))
|
||||
df2 = DataFrame()
|
||||
df2[1] = Series([1], index=['foo'])
|
||||
df.loc[:, 1] = Series([1], index=['foo'])
|
||||
tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1]))
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
# no index to start
|
||||
expected = DataFrame({0: Series(1, index=range(4))},
|
||||
columns=['A', 'B', 0])
|
||||
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df[0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[:, 0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_row(self):
|
||||
# GH5720, GH5744
|
||||
# don't create rows when empty
|
||||
expected = DataFrame(columns=['A', 'B', 'New'],
|
||||
index=Index([], dtype='int64'))
|
||||
expected['A'] = expected['A'].astype('int64')
|
||||
expected['B'] = expected['B'].astype('float64')
|
||||
expected['New'] = expected['New'].astype('float64')
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
y['New'] = np.nan
|
||||
tm.assert_frame_equal(y, expected)
|
||||
# tm.assert_frame_equal(y,expected)
|
||||
|
||||
expected = DataFrame(columns=['a', 'b', 'c c', 'd'])
|
||||
expected['d'] = expected['d'].astype('int64')
|
||||
df = DataFrame(columns=['a', 'b', 'c c'])
|
||||
df['d'] = 3
|
||||
tm.assert_frame_equal(df, expected)
|
||||
tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object))
|
||||
|
||||
# reindex columns is ok
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
result = y.reindex(columns=['A', 'B', 'C'])
|
||||
expected = DataFrame(columns=['A', 'B', 'C'],
|
||||
index=Index([], dtype='int64'))
|
||||
expected['A'] = expected['A'].astype('int64')
|
||||
expected['B'] = expected['B'].astype('float64')
|
||||
expected['C'] = expected['C'].astype('float64')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_set_empty_frame_set_series(self):
|
||||
# GH 5756
|
||||
# setting with empty Series
|
||||
df = DataFrame(Series())
|
||||
tm.assert_frame_equal(df, DataFrame({0: Series()}))
|
||||
|
||||
df = DataFrame(Series(name='foo'))
|
||||
tm.assert_frame_equal(df, DataFrame({'foo': Series()}))
|
||||
|
||||
def test_partial_set_empty_frame_empty_copy_assignment(self):
|
||||
# GH 5932
|
||||
# copy on empty with assignment fails
|
||||
df = DataFrame(index=[0])
|
||||
df = df.copy()
|
||||
df['a'] = 0
|
||||
expected = DataFrame(0, index=[0], columns=['a'])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_consistencies(self):
|
||||
# GH 6171
|
||||
# consistency on empty frames
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df['x'] = [1, 2]
|
||||
expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan]))
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df['x'] = ['1', '2']
|
||||
expected = DataFrame(
|
||||
dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df.loc[0, 'x'] = 1
|
||||
expected = DataFrame(dict(x=[1], y=[np.nan]))
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
@@ -0,0 +1,207 @@
|
||||
""" test scalar indexing, including at and iat """
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Series, Timedelta, Timestamp, date_range
|
||||
from pandas.tests.indexing.common import Base
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestScalar(Base):
|
||||
|
||||
def test_at_and_iat_get(self):
|
||||
def _check(f, func, values=False):
|
||||
|
||||
if f is not None:
|
||||
indicies = self.generate_indices(f, values)
|
||||
for i in indicies:
|
||||
result = getattr(f, func)[i]
|
||||
expected = self.get_value(f, i, values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
for o in self._objs:
|
||||
|
||||
d = getattr(self, o)
|
||||
|
||||
# iat
|
||||
for f in [d['ints'], d['uints']]:
|
||||
_check(f, 'iat', values=True)
|
||||
|
||||
for f in [d['labels'], d['ts'], d['floats']]:
|
||||
if f is not None:
|
||||
pytest.raises(ValueError, self.check_values, f, 'iat')
|
||||
|
||||
# at
|
||||
for f in [d['ints'], d['uints'], d['labels'],
|
||||
d['ts'], d['floats']]:
|
||||
_check(f, 'at')
|
||||
|
||||
def test_at_and_iat_set(self):
|
||||
def _check(f, func, values=False):
|
||||
|
||||
if f is not None:
|
||||
indicies = self.generate_indices(f, values)
|
||||
for i in indicies:
|
||||
getattr(f, func)[i] = 1
|
||||
expected = self.get_value(f, i, values)
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
for t in self._objs:
|
||||
|
||||
d = getattr(self, t)
|
||||
|
||||
# iat
|
||||
for f in [d['ints'], d['uints']]:
|
||||
_check(f, 'iat', values=True)
|
||||
|
||||
for f in [d['labels'], d['ts'], d['floats']]:
|
||||
if f is not None:
|
||||
pytest.raises(ValueError, _check, f, 'iat')
|
||||
|
||||
# at
|
||||
for f in [d['ints'], d['uints'], d['labels'],
|
||||
d['ts'], d['floats']]:
|
||||
_check(f, 'at')
|
||||
|
||||
def test_at_iat_coercion(self):
|
||||
|
||||
# as timestamp is not a tuple!
|
||||
dates = date_range('1/1/2000', periods=8)
|
||||
df = DataFrame(np.random.randn(8, 4),
|
||||
index=dates,
|
||||
columns=['A', 'B', 'C', 'D'])
|
||||
s = df['A']
|
||||
|
||||
result = s.at[dates[5]]
|
||||
xp = s.values[5]
|
||||
assert result == xp
|
||||
|
||||
# GH 7729
|
||||
# make sure we are boxing the returns
|
||||
s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]')
|
||||
expected = Timestamp('2014-02-02')
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
s = Series(['1 days', '2 days'], dtype='timedelta64[ns]')
|
||||
expected = Timedelta('2 days')
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
def test_iat_invalid_args(self):
|
||||
pass
|
||||
|
||||
def test_imethods_with_dups(self):
|
||||
|
||||
# GH6493
|
||||
# iat/iloc with dups
|
||||
|
||||
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64')
|
||||
result = s.iloc[2]
|
||||
assert result == 2
|
||||
result = s.iat[2]
|
||||
assert result == 2
|
||||
|
||||
pytest.raises(IndexError, lambda: s.iat[10])
|
||||
pytest.raises(IndexError, lambda: s.iat[-10])
|
||||
|
||||
result = s.iloc[[2, 3]]
|
||||
expected = Series([2, 3], [2, 2], dtype='int64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = s.to_frame()
|
||||
result = df.iloc[2]
|
||||
expected = Series(2, index=[0], name=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iat[2, 0]
|
||||
assert result == 2
|
||||
|
||||
def test_at_to_fail(self):
|
||||
# at should not fallback
|
||||
# GH 7814
|
||||
s = Series([1, 2, 3], index=list('abc'))
|
||||
result = s.at['a']
|
||||
assert result == 1
|
||||
pytest.raises(ValueError, lambda: s.at[0])
|
||||
|
||||
df = DataFrame({'A': [1, 2, 3]}, index=list('abc'))
|
||||
result = df.at['a', 'A']
|
||||
assert result == 1
|
||||
pytest.raises(ValueError, lambda: df.at['a', 0])
|
||||
|
||||
s = Series([1, 2, 3], index=[3, 2, 1])
|
||||
result = s.at[1]
|
||||
assert result == 3
|
||||
pytest.raises(ValueError, lambda: s.at['a'])
|
||||
|
||||
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
|
||||
result = df.at[1, 0]
|
||||
assert result == 3
|
||||
pytest.raises(ValueError, lambda: df.at['a', 0])
|
||||
|
||||
# GH 13822, incorrect error string with non-unique columns when missing
|
||||
# column is accessed
|
||||
df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]})
|
||||
df.columns = ['x', 'x', 'z']
|
||||
|
||||
# Check that we get the correct value in the KeyError
|
||||
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
|
||||
df[['x', 'y', 'z']]
|
||||
|
||||
def test_at_with_tz(self):
|
||||
# gh-15822
|
||||
df = DataFrame({'name': ['John', 'Anderson'],
|
||||
'date': [Timestamp(2017, 3, 13, 13, 32, 56),
|
||||
Timestamp(2017, 2, 16, 12, 10, 3)]})
|
||||
df['date'] = df['date'].dt.tz_localize('Asia/Shanghai')
|
||||
|
||||
expected = Timestamp('2017-03-13 13:32:56+0800', tz='Asia/Shanghai')
|
||||
|
||||
result = df.loc[0, 'date']
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 'date']
|
||||
assert result == expected
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_series(self):
|
||||
# GH 19860
|
||||
s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 1, 2])
|
||||
for el, item in s.iteritems():
|
||||
assert s.at[el] == s.loc[el] == item
|
||||
for i in range(len(s)):
|
||||
assert s.iat[i] == s.iloc[i] == i + 1
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.at[4]
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[4]
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
|
||||
# GH 19860
|
||||
df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]],
|
||||
columns=['a', 'b', 'c', 1, 2])
|
||||
for rowIdx, row in df.iterrows():
|
||||
for el, item in row.iteritems():
|
||||
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
|
||||
|
||||
for row in range(2):
|
||||
for i in range(5):
|
||||
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
df.at[0, 3]
|
||||
with pytest.raises(KeyError):
|
||||
df.loc[0, 3]
|
||||
|
||||
def test_iat_setter_incompatible_assignment(self):
|
||||
# GH 23236
|
||||
result = DataFrame({'a': [0, 1], 'b': [4, 5]})
|
||||
result.iat[0, 0] = None
|
||||
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,97 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestTimedeltaIndexing(object):
|
||||
def test_boolean_indexing(self):
|
||||
# GH 14946
|
||||
df = pd.DataFrame({'x': range(10)})
|
||||
df.index = pd.to_timedelta(range(10), unit='s')
|
||||
conditions = [df['x'] > 3, df['x'] == 3, df['x'] < 3]
|
||||
expected_data = [[0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
|
||||
[0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
|
||||
[10, 10, 10, 3, 4, 5, 6, 7, 8, 9]]
|
||||
for cond, data in zip(conditions, expected_data):
|
||||
result = df.assign(x=df.mask(cond, 10).astype('int64'))
|
||||
expected = pd.DataFrame(data,
|
||||
index=pd.to_timedelta(range(10), unit='s'),
|
||||
columns=['x'],
|
||||
dtype='int64')
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
(slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
|
||||
([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9])])
|
||||
def test_list_like_indexing(self, indexer, expected):
|
||||
# GH 16637
|
||||
df = pd.DataFrame({'x': range(10)}, dtype="int64")
|
||||
df.index = pd.to_timedelta(range(10), unit='s')
|
||||
|
||||
df.loc[df.index[indexer], 'x'] = 20
|
||||
|
||||
expected = pd.DataFrame(expected,
|
||||
index=pd.to_timedelta(range(10), unit='s'),
|
||||
columns=['x'],
|
||||
dtype="int64")
|
||||
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
def test_string_indexing(self):
|
||||
# GH 16896
|
||||
df = pd.DataFrame({'x': range(3)},
|
||||
index=pd.to_timedelta(range(3), unit='days'))
|
||||
expected = df.iloc[0]
|
||||
sliced = df.loc['0 days']
|
||||
tm.assert_series_equal(sliced, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value",
|
||||
[None, pd.NaT, np.nan])
|
||||
def test_masked_setitem(self, value):
|
||||
# issue (#18586)
|
||||
series = pd.Series([0, 1, 2], dtype='timedelta64[ns]')
|
||||
series[series == series[0]] = value
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]')
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value",
|
||||
[None, pd.NaT, np.nan])
|
||||
def test_listlike_setitem(self, value):
|
||||
# issue (#18586)
|
||||
series = pd.Series([0, 1, 2], dtype='timedelta64[ns]')
|
||||
series.iloc[0] = value
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]')
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
@pytest.mark.parametrize('start,stop, expected_slice', [
|
||||
[np.timedelta64(0, 'ns'), None, slice(0, 11)],
|
||||
[np.timedelta64(1, 'D'), np.timedelta64(6, 'D'), slice(1, 7)],
|
||||
[None, np.timedelta64(4, 'D'), slice(0, 5)]])
|
||||
def test_numpy_timedelta_scalar_indexing(self, start, stop,
|
||||
expected_slice):
|
||||
# GH 20393
|
||||
s = pd.Series(range(11), pd.timedelta_range('0 days', '10 days'))
|
||||
result = s.loc[slice(start, stop)]
|
||||
expected = s.iloc[expected_slice]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_roundtrip_thru_setitem(self):
|
||||
# PR 23462
|
||||
dt1 = pd.Timedelta(0)
|
||||
dt2 = pd.Timedelta(28767471428571405)
|
||||
df = pd.DataFrame({'dt': pd.Series([dt1, dt2])})
|
||||
df_copy = df.copy()
|
||||
s = pd.Series([dt1])
|
||||
|
||||
expected = df['dt'].iloc[1].value
|
||||
df.loc[[True, False]] = s
|
||||
result = df['dt'].iloc[1].value
|
||||
|
||||
assert expected == result
|
||||
tm.assert_frame_equal(df, df_copy)
|
||||
Reference in New Issue
Block a user