Static code analysis and corrections

This commit is contained in:
Kristjan Komlosi
2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,307 @@
""" common utilities """
import itertools
from warnings import catch_warnings, filterwarnings
import numpy as np
import pytest
from pandas.compat import lrange
from pandas.core.dtypes.common import is_scalar
from pandas import (
DataFrame, Float64Index, MultiIndex, Panel, Series, UInt64Index,
date_range)
from pandas.util import testing as tm
from pandas.io.formats.printing import pprint_thing
_verbose = False
def _mklbl(prefix, n):
return ["%s%s" % (prefix, i) for i in range(n)]
def _axify(obj, key, axis):
# create a tuple accessor
axes = [slice(None)] * obj.ndim
axes[axis] = key
return tuple(axes)
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class Base(object):
""" indexing comprehensive base class """
_objs = {'series', 'frame', 'panel'}
_typs = {'ints', 'uints', 'labels', 'mixed', 'ts', 'floats', 'empty',
'ts_rev', 'multi'}
def setup_method(self, method):
self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2))
self.frame_ints = DataFrame(np.random.randn(4, 4),
index=lrange(0, 8, 2),
columns=lrange(0, 12, 3))
with catch_warnings(record=True):
self.panel_ints = Panel(np.random.rand(4, 4, 4),
items=lrange(0, 8, 2),
major_axis=lrange(0, 12, 3),
minor_axis=lrange(0, 16, 4))
self.series_uints = Series(np.random.rand(4),
index=UInt64Index(lrange(0, 8, 2)))
self.frame_uints = DataFrame(np.random.randn(4, 4),
index=UInt64Index(lrange(0, 8, 2)),
columns=UInt64Index(lrange(0, 12, 3)))
self.panel_uints = Panel(np.random.rand(4, 4, 4),
items=UInt64Index(lrange(0, 8, 2)),
major_axis=UInt64Index(lrange(0, 12, 3)),
minor_axis=UInt64Index(lrange(0, 16, 4)))
self.series_floats = Series(np.random.rand(4),
index=Float64Index(range(0, 8, 2)))
self.frame_floats = DataFrame(np.random.randn(4, 4),
index=Float64Index(range(0, 8, 2)),
columns=Float64Index(range(0, 12, 3)))
self.panel_floats = Panel(np.random.rand(4, 4, 4),
items=Float64Index(range(0, 8, 2)),
major_axis=Float64Index(range(0, 12, 3)),
minor_axis=Float64Index(range(0, 16, 4)))
m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]),
MultiIndex.from_product([[5, 6], [7, 8]]),
MultiIndex.from_product([[9, 10], [11, 12]])]
self.series_multi = Series(np.random.rand(4),
index=m_idces[0])
self.frame_multi = DataFrame(np.random.randn(4, 4),
index=m_idces[0],
columns=m_idces[1])
self.panel_multi = Panel(np.random.rand(4, 4, 4),
items=m_idces[0],
major_axis=m_idces[1],
minor_axis=m_idces[2])
self.series_labels = Series(np.random.randn(4), index=list('abcd'))
self.frame_labels = DataFrame(np.random.randn(4, 4),
index=list('abcd'), columns=list('ABCD'))
self.panel_labels = Panel(np.random.randn(4, 4, 4),
items=list('abcd'),
major_axis=list('ABCD'),
minor_axis=list('ZYXW'))
self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8])
self.frame_mixed = DataFrame(np.random.randn(4, 4),
index=[2, 4, 'null', 8])
self.panel_mixed = Panel(np.random.randn(4, 4, 4),
items=[2, 4, 'null', 8])
self.series_ts = Series(np.random.randn(4),
index=date_range('20130101', periods=4))
self.frame_ts = DataFrame(np.random.randn(4, 4),
index=date_range('20130101', periods=4))
self.panel_ts = Panel(np.random.randn(4, 4, 4),
items=date_range('20130101', periods=4))
dates_rev = (date_range('20130101', periods=4)
.sort_values(ascending=False))
self.series_ts_rev = Series(np.random.randn(4),
index=dates_rev)
self.frame_ts_rev = DataFrame(np.random.randn(4, 4),
index=dates_rev)
self.panel_ts_rev = Panel(np.random.randn(4, 4, 4),
items=dates_rev)
self.frame_empty = DataFrame({})
self.series_empty = Series({})
self.panel_empty = Panel({})
# form agglomerates
for o in self._objs:
d = dict()
for t in self._typs:
d[t] = getattr(self, '%s_%s' % (o, t), None)
setattr(self, o, d)
def generate_indices(self, f, values=False):
""" generate the indices
if values is True , use the axis values
is False, use the range
"""
axes = f.axes
if values:
axes = [lrange(len(a)) for a in axes]
return itertools.product(*axes)
def get_result(self, obj, method, key, axis):
""" return the result for this obj with this key and this axis """
if isinstance(key, dict):
key = key[axis]
# use an artificial conversion to map the key as integers to the labels
# so ix can work for comparisons
if method == 'indexer':
method = 'ix'
key = obj._get_axis(axis)[key]
# in case we actually want 0 index slicing
with catch_warnings(record=True):
try:
xp = getattr(obj, method).__getitem__(_axify(obj, key, axis))
except AttributeError:
xp = getattr(obj, method).__getitem__(key)
return xp
def get_value(self, f, i, values=False):
""" return the value for the location i """
# check against values
if values:
return f.values[i]
# this is equiv of f[col][row].....
# v = f
# for a in reversed(i):
# v = v.__getitem__(a)
# return v
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
return f.ix[i]
def check_values(self, f, func, values=False):
if f is None:
return
axes = f.axes
indicies = itertools.product(*axes)
for i in indicies:
result = getattr(f, func)[i]
# check against values
if values:
expected = f.values[i]
else:
expected = f
for a in reversed(i):
expected = expected.__getitem__(a)
tm.assert_almost_equal(result, expected)
def check_result(self, name, method1, key1, method2, key2, typs=None,
objs=None, axes=None, fails=None):
def _eq(t, o, a, obj, k1, k2):
""" compare equal for these 2 keys """
if a is not None and a > obj.ndim - 1:
return
def _print(result, error=None):
if error is not None:
error = str(error)
v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s,"
"key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" %
(name, result, t, o, method1, method2, a, error or ''))
if _verbose:
pprint_thing(v)
try:
rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a))
try:
xp = self.get_result(obj, method2, k2, a)
except Exception:
result = 'no comp'
_print(result)
return
detail = None
try:
if is_scalar(rs) and is_scalar(xp):
assert rs == xp
elif xp.ndim == 1:
tm.assert_series_equal(rs, xp)
elif xp.ndim == 2:
tm.assert_frame_equal(rs, xp)
elif xp.ndim == 3:
tm.assert_panel_equal(rs, xp)
result = 'ok'
except AssertionError as e:
detail = str(e)
result = 'fail'
# reverse the checks
if fails is True:
if result == 'fail':
result = 'ok (fail)'
_print(result)
if not result.startswith('ok'):
raise AssertionError(detail)
except AssertionError:
raise
except Exception as detail:
# if we are in fails, the ok, otherwise raise it
if fails is not None:
if isinstance(detail, fails):
result = 'ok (%s)' % type(detail).__name__
_print(result)
return
result = type(detail).__name__
raise AssertionError(_print(result, error=detail))
if typs is None:
typs = self._typs
if objs is None:
objs = self._objs
if axes is not None:
if not isinstance(axes, (tuple, list)):
axes = [axes]
else:
axes = list(axes)
else:
axes = [0, 1, 2]
# check
for o in objs:
if o not in self._objs:
continue
d = getattr(self, o)
for a in axes:
for t in typs:
if t not in self._typs:
continue
obj = d[t]
if obj is None:
continue
def _call(obj=obj):
obj = obj.copy()
k2 = key2
_eq(t, o, a, obj, key1, k2)
# Panel deprecations
if isinstance(obj, Panel):
with catch_warnings():
filterwarnings("ignore", "\nPanel*", FutureWarning)
_call()
else:
_call()
@@ -0,0 +1,20 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
@pytest.fixture(params=[
(libindex.Int64Engine, np.int64),
(libindex.Int32Engine, np.int32),
(libindex.Int16Engine, np.int16),
(libindex.Int8Engine, np.int8),
(libindex.UInt64Engine, np.uint64),
(libindex.UInt32Engine, np.uint32),
(libindex.UInt16Engine, np.uint16),
(libindex.UInt8Engine, np.uint8),
(libindex.Float64Engine, np.float64),
(libindex.Float32Engine, np.float32),
], ids=lambda x: x[0].__name__)
def numeric_indexing_engine_type_and_dtype(request):
return request.param
@@ -0,0 +1,267 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Interval, IntervalIndex, Series
import pandas.util.testing as tm
class TestIntervalIndex(object):
def setup_method(self, method):
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def test_loc_with_scalar(self):
s = self.s
expected = s.iloc[:3]
tm.assert_series_equal(expected, s.loc[:3])
tm.assert_series_equal(expected, s.loc[:2.5])
tm.assert_series_equal(expected, s.loc[0.1:2.5])
tm.assert_series_equal(expected, s.loc[-1:3])
expected = s.iloc[1:4]
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
expected = s.iloc[2:5]
tm.assert_series_equal(expected, s.loc[s >= 2])
# TODO: check this behavior is consistent with test_interval_new.py
def test_getitem_with_scalar(self):
s = self.s
expected = s.iloc[:3]
tm.assert_series_equal(expected, s[:3])
tm.assert_series_equal(expected, s[:2.5])
tm.assert_series_equal(expected, s[0.1:2.5])
tm.assert_series_equal(expected, s[-1:3])
expected = s.iloc[1:4]
tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]])
tm.assert_series_equal(expected, s[[2, 3, 4]])
tm.assert_series_equal(expected, s[[1.5, 3, 4]])
expected = s.iloc[2:5]
tm.assert_series_equal(expected, s[s >= 2])
# TODO: check this behavior is consistent with test_interval_new.py
@pytest.mark.parametrize('direction', ['increasing', 'decreasing'])
def test_nonoverlapping_monotonic(self, direction, closed):
tpls = [(0, 1), (2, 3), (4, 5)]
if direction == 'decreasing':
tpls = tpls[::-1]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
s = Series(list('abc'), idx)
for key, expected in zip(idx.left, s):
if idx.closed_left:
assert s[key] == expected
assert s.loc[key] == expected
else:
with pytest.raises(KeyError):
s[key]
with pytest.raises(KeyError):
s.loc[key]
for key, expected in zip(idx.right, s):
if idx.closed_right:
assert s[key] == expected
assert s.loc[key] == expected
else:
with pytest.raises(KeyError):
s[key]
with pytest.raises(KeyError):
s.loc[key]
for key, expected in zip(idx.mid, s):
assert s[key] == expected
assert s.loc[key] == expected
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def test_with_interval(self):
s = self.s
expected = 0
result = s.loc[Interval(0, 1)]
assert result == expected
result = s[Interval(0, 1)]
assert result == expected
expected = s.iloc[3:5]
result = s.loc[Interval(3, 6)]
tm.assert_series_equal(expected, result)
expected = s.iloc[3:5]
result = s.loc[[Interval(3, 6)]]
tm.assert_series_equal(expected, result)
expected = s.iloc[3:5]
result = s.loc[[Interval(3, 5)]]
tm.assert_series_equal(expected, result)
# missing
with pytest.raises(KeyError):
s.loc[Interval(-2, 0)]
with pytest.raises(KeyError):
s[Interval(-2, 0)]
with pytest.raises(KeyError):
s.loc[Interval(5, 6)]
with pytest.raises(KeyError):
s[Interval(5, 6)]
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def test_with_slices(self):
s = self.s
# slice of interval
with pytest.raises(NotImplementedError):
s.loc[Interval(3, 6):]
with pytest.raises(NotImplementedError):
s[Interval(3, 6):]
expected = s.iloc[3:5]
result = s[[Interval(3, 6)]]
tm.assert_series_equal(expected, result)
# slice of scalar with step != 1
with pytest.raises(ValueError):
s[0:4:2]
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def test_with_overlaps(self):
s = self.s
expected = s.iloc[[3, 4, 3, 4]]
result = s.loc[[Interval(3, 6), Interval(3, 6)]]
tm.assert_series_equal(expected, result)
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
s = Series(range(len(idx)), index=idx)
result = s[4]
expected = s
tm.assert_series_equal(expected, result)
result = s[[4]]
expected = s
tm.assert_series_equal(expected, result)
result = s.loc[[4]]
expected = s
tm.assert_series_equal(expected, result)
result = s[Interval(3, 5)]
expected = s
tm.assert_series_equal(expected, result)
result = s.loc[Interval(3, 5)]
expected = s
tm.assert_series_equal(expected, result)
# doesn't intersect unique set of intervals
with pytest.raises(KeyError):
s[[Interval(3, 5)]]
with pytest.raises(KeyError):
s.loc[[Interval(3, 5)]]
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def test_non_unique(self):
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
s = Series(range(len(idx)), index=idx)
result = s.loc[Interval(1, 3)]
assert result == 0
result = s.loc[[Interval(1, 3)]]
expected = s.iloc[0:1]
tm.assert_series_equal(expected, result)
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def test_non_unique_moar(self):
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
s = Series(range(len(idx)), index=idx)
result = s.loc[Interval(1, 3)]
expected = s.iloc[[0, 1]]
tm.assert_series_equal(expected, result)
# non-unique index and slices not allowed
with pytest.raises(ValueError):
s.loc[Interval(1, 3):]
with pytest.raises(ValueError):
s[Interval(1, 3):]
# non-unique
with pytest.raises(ValueError):
s[[Interval(1, 3)]]
# TODO: check this behavior is consistent with test_interval_new.py
def test_non_matching(self):
s = self.s
# this is a departure from our current
# indexin scheme, but simpler
with pytest.raises(KeyError):
s.loc[[-1, 3, 4, 5]]
with pytest.raises(KeyError):
s.loc[[-1, 3]]
def test_large_series(self):
s = Series(np.arange(1000000),
index=IntervalIndex.from_breaks(np.arange(1000001)))
result1 = s.loc[:80000]
result2 = s.loc[0:80000]
result3 = s.loc[0:80000:1]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
def test_loc_getitem_frame(self):
df = DataFrame({'A': range(10)})
s = pd.cut(df.A, 5)
df['B'] = s
df = df.set_index('B')
result = df.loc[4]
expected = df.iloc[4:6]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError):
df.loc[10]
# single list-like
result = df.loc[[4]]
expected = df.iloc[4:6]
tm.assert_frame_equal(result, expected)
# non-unique
result = df.loc[[4, 5]]
expected = df.take([4, 5, 4, 5])
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError):
df.loc[[10]]
# partial missing
with pytest.raises(KeyError):
df.loc[[10, 4]]
@@ -0,0 +1,246 @@
import numpy as np
import pytest
from pandas import Interval, IntervalIndex, Series
import pandas.util.testing as tm
pytestmark = pytest.mark.skip(reason="new indexing tests for issue 16316")
class TestIntervalIndex(object):
def setup_method(self, method):
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
def test_loc_with_interval(self):
# loc with single label / list of labels:
# - Intervals: only exact matches
# - scalars: those that contain it
s = self.s
expected = 0
result = s.loc[Interval(0, 1)]
assert result == expected
result = s[Interval(0, 1)]
assert result == expected
expected = s.iloc[3:5]
result = s.loc[[Interval(3, 4), Interval(4, 5)]]
tm.assert_series_equal(expected, result)
result = s[[Interval(3, 4), Interval(4, 5)]]
tm.assert_series_equal(expected, result)
# missing or not exact
with pytest.raises(KeyError):
s.loc[Interval(3, 5, closed='left')]
with pytest.raises(KeyError):
s[Interval(3, 5, closed='left')]
with pytest.raises(KeyError):
s[Interval(3, 5)]
with pytest.raises(KeyError):
s.loc[Interval(3, 5)]
with pytest.raises(KeyError):
s[Interval(3, 5)]
with pytest.raises(KeyError):
s.loc[Interval(-2, 0)]
with pytest.raises(KeyError):
s[Interval(-2, 0)]
with pytest.raises(KeyError):
s.loc[Interval(5, 6)]
with pytest.raises(KeyError):
s[Interval(5, 6)]
def test_loc_with_scalar(self):
# loc with single label / list of labels:
# - Intervals: only exact matches
# - scalars: those that contain it
s = self.s
assert s.loc[1] == 0
assert s.loc[1.5] == 1
assert s.loc[2] == 1
# TODO with __getitem__ same rules as loc, or positional ?
# assert s[1] == 0
# assert s[1.5] == 1
# assert s[2] == 1
expected = s.iloc[1:4]
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
expected = s.iloc[[1, 1, 2, 1]]
tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]])
expected = s.iloc[2:5]
tm.assert_series_equal(expected, s.loc[s >= 2])
def test_loc_with_slices(self):
# loc with slices:
# - Interval objects: only works with exact matches
# - scalars: only works for non-overlapping, monotonic intervals,
# and start/stop select location based on the interval that
# contains them:
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
s = self.s
# slice of interval
expected = s.iloc[:3]
result = s.loc[Interval(0, 1):Interval(2, 3)]
tm.assert_series_equal(expected, result)
result = s[Interval(0, 1):Interval(2, 3)]
tm.assert_series_equal(expected, result)
expected = s.iloc[4:]
result = s.loc[Interval(3, 4):]
tm.assert_series_equal(expected, result)
result = s[Interval(3, 4):]
tm.assert_series_equal(expected, result)
with pytest.raises(KeyError):
s.loc[Interval(3, 6):]
with pytest.raises(KeyError):
s[Interval(3, 6):]
with pytest.raises(KeyError):
s.loc[Interval(3, 4, closed='left'):]
with pytest.raises(KeyError):
s[Interval(3, 4, closed='left'):]
# TODO with non-existing intervals ?
# s.loc[Interval(-1, 0):Interval(2, 3)]
# slice of scalar
expected = s.iloc[:3]
tm.assert_series_equal(expected, s.loc[:3])
tm.assert_series_equal(expected, s.loc[:2.5])
tm.assert_series_equal(expected, s.loc[0.1:2.5])
# TODO should this work? (-1 is not contained in any of the Intervals)
# tm.assert_series_equal(expected, s.loc[-1:3])
# TODO with __getitem__ same rules as loc, or positional ?
# tm.assert_series_equal(expected, s[:3])
# tm.assert_series_equal(expected, s[:2.5])
# tm.assert_series_equal(expected, s[0.1:2.5])
# slice of scalar with step != 1
with pytest.raises(NotImplementedError):
s[0:4:2]
def test_loc_with_overlap(self):
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
s = Series(range(len(idx)), index=idx)
# scalar
expected = s
result = s.loc[4]
tm.assert_series_equal(expected, result)
result = s[4]
tm.assert_series_equal(expected, result)
result = s.loc[[4]]
tm.assert_series_equal(expected, result)
result = s[[4]]
tm.assert_series_equal(expected, result)
# interval
expected = 0
result = s.loc[Interval(1, 5)]
tm.assert_series_equal(expected, result)
result = s[Interval(1, 5)]
tm.assert_series_equal(expected, result)
expected = s
result = s.loc[[Interval(1, 5), Interval(3, 7)]]
tm.assert_series_equal(expected, result)
result = s[[Interval(1, 5), Interval(3, 7)]]
tm.assert_series_equal(expected, result)
with pytest.raises(KeyError):
s.loc[Interval(3, 5)]
with pytest.raises(KeyError):
s.loc[[Interval(3, 5)]]
with pytest.raises(KeyError):
s[Interval(3, 5)]
with pytest.raises(KeyError):
s[[Interval(3, 5)]]
# slices with interval (only exact matches)
expected = s
result = s.loc[Interval(1, 5):Interval(3, 7)]
tm.assert_series_equal(expected, result)
result = s[Interval(1, 5):Interval(3, 7)]
tm.assert_series_equal(expected, result)
with pytest.raises(KeyError):
s.loc[Interval(1, 6):Interval(3, 8)]
with pytest.raises(KeyError):
s[Interval(1, 6):Interval(3, 8)]
# slices with scalar raise for overlapping intervals
# TODO KeyError is the appropriate error?
with pytest.raises(KeyError):
s.loc[1:4]
def test_non_unique(self):
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
s = Series(range(len(idx)), index=idx)
result = s.loc[Interval(1, 3)]
assert result == 0
result = s.loc[[Interval(1, 3)]]
expected = s.iloc[0:1]
tm.assert_series_equal(expected, result)
def test_non_unique_moar(self):
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
s = Series(range(len(idx)), index=idx)
expected = s.iloc[[0, 1]]
result = s.loc[Interval(1, 3)]
tm.assert_series_equal(expected, result)
expected = s
result = s.loc[Interval(1, 3):]
tm.assert_series_equal(expected, result)
expected = s
result = s[Interval(1, 3):]
tm.assert_series_equal(expected, result)
expected = s.iloc[[0, 1]]
result = s[[Interval(1, 3)]]
tm.assert_series_equal(expected, result)
@@ -0,0 +1,31 @@
import numpy as np
import pytest
from pandas import DataFrame, Index, MultiIndex
from pandas.util import testing as tm
@pytest.fixture
def multiindex_dataframe_random_data():
"""DataFrame with 2 level MultiIndex with random data"""
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
'three']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
return DataFrame(np.random.randn(10, 3), index=index,
columns=Index(['A', 'B', 'C'], name='exp'))
@pytest.fixture
def multiindex_year_month_day_dataframe_random_data():
"""DataFrame with 3 level MultiIndex (year, month, day) covering
first 100 business days from 2000-01-01 with random data"""
tdf = tm.makeTimeDataFrame(100)
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month,
lambda x: x.day]).sum()
# use Int64Index, to make sure things work
ymd.index.set_levels([lev.astype('i8') for lev in ymd.index.levels],
inplace=True)
ymd.index.set_names(['year', 'month', 'day'], inplace=True)
return ymd
@@ -0,0 +1,65 @@
import numpy as np
import pytest
from pandas.compat import lrange, lzip, range
from pandas import DataFrame, MultiIndex, Series
from pandas.core import common as com
import pandas.util.testing as tm
def test_detect_chained_assignment():
# Inplace ops, originally from:
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
a = [12, 23]
b = [123, None]
c = [1234, 2345]
d = [12345, 23456]
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
('ears', 'right')]
events = {('eyes', 'left'): a,
('eyes', 'right'): b,
('ears', 'left'): c,
('ears', 'right'): d}
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
with pytest.raises(com.SettingWithCopyError):
zed['eyes']['right'].fillna(value=555, inplace=True)
def test_cache_updating():
# 5216
# make sure that we don't try to set a dead cache
a = np.random.rand(10, 3)
df = DataFrame(a, columns=['x', 'y', 'z'])
tuples = [(i, j) for i in range(5) for j in range(2)]
index = MultiIndex.from_tuples(tuples)
df.index = index
# setting via chained assignment
# but actually works, since everything is a view
df.loc[0]['z'].iloc[0] = 1.
result = df.loc[(0, 0), 'z']
assert result == 1
# correct setting
df.loc[(0, 0), 'z'] = 2
result = df.loc[(0, 0), 'z']
assert result == 2
def test_indexer_caching():
# GH5727
# make sure that indexers are in the _internal_names_set
n = 1000001
arrays = [lrange(n), lrange(n)]
index = MultiIndex.from_tuples(lzip(*arrays))
s = Series(np.zeros(n), index=index)
str(s)
# setitem
expected = Series(np.ones(n), index=index)
s = Series(np.zeros(n), index=index)
s[s == 0] = 1
tm.assert_series_equal(s, expected)
@@ -0,0 +1,22 @@
from datetime import datetime
import numpy as np
from pandas import Index, Period, Series, period_range
def test_multiindex_period_datetime():
# GH4861, using datetime in period of multiindex raises exception
idx1 = Index(['a', 'a', 'a', 'b', 'b'])
idx2 = period_range('2012-01', periods=len(idx1), freq='M')
s = Series(np.random.randn(len(idx1)), [idx1, idx2])
# try Period as index
expected = s.iloc[0]
result = s.loc['a', Period('2012-01')]
assert result == expected
# try datetime as index
result = s.loc['a', datetime(2012, 1, 1)]
assert result == expected
@@ -0,0 +1,237 @@
import numpy as np
import pytest
from pandas.compat import u, zip
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.core.indexing import IndexingError
from pandas.util import testing as tm
# ----------------------------------------------------------------------------
# test indexing of Series with multi-level Index
# ----------------------------------------------------------------------------
@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
lambda s, x: s.loc[:, x],
lambda s, x: s.xs(x, level=1)])
@pytest.mark.parametrize('level1_value, expected', [
(0, Series([1], index=[0])),
(1, Series([2, 3], index=[1, 2]))
])
def test_series_getitem_multiindex(access_method, level1_value, expected):
# GH 6018
# series regression getitem with a multi-index
s = Series([1, 2, 3])
s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
result = access_method(s, level1_value)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('level0_value', ['D', 'A'])
def test_series_getitem_duplicates_multiindex(level0_value):
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
# the appropriate error, only in PY3 of course!
index = MultiIndex(levels=[[level0_value, 'B', 'C'],
[0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=['tag', 'day'])
arr = np.random.randn(len(index), 1)
df = DataFrame(arr, index=index, columns=['val'])
# confirm indexing on missing value raises KeyError
if level0_value != 'A':
with pytest.raises(KeyError, match=r"^'A'$"):
df.val['A']
with pytest.raises(KeyError, match=r"^'X'$"):
df.val['X']
result = df.val[level0_value]
expected = Series(arr.ravel()[0:3], name='val', index=Index(
[26, 37, 57], name='day'))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('indexer', [
lambda s: s[2000, 3],
lambda s: s.loc[2000, 3]
])
def test_series_getitem(
multiindex_year_month_day_dataframe_random_data, indexer):
s = multiindex_year_month_day_dataframe_random_data['A']
expected = s.reindex(s.index[42:65])
expected.index = expected.index.droplevel(0).droplevel(0)
result = indexer(s)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('indexer', [
lambda s: s[2000, 3, 10],
lambda s: s.loc[2000, 3, 10]
])
def test_series_getitem_returns_scalar(
multiindex_year_month_day_dataframe_random_data, indexer):
s = multiindex_year_month_day_dataframe_random_data['A']
expected = s.iloc[49]
result = indexer(s)
assert result == expected
@pytest.mark.parametrize('indexer,expected_error,expected_error_msg', [
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356L?$"),
(lambda s: s[(2000, 3, 4)], KeyError, r"^356L?$"),
(lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'),
(lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'),
(lambda s: s[len(s)], IndexError, 'index out of bounds'),
(lambda s: s.iloc[len(s)], IndexError,
'single positional indexer is out-of-bounds')
])
def test_series_getitem_indexing_errors(
multiindex_year_month_day_dataframe_random_data, indexer,
expected_error, expected_error_msg):
s = multiindex_year_month_day_dataframe_random_data['A']
with pytest.raises(expected_error, match=expected_error_msg):
indexer(s)
def test_series_getitem_corner_generator(
multiindex_year_month_day_dataframe_random_data):
s = multiindex_year_month_day_dataframe_random_data['A']
result = s[(x > 0 for x in s)]
expected = s[s > 0]
tm.assert_series_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index
# ----------------------------------------------------------------------------
def test_getitem_simple(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data.T
expected = df.values[:, 0]
result = df['foo', 'one'].values
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize('indexer,expected_error_msg', [
(lambda df: df[('foo', 'four')], r"^\('foo', 'four'\)$"),
(lambda df: df['foobar'], r"^'foobar'$")
])
def test_frame_getitem_simple_key_error(
multiindex_dataframe_random_data, indexer, expected_error_msg):
df = multiindex_dataframe_random_data.T
with pytest.raises(KeyError, match=expected_error_msg):
indexer(df)
def test_frame_getitem_multicolumn_empty_level():
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
['level3 item1', 'level3 item2']]
result = df['level1 item1']
expected = DataFrame([['1'], ['2'], ['3']], index=df.index,
columns=['level3 item1'])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('indexer,expected_slice', [
(lambda df: df['foo'], slice(3)),
(lambda df: df['bar'], slice(3, 5)),
(lambda df: df.loc[:, 'bar'], slice(3, 5))
])
def test_frame_getitem_toplevel(
multiindex_dataframe_random_data, indexer, expected_slice):
df = multiindex_dataframe_random_data.T
expected = df.reindex(columns=df.columns[expected_slice])
expected.columns = expected.columns.droplevel(0)
result = indexer(df)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('unicode_strings', [True, False])
def test_frame_mixed_depth_get(unicode_strings):
# If unicode_strings is True, the column labels in dataframe
# construction will use unicode strings in Python 2 (pull request
# #17099).
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
['', 'wx', 'wy', '', '', '']]
if unicode_strings:
arrays = [[u(s) for s in arr] for arr in arrays]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.randn(4, 6), columns=index)
result = df['a']
expected = df['a', '', ''].rename('a')
tm.assert_series_equal(result, expected)
result = df['routine1', 'result1']
expected = df['routine1', 'result1', '']
expected = expected.rename(('routine1', 'result1'))
tm.assert_series_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index with duplicates
# ----------------------------------------------------------------------------
@pytest.fixture
def dataframe_with_duplicate_index():
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
data = [['a', 'd', 'e', 'c', 'f', 'b'],
[1, 4, 5, 3, 6, 2],
[1, 4, 5, 3, 6, 2]]
index = ['h1', 'h3', 'h5']
columns = MultiIndex(
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
names=['main', 'sub'])
return DataFrame(data, index=index, columns=columns)
@pytest.mark.parametrize('indexer', [
lambda df: df[('A', 'A1')],
lambda df: df.loc[:, ('A', 'A1')]
])
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
# GH 4145
df = dataframe_with_duplicate_index
index = Index(['h1', 'h3', 'h5'])
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
# GH 4146, not returning a block manager when selecting a unique index
# from a duplicate index
# as of 4879, this returns a Series (which is similar to what happens
# with a non-unique)
df = dataframe_with_duplicate_index
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
result = df['A']['A1']
tm.assert_series_equal(result, expected)
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
# selecting a non_unique from the 2nd level
df = dataframe_with_duplicate_index
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
index=Index(['B2', 'B2'], name='sub'),
columns=['h1', 'h3', 'h5'], ).T
result = df['A']['B2']
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,151 @@
import numpy as np
import pytest
from pandas import DataFrame, MultiIndex, Series
from pandas.util import testing as tm
@pytest.fixture
def simple_multiindex_dataframe():
"""
Factory function to create simple 3 x 3 dataframe with
both columns and row MultiIndex using supplied data or
random data by default.
"""
def _simple_multiindex_dataframe(data=None):
if data is None:
data = np.random.randn(3, 3)
return DataFrame(data, columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]])
return _simple_multiindex_dataframe
@pytest.mark.parametrize('indexer, expected', [
(lambda df: df.iloc[0],
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8))),
(lambda df: df.iloc[2],
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12))),
(lambda df: df.iloc[:, 2],
lambda arr: Series(
arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)))
])
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
arr = np.random.randn(3, 3)
df = simple_multiindex_dataframe(arr)
result = indexer(df)
expected = expected(arr)
tm.assert_series_equal(result, expected)
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
df = simple_multiindex_dataframe()
result = df.iloc[[0, 1]]
expected = df.xs(4, drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_returns_scalar(simple_multiindex_dataframe):
arr = np.random.randn(3, 3)
df = simple_multiindex_dataframe(arr)
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_iloc_getitem_multiple_items():
# GH 5528
tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']])
index = MultiIndex.from_tuples(tup)
df = DataFrame(np.random.randn(4, 4), index=index)
result = df.iloc[[2, 3]]
expected = df.xs('b', drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_getitem_labels():
# this is basically regular indexing
arr = np.random.randn(4, 3)
df = DataFrame(arr,
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
index=[['i', 'i', 'j', 'k'], ['X', 'X', 'Y', 'Y']])
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_frame_getitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[:4]
expected = df[:4]
tm.assert_frame_equal(result, expected)
def test_frame_setitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
df.iloc[:4] = 0
assert (df.values[:4] == 0).all()
assert (df.values[4:] != 0).all()
def test_indexing_ambiguity_bug_1678():
# GH 1678
columns = MultiIndex.from_tuples(
[('Ohio', 'Green'), ('Ohio', 'Red'), ('Colorado', 'Green')])
index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
result = df.iloc[:, 1]
expected = df.loc[:, ('Ohio', 'Red')]
tm.assert_series_equal(result, expected)
def test_iloc_integer_locations():
# GH 13797
data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'],
['str30', 'str31'], ['str40', 'str41']]
index = MultiIndex.from_tuples(
[('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')])
expected = DataFrame(data)
df = DataFrame(data, index=index)
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
'data, indexes, values, expected_k', [
# test without indexer value in first level of MultiIndex
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
# test like code sample 1 in the issue
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
[755, 1066]),
# test like code sample 2 in the issue
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
# test like code sample 3 in the issue
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
[8, 15, 13])
])
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
# GH17148
df = DataFrame(data=data, columns=['i', 'j', 'k'])
df = df.set_index(['i', 'j'])
series = df.k.copy()
for i, v in zip(indexes, values):
series.iloc[i] += v
df['k'] = expected_k
expected = df.k
tm.assert_series_equal(series, expected)
def test_getitem_iloc(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[2]
expected = df.xs(df.index[2])
tm.assert_series_equal(result, expected)
@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
import warnings
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, MultiIndex, Series
import pandas.util.testing as tm
@pytest.mark.slow
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_multiindex_get_loc(): # GH7724, GH2646
with warnings.catch_warnings(record=True):
# test indexing into a multi-index before & past the lexsort depth
from numpy.random import randint, choice, randn
cols = ['jim', 'joe', 'jolie', 'joline', 'jolia']
def validate(mi, df, key):
mask = np.ones(len(df)).astype('bool')
# test for all partials of this key
for i, k in enumerate(key):
mask &= df.iloc[:, i] == k
if not mask.any():
assert key[:i + 1] not in mi.index
continue
assert key[:i + 1] in mi.index
right = df[mask].copy()
if i + 1 != len(key): # partial key
right.drop(cols[:i + 1], axis=1, inplace=True)
right.set_index(cols[i + 1:-1], inplace=True)
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
else: # full key
right.set_index(cols[:-1], inplace=True)
if len(right) == 1: # single hit
right = Series(right['jolia'].values,
name=right.index[0],
index=['jolia'])
tm.assert_series_equal(mi.loc[key[:i + 1]], right)
else: # multi hit
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
def loop(mi, df, keys):
for key in keys:
validate(mi, df, key)
n, m = 1000, 50
vals = [randint(0, 10, n), choice(
list('abcdefghij'), n), choice(
pd.date_range('20141009', periods=10).tolist(), n), choice(
list('ZYXWVUTSRQ'), n), randn(n)]
vals = list(map(tuple, zip(*vals)))
# bunch of keys for testing
keys = [randint(0, 11, m), choice(
list('abcdefghijk'), m), choice(
pd.date_range('20141009', periods=11).tolist(), m), choice(
list('ZYXWVUTSRQP'), m)]
keys = list(map(tuple, zip(*keys)))
keys += list(map(lambda t: t[:-1], vals[::n // m]))
# covers both unique index and non-unique index
df = DataFrame(vals, columns=cols)
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
for frame in a, b:
for i in range(5): # lexsort depth
df = frame.copy() if i == 0 else frame.sort_values(
by=cols[:i])
mi = df.set_index(cols[:-1])
assert not mi.index.lexsort_depth < i
loop(mi, df, keys)
@pytest.mark.slow
def test_large_mi_dataframe_indexing():
# GH10645
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
assert (not (10 ** 6, 0) in result)
@@ -0,0 +1,56 @@
from warnings import catch_warnings, simplefilter
import pytest
from pandas.compat import lrange
from pandas.errors import PerformanceWarning
from pandas import DataFrame, MultiIndex
from pandas.util import testing as tm
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestMultiIndexIx(object):
def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
frame.loc[('bar', 'two'), 'B'] = 5
assert frame.loc[('bar', 'two'), 'B'] == 5
# with integer labels
df = frame.copy()
df.columns = lrange(3)
df.loc[('bar', 'two'), 1] = 7
assert df.loc[('bar', 'two'), 1] == 7
with catch_warnings(record=True):
simplefilter("ignore", DeprecationWarning)
df = frame.copy()
df.columns = lrange(3)
df.ix[('bar', 'two'), 1] = 7
assert df.loc[('bar', 'two'), 1] == 7
def test_ix_general(self):
# ix general issues
# GH 2817
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
df = DataFrame(data).set_index(keys=['col', 'year'])
key = 4.0, 2012
# emits a PerformanceWarning, ok
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
# this is ok
df.sort_index(inplace=True)
res = df.loc[key]
# col has float dtype, result should be Float64Index
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
names=['col', 'year'])
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
tm.assert_frame_equal(res, expected)
@@ -0,0 +1,378 @@
import itertools
from warnings import catch_warnings
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.util import testing as tm
@pytest.fixture
def single_level_multiindex():
"""single level MultiIndex"""
return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
codes=[[0, 1, 2, 3]], names=['first'])
@pytest.fixture
def frame_random_data_integer_multi_index():
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)
return DataFrame(np.random.randn(6, 2), index=index)
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestMultiIndexLoc(object):
def test_loc_getitem_series(self):
# GH14730
# passing a series as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
x = Series(index=index, data=range(9), dtype=np.float64)
y = Series([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
dtype=np.float64)
result = x.loc[y]
tm.assert_series_equal(result, expected)
result = x.loc[[1, 3]]
tm.assert_series_equal(result, expected)
# GH15424
y1 = Series([1, 3], index=[1, 2])
result = x.loc[y1]
tm.assert_series_equal(result, expected)
empty = Series(data=[], dtype=np.float64)
expected = Series([], index=MultiIndex(
levels=index.levels, codes=[[], []], dtype=np.float64))
result = x.loc[empty]
tm.assert_series_equal(result, expected)
def test_loc_getitem_array(self):
# GH15434
# passing an array as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
x = Series(index=index, data=range(9), dtype=np.float64)
y = np.array([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
dtype=np.float64)
result = x.loc[y]
tm.assert_series_equal(result, expected)
# empty array:
empty = np.array([])
expected = Series([], index=MultiIndex(
levels=index.levels, codes=[[], []], dtype=np.float64))
result = x.loc[empty]
tm.assert_series_equal(result, expected)
# 0-dim array (scalar):
scalar = np.int64(1)
expected = Series(
data=[0, 1, 2],
index=['A', 'B', 'C'],
dtype=np.float64)
result = x.loc[scalar]
tm.assert_series_equal(result, expected)
def test_loc_multiindex(self):
mi_labels = DataFrame(np.random.randn(3, 3),
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
mi_int = DataFrame(np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]])
# the first row
rs = mi_labels.loc['i']
with catch_warnings(record=True):
xp = mi_labels.ix['i']
tm.assert_frame_equal(rs, xp)
# 2nd (last) columns
rs = mi_labels.loc[:, 'j']
with catch_warnings(record=True):
xp = mi_labels.ix[:, 'j']
tm.assert_frame_equal(rs, xp)
# corner column
rs = mi_labels.loc['j'].loc[:, 'j']
with catch_warnings(record=True):
xp = mi_labels.ix['j'].ix[:, 'j']
tm.assert_frame_equal(rs, xp)
# with a tuple
rs = mi_labels.loc[('i', 'X')]
with catch_warnings(record=True):
xp = mi_labels.ix[('i', 'X')]
tm.assert_frame_equal(rs, xp)
rs = mi_int.loc[4]
with catch_warnings(record=True):
xp = mi_int.ix[4]
tm.assert_frame_equal(rs, xp)
# missing label
pytest.raises(KeyError, lambda: mi_int.loc[2])
with catch_warnings(record=True):
# GH 21593
pytest.raises(KeyError, lambda: mi_int.ix[2])
def test_loc_multiindex_indexer_none(self):
# GH6788
# multi-index indexer is None (meaning take all)
attributes = ['Attribute' + str(i) for i in range(1)]
attribute_values = ['Value' + str(i) for i in range(5)]
index = MultiIndex.from_product([attributes, attribute_values])
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
df = DataFrame(df, columns=index)
result = df[attributes]
tm.assert_frame_equal(result, df)
# GH 7349
# loc with a multi-index seems to be doing fallback
df = DataFrame(np.arange(12).reshape(-1, 1),
index=MultiIndex.from_product([[1, 2, 3, 4],
[1, 2, 3]]))
expected = df.loc[([1, 2], ), :]
result = df.loc[[1, 2]]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_incomplete(self):
# GH 7399
# incomplete indexers
s = Series(np.arange(15, dtype='int64'),
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
expected = s.loc[:, 'a':'c']
result = s.loc[0:4, 'a':'c']
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
result = s.loc[:4, 'a':'c']
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
result = s.loc[0:, 'a':'c']
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)
# GH 7400
# multiindexer gettitem with list of indexers skips wrong element
s = Series(np.arange(15, dtype='int64'),
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
result = s.loc[2:4:2, 'a':'c']
tm.assert_series_equal(result, expected)
def test_get_loc_single_level(self, single_level_multiindex):
single_level = single_level_multiindex
s = Series(np.random.randn(len(single_level)),
index=single_level)
for k in single_level.values:
s[k]
def test_loc_getitem_int_slice(self):
# GH 3053
# loc should treat integer slices like label slices
index = MultiIndex.from_tuples([t for t in itertools.product(
[6, 7, 8], ['a', 'b'])])
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[6:8, :]
expected = df
tm.assert_frame_equal(result, expected)
index = MultiIndex.from_tuples([t
for t in itertools.product(
[10, 20, 30], ['a', 'b'])])
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[20:30, :]
expected = df.iloc[2:]
tm.assert_frame_equal(result, expected)
# doc examples
result = df.loc[10, :]
expected = df.iloc[0:2]
expected.index = ['a', 'b']
tm.assert_frame_equal(result, expected)
result = df.loc[:, 10]
# expected = df.ix[:,10] (this fails)
expected = df[10]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
'indexer_type_1',
(list, tuple, set, slice, np.ndarray, Series, Index))
@pytest.mark.parametrize(
'indexer_type_2',
(list, tuple, set, slice, np.ndarray, Series, Index))
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
# GH #19686
# .loc should work with nested indexers which can be
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
def convert_nested_indexer(indexer_type, keys):
if indexer_type == np.ndarray:
return np.array(keys)
if indexer_type == slice:
return slice(*keys)
return indexer_type(keys)
a = [10, 20, 30]
b = [1, 2, 3]
index = MultiIndex.from_product([a, b])
df = DataFrame(
np.arange(len(index), dtype='int64'),
index=index, columns=['Data'])
keys = ([10, 20], [2, 3])
types = (indexer_type_1, indexer_type_2)
# check indexers with all the combinations of nested objects
# of all the valid types
indexer = tuple(
convert_nested_indexer(indexer_type, k)
for indexer_type, k in zip(types, keys))
result = df.loc[indexer, 'Data']
expected = Series(
[1, 2, 4, 5], name='Data',
index=MultiIndex.from_product(keys))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('indexer, is_level1, expected_error', [
([], False, None), # empty ok
(['A'], False, None),
(['A', 'D'], False, None),
(['D'], False, r"\['D'\] not in index"), # not any values found
(pd.IndexSlice[:, ['foo']], True, None),
(pd.IndexSlice[:, ['foo', 'bah']], True, None)
])
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
expected_error):
# GH 7866
# multi-index slicing with missing indexers
idx = MultiIndex.from_product([['A', 'B', 'C'],
['foo', 'bar', 'baz']],
names=['one', 'two'])
s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
if indexer == []:
expected = s.iloc[[]]
elif is_level1:
expected = Series([0, 3, 6], index=MultiIndex.from_product(
[['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
else:
exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
names=['one', 'two'])
expected = Series(np.arange(3, dtype='int64'),
index=exp_idx).sort_index()
if expected_error is not None:
with pytest.raises(KeyError, match=expected_error):
s.loc[indexer]
else:
result = s.loc[indexer]
tm.assert_series_equal(result, expected)
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
@pytest.mark.parametrize('indexer', [
lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
])
def test_series_loc_getitem_fancy(
multiindex_year_month_day_dataframe_random_data, indexer):
s = multiindex_year_month_day_dataframe_random_data['A']
expected = s.reindex(s.index[49:51])
result = indexer(s)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('columns_indexer', [
([], slice(None)),
(['foo'], [])
])
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
# GH 8737
# empty indexer
multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
['alpha', 'beta']))
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
df = df.sort_index(level=0, axis=1)
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
result = df.loc[:, columns_indexer]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
# regression from < 0.14.0
# GH 7914
df = DataFrame([[np.mean, np.median], ['mean', 'median']],
columns=MultiIndex.from_tuples([('functs', 'mean'),
('functs', 'median')]),
index=['function', 'name'])
result = df.loc['function', ('functs', 'mean')]
expected = np.mean
assert result == expected
def test_loc_getitem_tuple_plus_slice():
# GH 671
df = DataFrame({'a': np.arange(10),
'b': np.arange(10),
'c': np.random.randn(10),
'd': np.random.randn(10)}
).set_index(['a', 'b'])
expected = df.loc[0, 0]
result = df.loc[(0, 0), :]
tm.assert_series_equal(result, expected)
def test_loc_getitem_int(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
result = df.loc[1]
expected = df[-3:]
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_int_raises_exception(
frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
with pytest.raises(KeyError, match=r"^3L?$"):
df.loc[3]
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
# test setup - check key not in dataframe
with pytest.raises(KeyError, match=r"^11L?$"):
df.loc[('bar', 'three'), 'B']
# in theory should be inserting in a sorted space????
df.loc[('bar', 'three'), 'B'] = 0
expected = 0
result = df.sort_index().loc[('bar', 'three'), 'B']
assert result == expected
@@ -0,0 +1,86 @@
import numpy as np
import pytest
import pandas._libs.index as _index
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.util import testing as tm
class TestMultiIndexBasic(object):
def test_multiindex_perf_warn(self):
df = DataFrame({'jim': [0, 0, 1, 1],
'joe': ['x', 'x', 'z', 'y'],
'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
with tm.assert_produces_warning(PerformanceWarning,
clear=[pd.core.index]):
df.loc[(1, 'z')]
df = df.iloc[[2, 1, 3, 0]]
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(0, )]
def test_multiindex_contains_dropped(self):
# GH 19027
# test that dropped MultiIndex levels are not in the MultiIndex
# despite continuing to be in the MultiIndex's levels
idx = MultiIndex.from_product([[1, 2], [3, 4]])
assert 2 in idx
idx = idx.drop(2)
# drop implementation keeps 2 in the levels
assert 2 in idx.levels[0]
# but it should no longer be in the index itself
assert 2 not in idx
# also applies to strings
idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
assert 'a' in idx
idx = idx.drop('a')
assert 'a' in idx.levels[0]
assert 'a' not in idx
@pytest.mark.parametrize("data, expected", [
(MultiIndex.from_product([(), ()]), True),
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
(MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
])
def test_multiindex_is_homogeneous_type(self, data, expected):
assert data._is_homogeneous_type is expected
def test_indexing_over_hashtable_size_cutoff(self):
n = 10000
old_cutoff = _index._SIZE_CUTOFF
_index._SIZE_CUTOFF = 20000
s = Series(np.arange(n),
MultiIndex.from_arrays((["a"] * n, np.arange(n))))
# hai it works!
assert s[("a", 5)] == 5
assert s[("a", 6)] == 6
assert s[("a", 7)] == 7
_index._SIZE_CUTOFF = old_cutoff
def test_multi_nan_indexing(self):
# GH 3588
df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
'b': ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20]})
result = df.set_index(['a', 'b'], drop=False)
expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
'b': ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20]},
index=[Index(['R1', 'R2', np.nan, 'R4'],
name='a'),
Index(['C1', 'C2', 'C3', 'C4'], name='b')])
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,103 @@
import numpy as np
import pytest
from pandas import DataFrame, MultiIndex, Panel, Series
from pandas.util import testing as tm
@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning')
class TestMultiIndexPanel(object):
def test_iloc_getitem_panel_multiindex(self):
# GH 7199
# Panel with multi-index
multi_index = MultiIndex.from_tuples([('ONE', 'one'),
('TWO', 'two'),
('THREE', 'three')],
names=['UPPER', 'lower'])
simple_index = [x[0] for x in multi_index]
wd1 = Panel(items=['First', 'Second'],
major_axis=['a', 'b', 'c', 'd'],
minor_axis=multi_index)
wd2 = Panel(items=['First', 'Second'],
major_axis=['a', 'b', 'c', 'd'],
minor_axis=simple_index)
expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG
tm.assert_frame_equal(result1, expected1)
expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
tm.assert_frame_equal(result2, expected2)
expected1 = DataFrame(index=['a'], columns=multi_index,
dtype='float64')
result1 = wd1.iloc[0, [0], [0, 1, 2]]
tm.assert_frame_equal(result1, expected1)
expected2 = DataFrame(index=['a'], columns=simple_index,
dtype='float64')
result2 = wd2.iloc[0, [0], [0, 1, 2]]
tm.assert_frame_equal(result2, expected2)
# GH 7516
mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
items=['a', 'b', 'c'], major_axis=mi,
minor_axis=['u', 'v', 'w'])
result = p.iloc[:, 1, 0]
expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
tm.assert_series_equal(result, expected)
result = p.loc[:, (1, 'y'), 'u']
tm.assert_series_equal(result, expected)
def test_panel_setitem_with_multiindex(self):
# 10360
# failing with a multi-index
arr = np.array([[[1, 2, 3], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0]]],
dtype=np.float64)
# reg index
axes = dict(items=['A', 'B'], major_axis=[0, 1],
minor_axis=['X', 'Y', 'Z'])
p1 = Panel(0., **axes)
p1.iloc[0, 0, :] = [1, 2, 3]
expected = Panel(arr, **axes)
tm.assert_panel_equal(p1, expected)
# multi-indexes
axes['items'] = MultiIndex.from_tuples(
[('A', 'a'), ('B', 'b')])
p2 = Panel(0., **axes)
p2.iloc[0, 0, :] = [1, 2, 3]
expected = Panel(arr, **axes)
tm.assert_panel_equal(p2, expected)
axes['major_axis'] = MultiIndex.from_tuples(
[('A', 1), ('A', 2)])
p3 = Panel(0., **axes)
p3.iloc[0, 0, :] = [1, 2, 3]
expected = Panel(arr, **axes)
tm.assert_panel_equal(p3, expected)
axes['minor_axis'] = MultiIndex.from_product(
[['X'], range(3)])
p4 = Panel(0., **axes)
p4.iloc[0, 0, :] = [1, 2, 3]
expected = Panel(arr, **axes)
tm.assert_panel_equal(p4, expected)
arr = np.array(
[[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
dtype=np.float64)
p5 = Panel(0., **axes)
p5.iloc[0, :, 0] = [1, 2]
expected = Panel(arr, **axes)
tm.assert_panel_equal(p5, expected)
@@ -0,0 +1,183 @@
from warnings import catch_warnings, simplefilter
import numpy as np
import pytest
from pandas import DataFrame, MultiIndex
from pandas.util import testing as tm
class TestMultiIndexPartial(object):
def test_getitem_partial_int(self):
# GH 12416
# with single item
l1 = [10, 20]
l2 = ['a', 'b']
df = DataFrame(index=range(2),
columns=MultiIndex.from_product([l1, l2]))
expected = DataFrame(index=range(2),
columns=l2)
result = df[20]
tm.assert_frame_equal(result, expected)
# with list
expected = DataFrame(index=range(2),
columns=MultiIndex.from_product([l1[1:], l2]))
result = df[[20]]
tm.assert_frame_equal(result, expected)
# missing item:
with pytest.raises(KeyError, match='1'):
df[1]
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
df[[1]]
def test_series_slice_partial(self):
pass
def test_xs_partial(self, multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data):
frame = multiindex_dataframe_random_data
ymd = multiindex_year_month_day_dataframe_random_data
result = frame.xs('foo')
result2 = frame.loc['foo']
expected = frame.T['foo'].T
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, result2)
result = ymd.xs((2000, 4))
expected = ymd.loc[2000, 4]
tm.assert_frame_equal(result, expected)
# ex from #1796
index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]],
codes=[[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
0, 1]])
df = DataFrame(np.random.randn(8, 4), index=index,
columns=list('abcd'))
result = df.xs(['foo', 'one'])
expected = df.loc['foo', 'one']
tm.assert_frame_equal(result, expected)
def test_getitem_partial(
self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
ymd = ymd.T
result = ymd[2000, 2]
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
expected.columns = expected.columns.droplevel(0).droplevel(0)
tm.assert_frame_equal(result, expected)
def test_fancy_slice_partial(
self, multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data):
frame = multiindex_dataframe_random_data
result = frame.loc['bar':'baz']
expected = frame[3:7]
tm.assert_frame_equal(result, expected)
ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[(2000, 2):(2000, 4)]
lev = ymd.index.codes[1]
expected = ymd[(lev >= 1) & (lev <= 3)]
tm.assert_frame_equal(result, expected)
def test_getitem_partial_column_select(self):
idx = MultiIndex(codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
levels=[['a', 'b'], ['x', 'y'], ['p', 'q']])
df = DataFrame(np.random.rand(3, 2), index=idx)
result = df.loc[('a', 'y'), :]
expected = df.loc[('a', 'y')]
tm.assert_frame_equal(result, expected)
result = df.loc[('a', 'y'), [1, 0]]
expected = df.loc[('a', 'y')][[1, 0]]
tm.assert_frame_equal(result, expected)
with catch_warnings(record=True):
simplefilter("ignore", DeprecationWarning)
result = df.ix[('a', 'y'), [1, 0]]
tm.assert_frame_equal(result, expected)
pytest.raises(KeyError, df.loc.__getitem__,
(('a', 'foo'), slice(None, None)))
def test_partial_set(
self, multiindex_year_month_day_dataframe_random_data):
# GH #397
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd.copy()
exp = ymd.copy()
df.loc[2000, 4] = 0
exp.loc[2000, 4].values[:] = 0
tm.assert_frame_equal(df, exp)
df['A'].loc[2000, 4] = 1
exp['A'].loc[2000, 4].values[:] = 1
tm.assert_frame_equal(df, exp)
df.loc[2000] = 5
exp.loc[2000].values[:] = 5
tm.assert_frame_equal(df, exp)
# this works...for now
df['A'].iloc[14] = 5
assert df['A'][14] == 5
# ---------------------------------------------------------------------
# AMBIGUOUS CASES!
def test_partial_ix_missing(
self, multiindex_year_month_day_dataframe_random_data):
pytest.skip("skipping for now")
ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[2000, 0]
expected = ymd.loc[2000]['A']
tm.assert_series_equal(result, expected)
# need to put in some work here
# self.ymd.loc[2000, 0] = 0
# assert (self.ymd.loc[2000]['A'] == 0).all()
# Pretty sure the second (and maybe even the first) is already wrong.
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6))
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6), 0)
# ---------------------------------------------------------------------
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
expected = frame.copy()
result = frame.copy()
result.loc[['foo', 'bar']] = 0
expected.loc['foo'] = 0
expected.loc['bar'] = 0
tm.assert_frame_equal(result, expected)
expected = frame.copy()
result = frame.copy()
result.loc['foo':'bar'] = 0
expected.loc['foo'] = 0
expected.loc['bar'] = 0
tm.assert_frame_equal(result, expected)
expected = frame['A'].copy()
result = frame['A'].copy()
result.loc[['foo', 'bar']] = 0
expected.loc['foo'] = 0
expected.loc['bar'] = 0
tm.assert_series_equal(result, expected)
expected = frame['A'].copy()
result = frame['A'].copy()
result.loc['foo':'bar'] = 0
expected.loc['foo'] = 0
expected.loc['bar'] = 0
tm.assert_series_equal(result, expected)
@@ -0,0 +1,42 @@
from numpy.random import randn
from pandas import DataFrame, MultiIndex, Series
from pandas.util import testing as tm
class TestMultiIndexSetOps(object):
def test_multiindex_symmetric_difference(self):
# GH 13490
idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']],
names=['a', 'b'])
result = idx ^ idx
assert result.names == idx.names
idx2 = idx.copy().rename(['A', 'B'])
result = idx ^ idx2
assert result.names == [None, None]
def test_mixed_depth_insert(self):
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
['', 'wx', 'wy', '', '', '']]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(randn(4, 6), columns=index)
result = df.copy()
expected = df.copy()
result['b'] = [1, 2, 3, 4]
expected['b', '', ''] = [1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
def test_dataframe_insert_column_all_na(self):
# GH #1534
mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c')
])
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
s = Series({(1, 1): 1, (1, 2): 2})
df['new'] = s
assert df['new'].isna().all()
@@ -0,0 +1,439 @@
from warnings import catch_warnings, simplefilter
import numpy as np
from numpy.random import randn
import pytest
import pandas as pd
from pandas import (
DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna)
import pandas.core.common as com
from pandas.util import testing as tm
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestMultiIndexSetItem(object):
def test_setitem_multiindex(self):
with catch_warnings(record=True):
for index_fn in ('ix', 'loc'):
def assert_equal(a, b):
assert a == b
def check(target, indexers, value, compare_fn, expected=None):
fn = getattr(target, index_fn)
fn.__setitem__(indexers, value)
result = fn.__getitem__(indexers)
if expected is None:
expected = value
compare_fn(result, expected)
# GH7190
index = MultiIndex.from_product([np.arange(0, 100),
np.arange(0, 80)],
names=['time', 'firm'])
t, n = 0, 2
df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
index=index)
check(target=df, indexers=((t, n), 'X'), value=0,
compare_fn=assert_equal)
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
index=index)
check(target=df, indexers=((t, n), 'X'), value=1,
compare_fn=assert_equal)
df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
index=index)
check(target=df, indexers=((t, n), 'X'), value=2,
compare_fn=assert_equal)
# gh-7218: assigning with 0-dim arrays
df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
'X', 'd', 'profit'],
index=index)
check(target=df,
indexers=((t, n), 'X'),
value=np.array(3),
compare_fn=assert_equal,
expected=3, )
# GH5206
df = DataFrame(np.arange(25).reshape(5, 5),
columns='A,B,C,D,E'.split(','), dtype=float)
df['F'] = 99
row_selection = df['A'] % 2 == 0
col_selection = ['B', 'C']
with catch_warnings(record=True):
df.ix[row_selection, col_selection] = df['F']
output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
with catch_warnings(record=True):
tm.assert_frame_equal(df.ix[row_selection, col_selection],
output)
check(target=df,
indexers=(row_selection, col_selection),
value=df['F'],
compare_fn=tm.assert_frame_equal,
expected=output, )
# GH11372
idx = MultiIndex.from_product([
['A', 'B', 'C'],
date_range('2015-01-01', '2015-04-01', freq='MS')])
cols = MultiIndex.from_product([
['foo', 'bar'],
date_range('2016-01-01', '2016-02-01', freq='MS')])
df = DataFrame(np.random.random((12, 4)),
index=idx, columns=cols)
subidx = MultiIndex.from_tuples(
[('A', Timestamp('2015-01-01')),
('A', Timestamp('2015-02-01'))])
subcols = MultiIndex.from_tuples(
[('foo', Timestamp('2016-01-01')),
('foo', Timestamp('2016-02-01'))])
vals = DataFrame(np.random.random((2, 2)),
index=subidx, columns=subcols)
check(target=df,
indexers=(subidx, subcols),
value=vals,
compare_fn=tm.assert_frame_equal, )
# set all columns
vals = DataFrame(
np.random.random((2, 4)), index=subidx, columns=cols)
check(target=df,
indexers=(subidx, slice(None, None, None)),
value=vals,
compare_fn=tm.assert_frame_equal, )
# identity
copy = df.copy()
check(target=df, indexers=(df.index, df.columns), value=df,
compare_fn=tm.assert_frame_equal, expected=copy)
def test_multiindex_setitem(self):
# GH 3738
# setting with a multi-index right hand side
arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
np.array(['one', 'two', 'one', 'one', 'two', 'one']),
np.arange(0, 6, 1)]
df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
columns=['A', 'B', 'C']).sort_index()
expected = df_orig.loc[['bar']] * 2
df = df_orig.copy()
df.loc[['bar']] *= 2
tm.assert_frame_equal(df.loc[['bar']], expected)
# raise because these have differing levels
with pytest.raises(TypeError):
df.loc['bar'] *= 2
# from SO
# http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
df_orig = DataFrame.from_dict({'price': {
('DE', 'Coal', 'Stock'): 2,
('DE', 'Gas', 'Stock'): 4,
('DE', 'Elec', 'Demand'): 1,
('FR', 'Gas', 'Stock'): 5,
('FR', 'Solar', 'SupIm'): 0,
('FR', 'Wind', 'SupIm'): 0
}})
df_orig.index = MultiIndex.from_tuples(df_orig.index,
names=['Sit', 'Com', 'Type'])
expected = df_orig.copy()
expected.iloc[[0, 2, 3]] *= 2
idx = pd.IndexSlice
df = df_orig.copy()
df.loc[idx[:, :, 'Stock'], :] *= 2
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, :, 'Stock'], 'price'] *= 2
tm.assert_frame_equal(df, expected)
def test_multiindex_assignment(self):
# GH3777 part 2
# mixed dtype
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
columns=list('abc'),
index=[[4, 4, 8], [8, 10, 12]])
df['d'] = np.nan
arr = np.array([0., 1.])
with catch_warnings(record=True):
df.ix[4, 'd'] = arr
tm.assert_series_equal(df.ix[4, 'd'],
Series(arr, index=[8, 10], name='d'))
# single dtype
df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
columns=list('abc'),
index=[[4, 4, 8], [8, 10, 12]])
with catch_warnings(record=True):
df.ix[4, 'c'] = arr
exp = Series(arr, index=[8, 10], name='c', dtype='float64')
tm.assert_series_equal(df.ix[4, 'c'], exp)
# scalar ok
with catch_warnings(record=True):
df.ix[4, 'c'] = 10
exp = Series(10, index=[8, 10], name='c', dtype='float64')
tm.assert_series_equal(df.ix[4, 'c'], exp)
# invalid assignments
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[4, 'c'] = [0, 1, 2, 3]
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[4, 'c'] = [0]
# groupby example
NUM_ROWS = 100
NUM_COLS = 10
col_names = ['A' + num for num in
map(str, np.arange(NUM_COLS).tolist())]
index_cols = col_names[:5]
df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
dtype=np.int64, columns=col_names)
df = df.set_index(index_cols).sort_index()
grp = df.groupby(level=index_cols[:4])
df['new_col'] = np.nan
f_index = np.arange(5)
def f(name, df2):
return Series(np.arange(df2.shape[0]),
name=df2.index.values[0]).reindex(f_index)
# TODO(wesm): unused?
# new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
# we are actually operating on a copy here
# but in this case, that's ok
for name, df2 in grp:
new_vals = np.arange(df2.shape[0])
with catch_warnings(record=True):
df.ix[name, 'new_col'] = new_vals
def test_series_setitem(
self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd['A']
s[2000, 3] = np.nan
assert isna(s.values[42:65]).all()
assert notna(s.values[:42]).all()
assert notna(s.values[65:]).all()
s[2000, 3, 10] = np.nan
assert isna(s[49])
def test_frame_getitem_setitem_boolean(
self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T.copy()
values = df.values
result = df[df > 0]
expected = df.where(df > 0)
tm.assert_frame_equal(result, expected)
df[df > 0] = 5
values[values > 0] = 5
tm.assert_almost_equal(df.values, values)
df[df == 5] = 0
values[values == 5] = 0
tm.assert_almost_equal(df.values, values)
# a df that needs alignment first
df[df[:-1] < 0] = 2
np.putmask(values[:-1], values[:-1] < 0, 2)
tm.assert_almost_equal(df.values, values)
with pytest.raises(TypeError, match='boolean values only'):
df[df * 0] = 2
def test_frame_getitem_setitem_multislice(self):
levels = [['t1', 't2'], ['a', 'b', 'c']]
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
midx = MultiIndex(codes=codes, levels=levels, names=[None, 'id'])
df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)
result = df.loc[:, 'value']
tm.assert_series_equal(df['value'], result)
with catch_warnings(record=True):
simplefilter("ignore", DeprecationWarning)
result = df.ix[:, 'value']
tm.assert_series_equal(df['value'], result)
result = df.loc[df.index[1:3], 'value']
tm.assert_series_equal(df['value'][1:3], result)
result = df.loc[:, :]
tm.assert_frame_equal(df, result)
result = df
df.loc[:, 'value'] = 10
result['value'] = 10
tm.assert_frame_equal(df, result)
df.loc[:, :] = 10
tm.assert_frame_equal(df, result)
def test_frame_setitem_multi_column(self):
df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
[0, 1, 0, 1]])
cp = df.copy()
cp['a'] = cp['b']
tm.assert_frame_equal(cp['a'], cp['b'])
# set with ndarray
cp = df.copy()
cp['a'] = cp['b'].values
tm.assert_frame_equal(cp['a'], cp['b'])
# ---------------------------------------
# #1803
columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
df = DataFrame(index=[1, 3, 5], columns=columns)
# Works, but adds a column instead of updating the two existing ones
df['A'] = 0.0 # Doesn't work
assert (df['A'].values == 0).all()
# it broadcasts
df['B', '1'] = [1, 2, 3]
df['A'] = df['B', '1']
sliced_a1 = df['A', '1']
sliced_a2 = df['A', '2']
sliced_b1 = df['B', '1']
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
assert sliced_a1.name == ('A', '1')
assert sliced_a2.name == ('A', '2')
assert sliced_b1.name == ('B', '1')
def test_getitem_setitem_tuple_plus_columns(
self, multiindex_year_month_day_dataframe_random_data):
# GH #1013
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd[:5]
result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
tm.assert_series_equal(result, expected)
def test_getitem_setitem_slice_integers(self):
index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])
frame = DataFrame(np.random.randn(len(index), 4), index=index,
columns=['a', 'b', 'c', 'd'])
res = frame.loc[1:2]
exp = frame.reindex(frame.index[2:])
tm.assert_frame_equal(res, exp)
frame.loc[1:2] = 7
assert (frame.loc[1:2] == 7).values.all()
series = Series(np.random.randn(len(index)), index=index)
res = series.loc[1:2]
exp = series.reindex(series.index[2:])
tm.assert_series_equal(res, exp)
series.loc[1:2] = 7
assert (series.loc[1:2] == 7).values.all()
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
dft = frame.T
s = dft['foo', 'two']
dft['foo', 'two'] = s > s.median()
tm.assert_series_equal(dft['foo', 'two'], s > s.median())
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
reindexed = dft.reindex(columns=[('foo', 'two')])
tm.assert_series_equal(reindexed['foo', 'two'], s > s.median())
def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
subset = frame.index[[1, 4, 5]]
frame.loc[subset] = 99
assert (frame.loc[subset].values == 99).all()
col = frame['B']
col[subset] = 97
assert (frame.loc[subset, 'B'] == 97).all()
def test_nonunique_assignment_1750(self):
df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]],
columns=list("ABCD"))
df = df.set_index(['A', 'B'])
ix = MultiIndex.from_tuples([(1, 1)])
df.loc[ix, "C"] = '_'
assert (df.xs((1, 1))['C'] == '_').all()
def test_astype_assignment_with_dups(self):
# GH 4686
# assignment with dups that has a dtype change
cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')])
df = DataFrame(np.arange(3).reshape((1, 3)),
columns=cols, dtype=object)
index = df.index.copy()
df['A'] = df['A'].astype(np.float64)
tm.assert_index_equal(df.index, index)
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
# this works because we are modifying the underlying array
# really a no-no
df = multiindex_dataframe_random_data.T
df['foo'].values[:] = 0
assert (df['foo'].values == 0).all()
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df['foo']['one'] = 2
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data.T
expected = frame
df = frame.copy()
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df['foo']['one'] = 2
result = df
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,576 @@
from warnings import catch_warnings
import numpy as np
import pytest
from pandas.errors import UnsortedIndexError
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
from pandas.core.indexing import _non_reducing_slice
from pandas.tests.indexing.common import _mklbl
from pandas.util import testing as tm
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestMultiIndexSlicers(object):
def test_per_axis_per_level_getitem(self):
# GH6134
# example test case
ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl(
'C', 4), _mklbl('D', 2)])
df = DataFrame(np.arange(len(ix.get_values())), index=ix)
result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
expected = df.loc[[tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == 'A1' or a == 'A2' or a == 'A3') and (
c == 'C1' or c == 'C3')]]
tm.assert_frame_equal(result, expected)
expected = df.loc[[tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == 'A1' or a == 'A2' or a == 'A3') and (
c == 'C1' or c == 'C2' or c == 'C3')]]
result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
tm.assert_frame_equal(result, expected)
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples([('A', 1), ('A', 2),
('A', 3), ('B', 1)],
names=['one', 'two'])
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
('b', 'foo'), ('b', 'bah')],
names=['lvl0', 'lvl1'])
df = DataFrame(
np.arange(16, dtype='int64').reshape(
4, 4), index=index, columns=columns)
df = df.sort_index(axis=0).sort_index(axis=1)
# identity
result = df.loc[(slice(None), slice(None)), :]
tm.assert_frame_equal(result, df)
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
result = df.loc[:, (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
# index
result = df.loc[(slice(None), [1]), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), 1), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# columns
result = df.loc[:, (slice(None), ['foo'])]
expected = df.iloc[:, [1, 3]]
tm.assert_frame_equal(result, expected)
# both
result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
expected = df.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc['A', 'a']
expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
index=Index([1, 2, 3], name='two'),
columns=Index(['bar', 'foo'], name='lvl1'))
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), [1, 2]), :]
expected = df.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
# multi-level series
s = Series(np.arange(len(ix.get_values())), index=ix)
result = s.loc['A1':'A3', :, ['C1', 'C3']]
expected = s.loc[[tuple([a, b, c, d])
for a, b, c, d in s.index.values
if (a == 'A1' or a == 'A2' or a == 'A3') and (
c == 'C1' or c == 'C3')]]
tm.assert_series_equal(result, expected)
# boolean indexers
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
expected = df.iloc[[2, 3]]
tm.assert_frame_equal(result, expected)
with pytest.raises(ValueError):
df.loc[(slice(None), np.array([True, False])), :]
# ambiguous cases
# these can be multiply interpreted (e.g. in this case
# as df.loc[slice(None),[1]] as well
pytest.raises(KeyError, lambda: df.loc[slice(None), [1]])
result = df.loc[(slice(None), [1]), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# not lexsorted
assert df.index.lexsort_depth == 2
df = df.sort_index(level=1, axis=0)
assert df.index.lexsort_depth == 0
msg = ('MultiIndex slicing requires the index to be '
r'lexsorted: slicing on levels \[1\], lexsort depth 0')
with pytest.raises(UnsortedIndexError, match=msg):
df.loc[(slice(None), slice('bar')), :]
# GH 16734: not sorted, but no real slicing
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
def test_multiindex_slicers_non_unique(self):
# GH 7106
# non-unique mi index support
df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
B=['a', 'a', 'a', 'a'],
C=[1, 2, 1, 3],
D=[1, 2, 3, 4]))
.set_index(['A', 'B', 'C']).sort_index())
assert not df.index.is_unique
expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
C=[1, 1], D=[1, 3]))
.set_index(['A', 'B', 'C']).sort_index())
result = df.loc[(slice(None), slice(None), 1), :]
tm.assert_frame_equal(result, expected)
# this is equivalent of an xs expression
result = df.xs(1, level=2, drop_level=False)
tm.assert_frame_equal(result, expected)
df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
B=['a', 'a', 'a', 'a'],
C=[1, 2, 1, 2],
D=[1, 2, 3, 4]))
.set_index(['A', 'B', 'C']).sort_index())
assert not df.index.is_unique
expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
C=[1, 1], D=[1, 3]))
.set_index(['A', 'B', 'C']).sort_index())
result = df.loc[(slice(None), slice(None), 1), :]
assert not result.index.is_unique
tm.assert_frame_equal(result, expected)
# GH12896
# numpy-implementation dependent bug
ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16,
17, 18, 19, 200000, 200000]
n = len(ints)
idx = MultiIndex.from_arrays([['a'] * n, ints])
result = Series([1] * n, index=idx)
result = result.sort_index()
result = result.loc[(slice(None), slice(100000))]
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
tm.assert_series_equal(result, expected)
def test_multiindex_slicers_datetimelike(self):
# GH 7429
# buggy/inconsistent behavior when slicing with datetime-like
import datetime
dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) +
datetime.timedelta(days=i) for i in range(6)]
freq = [1, 2]
index = MultiIndex.from_product(
[dates, freq], names=['date', 'frequency'])
df = DataFrame(
np.arange(6 * 2 * 4, dtype='int64').reshape(
-1, 4), index=index, columns=list('ABCD'))
# multi-axis slicing
idx = pd.IndexSlice
expected = df.iloc[[0, 2, 4], [0, 1]]
result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
Timestamp('2012-01-03 12:12:12')),
slice(1, 1)), slice('A', 'B')]
tm.assert_frame_equal(result, expected)
result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp(
'2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
Timestamp('2012-01-03 12:12:12')), 1),
slice('A', 'B')]
tm.assert_frame_equal(result, expected)
# with strings
result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'),
slice(1, 1)), slice('A', 'B')]
tm.assert_frame_equal(result, expected)
result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1),
idx['A', 'B']]
tm.assert_frame_equal(result, expected)
def test_multiindex_slicers_edges(self):
# GH 8132
# various edge cases
df = DataFrame(
{'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5,
'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3,
'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30",
"2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09",
"2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01",
"2013-07-09", "2013-08-06", "2013-09-03"],
'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]})
df['DATE'] = pd.to_datetime(df['DATE'])
df1 = df.set_index(['A', 'B', 'DATE'])
df1 = df1.sort_index()
# A1 - Get all values under "A0" and "A1"
result = df1.loc[(slice('A1')), :]
expected = df1.iloc[0:10]
tm.assert_frame_equal(result, expected)
# A2 - Get all values from the start to "A2"
result = df1.loc[(slice('A2')), :]
expected = df1
tm.assert_frame_equal(result, expected)
# A3 - Get all values under "B1" or "B2"
result = df1.loc[(slice(None), slice('B1', 'B2')), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
tm.assert_frame_equal(result, expected)
# A4 - Get all values between 2013-07-02 and 2013-07-09
result = df1.loc[(slice(None), slice(None),
slice('20130702', '20130709')), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
# B1 - Get all values in B0 that are also under A0, A1 and A2
result = df1.loc[(slice('A2'), slice('B0')), :]
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
# the As)
result = df1.loc[(slice(None), slice('B2')), :]
expected = df1
tm.assert_frame_equal(result, expected)
# B3 - Get all values from B1 to B2 and up to 2013-08-06
result = df1.loc[(slice(None), slice('B1', 'B2'),
slice('2013-08-06')), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
tm.assert_frame_equal(result, expected)
# B4 - Same as A4 but the start of the date slice is not a key.
# shows indexing on a partial selection slice
result = df1.loc[(slice(None), slice(None),
slice('20130701', '20130709')), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
def test_per_axis_per_level_doc_examples(self):
# test index maker
idx = pd.IndexSlice
# from indexing.rst / advanced
index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
_mklbl('C', 4), _mklbl('D', 2)])
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
('b', 'foo'), ('b', 'bah')],
names=['lvl0', 'lvl1'])
df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
.reshape((len(index), len(columns))),
index=index, columns=columns)
result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
expected = df.loc[[tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == 'A1' or a == 'A2' or a == 'A3') and (
c == 'C1' or c == 'C3')]]
tm.assert_frame_equal(result, expected)
result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :]
expected = df.loc[[tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (c == 'C1' or c == 'C3')]]
tm.assert_frame_equal(result, expected)
result = df.loc[idx[:, :, ['C1', 'C3']], :]
tm.assert_frame_equal(result, expected)
# not sorted
with pytest.raises(UnsortedIndexError):
df.loc['A1', ('a', slice('foo'))]
# GH 16734: not sorted, but no real slicing
tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
df.loc['A1'].iloc[:, [0, 2]])
df = df.sort_index(axis=1)
# slicing
df.loc['A1', (slice(None), 'foo')]
df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')]
# setitem
df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
def test_loc_axis_arguments(self):
index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
_mklbl('C', 4), _mklbl('D', 2)])
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
('b', 'foo'), ('b', 'bah')],
names=['lvl0', 'lvl1'])
df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
.reshape((len(index), len(columns))),
index=index,
columns=columns).sort_index().sort_index(axis=1)
# axis 0
result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']]
expected = df.loc[[tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (a == 'A1' or a == 'A2' or a == 'A3') and (
c == 'C1' or c == 'C3')]]
tm.assert_frame_equal(result, expected)
result = df.loc(axis='index')[:, :, ['C1', 'C3']]
expected = df.loc[[tuple([a, b, c, d])
for a, b, c, d in df.index.values
if (c == 'C1' or c == 'C3')]]
tm.assert_frame_equal(result, expected)
# axis 1
result = df.loc(axis=1)[:, 'foo']
expected = df.loc[:, (slice(None), 'foo')]
tm.assert_frame_equal(result, expected)
result = df.loc(axis='columns')[:, 'foo']
expected = df.loc[:, (slice(None), 'foo')]
tm.assert_frame_equal(result, expected)
# invalid axis
with pytest.raises(ValueError):
df.loc(axis=-1)[:, :, ['C1', 'C3']]
with pytest.raises(ValueError):
df.loc(axis=2)[:, :, ['C1', 'C3']]
with pytest.raises(ValueError):
df.loc(axis='foo')[:, :, ['C1', 'C3']]
def test_per_axis_per_level_setitem(self):
# test index maker
idx = pd.IndexSlice
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples([('A', 1), ('A', 2),
('A', 3), ('B', 1)],
names=['one', 'two'])
columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
('b', 'foo'), ('b', 'bah')],
names=['lvl0', 'lvl1'])
df_orig = DataFrame(
np.arange(16, dtype='int64').reshape(
4, 4), index=index, columns=columns)
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
# identity
df = df_orig.copy()
df.loc[(slice(None), slice(None)), :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[:, (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
# index
df = df_orig.copy()
df.loc[(slice(None), [1]), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, 1] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
# columns
df = df_orig.copy()
df.loc[:, (slice(None), ['foo'])] = 100
expected = df_orig.copy()
expected.iloc[:, [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# both
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, 1], idx[:, ['foo']]] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc['A', 'a'] = 100
expected = df_orig.copy()
expected.iloc[0:3, 0:2] = 100
tm.assert_frame_equal(df, expected)
# setting with a list-like
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
[[100, 100], [100, 100]], dtype='int64')
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# not enough values
df = df_orig.copy()
with pytest.raises(ValueError):
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
[[100], [100, 100]], dtype='int64')
with pytest.raises(ValueError):
df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
[100, 100, 100, 100], dtype='int64')
# with an alignable rhs
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice(
None), 1), (slice(None), ['foo'])] * 5
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice(
None), 1), (slice(None), ['foo'])]
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy()
rhs.loc[:, ('c', 'bah')] = 10
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
def test_multiindex_label_slicing_with_negative_step(self):
s = Series(np.arange(20),
MultiIndex.from_product([list('abcde'), np.arange(4)]))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
with catch_warnings(record=True):
tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
assert_slices_equivalent(SLC[::-1], SLC[::-1])
assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1])
assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1])
assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1])
assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1])
assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1])
assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])
assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
def test_multiindex_slice_first_level(self):
# GH 12697
freq = ['a', 'b', 'c', 'd']
idx = MultiIndex.from_product([freq, np.arange(500)])
df = DataFrame(list(range(2000)), index=idx, columns=['Test'])
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
result = df_slice.loc['a']
expected = DataFrame(list(range(30, 71)),
columns=['Test'], index=range(30, 71))
tm.assert_frame_equal(result, expected)
result = df_slice.loc['d']
expected = DataFrame(list(range(1530, 1571)),
columns=['Test'], index=range(30, 71))
tm.assert_frame_equal(result, expected)
def test_int_series_slicing(
self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd['A']
result = s[5:]
expected = s.reindex(s.index[5:])
tm.assert_series_equal(result, expected)
exp = ymd['A'].copy()
s[5:] = 0
exp.values[5:] = 0
tm.assert_numpy_array_equal(s.values, exp.values)
result = ymd[5:]
expected = ymd.reindex(s.index[5:])
tm.assert_frame_equal(result, expected)
def test_non_reducing_slice_on_multiindex(self):
# GH 19861
dic = {
('a', 'd'): [1, 4],
('a', 'c'): [2, 3],
('b', 'c'): [3, 2],
('b', 'd'): [4, 1]
}
df = pd.DataFrame(dic, index=[0, 1])
idx = pd.IndexSlice
slice_ = idx[:, idx['b', 'd']]
tslice_ = _non_reducing_slice(slice_)
result = df.loc[tslice_]
expected = pd.DataFrame({('b', 'd'): [4, 1]})
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,92 @@
import numpy as np
from numpy.random import randn
from pandas.compat import lzip
from pandas import DataFrame, MultiIndex, Series
from pandas.util import testing as tm
class TestMultiIndexSorted(object):
def test_getitem_multilevel_index_tuple_not_sorted(self):
index_columns = list("abc")
df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]],
columns=index_columns + ["data"])
df = df.set_index(index_columns)
query_index = df.index[:1]
rs = df.loc[query_index, "data"]
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c'])
xp = Series(['x'], index=xp_idx, name='data')
tm.assert_series_equal(rs, xp)
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.sort_index(level=1).T
# buglet with int typechecking
result = df.iloc[:, :np.int32(3)]
expected = df.reindex(columns=df.columns[:3])
tm.assert_frame_equal(result, expected)
def test_frame_getitem_not_sorted2(self):
# 13431
df = DataFrame({'col1': ['b', 'd', 'b', 'a'],
'col2': [3, 1, 1, 2],
'data': ['one', 'two', 'three', 'four']})
df2 = df.set_index(['col1', 'col2'])
df2_original = df2.copy()
df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True)
df2.index.set_codes([0, 1, 0, 2], level='col1', inplace=True)
assert not df2.index.is_lexsorted()
assert not df2.index.is_monotonic
assert df2_original.index.equals(df2.index)
expected = df2.sort_index()
assert expected.index.is_lexsorted()
assert expected.index.is_monotonic
result = df2.sort_index(level=0)
assert result.index.is_lexsorted()
assert result.index.is_monotonic
tm.assert_frame_equal(result, expected)
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T
df['foo', 'four'] = 'foo'
arrays = [np.array(x) for x in zip(*df.columns.values)]
result = df['foo']
result2 = df.loc[:, 'foo']
expected = df.reindex(columns=df.columns[arrays[0] == 'foo'])
expected.columns = expected.columns.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
df = df.T
result = df.xs('foo')
result2 = df.loc['foo']
expected = df.reindex(df.index[arrays[0] == 'foo'])
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
def test_series_getitem_not_sorted(self):
arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = lzip(*arrays)
index = MultiIndex.from_tuples(tuples)
s = Series(randn(8), index=index)
arrays = [np.array(x) for x in zip(*index.values)]
result = s['qux']
result2 = s.loc['qux']
expected = s[arrays[0] == 'qux']
expected.index = expected.index.droplevel(0)
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)
@@ -0,0 +1,237 @@
import numpy as np
import pytest
from pandas.compat import lrange, product as cart_product
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
import pandas.core.common as com
from pandas.util import testing as tm
@pytest.fixture
def four_level_index_dataframe():
arr = np.array([[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838]])
index = MultiIndex(
levels=[['a', 'x'], ['b', 'q'], [10.0032, 20.0, 30.0], [3, 4, 5]],
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
names=['one', 'two', 'three', 'four'])
return DataFrame(arr, index=index, columns=list('ABCDE'))
@pytest.mark.parametrize('key, level, exp_arr, exp_index', [
('a', 'lvl0', lambda x: x[:, 0:2], Index(['bar', 'foo'], name='lvl1')),
('foo', 'lvl1', lambda x: x[:, 1:2], Index(['a'], name='lvl0'))
])
def test_xs_named_levels_axis_eq_1(key, level, exp_arr, exp_index):
# see gh-2903
arr = np.random.randn(4, 4)
index = MultiIndex(levels=[['a', 'b'], ['bar', 'foo', 'hello', 'world']],
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
names=['lvl0', 'lvl1'])
df = DataFrame(arr, columns=index)
result = df.xs(key, level=level, axis=1)
expected = DataFrame(exp_arr(arr), columns=exp_index)
tm.assert_frame_equal(result, expected)
def test_xs_values(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs(('bar', 'two')).values
expected = df.values[4]
tm.assert_almost_equal(result, expected)
def test_xs_loc_equality(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs(('bar', 'two'))
expected = df.loc[('bar', 'two')]
tm.assert_series_equal(result, expected)
def test_xs_missing_values_in_index():
# see gh-6574
# missing values in returned index should be preserrved
acc = [
('a', 'abcde', 1),
('b', 'bbcde', 2),
('y', 'yzcde', 25),
('z', 'xbcde', 24),
('z', None, 26),
('z', 'zbcde', 25),
('z', 'ybcde', 26),
]
df = DataFrame(acc,
columns=['a1', 'a2', 'cnt']).set_index(['a1', 'a2'])
expected = DataFrame({'cnt': [24, 26, 25, 26]}, index=Index(
['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2'))
result = df.xs('z', level='a1')
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('key, level', [
('one', 'second'),
(['one'], ['second'])
])
def test_xs_with_duplicates(key, level, multiindex_dataframe_random_data):
# see gh-13719
frame = multiindex_dataframe_random_data
df = concat([frame] * 2)
assert df.index.is_unique is False
expected = concat([frame.xs('one', level='second')] * 2)
result = df.xs(key, level=level)
tm.assert_frame_equal(result, expected)
def test_xs_level(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs('two', level='second')
expected = df[df.index.get_level_values(1) == 'two']
expected.index = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_frame_equal(result, expected)
def test_xs_level_eq_2():
arr = np.random.randn(3, 5)
index = MultiIndex(
levels=[['a', 'p', 'x'], ['b', 'q', 'y'], ['c', 'r', 'z']],
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]])
df = DataFrame(arr, index=index)
expected = DataFrame(arr[1:2], index=[['a'], ['b']])
result = df.xs('c', level=2)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('indexer', [
lambda df: df.xs(('a', 4), level=['one', 'four']),
lambda df: df.xs('a').xs(4, level='four')
])
def test_xs_level_multiple(indexer, four_level_index_dataframe):
df = four_level_index_dataframe
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
expected_index = MultiIndex(
levels=[['q'], [20.0]],
codes=[[0], [0]],
names=['two', 'three'])
expected = DataFrame(
expected_values, index=expected_index, columns=list('ABCDE'))
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_xs_setting_with_copy_error(multiindex_dataframe_random_data):
# this is a copy in 0.14
df = multiindex_dataframe_random_data
result = df.xs('two', level='second')
# setting this will give a SettingWithCopyError
# as we are trying to write a view
msg = 'A value is trying to be set on a copy of a slice from a DataFrame'
with pytest.raises(com.SettingWithCopyError, match=msg):
result[:] = 10
def test_xs_setting_with_copy_error_multiple(four_level_index_dataframe):
# this is a copy in 0.14
df = four_level_index_dataframe
result = df.xs(('a', 4), level=['one', 'four'])
# setting this will give a SettingWithCopyError
# as we are trying to write a view
msg = 'A value is trying to be set on a copy of a slice from a DataFrame'
with pytest.raises(com.SettingWithCopyError, match=msg):
result[:] = 10
def test_xs_integer_key():
# see gh-2107
dates = lrange(20111201, 20111205)
ids = 'abcde'
index = MultiIndex.from_tuples(
[x for x in cart_product(dates, ids)],
names=['date', 'secid'])
df = DataFrame(
np.random.randn(len(index), 3), index, ['X', 'Y', 'Z'])
result = df.xs(20111201, level='date')
expected = df.loc[20111201, :]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('indexer', [
lambda df: df.xs('a', level=0),
lambda df: df.xs('a')
])
def test_xs_level0(indexer, four_level_index_dataframe):
df = four_level_index_dataframe
expected_values = [[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
expected_index = MultiIndex(
levels=[['b', 'q'], [10.0032, 20.0], [4, 5]],
codes=[[0, 1], [0, 1], [1, 0]],
names=['two', 'three', 'four'])
expected = DataFrame(
expected_values, index=expected_index, columns=list('ABCDE'))
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_xs_level_series(multiindex_dataframe_random_data):
# this test is not explicitly testing .xs functionality
# TODO: move to another module or refactor
df = multiindex_dataframe_random_data
s = df['A']
result = s[:, 'two']
expected = df.xs('two', level=1)['A']
tm.assert_series_equal(result, expected)
def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data):
# this test is not explicitly testing .xs functionality
# TODO: move to another module or refactor
df = multiindex_year_month_day_dataframe_random_data
s = df['A']
result = s[2000, 5]
expected = df.loc[2000, 5]['A']
tm.assert_series_equal(result, expected)
def test_xs_level_series_slice_not_implemented(
multiindex_year_month_day_dataframe_random_data):
# this test is not explicitly testing .xs functionality
# TODO: move to another module or refactor
# not implementing this for now
df = multiindex_year_month_day_dataframe_random_data
s = df['A']
msg = r'\(2000, slice\(3, 4, None\)\)'
with pytest.raises(TypeError, match=msg):
s[2000, 3:4]
def test_series_getitem_multiindex_xs():
# GH6258
dt = list(date_range('20130903', periods=3))
idx = MultiIndex.from_product([list('AB'), dt])
s = Series([1, 3, 4, 1, 3, 4], index=idx)
expected = Series([1, 1], index=list('AB'))
result = s.xs('20130903', level=1)
tm.assert_series_equal(result, expected)
def test_series_getitem_multiindex_xs_by_label():
# GH5684
idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
('b', 'two')])
s = Series([1, 2, 3, 4], index=idx)
s.index.set_names(['L1', 'L2'], inplace=True)
expected = Series([1, 3], index=['a', 'b'])
expected.index.set_names(['L1'], inplace=True)
result = s.xs('one', level='L2')
tm.assert_series_equal(result, expected)
@@ -0,0 +1,268 @@
# -*- coding: utf-8 -*-
# pylint: disable-msg=W0612,E1101
import numpy as np
import pandas as pd
import pandas.util.testing as tm
class TestIndexingCallable(object):
def test_frame_loc_ix_callable(self):
# GH 11485
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'),
'C': [1, 2, 3, 4]})
# iloc cannot use boolean Series (see GH3635)
# return bool indexer
res = df.loc[lambda x: x.A > 2]
tm.assert_frame_equal(res, df.loc[df.A > 2])
res = df.loc[lambda x: x.A > 2]
tm.assert_frame_equal(res, df.loc[df.A > 2])
res = df.loc[lambda x: x.A > 2, ]
tm.assert_frame_equal(res, df.loc[df.A > 2, ])
res = df.loc[lambda x: x.A > 2, ]
tm.assert_frame_equal(res, df.loc[df.A > 2, ])
res = df.loc[lambda x: x.B == 'b', :]
tm.assert_frame_equal(res, df.loc[df.B == 'b', :])
res = df.loc[lambda x: x.B == 'b', :]
tm.assert_frame_equal(res, df.loc[df.B == 'b', :])
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == 'B']
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == 'B']
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
res = df.loc[lambda x: x.A > 2, lambda x: 'B']
tm.assert_series_equal(res, df.loc[df.A > 2, 'B'])
res = df.loc[lambda x: x.A > 2, lambda x: 'B']
tm.assert_series_equal(res, df.loc[df.A > 2, 'B'])
res = df.loc[lambda x: x.A > 2, lambda x: ['A', 'B']]
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
res = df.loc[lambda x: x.A > 2, lambda x: ['A', 'B']]
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
res = df.loc[lambda x: x.A == 2, lambda x: ['A', 'B']]
tm.assert_frame_equal(res, df.loc[df.A == 2, ['A', 'B']])
res = df.loc[lambda x: x.A == 2, lambda x: ['A', 'B']]
tm.assert_frame_equal(res, df.loc[df.A == 2, ['A', 'B']])
# scalar
res = df.loc[lambda x: 1, lambda x: 'A']
assert res == df.loc[1, 'A']
res = df.loc[lambda x: 1, lambda x: 'A']
assert res == df.loc[1, 'A']
def test_frame_loc_ix_callable_mixture(self):
# GH 11485
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'),
'C': [1, 2, 3, 4]})
res = df.loc[lambda x: x.A > 2, ['A', 'B']]
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
res = df.loc[lambda x: x.A > 2, ['A', 'B']]
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
res = df.loc[[2, 3], lambda x: ['A', 'B']]
tm.assert_frame_equal(res, df.loc[[2, 3], ['A', 'B']])
res = df.loc[[2, 3], lambda x: ['A', 'B']]
tm.assert_frame_equal(res, df.loc[[2, 3], ['A', 'B']])
res = df.loc[3, lambda x: ['A', 'B']]
tm.assert_series_equal(res, df.loc[3, ['A', 'B']])
res = df.loc[3, lambda x: ['A', 'B']]
tm.assert_series_equal(res, df.loc[3, ['A', 'B']])
def test_frame_loc_callable(self):
# GH 11485
df = pd.DataFrame({'X': [1, 2, 3, 4],
'Y': list('aabb')},
index=list('ABCD'))
# return label
res = df.loc[lambda x: ['A', 'C']]
tm.assert_frame_equal(res, df.loc[['A', 'C']])
res = df.loc[lambda x: ['A', 'C'], ]
tm.assert_frame_equal(res, df.loc[['A', 'C'], ])
res = df.loc[lambda x: ['A', 'C'], :]
tm.assert_frame_equal(res, df.loc[['A', 'C'], :])
res = df.loc[lambda x: ['A', 'C'], lambda x: 'X']
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
res = df.loc[lambda x: ['A', 'C'], lambda x: ['X']]
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
# mixture
res = df.loc[['A', 'C'], lambda x: 'X']
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
res = df.loc[['A', 'C'], lambda x: ['X']]
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
res = df.loc[lambda x: ['A', 'C'], 'X']
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
res = df.loc[lambda x: ['A', 'C'], ['X']]
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
def test_frame_loc_callable_setitem(self):
# GH 11485
df = pd.DataFrame({'X': [1, 2, 3, 4],
'Y': list('aabb')},
index=list('ABCD'))
# return label
res = df.copy()
res.loc[lambda x: ['A', 'C']] = -20
exp = df.copy()
exp.loc[['A', 'C']] = -20
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ['A', 'C'], :] = 20
exp = df.copy()
exp.loc[['A', 'C'], :] = 20
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ['A', 'C'], lambda x: 'X'] = -1
exp = df.copy()
exp.loc[['A', 'C'], 'X'] = -1
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ['A', 'C'], lambda x: ['X']] = [5, 10]
exp = df.copy()
exp.loc[['A', 'C'], ['X']] = [5, 10]
tm.assert_frame_equal(res, exp)
# mixture
res = df.copy()
res.loc[['A', 'C'], lambda x: 'X'] = np.array([-1, -2])
exp = df.copy()
exp.loc[['A', 'C'], 'X'] = np.array([-1, -2])
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[['A', 'C'], lambda x: ['X']] = 10
exp = df.copy()
exp.loc[['A', 'C'], ['X']] = 10
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ['A', 'C'], 'X'] = -2
exp = df.copy()
exp.loc[['A', 'C'], 'X'] = -2
tm.assert_frame_equal(res, exp)
res = df.copy()
res.loc[lambda x: ['A', 'C'], ['X']] = -4
exp = df.copy()
exp.loc[['A', 'C'], ['X']] = -4
tm.assert_frame_equal(res, exp)
def test_frame_iloc_callable(self):
# GH 11485
df = pd.DataFrame({'X': [1, 2, 3, 4],
'Y': list('aabb')},
index=list('ABCD'))
# return location
res = df.iloc[lambda x: [1, 3]]
tm.assert_frame_equal(res, df.iloc[[1, 3]])
res = df.iloc[lambda x: [1, 3], :]
tm.assert_frame_equal(res, df.iloc[[1, 3], :])
res = df.iloc[lambda x: [1, 3], lambda x: 0]
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
res = df.iloc[lambda x: [1, 3], lambda x: [0]]
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
# mixture
res = df.iloc[[1, 3], lambda x: 0]
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
res = df.iloc[[1, 3], lambda x: [0]]
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
res = df.iloc[lambda x: [1, 3], 0]
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
res = df.iloc[lambda x: [1, 3], [0]]
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
def test_frame_iloc_callable_setitem(self):
# GH 11485
df = pd.DataFrame({'X': [1, 2, 3, 4],
'Y': list('aabb')},
index=list('ABCD'))
# return location
res = df.copy()
res.iloc[lambda x: [1, 3]] = 0
exp = df.copy()
exp.iloc[[1, 3]] = 0
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], :] = -1
exp = df.copy()
exp.iloc[[1, 3], :] = -1
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], lambda x: 0] = 5
exp = df.copy()
exp.iloc[[1, 3], 0] = 5
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], lambda x: [0]] = 25
exp = df.copy()
exp.iloc[[1, 3], [0]] = 25
tm.assert_frame_equal(res, exp)
# mixture
res = df.copy()
res.iloc[[1, 3], lambda x: 0] = -3
exp = df.copy()
exp.iloc[[1, 3], 0] = -3
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[[1, 3], lambda x: [0]] = -5
exp = df.copy()
exp.iloc[[1, 3], [0]] = -5
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], 0] = 10
exp = df.copy()
exp.iloc[[1, 3], 0] = 10
tm.assert_frame_equal(res, exp)
res = df.copy()
res.iloc[lambda x: [1, 3], [0]] = [-5, -5]
exp = df.copy()
exp.iloc[[1, 3], [0]] = [-5, -5]
tm.assert_frame_equal(res, exp)
@@ -0,0 +1,717 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas.compat as compat
from pandas.core.dtypes.common import is_categorical_dtype
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas as pd
from pandas import (
Categorical, CategoricalIndex, DataFrame, Index, Interval, Series,
Timestamp)
from pandas.api.types import CategoricalDtype as CDT
from pandas.util import testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
class TestCategoricalIndex(object):
def setup_method(self, method):
self.df = DataFrame({'A': np.arange(6, dtype='int64'),
'B': Series(list('aabbca')).astype(
CDT(list('cab')))}).set_index('B')
self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
'B': Series(list('aabbca')).astype(
CDT(list('cabe')))}).set_index('B')
self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
'B': (Series([1, 1, 2, 1, 3, 2])
.astype(CDT([3, 2, 1], ordered=True)))
}).set_index('B')
self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
'B': (Series([1, 1, 2, 1, 3, 2])
.astype(CDT([3, 2, 1], ordered=False)))
}).set_index('B')
def test_loc_scalar(self):
result = self.df.loc['a']
expected = (DataFrame({'A': [0, 1, 5],
'B': (Series(list('aaa'))
.astype(CDT(list('cab'))))})
.set_index('B'))
assert_frame_equal(result, expected)
df = self.df.copy()
df.loc['a'] = 20
expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20],
'B': (Series(list('aabbca'))
.astype(CDT(list('cab'))))})
.set_index('B'))
assert_frame_equal(df, expected)
# value not in the categories
pytest.raises(KeyError, lambda: df.loc['d'])
def f():
df.loc['d'] = 10
pytest.raises(TypeError, f)
def f():
df.loc['d', 'A'] = 10
pytest.raises(TypeError, f)
def f():
df.loc['d', 'C'] = 10
pytest.raises(TypeError, f)
def test_getitem_scalar(self):
cats = Categorical([Timestamp('12-31-1999'),
Timestamp('12-31-2000')])
s = Series([1, 2], index=cats)
expected = s.iloc[0]
result = s[cats[0]]
assert result == expected
def test_slicing_directly(self):
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
sliced = cat[3]
assert sliced == "d"
sliced = cat[3:5]
expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd'])
tm.assert_numpy_array_equal(sliced._codes, expected._codes)
tm.assert_index_equal(sliced.categories, expected.categories)
def test_slicing(self):
cat = Series(Categorical([1, 2, 3, 4]))
reversed = cat[::-1]
exp = np.array([4, 3, 2, 1], dtype=np.int64)
tm.assert_numpy_array_equal(reversed.__array__(), exp)
df = DataFrame({'value': (np.arange(100) + 1).astype('int64')})
df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10)
result = df.iloc[10]
tm.assert_series_equal(result, expected)
expected = DataFrame({'value': np.arange(11, 21).astype('int64')},
index=np.arange(10, 20).astype('int64'))
expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
result = df.iloc[10:20]
tm.assert_frame_equal(result, expected)
expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8)
result = df.loc[8]
tm.assert_series_equal(result, expected)
def test_slicing_and_getting_ops(self):
# systematically test the slicing operations:
# for all slicing ops:
# - returning a dataframe
# - returning a column
# - returning a row
# - returning a single value
cats = Categorical(
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"])
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
values = [1, 2, 3, 4, 5, 6, 7]
df = DataFrame({"cats": cats, "values": values}, index=idx)
# the expected values
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
idx2 = Index(["j", "k"])
values2 = [3, 4]
# 2:4,: | "j":"k",:
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
# :,"cats" | :,0
exp_col = Series(cats, index=idx, name='cats')
# "j",: | 2,:
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object",
name="j")
# "j","cats | 2,0
exp_val = "b"
# iloc
# frame
res_df = df.iloc[2:4, :]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
# row
res_row = df.iloc[2, :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], compat.string_types)
# col
res_col = df.iloc[:, 0]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
# single value
res_val = df.iloc[2, 0]
assert res_val == exp_val
# loc
# frame
res_df = df.loc["j":"k", :]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
# row
res_row = df.loc["j", :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], compat.string_types)
# col
res_col = df.loc[:, "cats"]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
# single value
res_val = df.loc["j", "cats"]
assert res_val == exp_val
# ix
# frame
# res_df = df.loc["j":"k",[0,1]] # doesn't work?
res_df = df.loc["j":"k", :]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
# row
res_row = df.loc["j", :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], compat.string_types)
# col
res_col = df.loc[:, "cats"]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
# single value
res_val = df.loc["j", df.columns[0]]
assert res_val == exp_val
# iat
res_val = df.iat[2, 0]
assert res_val == exp_val
# at
res_val = df.at["j", "cats"]
assert res_val == exp_val
# fancy indexing
exp_fancy = df.iloc[[2]]
res_fancy = df[df["cats"] == "b"]
tm.assert_frame_equal(res_fancy, exp_fancy)
res_fancy = df[df["values"] == 3]
tm.assert_frame_equal(res_fancy, exp_fancy)
# get_value
res_val = df.at["j", "cats"]
assert res_val == exp_val
# i : int, slice, or sequence of integers
res_row = df.iloc[2]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], compat.string_types)
res_df = df.iloc[slice(2, 4)]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
res_df = df.iloc[[2, 3]]
tm.assert_frame_equal(res_df, exp_df)
assert is_categorical_dtype(res_df["cats"])
res_col = df.iloc[:, 0]
tm.assert_series_equal(res_col, exp_col)
assert is_categorical_dtype(res_col)
res_df = df.iloc[:, slice(0, 2)]
tm.assert_frame_equal(res_df, df)
assert is_categorical_dtype(res_df["cats"])
res_df = df.iloc[:, [0, 1]]
tm.assert_frame_equal(res_df, df)
assert is_categorical_dtype(res_df["cats"])
def test_slicing_doc_examples(self):
# GH 7918
cats = Categorical(["a", "b", "b", "b", "c", "c", "c"],
categories=["a", "b", "c"])
idx = Index(["h", "i", "j", "k", "l", "m", "n", ])
values = [1, 2, 2, 2, 3, 4, 5]
df = DataFrame({"cats": cats, "values": values}, index=idx)
result = df.iloc[2:4, :]
expected = DataFrame(
{"cats": Categorical(['b', 'b'], categories=['a', 'b', 'c']),
"values": [2, 2]}, index=['j', 'k'])
tm.assert_frame_equal(result, expected)
result = df.iloc[2:4, :].dtypes
expected = Series(['category', 'int64'], ['cats', 'values'])
tm.assert_series_equal(result, expected)
result = df.loc["h":"j", "cats"]
expected = Series(Categorical(['a', 'b', 'b'],
categories=['a', 'b', 'c']),
index=['h', 'i', 'j'], name='cats')
tm.assert_series_equal(result, expected)
result = df.loc["h":"j", df.columns[0:1]]
expected = DataFrame({'cats': Categorical(['a', 'b', 'b'],
categories=['a', 'b', 'c'])},
index=['h', 'i', 'j'])
tm.assert_frame_equal(result, expected)
def test_getitem_category_type(self):
# GH 14580
# test iloc() on Series with Categorical data
s = Series([1, 2, 3]).astype('category')
# get slice
result = s.iloc[0:2]
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)
# get list of indexes
result = s.iloc[[0, 1]]
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)
# get boolean array
result = s.iloc[[True, False, False]]
expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)
def test_loc_listlike(self):
# list of labels
result = self.df.loc[['c', 'a']]
expected = self.df.iloc[[4, 0, 1, 5]]
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.loc[['a', 'b', 'e']]
exp_index = CategoricalIndex(
list('aaabbe'), categories=list('cabe'), name='B')
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
assert_frame_equal(result, expected, check_index_type=True)
# element in the categories but not in the values
pytest.raises(KeyError, lambda: self.df2.loc['e'])
# assign is ok
df = self.df2.copy()
df.loc['e'] = 20
result = df.loc[['a', 'b', 'e']]
exp_index = CategoricalIndex(
list('aaabbe'), categories=list('cabe'), name='B')
expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
assert_frame_equal(result, expected)
df = self.df2.copy()
result = df.loc[['a', 'b', 'e']]
exp_index = CategoricalIndex(
list('aaabbe'), categories=list('cabe'), name='B')
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
assert_frame_equal(result, expected, check_index_type=True)
# not all labels in the categories
with pytest.raises(KeyError):
self.df2.loc[['a', 'd']]
def test_loc_listlike_dtypes(self):
# GH 11586
# unique categories and codes
index = CategoricalIndex(['a', 'b', 'c'])
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
# unique slice
res = df.loc[['a', 'b']]
exp_index = CategoricalIndex(['a', 'b'],
categories=index.categories)
exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[['a', 'a', 'b']]
exp_index = CategoricalIndex(['a', 'a', 'b'],
categories=index.categories)
exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)
msg = ('a list-indexer must only include '
'values that are in the categories')
with pytest.raises(KeyError, match=msg):
df.loc[['a', 'x']]
# duplicated categories and codes
index = CategoricalIndex(['a', 'b', 'a'])
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
# unique slice
res = df.loc[['a', 'b']]
exp = DataFrame({'A': [1, 3, 2],
'B': [4, 6, 5]},
index=CategoricalIndex(['a', 'a', 'b']))
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[['a', 'a', 'b']]
exp = DataFrame(
{'A': [1, 3, 1, 3, 2],
'B': [4, 6, 4, 6, 5
]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
tm.assert_frame_equal(res, exp, check_index_type=True)
msg = ('a list-indexer must only include values '
'that are in the categories')
with pytest.raises(KeyError, match=msg):
df.loc[['a', 'x']]
# contains unused category
index = CategoricalIndex(
['a', 'b', 'a', 'c'], categories=list('abcde'))
df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
res = df.loc[['a', 'b']]
exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]},
index=CategoricalIndex(['a', 'a', 'b'],
categories=list('abcde')))
tm.assert_frame_equal(res, exp, check_index_type=True)
res = df.loc[['a', 'e']]
exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
index=CategoricalIndex(['a', 'a', 'e'],
categories=list('abcde')))
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[['a', 'a', 'b']]
exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
categories=list('abcde')))
tm.assert_frame_equal(res, exp, check_index_type=True)
msg = ('a list-indexer must only include values '
'that are in the categories')
with pytest.raises(KeyError, match=msg):
df.loc[['a', 'x']]
def test_get_indexer_array(self):
arr = np.array([Timestamp('1999-12-31 00:00:00'),
Timestamp('2000-12-31 00:00:00')], dtype=object)
cats = [Timestamp('1999-12-31 00:00:00'),
Timestamp('2000-12-31 00:00:00')]
ci = CategoricalIndex(cats,
categories=cats,
ordered=False, dtype='category')
result = ci.get_indexer(arr)
expected = np.array([0, 1], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_same_order(self):
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
categories=['a', 'b']))
expected = np.array([1, 1], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_different_order(self):
# https://github.com/pandas-dev/pandas/issues/19551
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
categories=['b', 'a']))
expected = np.array([1, 1], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
def test_getitem_with_listlike(self):
# GH 16115
cats = Categorical([Timestamp('12-31-1999'),
Timestamp('12-31-2000')])
expected = DataFrame([[1, 0], [0, 1]], dtype='uint8',
index=[0, 1], columns=cats)
dummies = pd.get_dummies(cats)
result = dummies[[c for c in dummies.columns]]
assert_frame_equal(result, expected)
def test_setitem_listlike(self):
# GH 9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(np.random.randint(0, 5, size=150000).astype(
np.int8)).add_categories([-1000])
indexer = np.array([100000]).astype(np.int64)
c[indexer] = -1000
# we are asserting the code result here
# which maps to the -1000 category
result = c.codes[np.array([100000]).astype(np.int64)]
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
def test_ix_categorical_index(self):
# GH 12531
df = DataFrame(np.random.randn(3, 3),
index=list('ABC'), columns=list('XYZ'))
cdf = df.copy()
cdf.index = CategoricalIndex(df.index)
cdf.columns = CategoricalIndex(df.columns)
expect = Series(df.loc['A', :], index=cdf.columns, name='A')
assert_series_equal(cdf.loc['A', :], expect)
expect = Series(df.loc[:, 'X'], index=cdf.index, name='X')
assert_series_equal(cdf.loc[:, 'X'], expect)
exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
index=exp_index)
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
exp_columns = CategoricalIndex(list('XY'),
categories=['X', 'Y', 'Z'])
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
columns=exp_columns)
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
# non-unique
df = DataFrame(np.random.randn(3, 3),
index=list('ABA'), columns=list('XYX'))
cdf = df.copy()
cdf.index = CategoricalIndex(df.index)
cdf.columns = CategoricalIndex(df.columns)
exp_index = CategoricalIndex(list('AA'), categories=['A', 'B'])
expect = DataFrame(df.loc['A', :], columns=cdf.columns,
index=exp_index)
assert_frame_equal(cdf.loc['A', :], expect)
exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y'])
expect = DataFrame(df.loc[:, 'X'], index=cdf.index,
columns=exp_columns)
assert_frame_equal(cdf.loc[:, 'X'], expect)
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
index=CategoricalIndex(list('AAB')))
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
columns=CategoricalIndex(list('XXY')))
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
def test_read_only_source(self):
# GH 10043
rw_array = np.eye(10)
rw_df = DataFrame(rw_array)
ro_array = np.eye(10)
ro_array.setflags(write=False)
ro_df = DataFrame(ro_array)
assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
assert_series_equal(rw_df.loc[1], ro_df.loc[1])
assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
def test_reindexing(self):
# reindexing
# convert to a regular index
result = self.df2.reindex(['a', 'b', 'e'])
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
'B': Series(list('aaabbe'))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(['a', 'b'])
expected = DataFrame({'A': [0, 1, 5, 2, 3],
'B': Series(list('aaabb'))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(['e'])
expected = DataFrame({'A': [np.nan],
'B': Series(['e'])}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(['d'])
expected = DataFrame({'A': [np.nan],
'B': Series(['d'])}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
# since we are actually reindexing with a Categorical
# then return a Categorical
cats = list('cabe')
result = self.df2.reindex(Categorical(['a', 'd'], categories=cats))
expected = DataFrame({'A': [0, 1, 5, np.nan],
'B': Series(list('aaad')).astype(
CDT(cats))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(Categorical(['a'], categories=cats))
expected = DataFrame({'A': [0, 1, 5],
'B': Series(list('aaa')).astype(
CDT(cats))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(['a', 'b', 'e'])
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
'B': Series(list('aaabbe'))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(['a', 'b'])
expected = DataFrame({'A': [0, 1, 5, 2, 3],
'B': Series(list('aaabb'))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(['e'])
expected = DataFrame({'A': [np.nan],
'B': Series(['e'])}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
# give back the type of categorical that we received
result = self.df2.reindex(Categorical(
['a', 'd'], categories=cats, ordered=True))
expected = DataFrame(
{'A': [0, 1, 5, np.nan],
'B': Series(list('aaad')).astype(
CDT(cats, ordered=True))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
result = self.df2.reindex(Categorical(
['a', 'd'], categories=['a', 'd']))
expected = DataFrame({'A': [0, 1, 5, np.nan],
'B': Series(list('aaad')).astype(
CDT(['a', 'd']))}).set_index('B')
assert_frame_equal(result, expected, check_index_type=True)
# passed duplicate indexers are not allowed
pytest.raises(ValueError, lambda: self.df2.reindex(['a', 'a']))
# args NotImplemented ATM
pytest.raises(NotImplementedError,
lambda: self.df2.reindex(['a'], method='ffill'))
pytest.raises(NotImplementedError,
lambda: self.df2.reindex(['a'], level=1))
pytest.raises(NotImplementedError,
lambda: self.df2.reindex(['a'], limit=2))
def test_loc_slice(self):
# slicing
# not implemented ATM
# GH9748
pytest.raises(TypeError, lambda: self.df.loc[1:5])
# result = df.loc[1:5]
# expected = df.iloc[[1,2,3,4]]
# assert_frame_equal(result, expected)
def test_boolean_selection(self):
df3 = self.df3
df4 = self.df4
result = df3[df3.index == 'a']
expected = df3.iloc[[]]
assert_frame_equal(result, expected)
result = df4[df4.index == 'a']
expected = df4.iloc[[]]
assert_frame_equal(result, expected)
result = df3[df3.index == 1]
expected = df3.iloc[[0, 1, 3]]
assert_frame_equal(result, expected)
result = df4[df4.index == 1]
expected = df4.iloc[[0, 1, 3]]
assert_frame_equal(result, expected)
# since we have an ordered categorical
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=True,
# name=u'B')
result = df3[df3.index < 2]
expected = df3.iloc[[4]]
assert_frame_equal(result, expected)
result = df3[df3.index > 1]
expected = df3.iloc[[]]
assert_frame_equal(result, expected)
# unordered
# cannot be compared
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=False,
# name=u'B')
pytest.raises(TypeError, lambda: df4[df4.index < 2])
pytest.raises(TypeError, lambda: df4[df4.index > 1])
def test_indexing_with_category(self):
# https://github.com/pandas-dev/pandas/issues/12564
# consistent result if comparing as Dataframe
cat = DataFrame({'A': ['foo', 'bar', 'baz']})
exp = DataFrame({'A': [True, False, False]})
res = (cat[['A']] == 'foo')
tm.assert_frame_equal(res, exp)
cat['A'] = cat['A'].astype('category')
res = (cat[['A']] == 'foo')
tm.assert_frame_equal(res, exp)
def test_map_with_dict_or_series(self):
orig_values = ['a', 'B', 1, 'a']
new_values = ['one', 2, 3.0, 'one']
cur_index = pd.CategoricalIndex(orig_values, name='XXX')
expected = pd.CategoricalIndex(new_values,
name='XXX', categories=[3.0, 2, 'one'])
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
output = cur_index.map(mapper)
# Order of categories in output can be different
tm.assert_index_equal(expected, output)
mapper = {o: n for o, n in
zip(orig_values[:-1], new_values[:-1])}
output = cur_index.map(mapper)
# Order of categories in output can be different
tm.assert_index_equal(expected, output)
@@ -0,0 +1,402 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame, Series, Timestamp, compat, date_range, option_context)
from pandas.core import common as com
from pandas.util import testing as tm
class TestCaching(object):
def test_slice_consolidate_invalidate_item_cache(self):
# this is chained assignment, but will 'work'
with option_context('chained_assignment', None):
# #3970
df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5})
# Creates a second float block
df["cc"] = 0.0
# caches a reference to the 'bb' series
df["bb"]
# repr machinery triggers consolidation
repr(df)
# Assignment to wrong series
df['bb'].iloc[0] = 0.17
df._clear_item_cache()
tm.assert_almost_equal(df['bb'][0], 0.17)
def test_setitem_cache_updating(self):
# GH 5424
cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']
for do_ref in [False, False]:
df = DataFrame({'a': cont,
"b": cont[3:] + cont[:3],
'c': np.arange(7)})
# ref the cache
if do_ref:
df.loc[0, "c"]
# set it
df.loc[7, 'c'] = 1
assert df.loc[0, 'c'] == 0.0
assert df.loc[7, 'c'] == 1.0
# GH 7084
# not updating cache on series setting with slices
expected = DataFrame({'A': [600, 600, 600]},
index=date_range('5/7/2014', '5/9/2014'))
out = DataFrame({'A': [0, 0, 0]},
index=date_range('5/7/2014', '5/9/2014'))
df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})
# loop through df to update out
six = Timestamp('5/7/2014')
eix = Timestamp('5/9/2014')
for ix, row in df.iterrows():
out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D']
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out['A'], expected['A'])
# try via a chain indexing
# this actually works
out = DataFrame({'A': [0, 0, 0]},
index=date_range('5/7/2014', '5/9/2014'))
for ix, row in df.iterrows():
v = out[row['C']][six:eix] + row['D']
out[row['C']][six:eix] = v
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out['A'], expected['A'])
out = DataFrame({'A': [0, 0, 0]},
index=date_range('5/7/2014', '5/9/2014'))
for ix, row in df.iterrows():
out.loc[six:eix, row['C']] += row['D']
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out['A'], expected['A'])
class TestChaining(object):
def test_setitem_chained_setfault(self):
# GH6026
data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
df = DataFrame({'response': np.array(data)})
mask = df.response == 'timeout'
df.response[mask] = 'none'
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
recarray = np.rec.fromarrays([data], names=['response'])
df = DataFrame(recarray)
mask = df.response == 'timeout'
df.response[mask] = 'none'
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
df = DataFrame({'response': data, 'response1': data})
mask = df.response == 'timeout'
df.response[mask] = 'none'
tm.assert_frame_equal(df, DataFrame({'response': mdata,
'response1': data}))
# GH 6056
expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar']))
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
df['A'].iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
df.A.iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)
def test_detect_chained_assignment(self):
pd.set_option('chained_assignment', 'raise')
# work with the chain
expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
df = DataFrame(np.arange(4).reshape(2, 2),
columns=list('AB'), dtype='int64')
assert df._is_copy is None
df['A'][0] = -5
df['A'][1] = -6
tm.assert_frame_equal(df, expected)
# test with the chaining
df = DataFrame({'A': Series(range(2), dtype='int64'),
'B': np.array(np.arange(2, 4), dtype=np.float64)})
assert df._is_copy is None
with pytest.raises(com.SettingWithCopyError):
df['A'][0] = -5
with pytest.raises(com.SettingWithCopyError):
df['A'][1] = np.nan
assert df['A']._is_copy is None
# Using a copy (the chain), fails
df = DataFrame({'A': Series(range(2), dtype='int64'),
'B': np.array(np.arange(2, 4), dtype=np.float64)})
with pytest.raises(com.SettingWithCopyError):
df.loc[0]['A'] = -5
# Doc example
df = DataFrame({'a': ['one', 'one', 'two', 'three',
'two', 'one', 'six'],
'c': Series(range(7), dtype='int64')})
assert df._is_copy is None
with pytest.raises(com.SettingWithCopyError):
indexer = df.a.str.startswith('o')
df[indexer]['c'] = 42
expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
with pytest.raises(com.SettingWithCopyError):
df['A'][0] = 111
with pytest.raises(com.SettingWithCopyError):
df.loc[0]['A'] = 111
df.loc[0, 'A'] = 111
tm.assert_frame_equal(df, expected)
# gh-5475: Make sure that is_copy is picked up reconstruction
df = DataFrame({"A": [1, 2]})
assert df._is_copy is None
with tm.ensure_clean('__tmp__pickle') as path:
df.to_pickle(path)
df2 = pd.read_pickle(path)
df2["B"] = df2["A"]
df2["B"] = df2["A"]
# gh-5597: a spurious raise as we are setting the entire column here
from string import ascii_letters as letters
def random_text(nobs=100):
df = []
for i in range(nobs):
idx = np.random.randint(len(letters), size=2)
idx.sort()
df.append([letters[idx[0]:idx[1]]])
return DataFrame(df, columns=['letters'])
df = random_text(100000)
# Always a copy
x = df.iloc[[0, 1, 2]]
assert x._is_copy is not None
x = df.iloc[[0, 1, 2, 4]]
assert x._is_copy is not None
# Explicitly copy
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer].copy()
assert df._is_copy is None
df['letters'] = df['letters'].apply(str.lower)
# Implicitly take
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df['letters'] = df['letters'].apply(str.lower)
# Implicitly take 2
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df.loc[:, 'letters'] = df['letters'].apply(str.lower)
# Should be ok even though it's a copy!
assert df._is_copy is None
df['letters'] = df['letters'].apply(str.lower)
assert df._is_copy is None
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df.loc[indexer, 'letters'] = (
df.loc[indexer, 'letters'].apply(str.lower))
# an identical take, so no copy
df = DataFrame({'a': [1]}).dropna()
assert df._is_copy is None
df['a'] += 1
df = DataFrame(np.random.randn(10, 4))
s = df.iloc[:, 0].sort_values()
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
tm.assert_series_equal(s, df[0].sort_values())
# see gh-6025: false positives
df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
str(df)
df['column1'] = df['column1'] + 'b'
str(df)
df = df[df['column2'] != 8]
str(df)
df['column1'] = df['column1'] + 'c'
str(df)
# from SO:
# http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
df = DataFrame(np.arange(0, 9), columns=['count'])
df['group'] = 'b'
with pytest.raises(com.SettingWithCopyError):
df.iloc[0:5]['group'] = 'a'
# Mixed type setting but same dtype & changing dtype
df = DataFrame(dict(A=date_range('20130101', periods=5),
B=np.random.randn(5),
C=np.arange(5, dtype='int64'),
D=list('abcde')))
with pytest.raises(com.SettingWithCopyError):
df.loc[2]['D'] = 'foo'
with pytest.raises(com.SettingWithCopyError):
df.loc[2]['C'] = 'foo'
with pytest.raises(com.SettingWithCopyError):
df['C'][2] = 'foo'
def test_setting_with_copy_bug(self):
# operating on a copy
df = DataFrame({'a': list(range(4)),
'b': list('ab..'),
'c': ['a', 'b', np.nan, 'd']})
mask = pd.isna(df.c)
def f():
df[['c']][mask] = df[['b']][mask]
pytest.raises(com.SettingWithCopyError, f)
# invalid warning as we are returning a new object
# GH 8730
df1 = DataFrame({'x': Series(['a', 'b', 'c']),
'y': Series(['d', 'e', 'f'])})
df2 = df1[['x']]
# this should not raise
df2['y'] = ['g', 'h', 'i']
def test_detect_chained_assignment_warnings(self):
with option_context("chained_assignment", "warn"):
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
with tm.assert_produces_warning(com.SettingWithCopyWarning):
df.loc[0]["A"] = 111
def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
# xref gh-13017.
with option_context("chained_assignment", "warn"):
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
columns=["a", "a", "c"])
with tm.assert_produces_warning(com.SettingWithCopyWarning):
df.c.loc[df.c > 0] = None
expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
columns=["a", "a", "c"])
tm.assert_frame_equal(df, expected)
def test_chained_getitem_with_lists(self):
# GH6394
# Regression in chained getitem indexing with embedded list-like from
# 0.12
def check(result, expected):
tm.assert_numpy_array_equal(result, expected)
assert isinstance(result, np.ndarray)
df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]})
expected = df['A'].iloc[2]
result = df.loc[2, 'A']
check(result, expected)
result2 = df.iloc[2]['A']
check(result2, expected)
result3 = df['A'].loc[2]
check(result3, expected)
result4 = df['A'].iloc[2]
check(result4, expected)
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_cache_updating(self):
# GH 4939, make sure to update the cache on setitem
df = tm.makeDataFrame()
df['A'] # cache series
df.ix["Hello Friend"] = df.ix[0]
assert "Hello Friend" in df['A'].index
assert "Hello Friend" in df['B'].index
panel = tm.makePanel()
panel.ix[0] # get first item into cache
panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1
assert "A+1" in panel.ix[0].columns
assert "A+1" in panel.ix[1].columns
# 10264
df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[
'a', 'b', 'c', 'd', 'e'], index=range(5))
df['f'] = 0
df.f.values[3] = 1
# TODO(wesm): unused?
# y = df.iloc[np.arange(2, len(df))]
df.f.values[3] = 2
expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[
'a', 'b', 'c', 'd', 'e', 'f'], index=range(5))
expected.at[3, 'f'] = 2
tm.assert_frame_equal(df, expected)
expected = Series([0, 0, 0, 2, 0], name='f')
tm.assert_series_equal(df.f, expected)
def test_deprecate_is_copy(self):
# GH18801
df = DataFrame({"A": [1, 2, 3]})
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# getter
df.is_copy
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# setter
df.is_copy = "test deprecated is_copy"
@@ -0,0 +1,939 @@
# -*- coding: utf-8 -*-
import itertools
import numpy as np
import pytest
import pandas.compat as compat
import pandas as pd
import pandas.util.testing as tm
###############################################################
# Index / Series common tests which may trigger dtype coercions
###############################################################
@pytest.fixture(autouse=True, scope='class')
def check_comprehensiveness(request):
# Iterate over combination of dtype, method and klass
# and ensure that each are contained within a collected test
cls = request.cls
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
def has_test(combo):
klass, dtype, method = combo
cls_funcs = request.node.session.items
return any(klass in x.name and dtype in x.name and
method in x.name for x in cls_funcs)
for combo in combos:
if not has_test(combo):
msg = 'test method is not defined: {0}, {1}'
raise AssertionError(msg.format(cls.__name__, combo))
yield
class CoercionBase(object):
klasses = ['index', 'series']
dtypes = ['object', 'int64', 'float64', 'complex128', 'bool',
'datetime64', 'datetime64tz', 'timedelta64', 'period']
@property
def method(self):
raise NotImplementedError(self)
def _assert(self, left, right, dtype):
# explicitly check dtype to avoid any unexpected result
if isinstance(left, pd.Series):
tm.assert_series_equal(left, right)
elif isinstance(left, pd.Index):
tm.assert_index_equal(left, right)
else:
raise NotImplementedError
assert left.dtype == dtype
assert right.dtype == dtype
class TestSetitemCoercion(CoercionBase):
method = 'setitem'
def _assert_setitem_series_conversion(self, original_series, loc_value,
expected_series, expected_dtype):
""" test series value's coercion triggered by assignment """
temp = original_series.copy()
temp[1] = loc_value
tm.assert_series_equal(temp, expected_series)
# check dtype explicitly for sure
assert temp.dtype == expected_dtype
# .loc works different rule, temporary disable
# temp = original_series.copy()
# temp.loc[1] = loc_value
# tm.assert_series_equal(temp, expected_series)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.object)])
def test_setitem_series_object(self, val, exp_dtype):
obj = pd.Series(list('abcd'))
assert obj.dtype == np.object
exp = pd.Series(['a', val, 'c', 'd'])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.int64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_setitem_series_int64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4])
assert obj.dtype == np.int64
if exp_dtype is np.float64:
exp = pd.Series([1, 1, 3, 4])
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
pytest.xfail("GH12747 The result must be float")
exp = pd.Series([1, val, 3, 4])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(np.int32(1), np.int8),
(np.int16(2**9), np.int16)])
def test_setitem_series_int8(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
assert obj.dtype == np.int8
if exp_dtype is np.int16:
exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
self._assert_setitem_series_conversion(obj, val, exp, np.int8)
pytest.xfail("BUG: it must be Series([1, 1, 3, 4], dtype=np.int16")
exp = pd.Series([1, val, 3, 4], dtype=np.int8)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.float64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_setitem_series_float64(self, val, exp_dtype):
obj = pd.Series([1.1, 2.2, 3.3, 4.4])
assert obj.dtype == np.float64
exp = pd.Series([1.1, val, 3.3, 4.4])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, np.object)])
def test_setitem_series_complex128(self, val, exp_dtype):
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.int64),
(3, np.int64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.bool)])
def test_setitem_series_bool(self, val, exp_dtype):
obj = pd.Series([True, False, True, False])
assert obj.dtype == np.bool
if exp_dtype is np.int64:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
pytest.xfail("TODO_GH12747 The result must be int")
elif exp_dtype is np.float64:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
pytest.xfail("TODO_GH12747 The result must be float")
elif exp_dtype is np.complex128:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
pytest.xfail("TODO_GH12747 The result must be complex")
exp = pd.Series([True, val, True, False])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(1, np.object),
('x', np.object)])
def test_setitem_series_datetime64(self, val, exp_dtype):
obj = pd.Series([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
exp = pd.Series([pd.Timestamp('2011-01-01'),
val,
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
'datetime64[ns, US/Eastern]'),
(pd.Timestamp('2012-01-01', tz='US/Pacific'), np.object),
(pd.Timestamp('2012-01-01'), np.object),
(1, np.object)])
def test_setitem_series_datetime64tz(self, val, exp_dtype):
tz = 'US/Eastern'
obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2011-01-02', tz=tz),
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
assert obj.dtype == 'datetime64[ns, US/Eastern]'
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
val,
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(pd.Timedelta('12 day'), 'timedelta64[ns]'),
(1, np.object),
('x', np.object)])
def test_setitem_series_timedelta64(self, val, exp_dtype):
obj = pd.Series([pd.Timedelta('1 day'),
pd.Timedelta('2 day'),
pd.Timedelta('3 day'),
pd.Timedelta('4 day')])
assert obj.dtype == 'timedelta64[ns]'
exp = pd.Series([pd.Timedelta('1 day'),
val,
pd.Timedelta('3 day'),
pd.Timedelta('4 day')])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
def _assert_setitem_index_conversion(self, original_series, loc_key,
expected_index, expected_dtype):
""" test index's coercion triggered by assign key """
temp = original_series.copy()
temp[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
temp = original_series.copy()
temp.loc[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
@pytest.mark.parametrize("val,exp_dtype", [
('x', np.object),
(5, IndexError),
(1.1, np.object)])
def test_setitem_index_object(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], index=list('abcd'))
assert obj.index.dtype == np.object
if exp_dtype is IndexError:
temp = obj.copy()
with pytest.raises(exp_dtype):
temp[5] = 5
else:
exp_index = pd.Index(list('abcd') + [val])
self._assert_setitem_index_conversion(obj, val, exp_index,
exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(5, np.int64),
(1.1, np.float64),
('x', np.object)])
def test_setitem_index_int64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4])
assert obj.index.dtype == np.int64
exp_index = pd.Index([0, 1, 2, 3, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(5, IndexError),
(5.1, np.float64),
('x', np.object)])
def test_setitem_index_float64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
assert obj.index.dtype == np.float64
if exp_dtype is IndexError:
# float + int -> int
temp = obj.copy()
with pytest.raises(exp_dtype):
temp[5] = 5
pytest.xfail("TODO_GH12747 The result must be float")
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
def test_setitem_series_period(self):
pass
def test_setitem_index_complex128(self):
pass
def test_setitem_index_bool(self):
pass
def test_setitem_index_datetime64(self):
pass
def test_setitem_index_datetime64tz(self):
pass
def test_setitem_index_timedelta64(self):
pass
def test_setitem_index_period(self):
pass
class TestInsertIndexCoercion(CoercionBase):
klasses = ['index']
method = 'insert'
def _assert_insert_conversion(self, original, value,
expected, expected_dtype):
""" test coercion triggered by insert """
target = original.copy()
res = target.insert(1, value)
tm.assert_index_equal(res, expected)
assert res.dtype == expected_dtype
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(1, 1, np.object),
(1.1, 1.1, np.object),
(False, False, np.object),
('x', 'x', np.object)])
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
obj = pd.Index(list('abcd'))
assert obj.dtype == np.object
exp = pd.Index(['a', coerced_val, 'b', 'c', 'd'])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(1, 1, np.int64),
(1.1, 1.1, np.float64),
(False, 0, np.int64),
('x', 'x', np.object)])
def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
obj = pd.Int64Index([1, 2, 3, 4])
assert obj.dtype == np.int64
exp = pd.Index([1, coerced_val, 2, 3, 4])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(1, 1., np.float64),
(1.1, 1.1, np.float64),
(False, 0., np.float64),
('x', 'x', np.object)])
def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
obj = pd.Float64Index([1., 2., 3., 4.])
assert obj.dtype == np.float64
exp = pd.Index([1., coerced_val, 2., 3., 4.])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize('fill_val,exp_dtype', [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
'datetime64[ns, US/Eastern]')],
ids=['datetime64', 'datetime64tz'])
def test_insert_index_datetimes(self, fill_val, exp_dtype):
obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
'2011-01-04'], tz=fill_val.tz)
assert obj.dtype == exp_dtype
exp = pd.DatetimeIndex(['2011-01-01', fill_val.date(), '2011-01-02',
'2011-01-03', '2011-01-04'], tz=fill_val.tz)
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
msg = "Passed item and index have different timezone"
if fill_val.tz:
with pytest.raises(ValueError, match=msg):
obj.insert(1, pd.Timestamp('2012-01-01'))
with pytest.raises(ValueError, match=msg):
obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo'))
msg = "cannot insert DatetimeIndex with incompatible label"
with pytest.raises(TypeError, match=msg):
obj.insert(1, 1)
pytest.xfail("ToDo: must coerce to object")
def test_insert_index_timedelta64(self):
obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day'])
assert obj.dtype == 'timedelta64[ns]'
# timedelta64 + timedelta64 => timedelta64
exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day'])
self._assert_insert_conversion(obj, pd.Timedelta('10 day'),
exp, 'timedelta64[ns]')
# ToDo: must coerce to object
msg = "cannot insert TimedeltaIndex with incompatible label"
with pytest.raises(TypeError, match=msg):
obj.insert(1, pd.Timestamp('2012-01-01'))
# ToDo: must coerce to object
msg = "cannot insert TimedeltaIndex with incompatible label"
with pytest.raises(TypeError, match=msg):
obj.insert(1, 1)
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'),
(pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object),
(1, 1, np.object),
('x', 'x', np.object)])
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
freq='M')
assert obj.dtype == 'period[M]'
if isinstance(insert, pd.Period):
index_type = pd.PeriodIndex
else:
index_type = pd.Index
exp = index_type([pd.Period('2011-01', freq='M'),
coerced_val,
pd.Period('2011-02', freq='M'),
pd.Period('2011-03', freq='M'),
pd.Period('2011-04', freq='M')], freq='M')
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
def test_insert_index_complex128(self):
pass
def test_insert_index_bool(self):
pass
class TestWhereCoercion(CoercionBase):
method = 'where'
def _assert_where_conversion(self, original, cond, values,
expected, expected_dtype):
""" test coercion triggered by where """
target = original.copy()
res = target.where(cond, values)
self._assert(res, expected, expected_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.object)])
def test_where_object(self, klass, fill_val, exp_dtype):
obj = klass(list('abcd'))
assert obj.dtype == np.object
cond = klass([True, False, True, False])
if fill_val is True and klass is pd.Series:
ret_val = 1
else:
ret_val = fill_val
exp = klass(['a', ret_val, 'c', ret_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(fill_val * x for x in [5, 6, 7, 8])
exp = klass(['a', values[1], 'c', values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.int64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_where_int64(self, klass, fill_val, exp_dtype):
if klass is pd.Index and exp_dtype is np.complex128:
pytest.skip("Complex Index not supported")
obj = klass([1, 2, 3, 4])
assert obj.dtype == np.int64
cond = klass([True, False, True, False])
exp = klass([1, fill_val, 3, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(x * fill_val for x in [5, 6, 7, 8])
exp = klass([1, values[1], 3, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val, exp_dtype", [
(1, np.float64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_where_float64(self, klass, fill_val, exp_dtype):
if klass is pd.Index and exp_dtype is np.complex128:
pytest.skip("Complex Index not supported")
obj = klass([1.1, 2.2, 3.3, 4.4])
assert obj.dtype == np.float64
cond = klass([True, False, True, False])
exp = klass([1.1, fill_val, 3.3, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(x * fill_val for x in [5, 6, 7, 8])
exp = klass([1.1, values[1], 3.3, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, np.object)])
def test_where_series_complex128(self, fill_val, exp_dtype):
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
cond = pd.Series([True, False, True, False])
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = pd.Series([True, False, True, True])
else:
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.bool)])
def test_where_series_bool(self, fill_val, exp_dtype):
obj = pd.Series([True, False, True, False])
assert obj.dtype == np.bool
cond = pd.Series([True, False, True, False])
exp = pd.Series([True, fill_val, True, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = pd.Series([True, False, True, True])
else:
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
exp = pd.Series([True, values[1], True, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("fill_val,exp_dtype", [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
ids=['datetime64', 'datetime64tz'])
def test_where_series_datetime64(self, fill_val, exp_dtype):
obj = pd.Series([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
cond = pd.Series([True, False, True, False])
exp = pd.Series([pd.Timestamp('2011-01-01'), fill_val,
pd.Timestamp('2011-01-03'), fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
values = pd.Series(pd.date_range(fill_val, periods=4))
if fill_val.tz:
exp = pd.Series([pd.Timestamp('2011-01-01'),
pd.Timestamp('2012-01-02 00:00', tz='US/Eastern'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2012-01-04 00:00',
tz='US/Eastern')])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
exp = pd.Series([pd.Timestamp('2011-01-01'), values[1],
pd.Timestamp('2011-01-03'), values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
def test_where_index_datetime(self):
fill_val = pd.Timestamp('2012-01-01')
exp_dtype = 'datetime64[ns]'
obj = pd.Index([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
cond = pd.Index([True, False, True, False])
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
"of some kind")
with pytest.raises(TypeError, match=msg):
obj.where(cond, fill_val)
values = pd.Index(pd.date_range(fill_val, periods=4))
exp = pd.Index([pd.Timestamp('2011-01-01'),
pd.Timestamp('2012-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2012-01-04')])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.xfail(
reason="GH 22839: do not ignore timezone, must be object")
def test_where_index_datetimetz(self):
fill_val = pd.Timestamp('2012-01-01', tz='US/Eastern')
exp_dtype = np.object
obj = pd.Index([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
cond = pd.Index([True, False, True, False])
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
"of some kind")
with pytest.raises(TypeError, match=msg):
obj.where(cond, fill_val)
values = pd.Index(pd.date_range(fill_val, periods=4))
exp = pd.Index([pd.Timestamp('2011-01-01'),
pd.Timestamp('2012-01-02', tz='US/Eastern'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2012-01-04', tz='US/Eastern')],
dtype=exp_dtype)
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
def test_where_index_complex128(self):
pass
def test_where_index_bool(self):
pass
def test_where_series_datetime64tz(self):
pass
def test_where_series_timedelta64(self):
pass
def test_where_series_period(self):
pass
def test_where_index_datetime64tz(self):
pass
def test_where_index_timedelta64(self):
pass
def test_where_index_period(self):
pass
class TestFillnaSeriesCoercion(CoercionBase):
# not indexing, but place here for consisntency
method = 'fillna'
def test_has_comprehensive_tests(self):
pass
def _assert_fillna_conversion(self, original, value,
expected, expected_dtype):
""" test coercion triggered by fillna """
target = original.copy()
res = target.fillna(value)
self._assert(res, expected, expected_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val, fill_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.object)])
def test_fillna_object(self, klass, fill_val, fill_dtype):
obj = klass(['a', np.nan, 'c', 'd'])
assert obj.dtype == np.object
exp = klass(['a', fill_val, 'c', 'd'])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,fill_dtype", [
(1, np.float64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_fillna_float64(self, klass, fill_val, fill_dtype):
obj = klass([1.1, np.nan, 3.3, 4.4])
assert obj.dtype == np.float64
exp = klass([1.1, fill_val, 3.3, 4.4])
# float + complex -> we don't support a complex Index
# complex for Series,
# object for Index
if fill_dtype == np.complex128 and klass == pd.Index:
fill_dtype = np.object
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("fill_val,fill_dtype", [
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, np.object)])
def test_fillna_series_complex128(self, fill_val, fill_dtype):
obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,fill_dtype", [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object),
(1, np.object), ('x', np.object)],
ids=['datetime64', 'datetime64tz', 'object', 'object'])
def test_fillna_datetime(self, klass, fill_val, fill_dtype):
obj = klass([pd.Timestamp('2011-01-01'),
pd.NaT,
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
exp = klass([pd.Timestamp('2011-01-01'),
fill_val,
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index])
@pytest.mark.parametrize("fill_val,fill_dtype", [
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
'datetime64[ns, US/Eastern]'),
(pd.Timestamp('2012-01-01'), np.object),
(pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), np.object),
(1, np.object),
('x', np.object)])
def test_fillna_datetime64tz(self, klass, fill_val, fill_dtype):
tz = 'US/Eastern'
obj = klass([pd.Timestamp('2011-01-01', tz=tz),
pd.NaT,
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
assert obj.dtype == 'datetime64[ns, US/Eastern]'
exp = klass([pd.Timestamp('2011-01-01', tz=tz),
fill_val,
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
def test_fillna_series_int64(self):
pass
def test_fillna_index_int64(self):
pass
def test_fillna_series_bool(self):
pass
def test_fillna_index_bool(self):
pass
def test_fillna_series_timedelta64(self):
pass
def test_fillna_series_period(self):
pass
def test_fillna_index_timedelta64(self):
pass
def test_fillna_index_period(self):
pass
class TestReplaceSeriesCoercion(CoercionBase):
klasses = ['series']
method = 'replace'
rep = {}
rep['object'] = ['a', 'b']
rep['int64'] = [4, 5]
rep['float64'] = [1.1, 2.2]
rep['complex128'] = [1 + 1j, 2 + 2j]
rep['bool'] = [True, False]
rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-03')]
for tz in ['UTC', 'US/Eastern']:
# to test tz => different tz replacement
key = 'datetime64[ns, {0}]'.format(tz)
rep[key] = [pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2011-01-03', tz=tz)]
rep['timedelta64[ns]'] = [pd.Timedelta('1 day'),
pd.Timedelta('2 day')]
@pytest.mark.parametrize('how', ['dict', 'series'])
@pytest.mark.parametrize('to_key', [
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]'
], ids=['object', 'int64', 'float64', 'complex128', 'bool',
'datetime64', 'datetime64tz', 'datetime64tz', 'timedelta64'])
@pytest.mark.parametrize('from_key', [
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]']
)
def test_replace_series(self, how, to_key, from_key):
if from_key == 'bool' and how == 'series' and compat.PY3:
# doesn't work in PY3, though ...dict_from_bool works fine
pytest.skip("doesn't work as in PY3")
index = pd.Index([3, 4], name='xxx')
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
assert obj.dtype == from_key
if (from_key.startswith('datetime') and to_key.startswith('datetime')):
# tested below
return
elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
# tested below
return
if how == 'dict':
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == 'series':
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
if ((from_key == 'float64' and to_key in ('int64')) or
(from_key == 'complex128' and
to_key in ('int64', 'float64'))):
if compat.is_platform_32bit() or compat.is_platform_windows():
pytest.skip("32-bit platform buggy: {0} -> {1}".format
(from_key, to_key))
# Expected: do not downcast by replacement
exp = pd.Series(self.rep[to_key], index=index,
name='yyy', dtype=from_key)
else:
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
# TODO(jbrockmendel) commented out to only have a single xfail printed
@pytest.mark.xfail(reason='GH #18376, tzawareness-compat bug '
'in BlockManager.replace_list')
# @pytest.mark.parametrize('how', ['dict', 'series'])
# @pytest.mark.parametrize('to_key', ['timedelta64[ns]', 'bool', 'object',
# 'complex128', 'float64', 'int64'])
# @pytest.mark.parametrize('from_key', ['datetime64[ns, UTC]',
# 'datetime64[ns, US/Eastern]'])
# def test_replace_series_datetime_tz(self, how, to_key, from_key):
def test_replace_series_datetime_tz(self):
how = 'series'
from_key = 'datetime64[ns, US/Eastern]'
to_key = 'timedelta64[ns]'
index = pd.Index([3, 4], name='xxx')
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
assert obj.dtype == from_key
if how == 'dict':
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == 'series':
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
# TODO(jreback) commented out to only have a single xfail printed
@pytest.mark.xfail(reason="different tz, "
"currently mask_missing raises SystemError",
strict=False)
# @pytest.mark.parametrize('how', ['dict', 'series'])
# @pytest.mark.parametrize('to_key', [
# 'datetime64[ns]', 'datetime64[ns, UTC]',
# 'datetime64[ns, US/Eastern]'])
# @pytest.mark.parametrize('from_key', [
# 'datetime64[ns]', 'datetime64[ns, UTC]',
# 'datetime64[ns, US/Eastern]'])
# def test_replace_series_datetime_datetime(self, how, to_key, from_key):
def test_replace_series_datetime_datetime(self):
how = 'dict'
to_key = 'datetime64[ns]'
from_key = 'datetime64[ns]'
index = pd.Index([3, 4], name='xxx')
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
assert obj.dtype == from_key
if how == 'dict':
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == 'series':
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
def test_replace_series_period(self):
pass
@@ -0,0 +1,315 @@
from datetime import datetime, timedelta
from dateutil import tz
import numpy as np
import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, date_range
from pandas.util import testing as tm
class TestDatetimeIndex(object):
def test_setitem_with_datetime_tz(self):
# 16889
# support .loc with alignment and tz-aware DatetimeIndex
mask = np.array([True, False, True, False])
idx = date_range('20010101', periods=4, tz='UTC')
df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
result = df.copy()
result.loc[mask, :] = df.loc[mask, :]
tm.assert_frame_equal(result, df)
result = df.copy()
result.loc[mask] = df.loc[mask]
tm.assert_frame_equal(result, df)
idx = date_range('20010101', periods=4)
df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
result = df.copy()
result.loc[mask, :] = df.loc[mask, :]
tm.assert_frame_equal(result, df)
result = df.copy()
result.loc[mask] = df.loc[mask]
tm.assert_frame_equal(result, df)
def test_indexing_with_datetime_tz(self):
# 8260
# support datetime64 with tz
idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
name='foo')
dr = date_range('20130110', periods=3)
df = DataFrame({'A': idx, 'B': dr})
df['C'] = idx
df.iloc[1, 1] = pd.NaT
df.iloc[1, 2] = pd.NaT
# indexing
result = df.iloc[1]
expected = Series([Timestamp('2013-01-02 00:00:00-0500',
tz='US/Eastern'), np.nan, np.nan],
index=list('ABC'), dtype='object', name=1)
tm.assert_series_equal(result, expected)
result = df.loc[1]
expected = Series([Timestamp('2013-01-02 00:00:00-0500',
tz='US/Eastern'), np.nan, np.nan],
index=list('ABC'), dtype='object', name=1)
tm.assert_series_equal(result, expected)
# indexing - fast_xs
df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')})
result = df.iloc[5]
expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D')
assert result == expected
result = df.loc[5]
assert result == expected
# indexing - boolean
result = df[df.a > df.a[3]]
expected = df.iloc[4:]
tm.assert_frame_equal(result, expected)
# indexing - setting an element
df = DataFrame(data=pd.to_datetime(
['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time'])
df['new_col'] = ['new', 'old']
df.time = df.set_index('time').index.tz_localize('UTC')
v = df[df.new_col == 'new'].set_index('time').index.tz_convert(
'US/Pacific')
# trying to set a single element on a part of a different timezone
# this converts to object
df2 = df.copy()
df2.loc[df2.new_col == 'new', 'time'] = v
expected = Series([v[0], df.loc[1, 'time']], name='time')
tm.assert_series_equal(df2.time, expected)
v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s')
df.loc[df.new_col == 'new', 'time'] = v
tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v)
def test_consistency_with_tz_aware_scalar(self):
# xef gh-12938
# various ways of indexing the same tz-aware scalar
df = Series([Timestamp('2016-03-30 14:35:25',
tz='Europe/Brussels')]).to_frame()
df = pd.concat([df, df]).reset_index(drop=True)
expected = Timestamp('2016-03-30 14:35:25+0200',
tz='Europe/Brussels')
result = df[0][0]
assert result == expected
result = df.iloc[0, 0]
assert result == expected
result = df.loc[0, 0]
assert result == expected
result = df.iat[0, 0]
assert result == expected
result = df.at[0, 0]
assert result == expected
result = df[0].loc[0]
assert result == expected
result = df[0].at[0]
assert result == expected
def test_indexing_with_datetimeindex_tz(self):
# GH 12050
# indexing on a series with a datetimeindex with tz
index = date_range('2015-01-01', periods=2, tz='utc')
ser = Series(range(2), index=index, dtype='int64')
# list-like indexing
for sel in (index, list(index)):
# getitem
tm.assert_series_equal(ser[sel], ser)
# setitem
result = ser.copy()
result[sel] = 1
expected = Series(1, index=index)
tm.assert_series_equal(result, expected)
# .loc getitem
tm.assert_series_equal(ser.loc[sel], ser)
# .loc setitem
result = ser.copy()
result.loc[sel] = 1
expected = Series(1, index=index)
tm.assert_series_equal(result, expected)
# single element indexing
# getitem
assert ser[index[1]] == 1
# setitem
result = ser.copy()
result[index[1]] = 5
expected = Series([0, 5], index=index)
tm.assert_series_equal(result, expected)
# .loc getitem
assert ser.loc[index[1]] == 1
# .loc setitem
result = ser.copy()
result.loc[index[1]] = 5
expected = Series([0, 5], index=index)
tm.assert_series_equal(result, expected)
def test_partial_setting_with_datetimelike_dtype(self):
# GH9478
# a datetimeindex alignment issue with partial setting
df = DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'),
index=date_range('1/1/2000', periods=3, freq='1H'))
expected = df.copy()
expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT]
mask = df.A < 1
df.loc[mask, 'C'] = df.loc[mask].index
tm.assert_frame_equal(df, expected)
def test_loc_setitem_datetime(self):
# GH 9516
dt1 = Timestamp('20130101 09:00:00')
dt2 = Timestamp('20130101 10:00:00')
for conv in [lambda x: x, lambda x: x.to_datetime64(),
lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]:
df = DataFrame()
df.loc[conv(dt1), 'one'] = 100
df.loc[conv(dt2), 'one'] = 200
expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2])
tm.assert_frame_equal(df, expected)
def test_series_partial_set_datetime(self):
# GH 11497
idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx')
ser = Series([0.1, 0.2], index=idx, name='s')
result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]]
exp = Series([0.1, 0.2], index=idx, name='s')
tm.assert_series_equal(result, exp, check_index_type=True)
keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'),
Timestamp('2011-01-01')]
exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'),
name='s')
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'),
Timestamp('2011-01-03')]
exp = Series([np.nan, 0.2, np.nan],
index=pd.DatetimeIndex(keys, name='idx'), name='s')
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
def test_series_partial_set_period(self):
# GH 11497
idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx')
ser = Series([0.1, 0.2], index=idx, name='s')
result = ser.loc[[pd.Period('2011-01-01', freq='D'),
pd.Period('2011-01-02', freq='D')]]
exp = Series([0.1, 0.2], index=idx, name='s')
tm.assert_series_equal(result, exp, check_index_type=True)
keys = [pd.Period('2011-01-02', freq='D'),
pd.Period('2011-01-02', freq='D'),
pd.Period('2011-01-01', freq='D')]
exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'),
name='s')
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
keys = [pd.Period('2011-01-03', freq='D'),
pd.Period('2011-01-02', freq='D'),
pd.Period('2011-01-03', freq='D')]
exp = Series([np.nan, 0.2, np.nan],
index=pd.PeriodIndex(keys, name='idx'), name='s')
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = ser.loc[keys]
tm.assert_series_equal(result, exp)
def test_nanosecond_getitem_setitem_with_tz(self):
# GH 11679
data = ['2016-06-28 08:30:00.123456789']
index = pd.DatetimeIndex(data, dtype='datetime64[ns, America/Chicago]')
df = DataFrame({'a': [10]}, index=index)
result = df.loc[df.index[0]]
expected = Series(10, index=['a'], name=df.index[0])
tm.assert_series_equal(result, expected)
result = df.copy()
result.loc[df.index[0], 'a'] = -1
expected = DataFrame(-1, index=index, columns=['a'])
tm.assert_frame_equal(result, expected)
def test_loc_getitem_across_dst(self):
# GH 21846
idx = pd.date_range('2017-10-29 01:30:00',
tz='Europe/Berlin', periods=5, freq='30 min')
series2 = pd.Series([0, 1, 2, 3, 4],
index=idx)
t_1 = pd.Timestamp('2017-10-29 02:30:00+02:00', tz='Europe/Berlin',
freq='30min')
t_2 = pd.Timestamp('2017-10-29 02:00:00+01:00', tz='Europe/Berlin',
freq='30min')
result = series2.loc[t_1:t_2]
expected = pd.Series([2, 3], index=idx[2:4])
tm.assert_series_equal(result, expected)
result = series2[t_1]
expected = 2
assert result == expected
def test_loc_incremental_setitem_with_dst(self):
# GH 20724
base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific"))
idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
result = pd.Series([0], index=[idxs[0]])
for ts in idxs:
result.loc[ts] = 1
expected = pd.Series(1, index=idxs)
tm.assert_series_equal(result, expected)
def test_loc_setitem_with_existing_dst(self):
# GH 18308
start = pd.Timestamp('2017-10-29 00:00:00+0200', tz='Europe/Madrid')
end = pd.Timestamp('2017-10-29 03:00:00+0100', tz='Europe/Madrid')
ts = pd.Timestamp('2016-10-10 03:00:00', tz='Europe/Madrid')
idx = pd.date_range(start, end, closed='left', freq="H")
result = pd.DataFrame(index=idx, columns=['value'])
result.loc[ts, 'value'] = 12
expected = pd.DataFrame([np.nan] * len(idx) + [12],
index=idx.append(pd.DatetimeIndex([ts])),
columns=['value'],
dtype=object)
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,898 @@
# -*- coding: utf-8 -*-
from warnings import catch_warnings
import numpy as np
import pytest
from pandas import (
DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series)
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal, assert_series_equal
ignore_ix = pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestFloatIndexers(object):
def check(self, result, original, indexer, getitem):
"""
comparator for results
we need to take care if we are indexing on a
Series or a frame
"""
if isinstance(original, Series):
expected = original.iloc[indexer]
else:
if getitem:
expected = original.iloc[:, indexer]
else:
expected = original.iloc[indexer]
assert_almost_equal(result, expected)
def test_scalar_error(self):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
# this duplicates the code below
# but is spefically testing for the error
# message
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
tm.makeCategoricalIndex,
tm.makeDateIndex, tm.makeTimedeltaIndex,
tm.makePeriodIndex, tm.makeIntIndex,
tm.makeRangeIndex]:
i = index(5)
s = Series(np.arange(len(i)), index=i)
msg = 'Cannot index by location index'
with pytest.raises(TypeError, match=msg):
s.iloc[3.0]
def f():
s.iloc[3.0] = 0
pytest.raises(TypeError, f)
@ignore_ix
def test_scalar_non_numeric(self):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
tm.makeCategoricalIndex,
tm.makeDateIndex, tm.makeTimedeltaIndex,
tm.makePeriodIndex]:
i = index(5)
for s in [Series(
np.arange(len(i)), index=i), DataFrame(
np.random.randn(
len(i), len(i)), index=i, columns=i)]:
# getting
for idxr, getitem in [(lambda x: x.ix, False),
(lambda x: x.iloc, False),
(lambda x: x, True)]:
def f():
with catch_warnings(record=True):
idxr(s)[3.0]
# gettitem on a DataFrame is a KeyError as it is indexing
# via labels on the columns
if getitem and isinstance(s, DataFrame):
error = KeyError
else:
error = TypeError
pytest.raises(error, f)
# label based can be a TypeError or KeyError
def f():
s.loc[3.0]
if s.index.inferred_type in ['string', 'unicode', 'mixed']:
error = KeyError
else:
error = TypeError
pytest.raises(error, f)
# contains
assert 3.0 not in s
# setting with a float fails with iloc
def f():
s.iloc[3.0] = 0
pytest.raises(TypeError, f)
# setting with an indexer
if s.index.inferred_type in ['categorical']:
# Value or Type Error
pass
elif s.index.inferred_type in ['datetime64', 'timedelta64',
'period']:
# these should prob work
# and are inconsisten between series/dataframe ATM
# for idxr in [lambda x: x.ix,
# lambda x: x]:
# s2 = s.copy()
# def f():
# idxr(s2)[3.0] = 0
# pytest.raises(TypeError, f)
pass
else:
s2 = s.copy()
s2.loc[3.0] = 10
assert s2.index.is_object()
for idxr in [lambda x: x.ix,
lambda x: x]:
s2 = s.copy()
with catch_warnings(record=True):
idxr(s2)[3.0] = 0
assert s2.index.is_object()
# fallsback to position selection, series only
s = Series(np.arange(len(i)), index=i)
s[3]
pytest.raises(TypeError, lambda: s[3.0])
@ignore_ix
def test_scalar_with_mixed(self):
s2 = Series([1, 2, 3], index=['a', 'b', 'c'])
s3 = Series([1, 2, 3], index=['a', 'b', 1.5])
# lookup in a pure string index
# with an invalid indexer
for idxr in [lambda x: x.ix,
lambda x: x,
lambda x: x.iloc]:
def f():
with catch_warnings(record=True):
idxr(s2)[1.0]
pytest.raises(TypeError, f)
pytest.raises(KeyError, lambda: s2.loc[1.0])
result = s2.loc['b']
expected = 2
assert result == expected
# mixed index so we have label
# indexing
for idxr in [lambda x: x]:
def f():
idxr(s3)[1.0]
pytest.raises(TypeError, f)
result = idxr(s3)[1]
expected = 2
assert result == expected
# mixed index so we have label
# indexing
for idxr in [lambda x: x.ix]:
with catch_warnings(record=True):
def f():
idxr(s3)[1.0]
pytest.raises(TypeError, f)
result = idxr(s3)[1]
expected = 2
assert result == expected
pytest.raises(TypeError, lambda: s3.iloc[1.0])
pytest.raises(KeyError, lambda: s3.loc[1.0])
result = s3.loc[1.5]
expected = 3
assert result == expected
@ignore_ix
def test_scalar_integer(self):
# test how scalar float indexers work on int indexes
# integer index
for i in [Int64Index(range(5)), RangeIndex(5)]:
for s in [Series(np.arange(len(i))),
DataFrame(np.random.randn(len(i), len(i)),
index=i, columns=i)]:
# coerce to equal int
for idxr, getitem in [(lambda x: x.ix, False),
(lambda x: x.loc, False),
(lambda x: x, True)]:
with catch_warnings(record=True):
result = idxr(s)[3.0]
self.check(result, s, 3, getitem)
# coerce to equal int
for idxr, getitem in [(lambda x: x.ix, False),
(lambda x: x.loc, False),
(lambda x: x, True)]:
if isinstance(s, Series):
def compare(x, y):
assert x == y
expected = 100
else:
compare = tm.assert_series_equal
if getitem:
expected = Series(100,
index=range(len(s)), name=3)
else:
expected = Series(100.,
index=range(len(s)), name=3)
s2 = s.copy()
with catch_warnings(record=True):
idxr(s2)[3.0] = 100
result = idxr(s2)[3.0]
compare(result, expected)
result = idxr(s2)[3]
compare(result, expected)
# contains
# coerce to equal int
assert 3.0 in s
@ignore_ix
def test_scalar_float(self):
# scalar float indexers work on a float index
index = Index(np.arange(5.))
for s in [Series(np.arange(len(index)), index=index),
DataFrame(np.random.randn(len(index), len(index)),
index=index, columns=index)]:
# assert all operations except for iloc are ok
indexer = index[3]
for idxr, getitem in [(lambda x: x.ix, False),
(lambda x: x.loc, False),
(lambda x: x, True)]:
# getting
result = idxr(s)[indexer]
self.check(result, s, 3, getitem)
# setting
s2 = s.copy()
def f():
with catch_warnings(record=True):
idxr(s2)[indexer] = expected
with catch_warnings(record=True):
result = idxr(s2)[indexer]
self.check(result, s, 3, getitem)
# random integer is a KeyError
with catch_warnings(record=True):
pytest.raises(KeyError, lambda: idxr(s)[3.5])
# contains
assert 3.0 in s
# iloc succeeds with an integer
expected = s.iloc[3]
s2 = s.copy()
s2.iloc[3] = expected
result = s2.iloc[3]
self.check(result, s, 3, False)
# iloc raises with a float
pytest.raises(TypeError, lambda: s.iloc[3.0])
def g():
s2.iloc[3.0] = 0
pytest.raises(TypeError, g)
@ignore_ix
def test_slice_non_numeric(self):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
tm.makeDateIndex, tm.makeTimedeltaIndex,
tm.makePeriodIndex]:
index = index(5)
for s in [Series(range(5), index=index),
DataFrame(np.random.randn(5, 2), index=index)]:
# getitem
for l in [slice(3.0, 4),
slice(3, 4.0),
slice(3.0, 4.0)]:
def f():
s.iloc[l]
pytest.raises(TypeError, f)
for idxr in [lambda x: x.ix,
lambda x: x.loc,
lambda x: x.iloc,
lambda x: x]:
def f():
with catch_warnings(record=True):
idxr(s)[l]
pytest.raises(TypeError, f)
# setitem
for l in [slice(3.0, 4),
slice(3, 4.0),
slice(3.0, 4.0)]:
def f():
s.iloc[l] = 0
pytest.raises(TypeError, f)
for idxr in [lambda x: x.ix,
lambda x: x.loc,
lambda x: x.iloc,
lambda x: x]:
def f():
with catch_warnings(record=True):
idxr(s)[l] = 0
pytest.raises(TypeError, f)
@ignore_ix
def test_slice_integer(self):
# same as above, but for Integer based indexes
# these coerce to a like integer
# oob indicates if we are out of bounds
# of positional indexing
for index, oob in [(Int64Index(range(5)), False),
(RangeIndex(5), False),
(Int64Index(range(5)) + 10, True)]:
# s is an in-range index
s = Series(range(5), index=index)
# getitem
for l in [slice(3.0, 4),
slice(3, 4.0),
slice(3.0, 4.0)]:
for idxr in [lambda x: x.loc,
lambda x: x.ix]:
with catch_warnings(record=True):
result = idxr(s)[l]
# these are all label indexing
# except getitem which is positional
# empty
if oob:
indexer = slice(0, 0)
else:
indexer = slice(3, 5)
self.check(result, s, indexer, False)
# positional indexing
def f():
s[l]
pytest.raises(TypeError, f)
# getitem out-of-bounds
for l in [slice(-6, 6),
slice(-6.0, 6.0)]:
for idxr in [lambda x: x.loc,
lambda x: x.ix]:
with catch_warnings(record=True):
result = idxr(s)[l]
# these are all label indexing
# except getitem which is positional
# empty
if oob:
indexer = slice(0, 0)
else:
indexer = slice(-6, 6)
self.check(result, s, indexer, False)
# positional indexing
def f():
s[slice(-6.0, 6.0)]
pytest.raises(TypeError, f)
# getitem odd floats
for l, res1 in [(slice(2.5, 4), slice(3, 5)),
(slice(2, 3.5), slice(2, 4)),
(slice(2.5, 3.5), slice(3, 4))]:
for idxr in [lambda x: x.loc,
lambda x: x.ix]:
with catch_warnings(record=True):
result = idxr(s)[l]
if oob:
res = slice(0, 0)
else:
res = res1
self.check(result, s, res, False)
# positional indexing
def f():
s[l]
pytest.raises(TypeError, f)
# setitem
for l in [slice(3.0, 4),
slice(3, 4.0),
slice(3.0, 4.0)]:
for idxr in [lambda x: x.loc,
lambda x: x.ix]:
sc = s.copy()
with catch_warnings(record=True):
idxr(sc)[l] = 0
result = idxr(sc)[l].values.ravel()
assert (result == 0).all()
# positional indexing
def f():
s[l] = 0
pytest.raises(TypeError, f)
def test_integer_positional_indexing(self):
""" make sure that we are raising on positional indexing
w.r.t. an integer index """
s = Series(range(2, 6), index=range(2, 6))
result = s[2:4]
expected = s.iloc[2:4]
assert_series_equal(result, expected)
for idxr in [lambda x: x,
lambda x: x.iloc]:
for l in [slice(2, 4.0),
slice(2.0, 4),
slice(2.0, 4.0)]:
def f():
idxr(s)[l]
pytest.raises(TypeError, f)
@ignore_ix
def test_slice_integer_frame_getitem(self):
# similar to above, but on the getitem dim (of a DataFrame)
for index in [Int64Index(range(5)), RangeIndex(5)]:
s = DataFrame(np.random.randn(5, 2), index=index)
def f(idxr):
# getitem
for l in [slice(0.0, 1),
slice(0, 1.0),
slice(0.0, 1.0)]:
result = idxr(s)[l]
indexer = slice(0, 2)
self.check(result, s, indexer, False)
# positional indexing
def f():
s[l]
pytest.raises(TypeError, f)
# getitem out-of-bounds
for l in [slice(-10, 10),
slice(-10.0, 10.0)]:
result = idxr(s)[l]
self.check(result, s, slice(-10, 10), True)
# positional indexing
def f():
s[slice(-10.0, 10.0)]
pytest.raises(TypeError, f)
# getitem odd floats
for l, res in [(slice(0.5, 1), slice(1, 2)),
(slice(0, 0.5), slice(0, 1)),
(slice(0.5, 1.5), slice(1, 2))]:
result = idxr(s)[l]
self.check(result, s, res, False)
# positional indexing
def f():
s[l]
pytest.raises(TypeError, f)
# setitem
for l in [slice(3.0, 4),
slice(3, 4.0),
slice(3.0, 4.0)]:
sc = s.copy()
idxr(sc)[l] = 0
result = idxr(sc)[l].values.ravel()
assert (result == 0).all()
# positional indexing
def f():
s[l] = 0
pytest.raises(TypeError, f)
f(lambda x: x.loc)
with catch_warnings(record=True):
f(lambda x: x.ix)
@ignore_ix
def test_slice_float(self):
# same as above, but for floats
index = Index(np.arange(5.)) + 0.1
for s in [Series(range(5), index=index),
DataFrame(np.random.randn(5, 2), index=index)]:
for l in [slice(3.0, 4),
slice(3, 4.0),
slice(3.0, 4.0)]:
expected = s.iloc[3:4]
for idxr in [lambda x: x.ix,
lambda x: x.loc,
lambda x: x]:
# getitem
with catch_warnings(record=True):
result = idxr(s)[l]
if isinstance(s, Series):
tm.assert_series_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)
# setitem
s2 = s.copy()
with catch_warnings(record=True):
idxr(s2)[l] = 0
result = idxr(s2)[l].values.ravel()
assert (result == 0).all()
def test_floating_index_doc_example(self):
index = Index([1.5, 2, 3, 4.5, 5])
s = Series(range(5), index=index)
assert s[3] == 2
assert s.loc[3] == 2
assert s.loc[3] == 2
assert s.iloc[3] == 3
def test_floating_misc(self):
# related 236
# scalar/slicing of a float index
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
# label based slicing
result1 = s[1.0:3.0]
result2 = s.loc[1.0:3.0]
result3 = s.loc[1.0:3.0]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
# exact indexing when found
result1 = s[5.0]
result2 = s.loc[5.0]
result3 = s.loc[5.0]
assert result1 == result2
assert result1 == result3
result1 = s[5]
result2 = s.loc[5]
result3 = s.loc[5]
assert result1 == result2
assert result1 == result3
assert s[5.0] == s[5]
# value not found (and no fallbacking at all)
# scalar integers
pytest.raises(KeyError, lambda: s.loc[4])
pytest.raises(KeyError, lambda: s.loc[4])
pytest.raises(KeyError, lambda: s[4])
# fancy floats/integers create the correct entry (as nan)
# fancy tests
expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
assert_series_equal(s[fancy_idx], expected)
assert_series_equal(s.loc[fancy_idx], expected)
assert_series_equal(s.loc[fancy_idx], expected)
expected = Series([2, 0], index=Index([5, 0], dtype='int64'))
for fancy_idx in [[5, 0], np.array([5, 0])]: # int
assert_series_equal(s[fancy_idx], expected)
assert_series_equal(s.loc[fancy_idx], expected)
assert_series_equal(s.loc[fancy_idx], expected)
# all should return the same as we are slicing 'the same'
result1 = s.loc[2:5]
result2 = s.loc[2.0:5.0]
result3 = s.loc[2.0:5]
result4 = s.loc[2.1:5]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
assert_series_equal(result1, result4)
# previously this did fallback indexing
result1 = s[2:5]
result2 = s[2.0:5.0]
result3 = s[2.0:5]
result4 = s[2.1:5]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
assert_series_equal(result1, result4)
result1 = s.loc[2:5]
result2 = s.loc[2.0:5.0]
result3 = s.loc[2.0:5]
result4 = s.loc[2.1:5]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
assert_series_equal(result1, result4)
# combined test
result1 = s.loc[2:5]
result2 = s.loc[2:5]
result3 = s[2:5]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
# list selection
result1 = s[[0.0, 5, 10]]
result2 = s.loc[[0.0, 5, 10]]
result3 = s.loc[[0.0, 5, 10]]
result4 = s.iloc[[0, 2, 4]]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
assert_series_equal(result1, result4)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[1.6, 5, 10]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[1.6, 5, 10]]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
assert_series_equal(result1, Series(
[np.nan, 2, 4], index=[1.6, 5, 10]))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result1 = s[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result2 = s.loc[[0, 1, 2]]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result3 = s.loc[[0, 1, 2]]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
assert_series_equal(result1, Series(
[0.0, np.nan, np.nan], index=[0, 1, 2]))
result1 = s.loc[[2.5, 5]]
result2 = s.loc[[2.5, 5]]
assert_series_equal(result1, result2)
assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0]))
result1 = s[[2.5]]
result2 = s.loc[[2.5]]
result3 = s.loc[[2.5]]
assert_series_equal(result1, result2)
assert_series_equal(result1, result3)
assert_series_equal(result1, Series([1], index=[2.5]))
def test_floating_tuples(self):
# see gh-13509
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name='foo')
result = s[0.0]
assert result == (1, 1)
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo')
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name='foo')
result = s[0.0]
tm.assert_series_equal(result, expected)
def test_float64index_slicing_bug(self):
# GH 5557, related to slicing a float index
ser = {256: 2321.0,
1: 78.0,
2: 2716.0,
3: 0.0,
4: 369.0,
5: 0.0,
6: 269.0,
7: 0.0,
8: 0.0,
9: 0.0,
10: 3536.0,
11: 0.0,
12: 24.0,
13: 0.0,
14: 931.0,
15: 0.0,
16: 101.0,
17: 78.0,
18: 9643.0,
19: 0.0,
20: 0.0,
21: 0.0,
22: 63761.0,
23: 0.0,
24: 446.0,
25: 0.0,
26: 34773.0,
27: 0.0,
28: 729.0,
29: 78.0,
30: 0.0,
31: 0.0,
32: 3374.0,
33: 0.0,
34: 1391.0,
35: 0.0,
36: 361.0,
37: 0.0,
38: 61808.0,
39: 0.0,
40: 0.0,
41: 0.0,
42: 6677.0,
43: 0.0,
44: 802.0,
45: 0.0,
46: 2691.0,
47: 0.0,
48: 3582.0,
49: 0.0,
50: 734.0,
51: 0.0,
52: 627.0,
53: 70.0,
54: 2584.0,
55: 0.0,
56: 324.0,
57: 0.0,
58: 605.0,
59: 0.0,
60: 0.0,
61: 0.0,
62: 3989.0,
63: 10.0,
64: 42.0,
65: 0.0,
66: 904.0,
67: 0.0,
68: 88.0,
69: 70.0,
70: 8172.0,
71: 0.0,
72: 0.0,
73: 0.0,
74: 64902.0,
75: 0.0,
76: 347.0,
77: 0.0,
78: 36605.0,
79: 0.0,
80: 379.0,
81: 70.0,
82: 0.0,
83: 0.0,
84: 3001.0,
85: 0.0,
86: 1630.0,
87: 7.0,
88: 364.0,
89: 0.0,
90: 67404.0,
91: 9.0,
92: 0.0,
93: 0.0,
94: 7685.0,
95: 0.0,
96: 1017.0,
97: 0.0,
98: 2831.0,
99: 0.0,
100: 2963.0,
101: 0.0,
102: 854.0,
103: 0.0,
104: 0.0,
105: 0.0,
106: 0.0,
107: 0.0,
108: 0.0,
109: 0.0,
110: 0.0,
111: 0.0,
112: 0.0,
113: 0.0,
114: 0.0,
115: 0.0,
116: 0.0,
117: 0.0,
118: 0.0,
119: 0.0,
120: 0.0,
121: 0.0,
122: 0.0,
123: 0.0,
124: 0.0,
125: 0.0,
126: 67744.0,
127: 22.0,
128: 264.0,
129: 0.0,
260: 197.0,
268: 0.0,
265: 0.0,
269: 0.0,
261: 0.0,
266: 1198.0,
267: 0.0,
262: 2629.0,
258: 775.0,
257: 0.0,
263: 0.0,
259: 0.0,
264: 163.0,
250: 10326.0,
251: 0.0,
252: 1228.0,
253: 0.0,
254: 2769.0,
255: 0.0}
# smoke test for the repr
s = Series(ser)
result = s.value_counts()
str(result)
@@ -0,0 +1,677 @@
""" test positional based indexing with iloc """
from warnings import catch_warnings, filterwarnings, simplefilter
import numpy as np
import pytest
from pandas.compat import lmap, lrange
import pandas as pd
from pandas import DataFrame, Series, concat, date_range, isna
from pandas.api.types import is_scalar
from pandas.tests.indexing.common import Base
from pandas.util import testing as tm
class TestiLoc(Base):
def test_iloc_exceeds_bounds(self):
# GH6296
# iloc should allow indexers that exceed the bounds
df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE'))
# lists of positions should raise IndexErrror!
msg = 'positional indexers are out-of-bounds'
with pytest.raises(IndexError, match=msg):
df.iloc[:, [0, 1, 2, 3, 4, 5]]
pytest.raises(IndexError, lambda: df.iloc[[1, 30]])
pytest.raises(IndexError, lambda: df.iloc[[1, -30]])
pytest.raises(IndexError, lambda: df.iloc[[100]])
s = df['A']
pytest.raises(IndexError, lambda: s.iloc[[100]])
pytest.raises(IndexError, lambda: s.iloc[[-100]])
# still raise on a single indexer
msg = 'single positional indexer is out-of-bounds'
with pytest.raises(IndexError, match=msg):
df.iloc[30]
pytest.raises(IndexError, lambda: df.iloc[-30])
# GH10779
# single positive/negative indexer exceeding Series bounds should raise
# an IndexError
with pytest.raises(IndexError, match=msg):
s.iloc[30]
pytest.raises(IndexError, lambda: s.iloc[-30])
# slices are ok
result = df.iloc[:, 4:10] # 0 < start < len < stop
expected = df.iloc[:, 4:]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, -4:-10] # stop < 0 < start < len
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
expected = df.iloc[:, :4:-1]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
expected = df.iloc[:, 4::-1]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, -10:4] # start < 0 < stop < len
expected = df.iloc[:, :4]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 10:4] # 0 < stop < len < start
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 10:11] # 0 < len < start < stop
expected = df.iloc[:, :0]
tm.assert_frame_equal(result, expected)
# slice bounds exceeding is ok
result = s.iloc[18:30]
expected = s.iloc[18:]
tm.assert_series_equal(result, expected)
result = s.iloc[30:]
expected = s.iloc[:0]
tm.assert_series_equal(result, expected)
result = s.iloc[30::-1]
expected = s.iloc[::-1]
tm.assert_series_equal(result, expected)
# doc example
def check(result, expected):
str(result)
result.dtypes
tm.assert_frame_equal(result, expected)
dfl = DataFrame(np.random.randn(5, 2), columns=list('AB'))
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
check(dfl.iloc[4:6], dfl.iloc[[4]])
pytest.raises(IndexError, lambda: dfl.iloc[[4, 5, 6]])
pytest.raises(IndexError, lambda: dfl.iloc[:, 4])
def test_iloc_getitem_int(self):
# integer
self.check_result('integer', 'iloc', 2, 'ix',
{0: 4, 1: 6, 2: 8}, typs=['ints', 'uints'])
self.check_result('integer', 'iloc', 2, 'indexer', 2,
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
fails=IndexError)
def test_iloc_getitem_neg_int(self):
# neg integer
self.check_result('neg int', 'iloc', -1, 'ix',
{0: 6, 1: 9, 2: 12}, typs=['ints', 'uints'])
self.check_result('neg int', 'iloc', -1, 'indexer', -1,
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
fails=IndexError)
@pytest.mark.parametrize('dims', [1, 2])
def test_iloc_getitem_invalid_scalar(self, dims):
# GH 21982
if dims == 1:
s = Series(np.arange(10))
else:
s = DataFrame(np.arange(100).reshape(10, 10))
with pytest.raises(TypeError, match='Cannot index by location index'):
s.iloc['a']
def test_iloc_array_not_mutating_negative_indices(self):
# GH 21867
array_with_neg_numbers = np.array([1, 2, -1])
array_copy = array_with_neg_numbers.copy()
df = pd.DataFrame({
'A': [100, 101, 102],
'B': [103, 104, 105],
'C': [106, 107, 108]},
index=[1, 2, 3])
df.iloc[array_with_neg_numbers]
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
df.iloc[:, array_with_neg_numbers]
tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
def test_iloc_getitem_list_int(self):
# list of ints
self.check_result('list int', 'iloc', [0, 1, 2], 'ix',
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
typs=['ints', 'uints'])
self.check_result('list int', 'iloc', [2], 'ix',
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2],
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
fails=IndexError)
# array of ints (GH5006), make sure that a single indexer is returning
# the correct type
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix',
{0: [0, 2, 4],
1: [0, 3, 6],
2: [0, 4, 8]}, typs=['ints', 'uints'])
self.check_result('array int', 'iloc', np.array([2]), 'ix',
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer',
[0, 1, 2],
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
fails=IndexError)
def test_iloc_getitem_neg_int_can_reach_first_index(self):
# GH10547 and GH10779
# negative integers should be able to reach index 0
df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]})
s = df['A']
expected = df.iloc[0]
result = df.iloc[-3]
tm.assert_series_equal(result, expected)
expected = df.iloc[[0]]
result = df.iloc[[-3]]
tm.assert_frame_equal(result, expected)
expected = s.iloc[0]
result = s.iloc[-3]
assert result == expected
expected = s.iloc[[0]]
result = s.iloc[[-3]]
tm.assert_series_equal(result, expected)
# check the length 1 Series case highlighted in GH10547
expected = Series(['a'], index=['A'])
result = expected.iloc[[-1]]
tm.assert_series_equal(result, expected)
def test_iloc_getitem_dups(self):
# no dups in panel (bug?)
self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix',
{0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
objs=['series', 'frame'], typs=['ints', 'uints'])
# GH 6766
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
df = concat([df1, df2], axis=1)
# cross-sectional indexing
result = df.iloc[0, 0]
assert isna(result)
result = df.iloc[0, :]
expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'],
name=0)
tm.assert_series_equal(result, expected)
def test_iloc_getitem_array(self):
# array like
s = Series(index=lrange(1, 4))
self.check_result('array like', 'iloc', s.index, 'ix',
{0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]},
typs=['ints', 'uints'])
def test_iloc_getitem_bool(self):
# boolean indexers
b = [True, False, True, False, ]
self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints'])
self.check_result('bool', 'iloc', b, 'ix', b,
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
fails=IndexError)
def test_iloc_getitem_slice(self):
# slices
self.check_result('slice', 'iloc', slice(1, 3), 'ix',
{0: [2, 4], 1: [3, 6], 2: [4, 8]},
typs=['ints', 'uints'])
self.check_result('slice', 'iloc', slice(1, 3), 'indexer',
slice(1, 3),
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
fails=IndexError)
def test_iloc_getitem_slice_dups(self):
df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2),
columns=['A', 'C'])
# axis=1
df = concat([df1, df2], axis=1)
tm.assert_frame_equal(df.iloc[:, :4], df1)
tm.assert_frame_equal(df.iloc[:, 4:], df2)
df = concat([df2, df1], axis=1)
tm.assert_frame_equal(df.iloc[:, :2], df2)
tm.assert_frame_equal(df.iloc[:, 2:], df1)
exp = concat([df2, df1.iloc[:, [0]]], axis=1)
tm.assert_frame_equal(df.iloc[:, 0:3], exp)
# axis=0
df = concat([df, df], axis=0)
tm.assert_frame_equal(df.iloc[0:10, :2], df2)
tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
tm.assert_frame_equal(df.iloc[10:, :2], df2)
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
def test_iloc_setitem(self):
df = self.frame_ints
df.iloc[1, 1] = 1
result = df.iloc[1, 1]
assert result == 1
df.iloc[:, 2:3] = 0
expected = df.iloc[:, 2:3]
result = df.iloc[:, 2:3]
tm.assert_frame_equal(result, expected)
# GH5771
s = Series(0, index=[4, 5, 6])
s.iloc[1:2] += 1
expected = Series([0, 1, 0], index=[4, 5, 6])
tm.assert_series_equal(s, expected)
def test_iloc_setitem_list(self):
# setitem with an iloc list
df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"],
columns=["A", "B", "C"])
df.iloc[[0, 1], [1, 2]]
df.iloc[[0, 1], [1, 2]] += 100
expected = DataFrame(
np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
index=["A", "B", "C"], columns=["A", "B", "C"])
tm.assert_frame_equal(df, expected)
def test_iloc_setitem_pandas_object(self):
# GH 17193
s_orig = Series([0, 1, 2, 3])
expected = Series([0, -1, -2, 3])
s = s_orig.copy()
s.iloc[Series([1, 2])] = [-1, -2]
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.iloc[pd.Index([1, 2])] = [-1, -2]
tm.assert_series_equal(s, expected)
def test_iloc_setitem_dups(self):
# GH 6766
# iloc with a mask aligning from another iloc
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
df = concat([df1, df2], axis=1)
expected = df.fillna(3)
expected['A'] = expected['A'].astype('float64')
inds = np.isnan(df.iloc[:, 0])
mask = inds[inds].index
df.iloc[mask, 0] = df.iloc[mask, 2]
tm.assert_frame_equal(df, expected)
# del a dup column across blocks
expected = DataFrame({0: [1, 2], 1: [3, 4]})
expected.columns = ['B', 'B']
del df['A']
tm.assert_frame_equal(df, expected)
# assign back to self
df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
tm.assert_frame_equal(df, expected)
# reversed x 2
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
drop=True)
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
drop=True)
tm.assert_frame_equal(df, expected)
def test_iloc_getitem_frame(self):
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
columns=lrange(0, 8, 2))
result = df.iloc[2]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
exp = df.ix[4]
tm.assert_series_equal(result, exp)
result = df.iloc[2, 2]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
exp = df.ix[4, 4]
assert result == exp
# slice
result = df.iloc[4:8]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[8:14]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 2:3]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[:, 4:5]
tm.assert_frame_equal(result, expected)
# list of integers
result = df.iloc[[0, 1, 3]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[0, 2, 6]]
tm.assert_frame_equal(result, expected)
result = df.iloc[[0, 1, 3], [0, 1]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[0, 2, 6], [0, 2]]
tm.assert_frame_equal(result, expected)
# neg indices
result = df.iloc[[-1, 1, 3], [-1, 1]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[18, 2, 6], [6, 2]]
tm.assert_frame_equal(result, expected)
# dups indices
result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[18, 18, 2, 6], [6, 2]]
tm.assert_frame_equal(result, expected)
# with index-like
s = Series(index=lrange(1, 5))
result = df.iloc[s.index]
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[2, 4, 6, 8]]
tm.assert_frame_equal(result, expected)
def test_iloc_getitem_labelled_frame(self):
# try with labelled frame
df = DataFrame(np.random.randn(10, 4),
index=list('abcdefghij'), columns=list('ABCD'))
result = df.iloc[1, 1]
exp = df.loc['b', 'B']
assert result == exp
result = df.iloc[:, 2:3]
expected = df.loc[:, ['C']]
tm.assert_frame_equal(result, expected)
# negative indexing
result = df.iloc[-1, -1]
exp = df.loc['j', 'D']
assert result == exp
# out-of-bounds exception
pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5]))
# trying to use a label
pytest.raises(ValueError, df.iloc.__getitem__, tuple(['j', 'D']))
def test_iloc_getitem_doc_issue(self):
# multi axis slicing issue with single block
# surfaced in GH 6059
arr = np.random.randn(6, 4)
index = date_range('20130101', periods=6)
columns = list('ABCD')
df = DataFrame(arr, index=index, columns=columns)
# defines ref_locs
df.describe()
result = df.iloc[3:5, 0:2]
str(result)
result.dtypes
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
columns=columns[0:2])
tm.assert_frame_equal(result, expected)
# for dups
df.columns = list('aaaa')
result = df.iloc[3:5, 0:2]
str(result)
result.dtypes
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
columns=list('aa'))
tm.assert_frame_equal(result, expected)
# related
arr = np.random.randn(6, 4)
index = list(range(0, 12, 2))
columns = list(range(0, 8, 2))
df = DataFrame(arr, index=index, columns=columns)
df._data.blocks[0].mgr_locs
result = df.iloc[1:5, 2:4]
str(result)
result.dtypes
expected = DataFrame(arr[1:5, 2:4], index=index[1:5],
columns=columns[2:4])
tm.assert_frame_equal(result, expected)
def test_iloc_setitem_series(self):
df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'),
columns=list('ABCD'))
df.iloc[1, 1] = 1
result = df.iloc[1, 1]
assert result == 1
df.iloc[:, 2:3] = 0
expected = df.iloc[:, 2:3]
result = df.iloc[:, 2:3]
tm.assert_frame_equal(result, expected)
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
s.iloc[1] = 1
result = s.iloc[1]
assert result == 1
s.iloc[:4] = 0
expected = s.iloc[:4]
result = s.iloc[:4]
tm.assert_series_equal(result, expected)
s = Series([-1] * 6)
s.iloc[0::2] = [0, 2, 4]
s.iloc[1::2] = [1, 3, 5]
result = s
expected = Series([0, 1, 2, 3, 4, 5])
tm.assert_series_equal(result, expected)
def test_iloc_setitem_list_of_lists(self):
# GH 7551
# list-of-list is set incorrectly in mixed vs. single dtyped frames
df = DataFrame(dict(A=np.arange(5, dtype='int64'),
B=np.arange(5, 10, dtype='int64')))
df.iloc[2:4] = [[10, 11], [12, 13]]
expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9]))
tm.assert_frame_equal(df, expected)
df = DataFrame(
dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64')))
df.iloc[2:4] = [['x', 11], ['y', 13]]
expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'],
B=[5, 6, 11, 13, 9]))
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
'indexer', [[0], slice(None, 1, None), np.array([0])])
@pytest.mark.parametrize(
'value', [['Z'], np.array(['Z'])])
def test_iloc_setitem_with_scalar_index(self, indexer, value):
# GH #19474
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
# elementwisely, not using "setter('A', ['Z'])".
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
df.iloc[0, indexer] = value
result = df.iloc[0, 0]
assert is_scalar(result) and result == 'Z'
def test_iloc_mask(self):
# GH 3631, iloc with a mask (of a series) should raise
df = DataFrame(lrange(5), list('ABCDE'), columns=['a'])
mask = (df.a % 2 == 0)
pytest.raises(ValueError, df.iloc.__getitem__, tuple([mask]))
mask.index = lrange(len(mask))
pytest.raises(NotImplementedError, df.iloc.__getitem__,
tuple([mask]))
# ndarray ok
result = df.iloc[np.array([True] * len(mask), dtype=bool)]
tm.assert_frame_equal(result, df)
# the possibilities
locs = np.arange(4)
nums = 2 ** locs
reps = lmap(bin, nums)
df = DataFrame({'locs': locs, 'nums': nums}, reps)
expected = {
(None, ''): '0b1100',
(None, '.loc'): '0b1100',
(None, '.iloc'): '0b1100',
('index', ''): '0b11',
('index', '.loc'): '0b11',
('index', '.iloc'): ('iLocation based boolean indexing '
'cannot use an indexable as a mask'),
('locs', ''): 'Unalignable boolean Series provided as indexer '
'(index of the boolean Series and of the indexed '
'object do not match',
('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
'(index of the boolean Series and of the '
'indexed object do not match',
('locs', '.iloc'): ('iLocation based boolean indexing on an '
'integer type is not available'),
}
# UserWarnings from reindex of a boolean mask
with catch_warnings(record=True):
simplefilter("ignore", UserWarning)
result = dict()
for idx in [None, 'index', 'locs']:
mask = (df.nums > 2).values
if idx:
mask = Series(mask, list(reversed(getattr(df, idx))))
for method in ['', '.loc', '.iloc']:
try:
if method:
accessor = getattr(df, method[1:])
else:
accessor = df
ans = str(bin(accessor[mask]['nums'].sum()))
except Exception as e:
ans = str(e)
key = tuple([idx, method])
r = expected.get(key)
if r != ans:
raise AssertionError(
"[%s] does not match [%s], received [%s]"
% (key, ans, r))
def test_iloc_non_unique_indexing(self):
# GH 4017, non-unique indexing (on the axis)
df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000})
idx = np.array(lrange(30)) * 99
expected = df.iloc[idx]
df3 = concat([df, 2 * df, 3 * df])
result = df3.iloc[idx]
tm.assert_frame_equal(result, expected)
df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000})
df2 = concat([df2, 2 * df2, 3 * df2])
sidx = df2.index.to_series()
expected = df2.iloc[idx[idx <= sidx.max()]]
new_list = []
for r, s in expected.iterrows():
new_list.append(s)
new_list.append(s * 2)
new_list.append(s * 3)
expected = DataFrame(new_list)
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])],
sort=True)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df2.loc[idx]
tm.assert_frame_equal(result, expected, check_index_type=False)
def test_iloc_empty_list_indexer_is_ok(self):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
# vertical empty
tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0],
check_index_type=True, check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :],
check_index_type=True, check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :],
check_index_type=True,
check_column_type=True)
def test_identity_slice_returns_new_object(self):
# GH13873
original_df = DataFrame({'a': [1, 2, 3]})
sliced_df = original_df.iloc[:]
assert sliced_df is not original_df
# should be a shallow copy
original_df['a'] = [4, 4, 4]
assert (sliced_df['a'] == 4).all()
original_series = Series([1, 2, 3, 4, 5, 6])
sliced_series = original_series.iloc[:]
assert sliced_series is not original_series
# should also be a shallow copy
original_series[:3] = [7, 8, 9]
assert all(sliced_series[:3] == [7, 8, 9])
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,169 @@
import numpy as np
from pandas._libs import algos as libalgos, index as libindex
from pandas import compat
import pandas.util.testing as tm
class TestNumericEngine(object):
def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
num = 1000
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
# monotonic increasing
engine = engine_type(lambda: arr, len(arr))
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False
# monotonic decreasing
engine = engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True
# neither monotonic increasing or decreasing
arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype)
engine = engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False
def test_is_unique(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
# unique
arr = np.array([1, 3, 2], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.is_unique is True
# not unique
arr = np.array([1, 2, 1], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.is_unique is False
def test_get_loc(self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
# unique
arr = np.array([1, 2, 3], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.get_loc(2) == 1
# monotonic
num = 1000
arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
assert engine.get_loc(2) == slice(1000, 2000)
# not monotonic
arr = np.array([1, 2, 3] * num, dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc(2)
assert (result == expected).all()
def test_get_backfill_indexer(
self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
new = np.array(compat.range(12), dtype=dtype)
result = engine.get_backfill_indexer(new)
expected = libalgos.backfill(arr, new)
tm.assert_numpy_array_equal(result, expected)
def test_get_pad_indexer(
self, numeric_indexing_engine_type_and_dtype):
engine_type, dtype = numeric_indexing_engine_type_and_dtype
arr = np.array([1, 5, 10], dtype=dtype)
engine = engine_type(lambda: arr, len(arr))
new = np.array(compat.range(12), dtype=dtype)
result = engine.get_pad_indexer(new)
expected = libalgos.pad(arr, new)
tm.assert_numpy_array_equal(result, expected)
class TestObjectEngine(object):
engine_type = libindex.ObjectEngine
dtype = np.object_
values = list('abc')
def test_is_monotonic(self):
num = 1000
arr = np.array(['a'] * num + ['a'] * num + ['c'] * num,
dtype=self.dtype)
# monotonic increasing
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_monotonic_increasing is True
assert engine.is_monotonic_decreasing is False
# monotonic decreasing
engine = self.engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is True
# neither monotonic increasing or decreasing
arr = np.array(['a'] * num + ['b'] * num + ['a'] * num,
dtype=self.dtype)
engine = self.engine_type(lambda: arr[::-1], len(arr))
assert engine.is_monotonic_increasing is False
assert engine.is_monotonic_decreasing is False
def test_is_unique(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_unique is True
# not unique
arr = np.array(['a', 'b', 'a'], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.is_unique is False
def test_get_loc(self):
# unique
arr = np.array(self.values, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.get_loc('b') == 1
# monotonic
num = 1000
arr = np.array(['a'] * num + ['b'] * num + ['c'] * num,
dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
assert engine.get_loc('b') == slice(1000, 2000)
# not monotonic
arr = np.array(self.values * num, dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
expected = np.array([False, True, False] * num, dtype=bool)
result = engine.get_loc('b')
assert (result == expected).all()
def test_get_backfill_indexer(self):
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
new = np.array(list('abcdefghij'), dtype=self.dtype)
result = engine.get_backfill_indexer(new)
expected = libalgos.backfill["object"](arr, new)
tm.assert_numpy_array_equal(result, expected)
def test_get_pad_indexer(self):
arr = np.array(['a', 'e', 'j'], dtype=self.dtype)
engine = self.engine_type(lambda: arr, len(arr))
new = np.array(list('abcdefghij'), dtype=self.dtype)
result = engine.get_pad_indexer(new)
expected = libalgos.pad["object"](arr, new)
tm.assert_numpy_array_equal(result, expected)
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
import pytest
from pandas import DataFrame
import pandas.util.testing as tm
class TestIndexingSlow(object):
@pytest.mark.slow
def test_large_dataframe_indexing(self):
# GH10692
result = DataFrame({'x': range(10 ** 6)}, dtype='int64')
result.loc[len(result)] = len(result) + 1
expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64')
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,314 @@
""" test indexing with ix """
from warnings import catch_warnings
import numpy as np
import pytest
from pandas.compat import lrange
from pandas.core.dtypes.common import is_scalar
import pandas as pd
from pandas import DataFrame, Series, option_context
from pandas.util import testing as tm
def test_ix_deprecation():
# GH 15114
df = DataFrame({'A': [1, 2, 3]})
with tm.assert_produces_warning(DeprecationWarning,
check_stacklevel=False):
df.ix[1, 'A']
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestIX(object):
def test_ix_loc_setitem_consistency(self):
# GH 5771
# loc with slice and series
s = Series(0, index=[4, 5, 6])
s.loc[4:5] += 1
expected = Series([1, 1, 0], index=[4, 5, 6])
tm.assert_series_equal(s, expected)
# GH 5928
# chained indexing assignment
df = DataFrame({'a': [0, 1, 2]})
expected = df.copy()
with catch_warnings(record=True):
expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a']
with catch_warnings(record=True):
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]]
tm.assert_frame_equal(df, expected)
df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]})
with catch_warnings(record=True):
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype(
'float64') + 0.5
expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]})
tm.assert_frame_equal(df, expected)
# GH 8607
# ix setitem consistency
df = DataFrame({'delta': [1174, 904, 161],
'elapsed': [7673, 9277, 1470],
'timestamp': [1413840976, 1413842580, 1413760580]})
expected = DataFrame({'delta': [1174, 904, 161],
'elapsed': [7673, 9277, 1470],
'timestamp': pd.to_datetime(
[1413840976, 1413842580, 1413760580],
unit='s')
})
df2 = df.copy()
df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
tm.assert_frame_equal(df2, expected)
df2 = df.copy()
df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
tm.assert_frame_equal(df2, expected)
df2 = df.copy()
with catch_warnings(record=True):
df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s')
tm.assert_frame_equal(df2, expected)
def test_ix_loc_consistency(self):
# GH 8613
# some edge cases where ix/loc should return the same
# this is not an exhaustive case
def compare(result, expected):
if is_scalar(expected):
assert result == expected
else:
assert expected.equals(result)
# failure cases for .loc, but these work for .ix
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'))
for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]),
tuple([slice(0, 2), df.columns[0:2]])]:
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
tm.makeDateIndex, tm.makePeriodIndex,
tm.makeTimedeltaIndex]:
df.index = index(len(df.index))
with catch_warnings(record=True):
df.ix[key]
pytest.raises(TypeError, lambda: df.loc[key])
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'),
index=pd.date_range('2012-01-01', periods=5))
for key in ['2012-01-03',
'2012-01-31',
slice('2012-01-03', '2012-01-03'),
slice('2012-01-03', '2012-01-04'),
slice('2012-01-03', '2012-01-06', 2),
slice('2012-01-03', '2012-01-31'),
tuple([[True, True, True, False, True]]), ]:
# getitem
# if the expected raises, then compare the exceptions
try:
with catch_warnings(record=True):
expected = df.ix[key]
except KeyError:
pytest.raises(KeyError, lambda: df.loc[key])
continue
result = df.loc[key]
compare(result, expected)
# setitem
df1 = df.copy()
df2 = df.copy()
with catch_warnings(record=True):
df1.ix[key] = 10
df2.loc[key] = 10
compare(df2, df1)
# edge cases
s = Series([1, 2, 3, 4], index=list('abde'))
result1 = s['a':'c']
with catch_warnings(record=True):
result2 = s.ix['a':'c']
result3 = s.loc['a':'c']
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
# now work rather than raising KeyError
s = Series(range(5), [-2, -1, 1, 2, 3])
with catch_warnings(record=True):
result1 = s.ix[-10:3]
result2 = s.loc[-10:3]
tm.assert_series_equal(result1, result2)
with catch_warnings(record=True):
result1 = s.ix[0:3]
result2 = s.loc[0:3]
tm.assert_series_equal(result1, result2)
def test_ix_weird_slicing(self):
# http://stackoverflow.com/q/17056560/1240268
df = DataFrame({'one': [1, 2, 3, np.nan, np.nan],
'two': [1, 2, 3, 4, 5]})
df.loc[df['one'] > 1, 'two'] = -df['two']
expected = DataFrame({'one': {0: 1.0,
1: 2.0,
2: 3.0,
3: np.nan,
4: np.nan},
'two': {0: 1,
1: -2,
2: -3,
3: 4,
4: 5}})
tm.assert_frame_equal(df, expected)
def test_ix_assign_column_mixed(self):
# GH #1142
df = DataFrame(tm.getSeriesData())
df['foo'] = 'bar'
orig = df.loc[:, 'B'].copy()
df.loc[:, 'B'] = df.loc[:, 'B'] + 1
tm.assert_series_equal(df.B, orig + 1)
# GH 3668, mixed frame with series value
df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'})
expected = df.copy()
for i in range(5):
indexer = i * 2
v = 1000 + i * 200
expected.loc[indexer, 'y'] = v
assert expected.loc[indexer, 'y'] == v
df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
tm.assert_frame_equal(df, expected)
# GH 4508, making sure consistency of assignments
df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
df.loc[[0, 2, ], 'b'] = [100, -100]
expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
tm.assert_frame_equal(df, expected)
df = DataFrame({'a': lrange(4)})
df['b'] = np.nan
df.loc[[1, 3], 'b'] = [100, -100]
expected = DataFrame({'a': [0, 1, 2, 3],
'b': [np.nan, 100, np.nan, -100]})
tm.assert_frame_equal(df, expected)
# ok, but chained assignments are dangerous
# if we turn off chained assignment it will work
with option_context('chained_assignment', None):
df = DataFrame({'a': lrange(4)})
df['b'] = np.nan
df['b'].loc[[1, 3]] = [100, -100]
tm.assert_frame_equal(df, expected)
def test_ix_get_set_consistency(self):
# GH 4544
# ix/loc get/set not consistent when
# a mixed int/string index
df = DataFrame(np.arange(16).reshape((4, 4)),
columns=['a', 'b', 8, 'c'],
index=['e', 7, 'f', 'g'])
with catch_warnings(record=True):
assert df.ix['e', 8] == 2
assert df.loc['e', 8] == 2
with catch_warnings(record=True):
df.ix['e', 8] = 42
assert df.ix['e', 8] == 42
assert df.loc['e', 8] == 42
df.loc['e', 8] = 45
with catch_warnings(record=True):
assert df.ix['e', 8] == 45
assert df.loc['e', 8] == 45
def test_ix_slicing_strings(self):
# see gh-3836
data = {'Classification':
['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'],
'Random': [1, 2, 3, 4, 5],
'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']}
df = DataFrame(data)
x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF'
])]
with catch_warnings(record=True):
df.ix[x.index, 'X'] = df['Classification']
expected = DataFrame({'Classification': {0: 'SA EQUITY CFD',
1: 'bbb',
2: 'SA EQUITY',
3: 'SA SSF',
4: 'aaa'},
'Random': {0: 1,
1: 2,
2: 3,
3: 4,
4: 5},
'X': {0: 'correct',
1: 'bbb',
2: 'correct',
3: 'correct',
4: 'aaa'}}) # bug was 4: 'bbb'
tm.assert_frame_equal(df, expected)
def test_ix_setitem_out_of_bounds_axis_0(self):
df = DataFrame(
np.random.randn(2, 5), index=["row%s" % i for i in range(2)],
columns=["col%s" % i for i in range(5)])
with catch_warnings(record=True):
pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100)
def test_ix_setitem_out_of_bounds_axis_1(self):
df = DataFrame(
np.random.randn(5, 2), index=["row%s" % i for i in range(5)],
columns=["col%s" % i for i in range(2)])
with catch_warnings(record=True):
pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100)
def test_ix_empty_list_indexer_is_ok(self):
with catch_warnings(record=True):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
# vertical empty
tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
check_index_type=True,
check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
check_index_type=True,
check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :],
check_index_type=True,
check_column_type=True)
def test_ix_duplicate_returns_series(self):
df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2],
columns=list('abc'))
with catch_warnings(record=True):
r = df.ix[0.2, 'a']
e = df.loc[0.2, 'a']
tm.assert_series_equal(r, e)
@@ -0,0 +1,767 @@
""" test label based indexing with loc """
from warnings import catch_warnings, filterwarnings
import numpy as np
import pytest
from pandas.compat import PY2, StringIO, lrange
import pandas as pd
from pandas import DataFrame, Series, Timestamp, date_range
from pandas.api.types import is_scalar
from pandas.tests.indexing.common import Base
from pandas.util import testing as tm
class TestLoc(Base):
def test_loc_getitem_dups(self):
# GH 5678
# repeated gettitems on a dup index returning a ndarray
df = DataFrame(
np.random.random_sample((20, 5)),
index=['ABCDE' [x % 5] for x in range(20)])
expected = df.loc['A', 0]
result = df.loc[:, 0].loc['A']
tm.assert_series_equal(result, expected)
def test_loc_getitem_dups2(self):
# GH4726
# dup indexing with iloc/loc
df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]],
columns=['a', 'a', 'a', 'a', 'a'], index=[1])
expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')],
index=['a', 'a', 'a', 'a', 'a'], name=1)
result = df.iloc[0]
tm.assert_series_equal(result, expected)
result = df.loc[1]
tm.assert_series_equal(result, expected)
def test_loc_setitem_dups(self):
# GH 6541
df_orig = DataFrame(
{'me': list('rttti'),
'foo': list('aaade'),
'bar': np.arange(5, dtype='float64') * 1.34 + 2,
'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me')
indexer = tuple(['r', ['bar', 'bar2']])
df = df_orig.copy()
df.loc[indexer] *= 2.0
tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
indexer = tuple(['r', 'bar'])
df = df_orig.copy()
df.loc[indexer] *= 2.0
assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
indexer = tuple(['t', ['bar', 'bar2']])
df = df_orig.copy()
df.loc[indexer] *= 2.0
tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
def test_loc_setitem_slice(self):
# GH10503
# assigning the same type should not change the type
df1 = DataFrame({'a': [0, 1, 1],
'b': Series([100, 200, 300], dtype='uint32')})
ix = df1['a'] == 1
newb1 = df1.loc[ix, 'b'] + 1
df1.loc[ix, 'b'] = newb1
expected = DataFrame({'a': [0, 1, 1],
'b': Series([100, 201, 301], dtype='uint32')})
tm.assert_frame_equal(df1, expected)
# assigning a new type should get the inferred type
df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
dtype='uint64')
ix = df1['a'] == 1
newb2 = df2.loc[ix, 'b']
df1.loc[ix, 'b'] = newb2
expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
dtype='uint64')
tm.assert_frame_equal(df2, expected)
def test_loc_getitem_int(self):
# int label
self.check_result('int label', 'loc', 2, 'ix', 2,
typs=['ints', 'uints'], axes=0)
self.check_result('int label', 'loc', 3, 'ix', 3,
typs=['ints', 'uints'], axes=1)
self.check_result('int label', 'loc', 4, 'ix', 4,
typs=['ints', 'uints'], axes=2)
self.check_result('int label', 'loc', 2, 'ix', 2,
typs=['label'], fails=KeyError)
def test_loc_getitem_label(self):
# label
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'],
axes=0)
self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'],
axes=0)
self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0)
self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1,
typs=['ts'], axes=0)
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'],
fails=KeyError)
def test_loc_getitem_label_out_of_range(self):
# out of range label
self.check_result('label range', 'loc', 'f', 'ix', 'f',
typs=['ints', 'uints', 'labels', 'mixed', 'ts'],
fails=KeyError)
self.check_result('label range', 'loc', 'f', 'ix', 'f',
typs=['floats'], fails=KeyError)
self.check_result('label range', 'loc', 20, 'ix', 20,
typs=['ints', 'uints', 'mixed'], fails=KeyError)
self.check_result('label range', 'loc', 20, 'ix', 20,
typs=['labels'], fails=TypeError)
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'],
axes=0, fails=TypeError)
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'],
axes=0, fails=KeyError)
def test_loc_getitem_label_list(self):
# list of labels
self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4],
typs=['ints', 'uints'], axes=0)
self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9],
typs=['ints', 'uints'], axes=1)
self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12],
typs=['ints', 'uints'], axes=2)
self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix',
['a', 'b', 'd'], typs=['labels'], axes=0)
self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix',
['A', 'B', 'C'], typs=['labels'], axes=1)
self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix',
['Z', 'Y', 'W'], typs=['labels'], axes=2)
self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix',
[2, 8, 'null'], typs=['mixed'], axes=0)
self.check_result('list lbl', 'loc',
[Timestamp('20130102'), Timestamp('20130103')], 'ix',
[Timestamp('20130102'), Timestamp('20130103')],
typs=['ts'], axes=0)
@pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with "
"Python 2 (GH #20770)"))
def test_loc_getitem_label_list_with_missing(self):
self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
typs=['empty'], fails=KeyError)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10],
typs=['ints', 'uints', 'floats'],
axes=0, fails=KeyError)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
typs=['ints', 'uints', 'floats'],
axes=1, fails=KeyError)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
typs=['ints', 'uints', 'floats'],
axes=2, fails=KeyError)
# GH 17758 - MultiIndex and missing keys
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)],
'ix', [(1, 3), (1, 4), (2, 5)],
typs=['multi'],
axes=0)
def test_getitem_label_list_with_missing(self):
s = Series(range(3), index=['a', 'b', 'c'])
# consistency
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
s[['a', 'd']]
s = Series(range(3))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
s[[0, 3]]
def test_loc_getitem_label_list_fails(self):
# fails
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
typs=['ints', 'uints'], axes=1, fails=KeyError)
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
typs=['ints', 'uints'], axes=2, fails=KeyError)
def test_loc_getitem_label_array_like(self):
# array like
self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index,
'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0)
self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index,
'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1)
self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index,
'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2)
def test_loc_getitem_bool(self):
# boolean indexers
b = [True, False, True, False]
self.check_result('bool', 'loc', b, 'ix', b,
typs=['ints', 'uints', 'labels',
'mixed', 'ts', 'floats'])
self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
fails=KeyError)
def test_loc_getitem_int_slice(self):
# ok
self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4],
typs=['ints', 'uints'], axes=0)
self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6],
typs=['ints', 'uints'], axes=1)
self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8],
typs=['ints', 'uints'], axes=2)
def test_loc_to_fail(self):
# GH3449
df = DataFrame(np.random.random((3, 3)),
index=['a', 'b', 'c'],
columns=['e', 'f', 'g'])
# raise a KeyError?
pytest.raises(KeyError, df.loc.__getitem__,
tuple([[1, 2], [1, 2]]))
# GH 7496
# loc should not fallback
s = Series()
s.loc[1] = 1
s.loc['a'] = 2
pytest.raises(KeyError, lambda: s.loc[-1])
pytest.raises(KeyError, lambda: s.loc[[-1, -2]])
pytest.raises(KeyError, lambda: s.loc[['4']])
s.loc[-1] = 3
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s.loc[[-1, -2]]
expected = Series([3, np.nan], index=[-1, -2])
tm.assert_series_equal(result, expected)
s['a'] = 2
pytest.raises(KeyError, lambda: s.loc[[-2]])
del s['a']
def f():
s.loc[[-2]] = 0
pytest.raises(KeyError, f)
# inconsistency between .loc[values] and .loc[values,:]
# GH 7999
df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value'])
def f():
df.loc[[3], :]
pytest.raises(KeyError, f)
def f():
df.loc[[3]]
pytest.raises(KeyError, f)
def test_loc_getitem_list_with_fail(self):
# 15747
# should KeyError if *any* missing labels
s = Series([1, 2, 3])
s.loc[[2]]
with pytest.raises(KeyError):
s.loc[[3]]
# a non-match and a match
with tm.assert_produces_warning(FutureWarning):
expected = s.loc[[2, 3]]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
def test_loc_getitem_label_slice(self):
# label slices (with ints)
self.check_result('lab slice', 'loc', slice(1, 3),
'ix', slice(1, 3),
typs=['labels', 'mixed', 'empty', 'ts', 'floats'],
fails=TypeError)
# real label slices
self.check_result('lab slice', 'loc', slice('a', 'c'),
'ix', slice('a', 'c'), typs=['labels'], axes=0)
self.check_result('lab slice', 'loc', slice('A', 'C'),
'ix', slice('A', 'C'), typs=['labels'], axes=1)
self.check_result('lab slice', 'loc', slice('W', 'Z'),
'ix', slice('W', 'Z'), typs=['labels'], axes=2)
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
'ix', slice('20130102', '20130104'),
typs=['ts'], axes=0)
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
'ix', slice('20130102', '20130104'),
typs=['ts'], axes=1, fails=TypeError)
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
'ix', slice('20130102', '20130104'),
typs=['ts'], axes=2, fails=TypeError)
# GH 14316
self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'),
'indexer', [0, 1, 2], typs=['ts_rev'], axes=0)
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
typs=['mixed'], axes=0, fails=TypeError)
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
typs=['mixed'], axes=1, fails=KeyError)
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
typs=['mixed'], axes=2, fails=KeyError)
self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)
def test_loc_index(self):
# gh-17131
# a boolean index should index like a boolean numpy array
df = DataFrame(
np.random.random(size=(5, 10)),
index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
mask = df.index.map(lambda x: "alpha" in x)
expected = df.loc[np.array(mask)]
result = df.loc[mask]
tm.assert_frame_equal(result, expected)
result = df.loc[mask.values]
tm.assert_frame_equal(result, expected)
def test_loc_general(self):
df = DataFrame(
np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'],
index=['A', 'B', 'C', 'D'])
# want this to work
result = df.loc[:, "A":"B"].iloc[0:2, :]
assert (result.columns == ['A', 'B']).all()
assert (result.index == ['A', 'B']).all()
# mixed type
result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0]
expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0)
tm.assert_series_equal(result, expected)
assert result.dtype == object
def test_loc_setitem_consistency(self):
# GH 6149
# coerce similarly for setitem and loc when rows have a null-slice
expected = DataFrame({'date': Series(0, index=range(5),
dtype=np.int64),
'val': Series(range(5), dtype=np.int64)})
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
'val': Series(
range(5), dtype=np.int64)})
df.loc[:, 'date'] = 0
tm.assert_frame_equal(df, expected)
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
'val': Series(range(5), dtype=np.int64)})
df.loc[:, 'date'] = np.array(0, dtype=np.int64)
tm.assert_frame_equal(df, expected)
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
'val': Series(range(5), dtype=np.int64)})
df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64)
tm.assert_frame_equal(df, expected)
expected = DataFrame({'date': Series('foo', index=range(5)),
'val': Series(range(5), dtype=np.int64)})
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
'val': Series(range(5), dtype=np.int64)})
df.loc[:, 'date'] = 'foo'
tm.assert_frame_equal(df, expected)
expected = DataFrame({'date': Series(1.0, index=range(5)),
'val': Series(range(5), dtype=np.int64)})
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
'val': Series(range(5), dtype=np.int64)})
df.loc[:, 'date'] = 1.0
tm.assert_frame_equal(df, expected)
# GH 15494
# setting on frame with single row
df = DataFrame({'date': Series([Timestamp('20180101')])})
df.loc[:, 'date'] = 'string'
expected = DataFrame({'date': Series(['string'])})
tm.assert_frame_equal(df, expected)
def test_loc_setitem_consistency_empty(self):
# empty (essentially noops)
expected = DataFrame(columns=['x', 'y'])
expected['x'] = expected['x'].astype(np.int64)
df = DataFrame(columns=['x', 'y'])
df.loc[:, 'x'] = 1
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=['x', 'y'])
df['x'] = 1
tm.assert_frame_equal(df, expected)
def test_loc_setitem_consistency_slice_column_len(self):
# .loc[:,column] setting with slice == len of the column
# GH10408
data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
Region,Site,RespondentID,,,,,
Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""
df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
'Respondent', 'StartDate')])
df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
'Respondent', 'EndDate')])
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
'Respondent', 'Duration')].astype('timedelta64[s]')
expected = Series([1380, 720, 840, 2160.], index=df.index,
name=('Respondent', 'Duration'))
tm.assert_series_equal(df[('Respondent', 'Duration')], expected)
def test_loc_setitem_frame(self):
df = self.frame_labels
result = df.iloc[0, 0]
df.loc['a', 'A'] = 1
result = df.loc['a', 'A']
assert result == 1
result = df.iloc[0, 0]
assert result == 1
df.loc[:, 'B':'D'] = 0
expected = df.loc[:, 'B':'D']
result = df.iloc[:, 1:]
tm.assert_frame_equal(result, expected)
# GH 6254
# setting issue
df = DataFrame(index=[3, 5, 4], columns=['A'])
df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64')
expected = DataFrame(dict(A=Series(
[1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4])
tm.assert_frame_equal(df, expected)
# GH 6252
# setting with an empty frame
keys1 = ['@' + str(i) for i in range(5)]
val1 = np.arange(5, dtype='int64')
keys2 = ['@' + str(i) for i in range(4)]
val2 = np.arange(4, dtype='int64')
index = list(set(keys1).union(keys2))
df = DataFrame(index=index)
df['A'] = np.nan
df.loc[keys1, 'A'] = val1
df['B'] = np.nan
df.loc[keys2, 'B'] = val2
expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series(
val2, index=keys2))).reindex(index=index)
tm.assert_frame_equal(df, expected)
# GH 8669
# invalid coercion of nan -> int
df = DataFrame({'A': [1, 2, 3], 'B': np.nan})
df.loc[df.B > df.A, 'B'] = df.A
expected = DataFrame({'A': [1, 2, 3], 'B': np.nan})
tm.assert_frame_equal(df, expected)
# GH 6546
# setting with mixed labels
df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']})
result = df.loc[0, [1, 2]]
expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
tm.assert_series_equal(result, expected)
expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']})
df.loc[0, [1, 2]] = [5, 6]
tm.assert_frame_equal(df, expected)
def test_loc_setitem_frame_multiples(self):
# multiple setting
df = DataFrame({'A': ['foo', 'bar', 'baz'],
'B': Series(
range(3), dtype=np.int64)})
rhs = df.loc[1:2]
rhs.index = df.index[0:2]
df.loc[0:1] = rhs
expected = DataFrame({'A': ['bar', 'baz', 'baz'],
'B': Series(
[1, 2, 2], dtype=np.int64)})
tm.assert_frame_equal(df, expected)
# multiple setting with frame on rhs (with M8)
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
'val': Series(
range(5), dtype=np.int64)})
expected = DataFrame({'date': [Timestamp('20000101'), Timestamp(
'20000102'), Timestamp('20000101'), Timestamp('20000102'),
Timestamp('20000103')],
'val': Series(
[0, 1, 0, 1, 2], dtype=np.int64)})
rhs = df.loc[0:2]
rhs.index = df.index[2:5]
df.loc[2:4] = rhs
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
'indexer', [['A'], slice(None, 'A', None), np.array(['A'])])
@pytest.mark.parametrize(
'value', [['Z'], np.array(['Z'])])
def test_loc_setitem_with_scalar_index(self, indexer, value):
# GH #19474
# assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
# elementwisely, not using "setter('A', ['Z'])".
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
df.loc[0, indexer] = value
result = df.loc[0, 'A']
assert is_scalar(result) and result == 'Z'
def test_loc_coerceion(self):
# 12411
df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'),
pd.NaT]})
expected = df.dtypes
result = df.iloc[[0]]
tm.assert_series_equal(result.dtypes, expected)
result = df.iloc[[1]]
tm.assert_series_equal(result.dtypes, expected)
# 12045
import datetime
df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
datetime.datetime(1012, 1, 2)]})
expected = df.dtypes
result = df.iloc[[0]]
tm.assert_series_equal(result.dtypes, expected)
result = df.iloc[[1]]
tm.assert_series_equal(result.dtypes, expected)
# 11594
df = DataFrame({'text': ['some words'] + [None] * 9})
expected = df.dtypes
result = df.iloc[0:2]
tm.assert_series_equal(result.dtypes, expected)
result = df.iloc[3:]
tm.assert_series_equal(result.dtypes, expected)
def test_loc_non_unique(self):
# GH3659
# non-unique indexer with loc slice
# https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
# these are going to raise because the we are non monotonic
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3])
pytest.raises(KeyError, df.loc.__getitem__,
tuple([slice(1, None)]))
pytest.raises(KeyError, df.loc.__getitem__,
tuple([slice(0, None)]))
pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)]))
# monotonic are ok
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
'B': [3, 4, 5, 6, 7, 8]},
index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0)
result = df.loc[1:]
expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]},
index=[1, 1, 2, 3])
tm.assert_frame_equal(result, expected)
result = df.loc[0:]
tm.assert_frame_equal(result, df)
result = df.loc[1:2]
expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]},
index=[1, 1, 2])
tm.assert_frame_equal(result, expected)
def test_loc_non_unique_memory_error(self):
# GH 4280
# non_unique index with a large selection triggers a memory error
columns = list('ABCDEFG')
def gen_test(l, l2):
return pd.concat([
DataFrame(np.random.randn(l, len(columns)),
index=lrange(l), columns=columns),
DataFrame(np.ones((l2, len(columns))),
index=[0] * l2, columns=columns)])
def gen_expected(df, mask):
len_mask = len(mask)
return pd.concat([df.take([0]),
DataFrame(np.ones((len_mask, len(columns))),
index=[0] * len_mask,
columns=columns),
df.take(mask[1:])])
df = gen_test(900, 100)
assert df.index.is_unique is False
mask = np.arange(100)
result = df.loc[mask]
expected = gen_expected(df, mask)
tm.assert_frame_equal(result, expected)
df = gen_test(900000, 100000)
assert df.index.is_unique is False
mask = np.arange(100000)
result = df.loc[mask]
expected = gen_expected(df, mask)
tm.assert_frame_equal(result, expected)
def test_loc_name(self):
# GH 3880
df = DataFrame([[1, 1], [1, 1]])
df.index.name = 'index_name'
result = df.iloc[[0, 1]].index.name
assert result == 'index_name'
with catch_warnings(record=True):
filterwarnings("ignore", "\\n.ix", DeprecationWarning)
result = df.ix[[0, 1]].index.name
assert result == 'index_name'
result = df.loc[[0, 1]].index.name
assert result == 'index_name'
def test_loc_empty_list_indexer_is_ok(self):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
# vertical empty
tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
check_index_type=True, check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
check_index_type=True, check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :],
check_index_type=True,
check_column_type=True)
def test_identity_slice_returns_new_object(self):
# GH13873
original_df = DataFrame({'a': [1, 2, 3]})
sliced_df = original_df.loc[:]
assert sliced_df is not original_df
assert original_df[:] is not original_df
# should be a shallow copy
original_df['a'] = [4, 4, 4]
assert (sliced_df['a'] == 4).all()
# These should not return copies
assert original_df is original_df.loc[:, :]
df = DataFrame(np.random.randn(10, 4))
assert df[0] is df.loc[:, 0]
# Same tests for Series
original_series = Series([1, 2, 3, 4, 5, 6])
sliced_series = original_series.loc[:]
assert sliced_series is not original_series
assert original_series[:] is not original_series
original_series[:3] = [7, 8, 9]
assert all(sliced_series[:3] == [7, 8, 9])
def test_loc_uint64(self):
# GH20722
# Test whether loc accept uint64 max value as index.
s = pd.Series([1, 2],
index=[np.iinfo('uint64').max - 1,
np.iinfo('uint64').max])
result = s.loc[np.iinfo('uint64').max - 1]
expected = s.iloc[0]
assert result == expected
result = s.loc[[np.iinfo('uint64').max - 1]]
expected = s.iloc[[0]]
tm.assert_series_equal(result, expected)
result = s.loc[[np.iinfo('uint64').max - 1,
np.iinfo('uint64').max]]
tm.assert_series_equal(result, s)
def test_loc_setitem_empty_append(self):
# GH6173, various appends to an empty dataframe
data = [1, 2, 3]
expected = DataFrame({'x': data, 'y': [None] * len(data)})
# appends to fit length of data
df = DataFrame(columns=['x', 'y'])
df.loc[:, 'x'] = data
tm.assert_frame_equal(df, expected)
# only appends one value
expected = DataFrame({'x': [1.0], 'y': [np.nan]})
df = DataFrame(columns=['x', 'y'],
dtype=np.float)
df.loc[0, 'x'] = expected.loc[0, 'x']
tm.assert_frame_equal(df, expected)
def test_loc_setitem_empty_append_raises(self):
# GH6173, various appends to an empty dataframe
data = [1, 2]
df = DataFrame(columns=['x', 'y'])
msg = (r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
r"are in the \[index\]")
with pytest.raises(KeyError, match=msg):
df.loc[[0, 1], 'x'] = data
msg = "cannot copy sequence with size 2 to array axis with dimension 0"
with pytest.raises(ValueError, match=msg):
df.loc[0:2, 'x'] = data
@@ -0,0 +1,214 @@
from warnings import catch_warnings
import numpy as np
import pytest
from pandas import DataFrame, Panel, date_range
from pandas.util import testing as tm
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestPanel(object):
def test_iloc_getitem_panel(self):
with catch_warnings(record=True):
# GH 7189
p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2),
items=['A', 'B', 'C', 'D'],
major_axis=['a', 'b', 'c'],
minor_axis=['one', 'two'])
result = p.iloc[1]
expected = p.loc['B']
tm.assert_frame_equal(result, expected)
result = p.iloc[1, 1]
expected = p.loc['B', 'b']
tm.assert_series_equal(result, expected)
result = p.iloc[1, 1, 1]
expected = p.loc['B', 'b', 'two']
assert result == expected
# slice
result = p.iloc[1:3]
expected = p.loc[['B', 'C']]
tm.assert_panel_equal(result, expected)
result = p.iloc[:, 0:2]
expected = p.loc[:, ['a', 'b']]
tm.assert_panel_equal(result, expected)
# list of integers
result = p.iloc[[0, 2]]
expected = p.loc[['A', 'C']]
tm.assert_panel_equal(result, expected)
# neg indices
result = p.iloc[[-1, 1], [-1, 1]]
expected = p.loc[['D', 'B'], ['c', 'b']]
tm.assert_panel_equal(result, expected)
# dups indices
result = p.iloc[[-1, -1, 1], [-1, 1]]
expected = p.loc[['D', 'D', 'B'], ['c', 'b']]
tm.assert_panel_equal(result, expected)
# combined
result = p.iloc[0, [True, True], [0, 1]]
expected = p.loc['A', ['a', 'b'], ['one', 'two']]
tm.assert_frame_equal(result, expected)
# out-of-bounds exception
with pytest.raises(IndexError):
p.iloc[tuple([10, 5])]
with pytest.raises(IndexError):
p.iloc[0, [True, True], [0, 1, 2]]
# trying to use a label
with pytest.raises(ValueError):
p.iloc[tuple(['j', 'D'])]
# GH
p = Panel(
np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'],
major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y'])
expected = p['A']
result = p.iloc[0, :, :]
tm.assert_frame_equal(result, expected)
result = p.iloc[0, [True, True, True], :]
tm.assert_frame_equal(result, expected)
result = p.iloc[0, [True, True, True], [0, 1]]
tm.assert_frame_equal(result, expected)
with pytest.raises(IndexError):
p.iloc[0, [True, True, True], [0, 1, 2]]
with pytest.raises(IndexError):
p.iloc[0, [True, True, True], [2]]
def test_iloc_panel_issue(self):
with catch_warnings(record=True):
# see gh-3617
p = Panel(np.random.randn(4, 4, 4))
assert p.iloc[:3, :3, :3].shape == (3, 3, 3)
assert p.iloc[1, :3, :3].shape == (3, 3)
assert p.iloc[:3, 1, :3].shape == (3, 3)
assert p.iloc[:3, :3, 1].shape == (3, 3)
assert p.iloc[1, 1, :3].shape == (3, )
assert p.iloc[1, :3, 1].shape == (3, )
assert p.iloc[:3, 1, 1].shape == (3, )
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
def test_panel_getitem(self):
with catch_warnings(record=True):
# GH4016, date selection returns a frame when a partial string
# selection
ind = date_range(start="2000", freq="D", periods=1000)
df = DataFrame(
np.random.randn(
len(ind), 5), index=ind, columns=list('ABCDE'))
panel = Panel({'frame_' + c: df for c in list('ABC')})
test2 = panel.loc[:, "2002":"2002-12-31"]
test1 = panel.loc[:, "2002"]
tm.assert_panel_equal(test1, test2)
# GH8710
# multi-element getting with a list
panel = tm.makePanel()
expected = panel.iloc[[0, 1]]
result = panel.loc[['ItemA', 'ItemB']]
tm.assert_panel_equal(result, expected)
result = panel.loc[['ItemA', 'ItemB'], :, :]
tm.assert_panel_equal(result, expected)
result = panel[['ItemA', 'ItemB']]
tm.assert_panel_equal(result, expected)
result = panel.loc['ItemA':'ItemB']
tm.assert_panel_equal(result, expected)
with catch_warnings(record=True):
result = panel.ix[['ItemA', 'ItemB']]
tm.assert_panel_equal(result, expected)
# with an object-like
# GH 9140
class TestObject(object):
def __str__(self):
return "TestObject"
obj = TestObject()
p = Panel(np.random.randn(1, 5, 4), items=[obj],
major_axis=date_range('1/1/2000', periods=5),
minor_axis=['A', 'B', 'C', 'D'])
expected = p.iloc[0]
result = p[obj]
tm.assert_frame_equal(result, expected)
def test_panel_setitem(self):
with catch_warnings(record=True):
# GH 7763
# loc and setitem have setting differences
np.random.seed(0)
index = range(3)
columns = list('abc')
panel = Panel({'A': DataFrame(np.random.randn(3, 3),
index=index, columns=columns),
'B': DataFrame(np.random.randn(3, 3),
index=index, columns=columns),
'C': DataFrame(np.random.randn(3, 3),
index=index, columns=columns)})
replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns)
expected = Panel({'A': replace, 'B': replace, 'C': replace})
p = panel.copy()
for idx in list('ABC'):
p[idx] = replace
tm.assert_panel_equal(p, expected)
p = panel.copy()
for idx in list('ABC'):
p.loc[idx, :, :] = replace
tm.assert_panel_equal(p, expected)
def test_panel_assignment(self):
with catch_warnings(record=True):
# GH3777
wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
major_axis=date_range('1/1/2000', periods=5),
minor_axis=['A', 'B', 'C', 'D'])
wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
major_axis=date_range('1/1/2000', periods=5),
minor_axis=['A', 'B', 'C', 'D'])
# TODO: unused?
# expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
with pytest.raises(NotImplementedError):
wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[
['Item1', 'Item2'], :, ['A', 'B']]
# to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']]
# wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign
# result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
# tm.assert_panel_equal(result,expected)
@@ -0,0 +1,620 @@
"""
test setting *parts* of objects both positionally and label based
TOD: these should be split among the indexer tests
"""
from warnings import catch_warnings
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Index, Panel, Series, date_range
from pandas.util import testing as tm
class TestPartialSetting(object):
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
def test_partial_setting(self):
# GH2578, allow ix and friends to partially set
# series
s_orig = Series([1, 2, 3])
s = s_orig.copy()
s[5] = 5
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.loc[5] = 5
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s[5] = 5.
expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.loc[5] = 5.
expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
# iloc/iat raise
s = s_orig.copy()
with pytest.raises(IndexError):
s.iloc[3] = 5.
with pytest.raises(IndexError):
s.iat[3] = 5.
# ## frame ##
df_orig = DataFrame(
np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64')
# iloc/iat raise
df = df_orig.copy()
with pytest.raises(IndexError):
df.iloc[4, 2] = 5.
with pytest.raises(IndexError):
df.iat[4, 2] = 5.
# row setting where it exists
expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]}))
df = df_orig.copy()
df.iloc[1] = df.iloc[2]
tm.assert_frame_equal(df, expected)
expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]}))
df = df_orig.copy()
df.loc[1] = df.loc[2]
tm.assert_frame_equal(df, expected)
# like 2578, partial setting with dtype preservation
expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]}))
df = df_orig.copy()
df.loc[3] = df.loc[2]
tm.assert_frame_equal(df, expected)
# single dtype frame, overwrite
expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]}))
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:, 'B'] = df.ix[:, 'A']
tm.assert_frame_equal(df, expected)
# mixed dtype frame, overwrite
expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])}))
df = df_orig.copy()
df['B'] = df['B'].astype(np.float64)
with catch_warnings(record=True):
df.ix[:, 'B'] = df.ix[:, 'A']
tm.assert_frame_equal(df, expected)
# single dtype frame, partial setting
expected = df_orig.copy()
expected['C'] = df['A']
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:, 'C'] = df.ix[:, 'A']
tm.assert_frame_equal(df, expected)
# mixed frame, partial setting
expected = df_orig.copy()
expected['C'] = df['A']
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:, 'C'] = df.ix[:, 'A']
tm.assert_frame_equal(df, expected)
with catch_warnings(record=True):
# ## panel ##
p_orig = Panel(np.arange(16).reshape(2, 4, 2),
items=['Item1', 'Item2'],
major_axis=pd.date_range('2001/1/12', periods=4),
minor_axis=['A', 'B'], dtype='float64')
# panel setting via item
p_orig = Panel(np.arange(16).reshape(2, 4, 2),
items=['Item1', 'Item2'],
major_axis=pd.date_range('2001/1/12', periods=4),
minor_axis=['A', 'B'], dtype='float64')
expected = p_orig.copy()
expected['Item3'] = expected['Item1']
p = p_orig.copy()
p.loc['Item3'] = p['Item1']
tm.assert_panel_equal(p, expected)
# panel with aligned series
expected = p_orig.copy()
expected = expected.transpose(2, 1, 0)
expected['C'] = DataFrame({'Item1': [30, 30, 30, 30],
'Item2': [32, 32, 32, 32]},
index=p_orig.major_axis)
expected = expected.transpose(2, 1, 0)
p = p_orig.copy()
p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items)
tm.assert_panel_equal(p, expected)
# GH 8473
dates = date_range('1/1/2000', periods=8)
df_orig = DataFrame(np.random.randn(8, 4), index=dates,
columns=['A', 'B', 'C', 'D'])
expected = pd.concat([df_orig,
DataFrame({'A': 7},
index=[dates[-1] + dates.freq])],
sort=True)
df = df_orig.copy()
df.loc[dates[-1] + dates.freq, 'A'] = 7
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.at[dates[-1] + dates.freq, 'A'] = 7
tm.assert_frame_equal(df, expected)
exp_other = DataFrame({0: 7}, index=[dates[-1] + dates.freq])
expected = pd.concat([df_orig, exp_other], axis=1)
df = df_orig.copy()
df.loc[dates[-1] + dates.freq, 0] = 7
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.at[dates[-1] + dates.freq, 0] = 7
tm.assert_frame_equal(df, expected)
def test_partial_setting_mixed_dtype(self):
# in a mixed dtype environment, try to preserve dtypes
# by appending
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
s = df.loc[1].copy()
s.name = 2
expected = df.append(s)
df.loc[2] = df.loc[1]
tm.assert_frame_equal(df, expected)
# columns will align
df = DataFrame(columns=['A', 'B'])
df.loc[0] = Series(1, index=range(4))
tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0]))
# columns will align
df = DataFrame(columns=['A', 'B'])
df.loc[0] = Series(1, index=['B'])
exp = DataFrame([[np.nan, 1]], columns=['A', 'B'],
index=[0], dtype='float64')
tm.assert_frame_equal(df, exp)
# list-like must conform
df = DataFrame(columns=['A', 'B'])
with pytest.raises(ValueError):
df.loc[0] = [1, 2, 3]
# TODO: #15657, these are left as object and not coerced
df = DataFrame(columns=['A', 'B'])
df.loc[3] = [6, 7]
exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'],
dtype='object')
tm.assert_frame_equal(df, exp)
def test_series_partial_set(self):
# partial set with new index
# Regression from GH4825
ser = Series([0.1, 0.2], index=[1, 2])
# loc equiv to .reindex
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([3, 2, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x'])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3, 'x']]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([3, 2, 3, 'x'])
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
result = ser.loc[[2, 2, 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, 'x', 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([2, 2, 'x', 1])
tm.assert_series_equal(result, expected, check_index_type=True)
# raises as nothing in in the index
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = ser.reindex([2, 2, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[3, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([3, 4, 4])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4],
index=[1, 2, 3, 4])
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[5, 3, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([5, 3, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4],
index=[1, 2, 3, 4])
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[5, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([5, 4, 4])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4],
index=[4, 5, 6, 7])
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[7, 2, 2]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([7, 2, 2])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4],
index=[1, 2, 3, 4])
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = s.loc[[4, 5, 5]]
tm.assert_series_equal(result, expected, check_index_type=True)
result = s.reindex([4, 5, 5])
tm.assert_series_equal(result, expected, check_index_type=True)
# iloc
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
result = ser.iloc[[1, 1, 0, 0]]
tm.assert_series_equal(result, expected, check_index_type=True)
def test_series_partial_set_with_name(self):
# GH 11497
idx = Index([1, 2], dtype='int64', name='idx')
ser = Series([0.1, 0.2], index=idx, name='s')
# loc
exp_idx = Index([3, 2, 3], dtype='int64', name='idx')
expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx')
expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx,
name='s')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[3, 2, 3, 'x']]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([2, 2, 1], dtype='int64', name='idx')
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s')
result = ser.loc[[2, 2, 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx')
expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, 'x', 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
# raises as nothing in in the index
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
exp_idx = Index([2, 2, 3], dtype='int64', name='idx')
expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = ser.loc[[2, 2, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([3, 4, 4], dtype='int64', name='idx')
expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s')
idx = Index([1, 2, 3], dtype='int64', name='idx')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3],
index=idx,
name='s').loc[[3, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([5, 3, 3], dtype='int64', name='idx')
expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s')
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
name='s').loc[[5, 3, 3]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([5, 4, 4], dtype='int64', name='idx')
expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s')
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
name='s').loc[[5, 4, 4]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([7, 2, 2], dtype='int64', name='idx')
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
idx = Index([4, 5, 6, 7], dtype='int64', name='idx')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
name='s').loc[[7, 2, 2]]
tm.assert_series_equal(result, expected, check_index_type=True)
exp_idx = Index([4, 5, 5], dtype='int64', name='idx')
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
name='s').loc[[4, 5, 5]]
tm.assert_series_equal(result, expected, check_index_type=True)
# iloc
exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx')
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s')
result = ser.iloc[[1, 1, 0, 0]]
tm.assert_series_equal(result, expected, check_index_type=True)
@pytest.mark.filterwarnings("ignore:\\n.ix")
def test_partial_set_invalid(self):
# GH 4940
# allow only setting of 'valid' values
orig = tm.makeTimeDataFrame()
df = orig.copy()
# don't allow not string inserts
with pytest.raises(TypeError):
with catch_warnings(record=True):
df.loc[100.0, :] = df.ix[0]
with pytest.raises(TypeError):
with catch_warnings(record=True):
df.loc[100, :] = df.ix[0]
with pytest.raises(TypeError):
with catch_warnings(record=True):
df.ix[100.0, :] = df.ix[0]
with pytest.raises(ValueError):
with catch_warnings(record=True):
df.ix[100, :] = df.ix[0]
# allow object conversion here
df = orig.copy()
with catch_warnings(record=True):
df.loc['a', :] = df.ix[0]
exp = orig.append(Series(df.ix[0], name='a'))
tm.assert_frame_equal(df, exp)
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ['a']))
assert df.index.dtype == 'object'
def test_partial_set_empty_series(self):
# GH5226
# partially set with an empty object series
s = Series()
s.loc[1] = 1
tm.assert_series_equal(s, Series([1], index=[1]))
s.loc[3] = 3
tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))
s = Series()
s.loc[1] = 1.
tm.assert_series_equal(s, Series([1.], index=[1]))
s.loc[3] = 3.
tm.assert_series_equal(s, Series([1., 3.], index=[1, 3]))
s = Series()
s.loc['foo'] = 1
tm.assert_series_equal(s, Series([1], index=['foo']))
s.loc['bar'] = 3
tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar']))
s.loc[3] = 4
tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3]))
def test_partial_set_empty_frame(self):
# partially set with an empty object
# frame
df = DataFrame()
with pytest.raises(ValueError):
df.loc[1] = 1
with pytest.raises(ValueError):
df.loc[1] = Series([1], index=['foo'])
with pytest.raises(ValueError):
df.loc[:, 1] = 1
# these work as they don't really change
# anything but the index
# GH5632
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
def f():
df = DataFrame()
df['foo'] = Series([], dtype='object')
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df['foo'] = Series(df.index)
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df['foo'] = df.index
return df
tm.assert_frame_equal(f(), expected)
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
expected['foo'] = expected['foo'].astype('float64')
def f():
df = DataFrame()
df['foo'] = []
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df['foo'] = Series(np.arange(len(df)), dtype='float64')
return df
tm.assert_frame_equal(f(), expected)
def f():
df = DataFrame()
tm.assert_index_equal(df.index, Index([], dtype='object'))
df['foo'] = range(len(df))
return df
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
expected['foo'] = expected['foo'].astype('float64')
tm.assert_frame_equal(f(), expected)
df = DataFrame()
tm.assert_index_equal(df.columns, Index([], dtype=object))
df2 = DataFrame()
df2[1] = Series([1], index=['foo'])
df.loc[:, 1] = Series([1], index=['foo'])
tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1]))
tm.assert_frame_equal(df, df2)
# no index to start
expected = DataFrame({0: Series(1, index=range(4))},
columns=['A', 'B', 0])
df = DataFrame(columns=['A', 'B'])
df[0] = Series(1, index=range(4))
df.dtypes
str(df)
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=['A', 'B'])
df.loc[:, 0] = Series(1, index=range(4))
df.dtypes
str(df)
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame_row(self):
# GH5720, GH5744
# don't create rows when empty
expected = DataFrame(columns=['A', 'B', 'New'],
index=Index([], dtype='int64'))
expected['A'] = expected['A'].astype('int64')
expected['B'] = expected['B'].astype('float64')
expected['New'] = expected['New'].astype('float64')
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
y['New'] = np.nan
tm.assert_frame_equal(y, expected)
# tm.assert_frame_equal(y,expected)
expected = DataFrame(columns=['a', 'b', 'c c', 'd'])
expected['d'] = expected['d'].astype('int64')
df = DataFrame(columns=['a', 'b', 'c c'])
df['d'] = 3
tm.assert_frame_equal(df, expected)
tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object))
# reindex columns is ok
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
result = y.reindex(columns=['A', 'B', 'C'])
expected = DataFrame(columns=['A', 'B', 'C'],
index=Index([], dtype='int64'))
expected['A'] = expected['A'].astype('int64')
expected['B'] = expected['B'].astype('float64')
expected['C'] = expected['C'].astype('float64')
tm.assert_frame_equal(result, expected)
def test_partial_set_empty_frame_set_series(self):
# GH 5756
# setting with empty Series
df = DataFrame(Series())
tm.assert_frame_equal(df, DataFrame({0: Series()}))
df = DataFrame(Series(name='foo'))
tm.assert_frame_equal(df, DataFrame({'foo': Series()}))
def test_partial_set_empty_frame_empty_copy_assignment(self):
# GH 5932
# copy on empty with assignment fails
df = DataFrame(index=[0])
df = df.copy()
df['a'] = 0
expected = DataFrame(0, index=[0], columns=['a'])
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame_empty_consistencies(self):
# GH 6171
# consistency on empty frames
df = DataFrame(columns=['x', 'y'])
df['x'] = [1, 2]
expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan]))
tm.assert_frame_equal(df, expected, check_dtype=False)
df = DataFrame(columns=['x', 'y'])
df['x'] = ['1', '2']
expected = DataFrame(
dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object)
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=['x', 'y'])
df.loc[0, 'x'] = 1
expected = DataFrame(dict(x=[1], y=[np.nan]))
tm.assert_frame_equal(df, expected, check_dtype=False)
@@ -0,0 +1,207 @@
""" test scalar indexing, including at and iat """
import numpy as np
import pytest
from pandas import DataFrame, Series, Timedelta, Timestamp, date_range
from pandas.tests.indexing.common import Base
from pandas.util import testing as tm
class TestScalar(Base):
def test_at_and_iat_get(self):
def _check(f, func, values=False):
if f is not None:
indicies = self.generate_indices(f, values)
for i in indicies:
result = getattr(f, func)[i]
expected = self.get_value(f, i, values)
tm.assert_almost_equal(result, expected)
for o in self._objs:
d = getattr(self, o)
# iat
for f in [d['ints'], d['uints']]:
_check(f, 'iat', values=True)
for f in [d['labels'], d['ts'], d['floats']]:
if f is not None:
pytest.raises(ValueError, self.check_values, f, 'iat')
# at
for f in [d['ints'], d['uints'], d['labels'],
d['ts'], d['floats']]:
_check(f, 'at')
def test_at_and_iat_set(self):
def _check(f, func, values=False):
if f is not None:
indicies = self.generate_indices(f, values)
for i in indicies:
getattr(f, func)[i] = 1
expected = self.get_value(f, i, values)
tm.assert_almost_equal(expected, 1)
for t in self._objs:
d = getattr(self, t)
# iat
for f in [d['ints'], d['uints']]:
_check(f, 'iat', values=True)
for f in [d['labels'], d['ts'], d['floats']]:
if f is not None:
pytest.raises(ValueError, _check, f, 'iat')
# at
for f in [d['ints'], d['uints'], d['labels'],
d['ts'], d['floats']]:
_check(f, 'at')
def test_at_iat_coercion(self):
# as timestamp is not a tuple!
dates = date_range('1/1/2000', periods=8)
df = DataFrame(np.random.randn(8, 4),
index=dates,
columns=['A', 'B', 'C', 'D'])
s = df['A']
result = s.at[dates[5]]
xp = s.values[5]
assert result == xp
# GH 7729
# make sure we are boxing the returns
s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]')
expected = Timestamp('2014-02-02')
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
result = r()
assert result == expected
s = Series(['1 days', '2 days'], dtype='timedelta64[ns]')
expected = Timedelta('2 days')
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
result = r()
assert result == expected
def test_iat_invalid_args(self):
pass
def test_imethods_with_dups(self):
# GH6493
# iat/iloc with dups
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64')
result = s.iloc[2]
assert result == 2
result = s.iat[2]
assert result == 2
pytest.raises(IndexError, lambda: s.iat[10])
pytest.raises(IndexError, lambda: s.iat[-10])
result = s.iloc[[2, 3]]
expected = Series([2, 3], [2, 2], dtype='int64')
tm.assert_series_equal(result, expected)
df = s.to_frame()
result = df.iloc[2]
expected = Series(2, index=[0], name=2)
tm.assert_series_equal(result, expected)
result = df.iat[2, 0]
assert result == 2
def test_at_to_fail(self):
# at should not fallback
# GH 7814
s = Series([1, 2, 3], index=list('abc'))
result = s.at['a']
assert result == 1
pytest.raises(ValueError, lambda: s.at[0])
df = DataFrame({'A': [1, 2, 3]}, index=list('abc'))
result = df.at['a', 'A']
assert result == 1
pytest.raises(ValueError, lambda: df.at['a', 0])
s = Series([1, 2, 3], index=[3, 2, 1])
result = s.at[1]
assert result == 3
pytest.raises(ValueError, lambda: s.at['a'])
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
result = df.at[1, 0]
assert result == 3
pytest.raises(ValueError, lambda: df.at['a', 0])
# GH 13822, incorrect error string with non-unique columns when missing
# column is accessed
df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]})
df.columns = ['x', 'x', 'z']
# Check that we get the correct value in the KeyError
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
df[['x', 'y', 'z']]
def test_at_with_tz(self):
# gh-15822
df = DataFrame({'name': ['John', 'Anderson'],
'date': [Timestamp(2017, 3, 13, 13, 32, 56),
Timestamp(2017, 2, 16, 12, 10, 3)]})
df['date'] = df['date'].dt.tz_localize('Asia/Shanghai')
expected = Timestamp('2017-03-13 13:32:56+0800', tz='Asia/Shanghai')
result = df.loc[0, 'date']
assert result == expected
result = df.at[0, 'date']
assert result == expected
def test_mixed_index_at_iat_loc_iloc_series(self):
# GH 19860
s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 1, 2])
for el, item in s.iteritems():
assert s.at[el] == s.loc[el] == item
for i in range(len(s)):
assert s.iat[i] == s.iloc[i] == i + 1
with pytest.raises(KeyError):
s.at[4]
with pytest.raises(KeyError):
s.loc[4]
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
# GH 19860
df = DataFrame([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]],
columns=['a', 'b', 'c', 1, 2])
for rowIdx, row in df.iterrows():
for el, item in row.iteritems():
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
for row in range(2):
for i in range(5):
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
with pytest.raises(KeyError):
df.at[0, 3]
with pytest.raises(KeyError):
df.loc[0, 3]
def test_iat_setter_incompatible_assignment(self):
# GH 23236
result = DataFrame({'a': [0, 1], 'b': [4, 5]})
result.iat[0, 0] = None
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
tm.assert_frame_equal(result, expected)
@@ -0,0 +1,97 @@
import numpy as np
import pytest
import pandas as pd
from pandas.util import testing as tm
class TestTimedeltaIndexing(object):
def test_boolean_indexing(self):
# GH 14946
df = pd.DataFrame({'x': range(10)})
df.index = pd.to_timedelta(range(10), unit='s')
conditions = [df['x'] > 3, df['x'] == 3, df['x'] < 3]
expected_data = [[0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
[0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
[10, 10, 10, 3, 4, 5, 6, 7, 8, 9]]
for cond, data in zip(conditions, expected_data):
result = df.assign(x=df.mask(cond, 10).astype('int64'))
expected = pd.DataFrame(data,
index=pd.to_timedelta(range(10), unit='s'),
columns=['x'],
dtype='int64')
tm.assert_frame_equal(expected, result)
@pytest.mark.parametrize(
"indexer, expected",
[(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
(slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9])])
def test_list_like_indexing(self, indexer, expected):
# GH 16637
df = pd.DataFrame({'x': range(10)}, dtype="int64")
df.index = pd.to_timedelta(range(10), unit='s')
df.loc[df.index[indexer], 'x'] = 20
expected = pd.DataFrame(expected,
index=pd.to_timedelta(range(10), unit='s'),
columns=['x'],
dtype="int64")
tm.assert_frame_equal(expected, df)
def test_string_indexing(self):
# GH 16896
df = pd.DataFrame({'x': range(3)},
index=pd.to_timedelta(range(3), unit='days'))
expected = df.iloc[0]
sliced = df.loc['0 days']
tm.assert_series_equal(sliced, expected)
@pytest.mark.parametrize(
"value",
[None, pd.NaT, np.nan])
def test_masked_setitem(self, value):
# issue (#18586)
series = pd.Series([0, 1, 2], dtype='timedelta64[ns]')
series[series == series[0]] = value
expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]')
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"value",
[None, pd.NaT, np.nan])
def test_listlike_setitem(self, value):
# issue (#18586)
series = pd.Series([0, 1, 2], dtype='timedelta64[ns]')
series.iloc[0] = value
expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]')
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize('start,stop, expected_slice', [
[np.timedelta64(0, 'ns'), None, slice(0, 11)],
[np.timedelta64(1, 'D'), np.timedelta64(6, 'D'), slice(1, 7)],
[None, np.timedelta64(4, 'D'), slice(0, 5)]])
def test_numpy_timedelta_scalar_indexing(self, start, stop,
expected_slice):
# GH 20393
s = pd.Series(range(11), pd.timedelta_range('0 days', '10 days'))
result = s.loc[slice(start, stop)]
expected = s.iloc[expected_slice]
tm.assert_series_equal(result, expected)
def test_roundtrip_thru_setitem(self):
# PR 23462
dt1 = pd.Timedelta(0)
dt2 = pd.Timedelta(28767471428571405)
df = pd.DataFrame({'dt': pd.Series([dt1, dt2])})
df_copy = df.copy()
s = pd.Series([dt1])
expected = df['dt'].iloc[1].value
df.loc[[True, False]] = s
result = df['dt'].iloc[1].value
assert expected == result
tm.assert_frame_equal(df, df_copy)