pruned venvs
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,307 +0,0 @@
|
||||
""" common utilities """
|
||||
|
||||
import itertools
|
||||
from warnings import catch_warnings, filterwarnings
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat import lrange
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
from pandas import (Series, DataFrame, Panel, date_range, UInt64Index,
|
||||
Float64Index, MultiIndex)
|
||||
from pandas.util import testing as tm
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
_verbose = False
|
||||
|
||||
|
||||
def _mklbl(prefix, n):
|
||||
return ["%s%s" % (prefix, i) for i in range(n)]
|
||||
|
||||
|
||||
def _axify(obj, key, axis):
|
||||
# create a tuple accessor
|
||||
axes = [slice(None)] * obj.ndim
|
||||
axes[axis] = key
|
||||
return tuple(axes)
|
||||
|
||||
|
||||
class Base(object):
|
||||
""" indexing comprehensive base class """
|
||||
|
||||
_objs = set(['series', 'frame', 'panel'])
|
||||
_typs = set(['ints', 'uints', 'labels', 'mixed',
|
||||
'ts', 'floats', 'empty', 'ts_rev', 'multi'])
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2))
|
||||
self.frame_ints = DataFrame(np.random.randn(4, 4),
|
||||
index=lrange(0, 8, 2),
|
||||
columns=lrange(0, 12, 3))
|
||||
with catch_warnings(record=True):
|
||||
self.panel_ints = Panel(np.random.rand(4, 4, 4),
|
||||
items=lrange(0, 8, 2),
|
||||
major_axis=lrange(0, 12, 3),
|
||||
minor_axis=lrange(0, 16, 4))
|
||||
|
||||
self.series_uints = Series(np.random.rand(4),
|
||||
index=UInt64Index(lrange(0, 8, 2)))
|
||||
self.frame_uints = DataFrame(np.random.randn(4, 4),
|
||||
index=UInt64Index(lrange(0, 8, 2)),
|
||||
columns=UInt64Index(lrange(0, 12, 3)))
|
||||
with catch_warnings(record=True):
|
||||
self.panel_uints = Panel(np.random.rand(4, 4, 4),
|
||||
items=UInt64Index(lrange(0, 8, 2)),
|
||||
major_axis=UInt64Index(lrange(0, 12, 3)),
|
||||
minor_axis=UInt64Index(lrange(0, 16, 4)))
|
||||
|
||||
self.series_floats = Series(np.random.rand(4),
|
||||
index=Float64Index(range(0, 8, 2)))
|
||||
self.frame_floats = DataFrame(np.random.randn(4, 4),
|
||||
index=Float64Index(range(0, 8, 2)),
|
||||
columns=Float64Index(range(0, 12, 3)))
|
||||
with catch_warnings(record=True):
|
||||
self.panel_floats = Panel(np.random.rand(4, 4, 4),
|
||||
items=Float64Index(range(0, 8, 2)),
|
||||
major_axis=Float64Index(range(0, 12, 3)),
|
||||
minor_axis=Float64Index(range(0, 16, 4)))
|
||||
|
||||
m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
MultiIndex.from_product([[5, 6], [7, 8]]),
|
||||
MultiIndex.from_product([[9, 10], [11, 12]])]
|
||||
|
||||
self.series_multi = Series(np.random.rand(4),
|
||||
index=m_idces[0])
|
||||
self.frame_multi = DataFrame(np.random.randn(4, 4),
|
||||
index=m_idces[0],
|
||||
columns=m_idces[1])
|
||||
with catch_warnings(record=True):
|
||||
self.panel_multi = Panel(np.random.rand(4, 4, 4),
|
||||
items=m_idces[0],
|
||||
major_axis=m_idces[1],
|
||||
minor_axis=m_idces[2])
|
||||
|
||||
self.series_labels = Series(np.random.randn(4), index=list('abcd'))
|
||||
self.frame_labels = DataFrame(np.random.randn(4, 4),
|
||||
index=list('abcd'), columns=list('ABCD'))
|
||||
with catch_warnings(record=True):
|
||||
self.panel_labels = Panel(np.random.randn(4, 4, 4),
|
||||
items=list('abcd'),
|
||||
major_axis=list('ABCD'),
|
||||
minor_axis=list('ZYXW'))
|
||||
|
||||
self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8])
|
||||
self.frame_mixed = DataFrame(np.random.randn(4, 4),
|
||||
index=[2, 4, 'null', 8])
|
||||
with catch_warnings(record=True):
|
||||
self.panel_mixed = Panel(np.random.randn(4, 4, 4),
|
||||
items=[2, 4, 'null', 8])
|
||||
|
||||
self.series_ts = Series(np.random.randn(4),
|
||||
index=date_range('20130101', periods=4))
|
||||
self.frame_ts = DataFrame(np.random.randn(4, 4),
|
||||
index=date_range('20130101', periods=4))
|
||||
with catch_warnings(record=True):
|
||||
self.panel_ts = Panel(np.random.randn(4, 4, 4),
|
||||
items=date_range('20130101', periods=4))
|
||||
|
||||
dates_rev = (date_range('20130101', periods=4)
|
||||
.sort_values(ascending=False))
|
||||
self.series_ts_rev = Series(np.random.randn(4),
|
||||
index=dates_rev)
|
||||
self.frame_ts_rev = DataFrame(np.random.randn(4, 4),
|
||||
index=dates_rev)
|
||||
with catch_warnings(record=True):
|
||||
self.panel_ts_rev = Panel(np.random.randn(4, 4, 4),
|
||||
items=dates_rev)
|
||||
|
||||
self.frame_empty = DataFrame({})
|
||||
self.series_empty = Series({})
|
||||
with catch_warnings(record=True):
|
||||
self.panel_empty = Panel({})
|
||||
|
||||
# form agglomerates
|
||||
for o in self._objs:
|
||||
|
||||
d = dict()
|
||||
for t in self._typs:
|
||||
d[t] = getattr(self, '%s_%s' % (o, t), None)
|
||||
|
||||
setattr(self, o, d)
|
||||
|
||||
def generate_indices(self, f, values=False):
|
||||
""" generate the indicies
|
||||
if values is True , use the axis values
|
||||
is False, use the range
|
||||
"""
|
||||
|
||||
axes = f.axes
|
||||
if values:
|
||||
axes = [lrange(len(a)) for a in axes]
|
||||
|
||||
return itertools.product(*axes)
|
||||
|
||||
def get_result(self, obj, method, key, axis):
|
||||
""" return the result for this obj with this key and this axis """
|
||||
|
||||
if isinstance(key, dict):
|
||||
key = key[axis]
|
||||
|
||||
# use an artificial conversion to map the key as integers to the labels
|
||||
# so ix can work for comparisons
|
||||
if method == 'indexer':
|
||||
method = 'ix'
|
||||
key = obj._get_axis(axis)[key]
|
||||
|
||||
# in case we actually want 0 index slicing
|
||||
with catch_warnings(record=True):
|
||||
try:
|
||||
xp = getattr(obj, method).__getitem__(_axify(obj, key, axis))
|
||||
except:
|
||||
xp = getattr(obj, method).__getitem__(key)
|
||||
|
||||
return xp
|
||||
|
||||
def get_value(self, f, i, values=False):
|
||||
""" return the value for the location i """
|
||||
|
||||
# check against values
|
||||
if values:
|
||||
return f.values[i]
|
||||
|
||||
# this is equiv of f[col][row].....
|
||||
# v = f
|
||||
# for a in reversed(i):
|
||||
# v = v.__getitem__(a)
|
||||
# return v
|
||||
with catch_warnings(record=True):
|
||||
return f.ix[i]
|
||||
|
||||
def check_values(self, f, func, values=False):
|
||||
|
||||
if f is None:
|
||||
return
|
||||
axes = f.axes
|
||||
indicies = itertools.product(*axes)
|
||||
|
||||
for i in indicies:
|
||||
result = getattr(f, func)[i]
|
||||
|
||||
# check against values
|
||||
if values:
|
||||
expected = f.values[i]
|
||||
else:
|
||||
expected = f
|
||||
for a in reversed(i):
|
||||
expected = expected.__getitem__(a)
|
||||
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def check_result(self, name, method1, key1, method2, key2, typs=None,
|
||||
objs=None, axes=None, fails=None):
|
||||
def _eq(t, o, a, obj, k1, k2):
|
||||
""" compare equal for these 2 keys """
|
||||
|
||||
if a is not None and a > obj.ndim - 1:
|
||||
return
|
||||
|
||||
def _print(result, error=None):
|
||||
if error is not None:
|
||||
error = str(error)
|
||||
v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s,"
|
||||
"key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" %
|
||||
(name, result, t, o, method1, method2, a, error or ''))
|
||||
if _verbose:
|
||||
pprint_thing(v)
|
||||
|
||||
try:
|
||||
rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a))
|
||||
|
||||
try:
|
||||
xp = self.get_result(obj, method2, k2, a)
|
||||
except:
|
||||
result = 'no comp'
|
||||
_print(result)
|
||||
return
|
||||
|
||||
detail = None
|
||||
|
||||
try:
|
||||
if is_scalar(rs) and is_scalar(xp):
|
||||
assert rs == xp
|
||||
elif xp.ndim == 1:
|
||||
tm.assert_series_equal(rs, xp)
|
||||
elif xp.ndim == 2:
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
elif xp.ndim == 3:
|
||||
tm.assert_panel_equal(rs, xp)
|
||||
result = 'ok'
|
||||
except AssertionError as e:
|
||||
detail = str(e)
|
||||
result = 'fail'
|
||||
|
||||
# reverse the checks
|
||||
if fails is True:
|
||||
if result == 'fail':
|
||||
result = 'ok (fail)'
|
||||
|
||||
_print(result)
|
||||
if not result.startswith('ok'):
|
||||
raise AssertionError(detail)
|
||||
|
||||
except AssertionError:
|
||||
raise
|
||||
except Exception as detail:
|
||||
|
||||
# if we are in fails, the ok, otherwise raise it
|
||||
if fails is not None:
|
||||
if isinstance(detail, fails):
|
||||
result = 'ok (%s)' % type(detail).__name__
|
||||
_print(result)
|
||||
return
|
||||
|
||||
result = type(detail).__name__
|
||||
raise AssertionError(_print(result, error=detail))
|
||||
|
||||
if typs is None:
|
||||
typs = self._typs
|
||||
|
||||
if objs is None:
|
||||
objs = self._objs
|
||||
|
||||
if axes is not None:
|
||||
if not isinstance(axes, (tuple, list)):
|
||||
axes = [axes]
|
||||
else:
|
||||
axes = list(axes)
|
||||
else:
|
||||
axes = [0, 1, 2]
|
||||
|
||||
# check
|
||||
for o in objs:
|
||||
if o not in self._objs:
|
||||
continue
|
||||
|
||||
d = getattr(self, o)
|
||||
for a in axes:
|
||||
for t in typs:
|
||||
if t not in self._typs:
|
||||
continue
|
||||
|
||||
obj = d[t]
|
||||
if obj is None:
|
||||
continue
|
||||
|
||||
def _call(obj=obj):
|
||||
obj = obj.copy()
|
||||
|
||||
k2 = key2
|
||||
_eq(t, o, a, obj, key1, k2)
|
||||
|
||||
# Panel deprecations
|
||||
if isinstance(obj, Panel):
|
||||
with catch_warnings():
|
||||
filterwarnings("ignore", "\nPanel*", FutureWarning)
|
||||
_call()
|
||||
else:
|
||||
_call()
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
-270
@@ -1,270 +0,0 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import Series, DataFrame, IntervalIndex, Interval
|
||||
from pandas.compat import product
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_loc_with_scalar(self):
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s.loc[:3])
|
||||
tm.assert_series_equal(expected, s.loc[:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[0.1:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[-1:3])
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s.loc[s >= 2])
|
||||
|
||||
# TODO: check this behavior is consistent with test_interval_new.py
|
||||
def test_getitem_with_scalar(self):
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s[:3])
|
||||
tm.assert_series_equal(expected, s[:2.5])
|
||||
tm.assert_series_equal(expected, s[0.1:2.5])
|
||||
tm.assert_series_equal(expected, s[-1:3])
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s[s >= 2])
|
||||
|
||||
# TODO: check this behavior is consistent with test_interval_new.py
|
||||
@pytest.mark.parametrize('direction, closed',
|
||||
product(('increasing', 'decreasing'),
|
||||
('left', 'right', 'neither', 'both')))
|
||||
def test_nonoverlapping_monotonic(self, direction, closed):
|
||||
tpls = [(0, 1), (2, 3), (4, 5)]
|
||||
if direction == 'decreasing':
|
||||
tpls = tpls[::-1]
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
s = Series(list('abc'), idx)
|
||||
|
||||
for key, expected in zip(idx.left, s):
|
||||
if idx.closed_left:
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError):
|
||||
s[key]
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[key]
|
||||
|
||||
for key, expected in zip(idx.right, s):
|
||||
if idx.closed_right:
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError):
|
||||
s[key]
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[key]
|
||||
|
||||
for key, expected in zip(idx.mid, s):
|
||||
assert s[key] == expected
|
||||
assert s.loc[key] == expected
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_with_interval(self):
|
||||
|
||||
s = self.s
|
||||
expected = 0
|
||||
|
||||
result = s.loc[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
result = s[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[Interval(3, 6)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[[Interval(3, 6)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[[Interval(3, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(5, 6)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(5, 6)]
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_with_slices(self):
|
||||
|
||||
s = self.s
|
||||
|
||||
# slice of interval
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.loc[Interval(3, 6):]
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s[Interval(3, 6):]
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s[[Interval(3, 6)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# slice of scalar with step != 1
|
||||
with pytest.raises(ValueError):
|
||||
s[0:4:2]
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_with_overlaps(self):
|
||||
|
||||
s = self.s
|
||||
expected = s.iloc[[3, 4, 3, 4]]
|
||||
result = s.loc[[Interval(3, 6), Interval(3, 6)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s[4]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[4]]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.loc[[4]]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[Interval(3, 5)]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.loc[Interval(3, 5)]
|
||||
expected = s
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# doesn't intersect unique set of intervals
|
||||
with pytest.raises(KeyError):
|
||||
s[[Interval(3, 5)]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[Interval(3, 5)]]
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_non_unique(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s.loc[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = s.loc[[Interval(1, 3)]]
|
||||
expected = s.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
|
||||
def test_non_unique_moar(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s.loc[Interval(1, 3)]
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# non-unique index and slices not allowed
|
||||
with pytest.raises(ValueError):
|
||||
s.loc[Interval(1, 3):]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
s[Interval(1, 3):]
|
||||
|
||||
# non-unique
|
||||
with pytest.raises(ValueError):
|
||||
s[[Interval(1, 3)]]
|
||||
|
||||
# TODO: check this behavior is consistent with test_interval_new.py
|
||||
def test_non_matching(self):
|
||||
s = self.s
|
||||
|
||||
# this is a departure from our current
|
||||
# indexin scheme, but simpler
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[-1, 3, 4, 5]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[-1, 3]]
|
||||
|
||||
def test_large_series(self):
|
||||
s = Series(np.arange(1000000),
|
||||
index=IntervalIndex.from_breaks(np.arange(1000001)))
|
||||
|
||||
result1 = s.loc[:80000]
|
||||
result2 = s.loc[0:80000]
|
||||
result3 = s.loc[0:80000:1]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
def test_loc_getitem_frame(self):
|
||||
|
||||
df = DataFrame({'A': range(10)})
|
||||
s = pd.cut(df.A, 5)
|
||||
df['B'] = s
|
||||
df = df.set_index('B')
|
||||
|
||||
result = df.loc[4]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
df.loc[10]
|
||||
|
||||
# single list-like
|
||||
result = df.loc[[4]]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-unique
|
||||
result = df.loc[[4, 5]]
|
||||
expected = df.take([4, 5, 4, 5])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
df.loc[[10]]
|
||||
|
||||
# partial missing
|
||||
with pytest.raises(KeyError):
|
||||
df.loc[[10, 4]]
|
||||
-247
@@ -1,247 +0,0 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from pandas import Series, IntervalIndex, Interval
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skip(reason="new indexing tests for issue 16316")
|
||||
|
||||
|
||||
class TestIntervalIndex(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_loc_with_interval(self):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
s = self.s
|
||||
|
||||
expected = 0
|
||||
result = s.loc[Interval(0, 1)]
|
||||
assert result == expected
|
||||
result = s[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[3:5]
|
||||
result = s.loc[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing or not exact
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 5, closed='left')]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5, closed='left')]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(5, 6)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(5, 6)]
|
||||
|
||||
def test_loc_with_scalar(self):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
s = self.s
|
||||
|
||||
assert s.loc[1] == 0
|
||||
assert s.loc[1.5] == 1
|
||||
assert s.loc[2] == 1
|
||||
|
||||
# TODO with __getitem__ same rules as loc, or positional ?
|
||||
# assert s[1] == 0
|
||||
# assert s[1.5] == 1
|
||||
# assert s[2] == 1
|
||||
|
||||
expected = s.iloc[1:4]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
|
||||
|
||||
expected = s.iloc[[1, 1, 2, 1]]
|
||||
tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]])
|
||||
|
||||
expected = s.iloc[2:5]
|
||||
tm.assert_series_equal(expected, s.loc[s >= 2])
|
||||
|
||||
def test_loc_with_slices(self):
|
||||
|
||||
# loc with slices:
|
||||
# - Interval objects: only works with exact matches
|
||||
# - scalars: only works for non-overlapping, monotonic intervals,
|
||||
# and start/stop select location based on the interval that
|
||||
# contains them:
|
||||
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
|
||||
|
||||
s = self.s
|
||||
|
||||
# slice of interval
|
||||
|
||||
expected = s.iloc[:3]
|
||||
result = s.loc[Interval(0, 1):Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[Interval(0, 1):Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[4:]
|
||||
result = s.loc[Interval(3, 4):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = s[Interval(3, 4):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 6):]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 6):]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 4, closed='left'):]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 4, closed='left'):]
|
||||
|
||||
# TODO with non-existing intervals ?
|
||||
# s.loc[Interval(-1, 0):Interval(2, 3)]
|
||||
|
||||
# slice of scalar
|
||||
|
||||
expected = s.iloc[:3]
|
||||
tm.assert_series_equal(expected, s.loc[:3])
|
||||
tm.assert_series_equal(expected, s.loc[:2.5])
|
||||
tm.assert_series_equal(expected, s.loc[0.1:2.5])
|
||||
|
||||
# TODO should this work? (-1 is not contained in any of the Intervals)
|
||||
# tm.assert_series_equal(expected, s.loc[-1:3])
|
||||
|
||||
# TODO with __getitem__ same rules as loc, or positional ?
|
||||
# tm.assert_series_equal(expected, s[:3])
|
||||
# tm.assert_series_equal(expected, s[:2.5])
|
||||
# tm.assert_series_equal(expected, s[0.1:2.5])
|
||||
|
||||
# slice of scalar with step != 1
|
||||
with pytest.raises(NotImplementedError):
|
||||
s[0:4:2]
|
||||
|
||||
def test_loc_with_overlap(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
# scalar
|
||||
expected = s
|
||||
result = s.loc[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.loc[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# interval
|
||||
expected = 0
|
||||
result = s.loc[Interval(1, 5)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[Interval(1, 5)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s.loc[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[Interval(3, 5)]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[[Interval(3, 5)]]
|
||||
|
||||
# slices with interval (only exact matches)
|
||||
expected = s
|
||||
result = s.loc[Interval(1, 5):Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s[Interval(1, 5):Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[Interval(1, 6):Interval(3, 8)]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s[Interval(1, 6):Interval(3, 8)]
|
||||
|
||||
# slices with scalar raise for overlapping intervals
|
||||
# TODO KeyError is the appropriate error?
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[1:4]
|
||||
|
||||
def test_non_unique(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = s.loc[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = s.loc[[Interval(1, 3)]]
|
||||
expected = s.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_non_unique_moar(self):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
expected = s.iloc[[0, 1]]
|
||||
result = s.loc[Interval(1, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s.loc[Interval(1, 3):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s
|
||||
result = s[Interval(1, 3):]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = s.iloc[[0, 1]]
|
||||
result = s[[Interval(1, 3)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
@@ -1,267 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable-msg=W0612,E1101
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestIndexingCallable(object):
|
||||
|
||||
def test_frame_loc_ix_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'),
|
||||
'C': [1, 2, 3, 4]})
|
||||
# iloc cannot use boolean Series (see GH3635)
|
||||
|
||||
# return bool indexer
|
||||
res = df.loc[lambda x: x.A > 2]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ])
|
||||
|
||||
res = df.loc[lambda x: x.B == 'b', :]
|
||||
tm.assert_frame_equal(res, df.loc[df.B == 'b', :])
|
||||
|
||||
res = df.loc[lambda x: x.B == 'b', :]
|
||||
tm.assert_frame_equal(res, df.loc[df.B == 'b', :])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == 'B']
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: x.columns == 'B']
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: 'B']
|
||||
tm.assert_series_equal(res, df.loc[df.A > 2, 'B'])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: 'B']
|
||||
tm.assert_series_equal(res, df.loc[df.A > 2, 'B'])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A == 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A == 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A == 2, lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A == 2, ['A', 'B']])
|
||||
|
||||
# scalar
|
||||
res = df.loc[lambda x: 1, lambda x: 'A']
|
||||
assert res == df.loc[1, 'A']
|
||||
|
||||
res = df.loc[lambda x: 1, lambda x: 'A']
|
||||
assert res == df.loc[1, 'A']
|
||||
|
||||
def test_frame_loc_ix_callable_mixture(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'),
|
||||
'C': [1, 2, 3, 4]})
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[lambda x: x.A > 2, ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[df.A > 2, ['A', 'B']])
|
||||
|
||||
res = df.loc[[2, 3], lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[[2, 3], ['A', 'B']])
|
||||
|
||||
res = df.loc[[2, 3], lambda x: ['A', 'B']]
|
||||
tm.assert_frame_equal(res, df.loc[[2, 3], ['A', 'B']])
|
||||
|
||||
res = df.loc[3, lambda x: ['A', 'B']]
|
||||
tm.assert_series_equal(res, df.loc[3, ['A', 'B']])
|
||||
|
||||
res = df.loc[3, lambda x: ['A', 'B']]
|
||||
tm.assert_series_equal(res, df.loc[3, ['A', 'B']])
|
||||
|
||||
def test_frame_loc_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return label
|
||||
res = df.loc[lambda x: ['A', 'C']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C']])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], ]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], :]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], :])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], lambda x: 'X']
|
||||
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], lambda x: ['X']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
|
||||
|
||||
# mixture
|
||||
res = df.loc[['A', 'C'], lambda x: 'X']
|
||||
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
|
||||
|
||||
res = df.loc[['A', 'C'], lambda x: ['X']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], 'X']
|
||||
tm.assert_series_equal(res, df.loc[['A', 'C'], 'X'])
|
||||
|
||||
res = df.loc[lambda x: ['A', 'C'], ['X']]
|
||||
tm.assert_frame_equal(res, df.loc[['A', 'C'], ['X']])
|
||||
|
||||
def test_frame_loc_callable_setitem(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return label
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C']] = -20
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C']] = -20
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], :] = 20
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], :] = 20
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], lambda x: 'X'] = -1
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], 'X'] = -1
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], lambda x: ['X']] = [5, 10]
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], ['X']] = [5, 10]
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# mixture
|
||||
res = df.copy()
|
||||
res.loc[['A', 'C'], lambda x: 'X'] = np.array([-1, -2])
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], 'X'] = np.array([-1, -2])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[['A', 'C'], lambda x: ['X']] = 10
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], ['X']] = 10
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], 'X'] = -2
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], 'X'] = -2
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.loc[lambda x: ['A', 'C'], ['X']] = -4
|
||||
exp = df.copy()
|
||||
exp.loc[['A', 'C'], ['X']] = -4
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_frame_iloc_callable(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return location
|
||||
res = df.iloc[lambda x: [1, 3]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3]])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], :]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], :])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], lambda x: 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], lambda x: [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
# mixture
|
||||
res = df.iloc[[1, 3], lambda x: 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[[1, 3], lambda x: [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], 0]
|
||||
tm.assert_series_equal(res, df.iloc[[1, 3], 0])
|
||||
|
||||
res = df.iloc[lambda x: [1, 3], [0]]
|
||||
tm.assert_frame_equal(res, df.iloc[[1, 3], [0]])
|
||||
|
||||
def test_frame_iloc_callable_setitem(self):
|
||||
# GH 11485
|
||||
df = pd.DataFrame({'X': [1, 2, 3, 4],
|
||||
'Y': list('aabb')},
|
||||
index=list('ABCD'))
|
||||
|
||||
# return location
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3]] = 0
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3]] = 0
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], :] = -1
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], :] = -1
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], lambda x: 0] = 5
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = 5
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], lambda x: [0]] = 25
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = 25
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# mixture
|
||||
res = df.copy()
|
||||
res.iloc[[1, 3], lambda x: 0] = -3
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = -3
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[[1, 3], lambda x: [0]] = -5
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = -5
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], 0] = 10
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], 0] = 10
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
res = df.copy()
|
||||
res.iloc[lambda x: [1, 3], [0]] = [-5, -5]
|
||||
exp = df.copy()
|
||||
exp.iloc[[1, 3], [0]] = [-5, -5]
|
||||
tm.assert_frame_equal(res, exp)
|
||||
@@ -1,717 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.compat as compat
|
||||
import numpy as np
|
||||
from pandas import (Series, DataFrame, Timestamp, Categorical,
|
||||
CategoricalIndex, Interval, Index)
|
||||
from pandas.util.testing import assert_series_equal, assert_frame_equal
|
||||
from pandas.util import testing as tm
|
||||
from pandas.core.dtypes.common import is_categorical_dtype
|
||||
from pandas.api.types import CategoricalDtype as CDT
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
|
||||
class TestCategoricalIndex(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.df = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': Series(list('aabbca')).astype(
|
||||
CDT(list('cab')))}).set_index('B')
|
||||
self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': Series(list('aabbca')).astype(
|
||||
CDT(list('cabe')))}).set_index('B')
|
||||
self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': (Series([1, 1, 2, 1, 3, 2])
|
||||
.astype(CDT([3, 2, 1], ordered=True)))
|
||||
}).set_index('B')
|
||||
self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||||
'B': (Series([1, 1, 2, 1, 3, 2])
|
||||
.astype(CDT([3, 2, 1], ordered=False)))
|
||||
}).set_index('B')
|
||||
|
||||
def test_loc_scalar(self):
|
||||
result = self.df.loc['a']
|
||||
expected = (DataFrame({'A': [0, 1, 5],
|
||||
'B': (Series(list('aaa'))
|
||||
.astype(CDT(list('cab'))))})
|
||||
.set_index('B'))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = self.df.copy()
|
||||
df.loc['a'] = 20
|
||||
expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20],
|
||||
'B': (Series(list('aabbca'))
|
||||
.astype(CDT(list('cab'))))})
|
||||
.set_index('B'))
|
||||
assert_frame_equal(df, expected)
|
||||
|
||||
# value not in the categories
|
||||
pytest.raises(KeyError, lambda: df.loc['d'])
|
||||
|
||||
def f():
|
||||
df.loc['d'] = 10
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def f():
|
||||
df.loc['d', 'A'] = 10
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def f():
|
||||
df.loc['d', 'C'] = 10
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def test_getitem_scalar(self):
|
||||
|
||||
cats = Categorical([Timestamp('12-31-1999'),
|
||||
Timestamp('12-31-2000')])
|
||||
|
||||
s = Series([1, 2], index=cats)
|
||||
|
||||
expected = s.iloc[0]
|
||||
result = s[cats[0]]
|
||||
assert result == expected
|
||||
|
||||
def test_slicing_directly(self):
|
||||
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
|
||||
sliced = cat[3]
|
||||
assert sliced == "d"
|
||||
sliced = cat[3:5]
|
||||
expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd'])
|
||||
tm.assert_numpy_array_equal(sliced._codes, expected._codes)
|
||||
tm.assert_index_equal(sliced.categories, expected.categories)
|
||||
|
||||
def test_slicing(self):
|
||||
cat = Series(Categorical([1, 2, 3, 4]))
|
||||
reversed = cat[::-1]
|
||||
exp = np.array([4, 3, 2, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(reversed.__array__(), exp)
|
||||
|
||||
df = DataFrame({'value': (np.arange(100) + 1).astype('int64')})
|
||||
df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
|
||||
|
||||
expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10)
|
||||
result = df.iloc[10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame({'value': np.arange(11, 21).astype('int64')},
|
||||
index=np.arange(10, 20).astype('int64'))
|
||||
expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
|
||||
result = df.iloc[10:20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8)
|
||||
result = df.loc[8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_slicing_and_getting_ops(self):
|
||||
|
||||
# systematically test the slicing operations:
|
||||
# for all slicing ops:
|
||||
# - returning a dataframe
|
||||
# - returning a column
|
||||
# - returning a row
|
||||
# - returning a single value
|
||||
|
||||
cats = Categorical(
|
||||
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"])
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 3, 4, 5, 6, 7]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
# the expected values
|
||||
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
|
||||
idx2 = Index(["j", "k"])
|
||||
values2 = [3, 4]
|
||||
|
||||
# 2:4,: | "j":"k",:
|
||||
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
|
||||
|
||||
# :,"cats" | :,0
|
||||
exp_col = Series(cats, index=idx, name='cats')
|
||||
|
||||
# "j",: | 2,:
|
||||
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object",
|
||||
name="j")
|
||||
|
||||
# "j","cats | 2,0
|
||||
exp_val = "b"
|
||||
|
||||
# iloc
|
||||
# frame
|
||||
res_df = df.iloc[2:4, :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.iloc[2, :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
# col
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.iloc[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# loc
|
||||
# frame
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# ix
|
||||
# frame
|
||||
# res_df = df.loc["j":"k",[0,1]] # doesn't work?
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", df.columns[0]]
|
||||
assert res_val == exp_val
|
||||
|
||||
# iat
|
||||
res_val = df.iat[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# at
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# fancy indexing
|
||||
exp_fancy = df.iloc[[2]]
|
||||
|
||||
res_fancy = df[df["cats"] == "b"]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
res_fancy = df[df["values"] == 3]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
|
||||
# get_value
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# i : int, slice, or sequence of integers
|
||||
res_row = df.iloc[2]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], compat.string_types)
|
||||
|
||||
res_df = df.iloc[slice(2, 4)]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_df = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col)
|
||||
|
||||
res_df = df.iloc[:, slice(0, 2)]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
res_df = df.iloc[:, [0, 1]]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"])
|
||||
|
||||
def test_slicing_doc_examples(self):
|
||||
|
||||
# GH 7918
|
||||
cats = Categorical(["a", "b", "b", "b", "c", "c", "c"],
|
||||
categories=["a", "b", "c"])
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n", ])
|
||||
values = [1, 2, 2, 2, 3, 4, 5]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
result = df.iloc[2:4, :]
|
||||
expected = DataFrame(
|
||||
{"cats": Categorical(['b', 'b'], categories=['a', 'b', 'c']),
|
||||
"values": [2, 2]}, index=['j', 'k'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[2:4, :].dtypes
|
||||
expected = Series(['category', 'int64'], ['cats', 'values'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", "cats"]
|
||||
expected = Series(Categorical(['a', 'b', 'b'],
|
||||
categories=['a', 'b', 'c']),
|
||||
index=['h', 'i', 'j'], name='cats')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", df.columns[0:1]]
|
||||
expected = DataFrame({'cats': Categorical(['a', 'b', 'b'],
|
||||
categories=['a', 'b', 'c'])},
|
||||
index=['h', 'i', 'j'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_category_type(self):
|
||||
# GH 14580
|
||||
# test iloc() on Series with Categorical data
|
||||
|
||||
s = Series([1, 2, 3]).astype('category')
|
||||
|
||||
# get slice
|
||||
result = s.iloc[0:2]
|
||||
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# get list of indexes
|
||||
result = s.iloc[[0, 1]]
|
||||
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# get boolean array
|
||||
result = s.iloc[[True, False, False]]
|
||||
expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_listlike(self):
|
||||
|
||||
# list of labels
|
||||
result = self.df.loc[['c', 'a']]
|
||||
expected = self.df.iloc[[4, 0, 1, 5]]
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.loc[['a', 'b', 'e']]
|
||||
exp_index = CategoricalIndex(
|
||||
list('aaabbe'), categories=list('cabe'), name='B')
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# element in the categories but not in the values
|
||||
pytest.raises(KeyError, lambda: self.df2.loc['e'])
|
||||
|
||||
# assign is ok
|
||||
df = self.df2.copy()
|
||||
df.loc['e'] = 20
|
||||
result = df.loc[['a', 'b', 'e']]
|
||||
exp_index = CategoricalIndex(
|
||||
list('aaabbe'), categories=list('cabe'), name='B')
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = self.df2.copy()
|
||||
result = df.loc[['a', 'b', 'e']]
|
||||
exp_index = CategoricalIndex(
|
||||
list('aaabbe'), categories=list('cabe'), name='B')
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# not all labels in the categories
|
||||
with pytest.raises(KeyError):
|
||||
self.df2.loc[['a', 'd']]
|
||||
|
||||
def test_loc_listlike_dtypes(self):
|
||||
# GH 11586
|
||||
|
||||
# unique categories and codes
|
||||
index = CategoricalIndex(['a', 'b', 'c'])
|
||||
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[['a', 'b']]
|
||||
exp_index = CategoricalIndex(['a', 'b'],
|
||||
categories=index.categories)
|
||||
exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[['a', 'a', 'b']]
|
||||
|
||||
exp_index = CategoricalIndex(['a', 'a', 'b'],
|
||||
categories=index.categories)
|
||||
exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with tm.assert_raises_regex(
|
||||
KeyError,
|
||||
'a list-indexer must only include values that are '
|
||||
'in the categories'):
|
||||
df.loc[['a', 'x']]
|
||||
|
||||
# duplicated categories and codes
|
||||
index = CategoricalIndex(['a', 'b', 'a'])
|
||||
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[['a', 'b']]
|
||||
exp = DataFrame({'A': [1, 3, 2],
|
||||
'B': [4, 6, 5]},
|
||||
index=CategoricalIndex(['a', 'a', 'b']))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[['a', 'a', 'b']]
|
||||
exp = DataFrame(
|
||||
{'A': [1, 3, 1, 3, 2],
|
||||
'B': [4, 6, 4, 6, 5
|
||||
]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with tm.assert_raises_regex(
|
||||
KeyError,
|
||||
'a list-indexer must only include values '
|
||||
'that are in the categories'):
|
||||
df.loc[['a', 'x']]
|
||||
|
||||
# contains unused category
|
||||
index = CategoricalIndex(
|
||||
['a', 'b', 'a', 'c'], categories=list('abcde'))
|
||||
df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
|
||||
|
||||
res = df.loc[['a', 'b']]
|
||||
exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]},
|
||||
index=CategoricalIndex(['a', 'a', 'b'],
|
||||
categories=list('abcde')))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
res = df.loc[['a', 'e']]
|
||||
exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
|
||||
index=CategoricalIndex(['a', 'a', 'e'],
|
||||
categories=list('abcde')))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[['a', 'a', 'b']]
|
||||
exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
|
||||
index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
|
||||
categories=list('abcde')))
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with tm.assert_raises_regex(
|
||||
KeyError,
|
||||
'a list-indexer must only include values '
|
||||
'that are in the categories'):
|
||||
df.loc[['a', 'x']]
|
||||
|
||||
def test_get_indexer_array(self):
|
||||
arr = np.array([Timestamp('1999-12-31 00:00:00'),
|
||||
Timestamp('2000-12-31 00:00:00')], dtype=object)
|
||||
cats = [Timestamp('1999-12-31 00:00:00'),
|
||||
Timestamp('2000-12-31 00:00:00')]
|
||||
ci = CategoricalIndex(cats,
|
||||
categories=cats,
|
||||
ordered=False, dtype='category')
|
||||
result = ci.get_indexer(arr)
|
||||
expected = np.array([0, 1], dtype='intp')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_same_order(self):
|
||||
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
|
||||
categories=['a', 'b']))
|
||||
expected = np.array([1, 1], dtype='intp')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19551
|
||||
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
|
||||
categories=['b', 'a']))
|
||||
expected = np.array([1, 1], dtype='intp')
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_getitem_with_listlike(self):
|
||||
# GH 16115
|
||||
cats = Categorical([Timestamp('12-31-1999'),
|
||||
Timestamp('12-31-2000')])
|
||||
|
||||
expected = DataFrame([[1, 0], [0, 1]], dtype='uint8',
|
||||
index=[0, 1], columns=cats)
|
||||
dummies = pd.get_dummies(cats)
|
||||
result = dummies[[c for c in dummies.columns]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_listlike(self):
|
||||
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
np.random.seed(1)
|
||||
c = Categorical(np.random.randint(0, 5, size=150000).astype(
|
||||
np.int8)).add_categories([-1000])
|
||||
indexer = np.array([100000]).astype(np.int64)
|
||||
c[indexer] = -1000
|
||||
|
||||
# we are asserting the code result here
|
||||
# which maps to the -1000 category
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
|
||||
|
||||
def test_ix_categorical_index(self):
|
||||
# GH 12531
|
||||
df = DataFrame(np.random.randn(3, 3),
|
||||
index=list('ABC'), columns=list('XYZ'))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
expect = Series(df.loc['A', :], index=cdf.columns, name='A')
|
||||
assert_series_equal(cdf.loc['A', :], expect)
|
||||
|
||||
expect = Series(df.loc[:, 'X'], index=cdf.index, name='X')
|
||||
assert_series_equal(cdf.loc[:, 'X'], expect)
|
||||
|
||||
exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
|
||||
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
|
||||
index=exp_index)
|
||||
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list('XY'),
|
||||
categories=['X', 'Y', 'Z'])
|
||||
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
|
||||
columns=exp_columns)
|
||||
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
|
||||
|
||||
# non-unique
|
||||
df = DataFrame(np.random.randn(3, 3),
|
||||
index=list('ABA'), columns=list('XYX'))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
exp_index = CategoricalIndex(list('AA'), categories=['A', 'B'])
|
||||
expect = DataFrame(df.loc['A', :], columns=cdf.columns,
|
||||
index=exp_index)
|
||||
assert_frame_equal(cdf.loc['A', :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y'])
|
||||
expect = DataFrame(df.loc[:, 'X'], index=cdf.index,
|
||||
columns=exp_columns)
|
||||
assert_frame_equal(cdf.loc[:, 'X'], expect)
|
||||
|
||||
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
|
||||
index=CategoricalIndex(list('AAB')))
|
||||
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
|
||||
|
||||
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
|
||||
columns=CategoricalIndex(list('XXY')))
|
||||
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
|
||||
|
||||
def test_read_only_source(self):
|
||||
# GH 10043
|
||||
rw_array = np.eye(10)
|
||||
rw_df = DataFrame(rw_array)
|
||||
|
||||
ro_array = np.eye(10)
|
||||
ro_array.setflags(write=False)
|
||||
ro_df = DataFrame(ro_array)
|
||||
|
||||
assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
|
||||
assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
|
||||
assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
|
||||
assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
|
||||
|
||||
assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
|
||||
assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
|
||||
assert_series_equal(rw_df.loc[1], ro_df.loc[1])
|
||||
assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
|
||||
|
||||
def test_reindexing(self):
|
||||
|
||||
# reindexing
|
||||
# convert to a regular index
|
||||
result = self.df2.reindex(['a', 'b', 'e'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
|
||||
'B': Series(list('aaabbe'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['a', 'b'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3],
|
||||
'B': Series(list('aaabb'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['e'])
|
||||
expected = DataFrame({'A': [np.nan],
|
||||
'B': Series(['e'])}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['d'])
|
||||
expected = DataFrame({'A': [np.nan],
|
||||
'B': Series(['d'])}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# since we are actually reindexing with a Categorical
|
||||
# then return a Categorical
|
||||
cats = list('cabe')
|
||||
|
||||
result = self.df2.reindex(Categorical(['a', 'd'], categories=cats))
|
||||
expected = DataFrame({'A': [0, 1, 5, np.nan],
|
||||
'B': Series(list('aaad')).astype(
|
||||
CDT(cats))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(Categorical(['a'], categories=cats))
|
||||
expected = DataFrame({'A': [0, 1, 5],
|
||||
'B': Series(list('aaa')).astype(
|
||||
CDT(cats))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['a', 'b', 'e'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
|
||||
'B': Series(list('aaabbe'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['a', 'b'])
|
||||
expected = DataFrame({'A': [0, 1, 5, 2, 3],
|
||||
'B': Series(list('aaabb'))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(['e'])
|
||||
expected = DataFrame({'A': [np.nan],
|
||||
'B': Series(['e'])}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# give back the type of categorical that we received
|
||||
result = self.df2.reindex(Categorical(
|
||||
['a', 'd'], categories=cats, ordered=True))
|
||||
expected = DataFrame(
|
||||
{'A': [0, 1, 5, np.nan],
|
||||
'B': Series(list('aaad')).astype(
|
||||
CDT(cats, ordered=True))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = self.df2.reindex(Categorical(
|
||||
['a', 'd'], categories=['a', 'd']))
|
||||
expected = DataFrame({'A': [0, 1, 5, np.nan],
|
||||
'B': Series(list('aaad')).astype(
|
||||
CDT(['a', 'd']))}).set_index('B')
|
||||
assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
# passed duplicate indexers are not allowed
|
||||
pytest.raises(ValueError, lambda: self.df2.reindex(['a', 'a']))
|
||||
|
||||
# args NotImplemented ATM
|
||||
pytest.raises(NotImplementedError,
|
||||
lambda: self.df2.reindex(['a'], method='ffill'))
|
||||
pytest.raises(NotImplementedError,
|
||||
lambda: self.df2.reindex(['a'], level=1))
|
||||
pytest.raises(NotImplementedError,
|
||||
lambda: self.df2.reindex(['a'], limit=2))
|
||||
|
||||
def test_loc_slice(self):
|
||||
# slicing
|
||||
# not implemented ATM
|
||||
# GH9748
|
||||
|
||||
pytest.raises(TypeError, lambda: self.df.loc[1:5])
|
||||
|
||||
# result = df.loc[1:5]
|
||||
# expected = df.iloc[[1,2,3,4]]
|
||||
# assert_frame_equal(result, expected)
|
||||
|
||||
def test_boolean_selection(self):
|
||||
|
||||
df3 = self.df3
|
||||
df4 = self.df4
|
||||
|
||||
result = df3[df3.index == 'a']
|
||||
expected = df3.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 'a']
|
||||
expected = df4.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index == 1]
|
||||
expected = df3.iloc[[0, 1, 3]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 1]
|
||||
expected = df4.iloc[[0, 1, 3]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# since we have an ordered categorical
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=True,
|
||||
# name=u'B')
|
||||
result = df3[df3.index < 2]
|
||||
expected = df3.iloc[[4]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index > 1]
|
||||
expected = df3.iloc[[]]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# unordered
|
||||
# cannot be compared
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=False,
|
||||
# name=u'B')
|
||||
pytest.raises(TypeError, lambda: df4[df4.index < 2])
|
||||
pytest.raises(TypeError, lambda: df4[df4.index > 1])
|
||||
|
||||
def test_indexing_with_category(self):
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/12564
|
||||
# consistent result if comparing as Dataframe
|
||||
|
||||
cat = DataFrame({'A': ['foo', 'bar', 'baz']})
|
||||
exp = DataFrame({'A': [True, False, False]})
|
||||
|
||||
res = (cat[['A']] == 'foo')
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
cat['A'] = cat['A'].astype('category')
|
||||
|
||||
res = (cat[['A']] == 'foo')
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_map_with_dict_or_series(self):
|
||||
orig_values = ['a', 'B', 1, 'a']
|
||||
new_values = ['one', 2, 3.0, 'one']
|
||||
cur_index = pd.CategoricalIndex(orig_values, name='XXX')
|
||||
expected = pd.CategoricalIndex(new_values,
|
||||
name='XXX', categories=[3.0, 2, 'one'])
|
||||
|
||||
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
|
||||
output = cur_index.map(mapper)
|
||||
# Order of categories in output can be different
|
||||
tm.assert_index_equal(expected, output)
|
||||
|
||||
mapper = {o: n for o, n in
|
||||
zip(orig_values[:-1], new_values[:-1])}
|
||||
output = cur_index.map(mapper)
|
||||
# Order of categories in output can be different
|
||||
tm.assert_index_equal(expected, output)
|
||||
-431
@@ -1,431 +0,0 @@
|
||||
from warnings import catch_warnings
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.core import common as com
|
||||
from pandas import (compat, DataFrame, option_context,
|
||||
Series, MultiIndex, date_range, Timestamp)
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestCaching(object):
|
||||
|
||||
def test_slice_consolidate_invalidate_item_cache(self):
|
||||
|
||||
# this is chained assignment, but will 'work'
|
||||
with option_context('chained_assignment', None):
|
||||
|
||||
# #3970
|
||||
df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5})
|
||||
|
||||
# Creates a second float block
|
||||
df["cc"] = 0.0
|
||||
|
||||
# caches a reference to the 'bb' series
|
||||
df["bb"]
|
||||
|
||||
# repr machinery triggers consolidation
|
||||
repr(df)
|
||||
|
||||
# Assignment to wrong series
|
||||
df['bb'].iloc[0] = 0.17
|
||||
df._clear_item_cache()
|
||||
tm.assert_almost_equal(df['bb'][0], 0.17)
|
||||
|
||||
def test_setitem_cache_updating(self):
|
||||
# GH 5424
|
||||
cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']
|
||||
|
||||
for do_ref in [False, False]:
|
||||
df = DataFrame({'a': cont,
|
||||
"b": cont[3:] + cont[:3],
|
||||
'c': np.arange(7)})
|
||||
|
||||
# ref the cache
|
||||
if do_ref:
|
||||
df.loc[0, "c"]
|
||||
|
||||
# set it
|
||||
df.loc[7, 'c'] = 1
|
||||
|
||||
assert df.loc[0, 'c'] == 0.0
|
||||
assert df.loc[7, 'c'] == 1.0
|
||||
|
||||
# GH 7084
|
||||
# not updating cache on series setting with slices
|
||||
expected = DataFrame({'A': [600, 600, 600]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
out = DataFrame({'A': [0, 0, 0]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})
|
||||
|
||||
# loop through df to update out
|
||||
six = Timestamp('5/7/2014')
|
||||
eix = Timestamp('5/9/2014')
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D']
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out['A'], expected['A'])
|
||||
|
||||
# try via a chain indexing
|
||||
# this actually works
|
||||
out = DataFrame({'A': [0, 0, 0]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
for ix, row in df.iterrows():
|
||||
v = out[row['C']][six:eix] + row['D']
|
||||
out[row['C']][six:eix] = v
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out['A'], expected['A'])
|
||||
|
||||
out = DataFrame({'A': [0, 0, 0]},
|
||||
index=date_range('5/7/2014', '5/9/2014'))
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row['C']] += row['D']
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out['A'], expected['A'])
|
||||
|
||||
|
||||
class TestChaining(object):
|
||||
|
||||
def test_setitem_chained_setfault(self):
|
||||
|
||||
# GH6026
|
||||
# setfaults under numpy 1.7.1 (ok on 1.8)
|
||||
data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
|
||||
mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
|
||||
|
||||
df = DataFrame({'response': np.array(data)})
|
||||
mask = df.response == 'timeout'
|
||||
df.response[mask] = 'none'
|
||||
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
|
||||
|
||||
recarray = np.rec.fromarrays([data], names=['response'])
|
||||
df = DataFrame(recarray)
|
||||
mask = df.response == 'timeout'
|
||||
df.response[mask] = 'none'
|
||||
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
|
||||
|
||||
df = DataFrame({'response': data, 'response1': data})
|
||||
mask = df.response == 'timeout'
|
||||
df.response[mask] = 'none'
|
||||
tm.assert_frame_equal(df, DataFrame({'response': mdata,
|
||||
'response1': data}))
|
||||
|
||||
# GH 6056
|
||||
expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar']))
|
||||
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
|
||||
df['A'].iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
|
||||
df.A.iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_detect_chained_assignment(self):
|
||||
|
||||
pd.set_option('chained_assignment', 'raise')
|
||||
|
||||
# work with the chain
|
||||
expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
|
||||
df = DataFrame(np.arange(4).reshape(2, 2),
|
||||
columns=list('AB'), dtype='int64')
|
||||
assert df._is_copy is None
|
||||
|
||||
df['A'][0] = -5
|
||||
df['A'][1] = -6
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# test with the chaining
|
||||
df = DataFrame({'A': Series(range(2), dtype='int64'),
|
||||
'B': np.array(np.arange(2, 4), dtype=np.float64)})
|
||||
assert df._is_copy is None
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['A'][0] = -5
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['A'][1] = np.nan
|
||||
|
||||
assert df['A']._is_copy is None
|
||||
|
||||
# Using a copy (the chain), fails
|
||||
df = DataFrame({'A': Series(range(2), dtype='int64'),
|
||||
'B': np.array(np.arange(2, 4), dtype=np.float64)})
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[0]['A'] = -5
|
||||
|
||||
# Doc example
|
||||
df = DataFrame({'a': ['one', 'one', 'two', 'three',
|
||||
'two', 'one', 'six'],
|
||||
'c': Series(range(7), dtype='int64')})
|
||||
assert df._is_copy is None
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
indexer = df.a.str.startswith('o')
|
||||
df[indexer]['c'] = 42
|
||||
|
||||
expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
|
||||
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['A'][0] = 111
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[0]['A'] = 111
|
||||
|
||||
df.loc[0, 'A'] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# gh-5475: Make sure that is_copy is picked up reconstruction
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.ensure_clean('__tmp__pickle') as path:
|
||||
df.to_pickle(path)
|
||||
df2 = pd.read_pickle(path)
|
||||
df2["B"] = df2["A"]
|
||||
df2["B"] = df2["A"]
|
||||
|
||||
# gh-5597: a spurious raise as we are setting the entire column here
|
||||
from string import ascii_letters as letters
|
||||
|
||||
def random_text(nobs=100):
|
||||
df = []
|
||||
for i in range(nobs):
|
||||
idx = np.random.randint(len(letters), size=2)
|
||||
idx.sort()
|
||||
|
||||
df.append([letters[idx[0]:idx[1]]])
|
||||
|
||||
return DataFrame(df, columns=['letters'])
|
||||
|
||||
df = random_text(100000)
|
||||
|
||||
# Always a copy
|
||||
x = df.iloc[[0, 1, 2]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
x = df.iloc[[0, 1, 2, 4]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
# Explicitly copy
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer].copy()
|
||||
|
||||
assert df._is_copy is None
|
||||
df['letters'] = df['letters'].apply(str.lower)
|
||||
|
||||
# Implicitly take
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer]
|
||||
|
||||
assert df._is_copy is not None
|
||||
df['letters'] = df['letters'].apply(str.lower)
|
||||
|
||||
# Implicitly take 2
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
|
||||
df = df.loc[indexer]
|
||||
assert df._is_copy is not None
|
||||
df.loc[:, 'letters'] = df['letters'].apply(str.lower)
|
||||
|
||||
# Should be ok even though it's a copy!
|
||||
assert df._is_copy is None
|
||||
|
||||
df['letters'] = df['letters'].apply(str.lower)
|
||||
assert df._is_copy is None
|
||||
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df.loc[indexer, 'letters'] = (
|
||||
df.loc[indexer, 'letters'].apply(str.lower))
|
||||
|
||||
# an identical take, so no copy
|
||||
df = DataFrame({'a': [1]}).dropna()
|
||||
assert df._is_copy is None
|
||||
df['a'] += 1
|
||||
|
||||
# Inplace ops, originally from:
|
||||
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
|
||||
('ears', 'right')]
|
||||
events = {('eyes', 'left'): a,
|
||||
('eyes', 'right'): b,
|
||||
('ears', 'left'): c,
|
||||
('ears', 'right'): d}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
|
||||
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
zed['eyes']['right'].fillna(value=555, inplace=True)
|
||||
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
s = df.iloc[:, 0].sort_values()
|
||||
|
||||
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
|
||||
tm.assert_series_equal(s, df[0].sort_values())
|
||||
|
||||
# see gh-6025: false positives
|
||||
df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
|
||||
str(df)
|
||||
|
||||
df['column1'] = df['column1'] + 'b'
|
||||
str(df)
|
||||
|
||||
df = df[df['column2'] != 8]
|
||||
str(df)
|
||||
|
||||
df['column1'] = df['column1'] + 'c'
|
||||
str(df)
|
||||
|
||||
# from SO:
|
||||
# http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
|
||||
df = DataFrame(np.arange(0, 9), columns=['count'])
|
||||
df['group'] = 'b'
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.iloc[0:5]['group'] = 'a'
|
||||
|
||||
# Mixed type setting but same dtype & changing dtype
|
||||
df = DataFrame(dict(A=date_range('20130101', periods=5),
|
||||
B=np.random.randn(5),
|
||||
C=np.arange(5, dtype='int64'),
|
||||
D=list('abcde')))
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[2]['D'] = 'foo'
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df.loc[2]['C'] = 'foo'
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
df['C'][2] = 'foo'
|
||||
|
||||
def test_setting_with_copy_bug(self):
|
||||
|
||||
# operating on a copy
|
||||
df = DataFrame({'a': list(range(4)),
|
||||
'b': list('ab..'),
|
||||
'c': ['a', 'b', np.nan, 'd']})
|
||||
mask = pd.isna(df.c)
|
||||
|
||||
def f():
|
||||
df[['c']][mask] = df[['b']][mask]
|
||||
|
||||
pytest.raises(com.SettingWithCopyError, f)
|
||||
|
||||
# invalid warning as we are returning a new object
|
||||
# GH 8730
|
||||
df1 = DataFrame({'x': Series(['a', 'b', 'c']),
|
||||
'y': Series(['d', 'e', 'f'])})
|
||||
df2 = df1[['x']]
|
||||
|
||||
# this should not raise
|
||||
df2['y'] = ['g', 'h', 'i']
|
||||
|
||||
def test_detect_chained_assignment_warnings(self):
|
||||
|
||||
# warnings
|
||||
with option_context('chained_assignment', 'warn'):
|
||||
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
|
||||
with tm.assert_produces_warning(
|
||||
expected_warning=com.SettingWithCopyWarning):
|
||||
df.loc[0]['A'] = 111
|
||||
|
||||
def test_chained_getitem_with_lists(self):
|
||||
|
||||
# GH6394
|
||||
# Regression in chained getitem indexing with embedded list-like from
|
||||
# 0.12
|
||||
def check(result, expected):
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
assert isinstance(result, np.ndarray)
|
||||
|
||||
df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]})
|
||||
expected = df['A'].iloc[2]
|
||||
result = df.loc[2, 'A']
|
||||
check(result, expected)
|
||||
result2 = df.iloc[2]['A']
|
||||
check(result2, expected)
|
||||
result3 = df['A'].loc[2]
|
||||
check(result3, expected)
|
||||
result4 = df['A'].iloc[2]
|
||||
check(result4, expected)
|
||||
|
||||
def test_cache_updating(self):
|
||||
# GH 4939, make sure to update the cache on setitem
|
||||
|
||||
df = tm.makeDataFrame()
|
||||
df['A'] # cache series
|
||||
with catch_warnings(record=True):
|
||||
df.ix["Hello Friend"] = df.ix[0]
|
||||
assert "Hello Friend" in df['A'].index
|
||||
assert "Hello Friend" in df['B'].index
|
||||
|
||||
with catch_warnings(record=True):
|
||||
panel = tm.makePanel()
|
||||
panel.ix[0] # get first item into cache
|
||||
panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1
|
||||
assert "A+1" in panel.ix[0].columns
|
||||
assert "A+1" in panel.ix[1].columns
|
||||
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.rand(10, 3)
|
||||
df = DataFrame(a, columns=['x', 'y', 'z'])
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
df.loc[0]['z'].iloc[0] = 1.
|
||||
result = df.loc[(0, 0), 'z']
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), 'z'] = 2
|
||||
result = df.loc[(0, 0), 'z']
|
||||
assert result == 2
|
||||
|
||||
# 10264
|
||||
df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[
|
||||
'a', 'b', 'c', 'd', 'e'], index=range(5))
|
||||
df['f'] = 0
|
||||
df.f.values[3] = 1
|
||||
|
||||
# TODO(wesm): unused?
|
||||
# y = df.iloc[np.arange(2, len(df))]
|
||||
|
||||
df.f.values[3] = 2
|
||||
expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[
|
||||
'a', 'b', 'c', 'd', 'e', 'f'], index=range(5))
|
||||
expected.at[3, 'f'] = 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
expected = Series([0, 0, 0, 2, 0], name='f')
|
||||
tm.assert_series_equal(df.f, expected)
|
||||
|
||||
def test_deprecate_is_copy(self):
|
||||
# GH18801
|
||||
df = DataFrame({"A": [1, 2, 3]})
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# getter
|
||||
df.is_copy
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# setter
|
||||
df.is_copy = "test deprecated is_copy"
|
||||
@@ -1,920 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import itertools
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
import pandas.compat as compat
|
||||
|
||||
|
||||
###############################################################
|
||||
# Index / Series common tests which may trigger dtype coercions
|
||||
###############################################################
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope='class')
|
||||
def check_comprehensiveness(request):
|
||||
# Iterate over combination of dtype, method and klass
|
||||
# and ensure that each are contained within a collected test
|
||||
cls = request.cls
|
||||
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
|
||||
|
||||
def has_test(combo):
|
||||
klass, dtype, method = combo
|
||||
cls_funcs = request.node.session.items
|
||||
return any(klass in x.name and dtype in x.name and
|
||||
method in x.name for x in cls_funcs)
|
||||
|
||||
for combo in combos:
|
||||
if not has_test(combo):
|
||||
msg = 'test method is not defined: {0}, {1}'
|
||||
raise AssertionError(msg.format(type(cls), combo))
|
||||
|
||||
yield
|
||||
|
||||
|
||||
class CoercionBase(object):
|
||||
|
||||
klasses = ['index', 'series']
|
||||
dtypes = ['object', 'int64', 'float64', 'complex128', 'bool',
|
||||
'datetime64', 'datetime64tz', 'timedelta64', 'period']
|
||||
|
||||
@property
|
||||
def method(self):
|
||||
raise NotImplementedError(self)
|
||||
|
||||
def _assert(self, left, right, dtype):
|
||||
# explicitly check dtype to avoid any unexpected result
|
||||
if isinstance(left, pd.Series):
|
||||
tm.assert_series_equal(left, right)
|
||||
elif isinstance(left, pd.Index):
|
||||
tm.assert_index_equal(left, right)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
assert left.dtype == dtype
|
||||
assert right.dtype == dtype
|
||||
|
||||
|
||||
class TestSetitemCoercion(CoercionBase):
|
||||
|
||||
method = 'setitem'
|
||||
|
||||
def _assert_setitem_series_conversion(self, original_series, loc_value,
|
||||
expected_series, expected_dtype):
|
||||
""" test series value's coercion triggered by assignment """
|
||||
temp = original_series.copy()
|
||||
temp[1] = loc_value
|
||||
tm.assert_series_equal(temp, expected_series)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.dtype == expected_dtype
|
||||
|
||||
# .loc works different rule, temporary disable
|
||||
# temp = original_series.copy()
|
||||
# temp.loc[1] = loc_value
|
||||
# tm.assert_series_equal(temp, expected_series)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_object(self, val, exp_dtype):
|
||||
obj = pd.Series(list('abcd'))
|
||||
assert obj.dtype == np.object
|
||||
|
||||
exp = pd.Series(['a', val, 'c', 'd'])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.int64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_int64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
|
||||
if exp_dtype is np.float64:
|
||||
exp = pd.Series([1, 1, 3, 4])
|
||||
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
|
||||
pytest.xfail("GH12747 The result must be float")
|
||||
|
||||
exp = pd.Series([1, val, 3, 4])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(np.int32(1), np.int8),
|
||||
(np.int16(2**9), np.int16)])
|
||||
def test_setitem_series_int8(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
|
||||
assert obj.dtype == np.int8
|
||||
|
||||
if exp_dtype is np.int16:
|
||||
exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.int8)
|
||||
pytest.xfail("BUG: it must be Series([1, 1, 3, 4], dtype=np.int16")
|
||||
|
||||
exp = pd.Series([1, val, 3, 4], dtype=np.int8)
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.float64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_float64(self, val, exp_dtype):
|
||||
obj = pd.Series([1.1, 2.2, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = pd.Series([1.1, val, 3.3, 4.4])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_setitem_series_complex128(self, val, exp_dtype):
|
||||
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
|
||||
assert obj.dtype == np.complex128
|
||||
|
||||
exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(1, np.int64),
|
||||
(3, np.int64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.bool)])
|
||||
def test_setitem_series_bool(self, val, exp_dtype):
|
||||
obj = pd.Series([True, False, True, False])
|
||||
assert obj.dtype == np.bool
|
||||
|
||||
if exp_dtype is np.int64:
|
||||
exp = pd.Series([True, True, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
|
||||
pytest.xfail("TODO_GH12747 The result must be int")
|
||||
elif exp_dtype is np.float64:
|
||||
exp = pd.Series([True, True, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
|
||||
pytest.xfail("TODO_GH12747 The result must be float")
|
||||
elif exp_dtype is np.complex128:
|
||||
exp = pd.Series([True, True, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
|
||||
pytest.xfail("TODO_GH12747 The result must be complex")
|
||||
|
||||
exp = pd.Series([True, val, True, False])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(1, np.object),
|
||||
('x', np.object)])
|
||||
def test_setitem_series_datetime64(self, val, exp_dtype):
|
||||
obj = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
val,
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
|
||||
'datetime64[ns, US/Eastern]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Pacific'), np.object),
|
||||
(pd.Timestamp('2012-01-01'), np.object),
|
||||
(1, np.object)])
|
||||
def test_setitem_series_datetime64tz(self, val, exp_dtype):
|
||||
tz = 'US/Eastern'
|
||||
obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2011-01-02', tz=tz),
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
assert obj.dtype == 'datetime64[ns, US/Eastern]'
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
val,
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(pd.Timedelta('12 day'), 'timedelta64[ns]'),
|
||||
(1, np.object),
|
||||
('x', np.object)])
|
||||
def test_setitem_series_timedelta64(self, val, exp_dtype):
|
||||
obj = pd.Series([pd.Timedelta('1 day'),
|
||||
pd.Timedelta('2 day'),
|
||||
pd.Timedelta('3 day'),
|
||||
pd.Timedelta('4 day')])
|
||||
assert obj.dtype == 'timedelta64[ns]'
|
||||
|
||||
exp = pd.Series([pd.Timedelta('1 day'),
|
||||
val,
|
||||
pd.Timedelta('3 day'),
|
||||
pd.Timedelta('4 day')])
|
||||
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
|
||||
|
||||
def _assert_setitem_index_conversion(self, original_series, loc_key,
|
||||
expected_index, expected_dtype):
|
||||
""" test index's coercion triggered by assign key """
|
||||
temp = original_series.copy()
|
||||
temp[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
temp = original_series.copy()
|
||||
temp.loc[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
('x', np.object),
|
||||
(5, IndexError),
|
||||
(1.1, np.object)])
|
||||
def test_setitem_index_object(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], index=list('abcd'))
|
||||
assert obj.index.dtype == np.object
|
||||
|
||||
if exp_dtype is IndexError:
|
||||
temp = obj.copy()
|
||||
with pytest.raises(exp_dtype):
|
||||
temp[5] = 5
|
||||
else:
|
||||
exp_index = pd.Index(list('abcd') + [val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index,
|
||||
exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(5, np.int64),
|
||||
(1.1, np.float64),
|
||||
('x', np.object)])
|
||||
def test_setitem_index_int64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4])
|
||||
assert obj.index.dtype == np.int64
|
||||
|
||||
exp_index = pd.Index([0, 1, 2, 3, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("val,exp_dtype", [
|
||||
(5, IndexError),
|
||||
(5.1, np.float64),
|
||||
('x', np.object)])
|
||||
def test_setitem_index_float64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
|
||||
assert obj.index.dtype == np.float64
|
||||
|
||||
if exp_dtype is IndexError:
|
||||
# float + int -> int
|
||||
temp = obj.copy()
|
||||
with pytest.raises(exp_dtype):
|
||||
temp[5] = 5
|
||||
pytest.xfail("TODO_GH12747 The result must be float")
|
||||
|
||||
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
def test_setitem_series_period(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_complex128(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_bool(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_datetime64(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_datetime64tz(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_setitem_index_period(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestInsertIndexCoercion(CoercionBase):
|
||||
|
||||
klasses = ['index']
|
||||
method = 'insert'
|
||||
|
||||
def _assert_insert_conversion(self, original, value,
|
||||
expected, expected_dtype):
|
||||
""" test coercion triggered by insert """
|
||||
target = original.copy()
|
||||
res = target.insert(1, value)
|
||||
tm.assert_index_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(1, 1, np.object),
|
||||
(1.1, 1.1, np.object),
|
||||
(False, False, np.object),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Index(list('abcd'))
|
||||
assert obj.dtype == np.object
|
||||
|
||||
exp = pd.Index(['a', coerced_val, 'b', 'c', 'd'])
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(1, 1, np.int64),
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, 0, np.int64),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Int64Index([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
|
||||
exp = pd.Index([1, coerced_val, 2, 3, 4])
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(1, 1., np.float64),
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, 0., np.float64),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Float64Index([1., 2., 3., 4.])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = pd.Index([1., coerced_val, 2., 3., 4.])
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize('fill_val,exp_dtype', [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
|
||||
'datetime64[ns, US/Eastern]')],
|
||||
ids=['datetime64', 'datetime64tz'])
|
||||
def test_insert_index_datetimes(self, fill_val, exp_dtype):
|
||||
obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
|
||||
'2011-01-04'], tz=fill_val.tz)
|
||||
assert obj.dtype == exp_dtype
|
||||
|
||||
exp = pd.DatetimeIndex(['2011-01-01', fill_val.date(), '2011-01-02',
|
||||
'2011-01-03', '2011-01-04'], tz=fill_val.tz)
|
||||
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
|
||||
|
||||
msg = "Passed item and index have different timezone"
|
||||
if fill_val.tz:
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
obj.insert(1, pd.Timestamp('2012-01-01'))
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo'))
|
||||
|
||||
msg = "cannot insert DatetimeIndex with incompatible label"
|
||||
with tm.assert_raises_regex(TypeError, msg):
|
||||
obj.insert(1, 1)
|
||||
|
||||
pytest.xfail("ToDo: must coerce to object")
|
||||
|
||||
def test_insert_index_timedelta64(self):
|
||||
obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day'])
|
||||
assert obj.dtype == 'timedelta64[ns]'
|
||||
|
||||
# timedelta64 + timedelta64 => timedelta64
|
||||
exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day'])
|
||||
self._assert_insert_conversion(obj, pd.Timedelta('10 day'),
|
||||
exp, 'timedelta64[ns]')
|
||||
|
||||
# ToDo: must coerce to object
|
||||
msg = "cannot insert TimedeltaIndex with incompatible label"
|
||||
with tm.assert_raises_regex(TypeError, msg):
|
||||
obj.insert(1, pd.Timestamp('2012-01-01'))
|
||||
|
||||
# ToDo: must coerce to object
|
||||
msg = "cannot insert TimedeltaIndex with incompatible label"
|
||||
with tm.assert_raises_regex(TypeError, msg):
|
||||
obj.insert(1, 1)
|
||||
|
||||
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
|
||||
(pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'),
|
||||
(pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object),
|
||||
(1, 1, np.object),
|
||||
('x', 'x', np.object)])
|
||||
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
|
||||
freq='M')
|
||||
assert obj.dtype == 'period[M]'
|
||||
|
||||
if isinstance(insert, pd.Period):
|
||||
index_type = pd.PeriodIndex
|
||||
else:
|
||||
index_type = pd.Index
|
||||
|
||||
exp = index_type([pd.Period('2011-01', freq='M'),
|
||||
coerced_val,
|
||||
pd.Period('2011-02', freq='M'),
|
||||
pd.Period('2011-03', freq='M'),
|
||||
pd.Period('2011-04', freq='M')], freq='M')
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
def test_insert_index_complex128(self):
|
||||
pass
|
||||
|
||||
def test_insert_index_bool(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestWhereCoercion(CoercionBase):
|
||||
|
||||
method = 'where'
|
||||
|
||||
def _assert_where_conversion(self, original, cond, values,
|
||||
expected, expected_dtype):
|
||||
""" test coercion triggered by where """
|
||||
target = original.copy()
|
||||
res = target.where(cond, values)
|
||||
self._assert(res, expected, expected_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.object)])
|
||||
def test_where_object(self, klass, fill_val, exp_dtype):
|
||||
obj = klass(list('abcd'))
|
||||
assert obj.dtype == np.object
|
||||
cond = klass([True, False, True, False])
|
||||
|
||||
if fill_val is True and klass is pd.Series:
|
||||
ret_val = 1
|
||||
else:
|
||||
ret_val = fill_val
|
||||
|
||||
exp = klass(['a', ret_val, 'c', ret_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
else:
|
||||
values = klass(fill_val * x for x in [5, 6, 7, 8])
|
||||
|
||||
exp = klass(['a', values[1], 'c', values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.int64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_where_int64(self, klass, fill_val, exp_dtype):
|
||||
if klass is pd.Index and exp_dtype is np.complex128:
|
||||
pytest.skip("Complex Index not supported")
|
||||
obj = klass([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
cond = klass([True, False, True, False])
|
||||
|
||||
exp = klass([1, fill_val, 3, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
else:
|
||||
values = klass(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = klass([1, values[1], 3, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val, exp_dtype", [
|
||||
(1, np.float64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_where_float64(self, klass, fill_val, exp_dtype):
|
||||
if klass is pd.Index and exp_dtype is np.complex128:
|
||||
pytest.skip("Complex Index not supported")
|
||||
obj = klass([1.1, 2.2, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
cond = klass([True, False, True, False])
|
||||
|
||||
exp = klass([1.1, fill_val, 3.3, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
else:
|
||||
values = klass(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = klass([1.1, values[1], 3.3, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_where_series_complex128(self, fill_val, exp_dtype):
|
||||
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
|
||||
assert obj.dtype == np.complex128
|
||||
cond = pd.Series([True, False, True, False])
|
||||
|
||||
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = pd.Series([True, False, True, True])
|
||||
else:
|
||||
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.bool)])
|
||||
def test_where_series_bool(self, fill_val, exp_dtype):
|
||||
|
||||
obj = pd.Series([True, False, True, False])
|
||||
assert obj.dtype == np.bool
|
||||
cond = pd.Series([True, False, True, False])
|
||||
|
||||
exp = pd.Series([True, fill_val, True, fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val is True:
|
||||
values = pd.Series([True, False, True, True])
|
||||
else:
|
||||
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
|
||||
exp = pd.Series([True, values[1], True, values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
|
||||
ids=['datetime64', 'datetime64tz'])
|
||||
def test_where_series_datetime64(self, fill_val, exp_dtype):
|
||||
obj = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
cond = pd.Series([True, False, True, False])
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'), fill_val,
|
||||
pd.Timestamp('2011-01-03'), fill_val])
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
values = pd.Series(pd.date_range(fill_val, periods=4))
|
||||
if fill_val.tz:
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2012-01-02 05:00'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2012-01-04 05:00')])
|
||||
self._assert_where_conversion(obj, cond, values, exp,
|
||||
'datetime64[ns]')
|
||||
pytest.xfail("ToDo: do not coerce to UTC, must be object")
|
||||
|
||||
exp = pd.Series([pd.Timestamp('2011-01-01'), values[1],
|
||||
pd.Timestamp('2011-01-03'), values[3]])
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,exp_dtype", [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
|
||||
ids=['datetime64', 'datetime64tz'])
|
||||
def test_where_index_datetime(self, fill_val, exp_dtype):
|
||||
obj = pd.Index([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
cond = pd.Index([True, False, True, False])
|
||||
|
||||
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
|
||||
"of some kind")
|
||||
with tm.assert_raises_regex(TypeError, msg):
|
||||
obj.where(cond, fill_val)
|
||||
|
||||
values = pd.Index(pd.date_range(fill_val, periods=4))
|
||||
exp = pd.Index([pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2012-01-02'),
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2012-01-04')])
|
||||
|
||||
if fill_val.tz:
|
||||
self._assert_where_conversion(obj, cond, values, exp,
|
||||
'datetime64[ns]')
|
||||
pytest.xfail("ToDo: do not ignore timezone, must be object")
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
pytest.xfail("datetime64 + datetime64 -> datetime64 must support"
|
||||
" scalar")
|
||||
|
||||
def test_where_index_complex128(self):
|
||||
pass
|
||||
|
||||
def test_where_index_bool(self):
|
||||
pass
|
||||
|
||||
def test_where_series_datetime64tz(self):
|
||||
pass
|
||||
|
||||
def test_where_series_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_where_series_period(self):
|
||||
pass
|
||||
|
||||
def test_where_index_datetime64tz(self):
|
||||
pass
|
||||
|
||||
def test_where_index_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_where_index_period(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestFillnaSeriesCoercion(CoercionBase):
|
||||
|
||||
# not indexing, but place here for consisntency
|
||||
|
||||
method = 'fillna'
|
||||
|
||||
def test_has_comprehensive_tests(self):
|
||||
pass
|
||||
|
||||
def _assert_fillna_conversion(self, original, value,
|
||||
expected, expected_dtype):
|
||||
""" test coercion triggered by fillna """
|
||||
target = original.copy()
|
||||
res = target.fillna(value)
|
||||
self._assert(res, expected, expected_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val, fill_dtype", [
|
||||
(1, np.object),
|
||||
(1.1, np.object),
|
||||
(1 + 1j, np.object),
|
||||
(True, np.object)])
|
||||
def test_fillna_object(self, klass, fill_val, fill_dtype):
|
||||
obj = klass(['a', np.nan, 'c', 'd'])
|
||||
assert obj.dtype == np.object
|
||||
|
||||
exp = klass(['a', fill_val, 'c', 'd'])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(1, np.float64),
|
||||
(1.1, np.float64),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_fillna_float64(self, klass, fill_val, fill_dtype):
|
||||
obj = klass([1.1, np.nan, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = klass([1.1, fill_val, 3.3, 4.4])
|
||||
# float + complex -> we don't support a complex Index
|
||||
# complex for Series,
|
||||
# object for Index
|
||||
if fill_dtype == np.complex128 and klass == pd.Index:
|
||||
fill_dtype = np.object
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, np.object)])
|
||||
def test_fillna_series_complex128(self, fill_val, fill_dtype):
|
||||
obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j])
|
||||
assert obj.dtype == np.complex128
|
||||
|
||||
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
|
||||
ids=['series', 'index'])
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object),
|
||||
(1, np.object), ('x', np.object)],
|
||||
ids=['datetime64', 'datetime64tz', 'object', 'object'])
|
||||
def test_fillna_datetime(self, klass, fill_val, fill_dtype):
|
||||
obj = klass([pd.Timestamp('2011-01-01'),
|
||||
pd.NaT,
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
assert obj.dtype == 'datetime64[ns]'
|
||||
|
||||
exp = klass([pd.Timestamp('2011-01-01'),
|
||||
fill_val,
|
||||
pd.Timestamp('2011-01-03'),
|
||||
pd.Timestamp('2011-01-04')])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize("klass", [pd.Series, pd.Index])
|
||||
@pytest.mark.parametrize("fill_val,fill_dtype", [
|
||||
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
|
||||
'datetime64[ns, US/Eastern]'),
|
||||
(pd.Timestamp('2012-01-01'), np.object),
|
||||
(pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), np.object),
|
||||
(1, np.object),
|
||||
('x', np.object)])
|
||||
def test_fillna_datetime64tz(self, klass, fill_val, fill_dtype):
|
||||
tz = 'US/Eastern'
|
||||
|
||||
obj = klass([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.NaT,
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
assert obj.dtype == 'datetime64[ns, US/Eastern]'
|
||||
|
||||
exp = klass([pd.Timestamp('2011-01-01', tz=tz),
|
||||
fill_val,
|
||||
pd.Timestamp('2011-01-03', tz=tz),
|
||||
pd.Timestamp('2011-01-04', tz=tz)])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
def test_fillna_series_int64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_int64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_series_bool(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_bool(self):
|
||||
pass
|
||||
|
||||
def test_fillna_series_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_series_period(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_timedelta64(self):
|
||||
pass
|
||||
|
||||
def test_fillna_index_period(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestReplaceSeriesCoercion(CoercionBase):
|
||||
|
||||
klasses = ['series']
|
||||
method = 'replace'
|
||||
|
||||
rep = {}
|
||||
rep['object'] = ['a', 'b']
|
||||
rep['int64'] = [4, 5]
|
||||
rep['float64'] = [1.1, 2.2]
|
||||
rep['complex128'] = [1 + 1j, 2 + 2j]
|
||||
rep['bool'] = [True, False]
|
||||
rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'),
|
||||
pd.Timestamp('2011-01-03')]
|
||||
|
||||
for tz in ['UTC', 'US/Eastern']:
|
||||
# to test tz => different tz replacement
|
||||
key = 'datetime64[ns, {0}]'.format(tz)
|
||||
rep[key] = [pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2011-01-03', tz=tz)]
|
||||
|
||||
rep['timedelta64[ns]'] = [pd.Timedelta('1 day'),
|
||||
pd.Timedelta('2 day')]
|
||||
|
||||
@pytest.mark.parametrize('how', ['dict', 'series'])
|
||||
@pytest.mark.parametrize('to_key', [
|
||||
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
|
||||
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]'
|
||||
], ids=['object', 'int64', 'float64', 'complex128', 'bool',
|
||||
'datetime64', 'datetime64tz', 'datetime64tz', 'timedelta64'])
|
||||
@pytest.mark.parametrize('from_key', [
|
||||
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
|
||||
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]']
|
||||
)
|
||||
def test_replace_series(self, how, to_key, from_key):
|
||||
if from_key == 'bool' and how == 'series' and compat.PY3:
|
||||
# doesn't work in PY3, though ...dict_from_bool works fine
|
||||
pytest.skip("doesn't work as in PY3")
|
||||
|
||||
index = pd.Index([3, 4], name='xxx')
|
||||
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if (from_key.startswith('datetime') and to_key.startswith('datetime')):
|
||||
# tested below
|
||||
return
|
||||
elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
|
||||
# tested below
|
||||
return
|
||||
|
||||
if how == 'dict':
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == 'series':
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
result = obj.replace(replacer)
|
||||
|
||||
if ((from_key == 'float64' and to_key in ('int64')) or
|
||||
(from_key == 'complex128' and
|
||||
to_key in ('int64', 'float64'))):
|
||||
|
||||
if compat.is_platform_32bit() or compat.is_platform_windows():
|
||||
pytest.skip("32-bit platform buggy: {0} -> {1}".format
|
||||
(from_key, to_key))
|
||||
|
||||
# Expected: do not downcast by replacement
|
||||
exp = pd.Series(self.rep[to_key], index=index,
|
||||
name='yyy', dtype=from_key)
|
||||
|
||||
else:
|
||||
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
|
||||
assert exp.dtype == to_key
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# TODO(jbrockmendel) commented out to only have a single xfail printed
|
||||
@pytest.mark.xfail(reason='GH #18376, tzawareness-compat bug '
|
||||
'in BlockManager.replace_list')
|
||||
# @pytest.mark.parametrize('how', ['dict', 'series'])
|
||||
# @pytest.mark.parametrize('to_key', ['timedelta64[ns]', 'bool', 'object',
|
||||
# 'complex128', 'float64', 'int64'])
|
||||
# @pytest.mark.parametrize('from_key', ['datetime64[ns, UTC]',
|
||||
# 'datetime64[ns, US/Eastern]'])
|
||||
# def test_replace_series_datetime_tz(self, how, to_key, from_key):
|
||||
def test_replace_series_datetime_tz(self):
|
||||
how = 'series'
|
||||
from_key = 'datetime64[ns, US/Eastern]'
|
||||
to_key = 'timedelta64[ns]'
|
||||
|
||||
index = pd.Index([3, 4], name='xxx')
|
||||
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if how == 'dict':
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == 'series':
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
result = obj.replace(replacer)
|
||||
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
|
||||
assert exp.dtype == to_key
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# TODO(jreback) commented out to only have a single xfail printed
|
||||
@pytest.mark.xfail(reason="different tz, "
|
||||
"currently mask_missing raises SystemError")
|
||||
# @pytest.mark.parametrize('how', ['dict', 'series'])
|
||||
# @pytest.mark.parametrize('to_key', [
|
||||
# 'datetime64[ns]', 'datetime64[ns, UTC]',
|
||||
# 'datetime64[ns, US/Eastern]'])
|
||||
# @pytest.mark.parametrize('from_key', [
|
||||
# 'datetime64[ns]', 'datetime64[ns, UTC]',
|
||||
# 'datetime64[ns, US/Eastern]'])
|
||||
# def test_replace_series_datetime_datetime(self, how, to_key, from_key):
|
||||
def test_replace_series_datetime_datetime(self):
|
||||
how = 'dict'
|
||||
to_key = 'datetime64[ns]'
|
||||
from_key = 'datetime64[ns]'
|
||||
|
||||
index = pd.Index([3, 4], name='xxx')
|
||||
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if how == 'dict':
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == 'series':
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
result = obj.replace(replacer)
|
||||
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
|
||||
assert exp.dtype == to_key
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_replace_series_period(self):
|
||||
pass
|
||||
@@ -1,254 +0,0 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import date_range, Index, DataFrame, Series, Timestamp
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex(object):
|
||||
|
||||
def test_setitem_with_datetime_tz(self):
|
||||
# 16889
|
||||
# support .loc with alignment and tz-aware DatetimeIndex
|
||||
mask = np.array([True, False, True, False])
|
||||
|
||||
idx = date_range('20010101', periods=4, tz='UTC')
|
||||
df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask, :] = df.loc[mask, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask] = df.loc[mask]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
idx = date_range('20010101', periods=4)
|
||||
df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask, :] = df.loc[mask, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[mask] = df.loc[mask]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_indexing_with_datetime_tz(self):
|
||||
|
||||
# 8260
|
||||
# support datetime64 with tz
|
||||
|
||||
idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
|
||||
name='foo')
|
||||
dr = date_range('20130110', periods=3)
|
||||
df = DataFrame({'A': idx, 'B': dr})
|
||||
df['C'] = idx
|
||||
df.iloc[1, 1] = pd.NaT
|
||||
df.iloc[1, 2] = pd.NaT
|
||||
|
||||
# indexing
|
||||
result = df.iloc[1]
|
||||
expected = Series([Timestamp('2013-01-02 00:00:00-0500',
|
||||
tz='US/Eastern'), np.nan, np.nan],
|
||||
index=list('ABC'), dtype='object', name=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = df.loc[1]
|
||||
expected = Series([Timestamp('2013-01-02 00:00:00-0500',
|
||||
tz='US/Eastern'), np.nan, np.nan],
|
||||
index=list('ABC'), dtype='object', name=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# indexing - fast_xs
|
||||
df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')})
|
||||
result = df.iloc[5]
|
||||
expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D')
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[5]
|
||||
assert result == expected
|
||||
|
||||
# indexing - boolean
|
||||
result = df[df.a > df.a[3]]
|
||||
expected = df.iloc[4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# indexing - setting an element
|
||||
df = DataFrame(data=pd.to_datetime(
|
||||
['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time'])
|
||||
df['new_col'] = ['new', 'old']
|
||||
df.time = df.set_index('time').index.tz_localize('UTC')
|
||||
v = df[df.new_col == 'new'].set_index('time').index.tz_convert(
|
||||
'US/Pacific')
|
||||
|
||||
# trying to set a single element on a part of a different timezone
|
||||
# this converts to object
|
||||
df2 = df.copy()
|
||||
df2.loc[df2.new_col == 'new', 'time'] = v
|
||||
|
||||
expected = Series([v[0], df.loc[1, 'time']], name='time')
|
||||
tm.assert_series_equal(df2.time, expected)
|
||||
|
||||
v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s')
|
||||
df.loc[df.new_col == 'new', 'time'] = v
|
||||
tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v)
|
||||
|
||||
def test_consistency_with_tz_aware_scalar(self):
|
||||
# xef gh-12938
|
||||
# various ways of indexing the same tz-aware scalar
|
||||
df = Series([Timestamp('2016-03-30 14:35:25',
|
||||
tz='Europe/Brussels')]).to_frame()
|
||||
|
||||
df = pd.concat([df, df]).reset_index(drop=True)
|
||||
expected = Timestamp('2016-03-30 14:35:25+0200',
|
||||
tz='Europe/Brussels')
|
||||
|
||||
result = df[0][0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iat[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].loc[0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].at[0]
|
||||
assert result == expected
|
||||
|
||||
def test_indexing_with_datetimeindex_tz(self):
|
||||
|
||||
# GH 12050
|
||||
# indexing on a series with a datetimeindex with tz
|
||||
index = date_range('2015-01-01', periods=2, tz='utc')
|
||||
|
||||
ser = Series(range(2), index=index, dtype='int64')
|
||||
|
||||
# list-like indexing
|
||||
|
||||
for sel in (index, list(index)):
|
||||
# getitem
|
||||
tm.assert_series_equal(ser[sel], ser)
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
result[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .loc getitem
|
||||
tm.assert_series_equal(ser.loc[sel], ser)
|
||||
|
||||
# .loc setitem
|
||||
result = ser.copy()
|
||||
result.loc[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# single element indexing
|
||||
|
||||
# getitem
|
||||
assert ser[index[1]] == 1
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
result[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .loc getitem
|
||||
assert ser.loc[index[1]] == 1
|
||||
|
||||
# .loc setitem
|
||||
result = ser.copy()
|
||||
result.loc[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_partial_setting_with_datetimelike_dtype(self):
|
||||
|
||||
# GH9478
|
||||
# a datetimeindex alignment issue with partial setting
|
||||
df = DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'),
|
||||
index=date_range('1/1/2000', periods=3, freq='1H'))
|
||||
expected = df.copy()
|
||||
expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT]
|
||||
|
||||
mask = df.A < 1
|
||||
df.loc[mask, 'C'] = df.loc[mask].index
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_datetime(self):
|
||||
|
||||
# GH 9516
|
||||
dt1 = Timestamp('20130101 09:00:00')
|
||||
dt2 = Timestamp('20130101 10:00:00')
|
||||
|
||||
for conv in [lambda x: x, lambda x: x.to_datetime64(),
|
||||
lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]:
|
||||
|
||||
df = DataFrame()
|
||||
df.loc[conv(dt1), 'one'] = 100
|
||||
df.loc[conv(dt2), 'one'] = 200
|
||||
|
||||
expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_partial_set_datetime(self):
|
||||
# GH 11497
|
||||
|
||||
idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx')
|
||||
ser = Series([0.1, 0.2], index=idx, name='s')
|
||||
|
||||
result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]]
|
||||
exp = Series([0.1, 0.2], index=idx, name='s')
|
||||
tm.assert_series_equal(result, exp, check_index_type=True)
|
||||
|
||||
keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'),
|
||||
Timestamp('2011-01-01')]
|
||||
exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'),
|
||||
name='s')
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'),
|
||||
Timestamp('2011-01-03')]
|
||||
exp = Series([np.nan, 0.2, np.nan],
|
||||
index=pd.DatetimeIndex(keys, name='idx'), name='s')
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_period(self):
|
||||
# GH 11497
|
||||
|
||||
idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx')
|
||||
ser = Series([0.1, 0.2], index=idx, name='s')
|
||||
|
||||
result = ser.loc[[pd.Period('2011-01-01', freq='D'),
|
||||
pd.Period('2011-01-02', freq='D')]]
|
||||
exp = Series([0.1, 0.2], index=idx, name='s')
|
||||
tm.assert_series_equal(result, exp, check_index_type=True)
|
||||
|
||||
keys = [pd.Period('2011-01-02', freq='D'),
|
||||
pd.Period('2011-01-02', freq='D'),
|
||||
pd.Period('2011-01-01', freq='D')]
|
||||
exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'),
|
||||
name='s')
|
||||
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
|
||||
|
||||
keys = [pd.Period('2011-01-03', freq='D'),
|
||||
pd.Period('2011-01-02', freq='D'),
|
||||
pd.Period('2011-01-03', freq='D')]
|
||||
exp = Series([np.nan, 0.2, np.nan],
|
||||
index=pd.PeriodIndex(keys, name='idx'), name='s')
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = ser.loc[keys]
|
||||
tm.assert_series_equal(result, exp)
|
||||
@@ -1,890 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pytest
|
||||
|
||||
from warnings import catch_warnings
|
||||
import numpy as np
|
||||
from pandas import (Series, DataFrame, Index, Float64Index, Int64Index,
|
||||
RangeIndex)
|
||||
from pandas.util.testing import assert_series_equal, assert_almost_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestFloatIndexers(object):
|
||||
|
||||
def check(self, result, original, indexer, getitem):
|
||||
"""
|
||||
comparator for results
|
||||
we need to take care if we are indexing on a
|
||||
Series or a frame
|
||||
"""
|
||||
if isinstance(original, Series):
|
||||
expected = original.iloc[indexer]
|
||||
else:
|
||||
if getitem:
|
||||
expected = original.iloc[:, indexer]
|
||||
else:
|
||||
expected = original.iloc[indexer]
|
||||
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
def test_scalar_error(self):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
# this duplicates the code below
|
||||
# but is spefically testing for the error
|
||||
# message
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeCategoricalIndex,
|
||||
tm.makeDateIndex, tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex, tm.makeIntIndex,
|
||||
tm.makeRangeIndex]:
|
||||
|
||||
i = index(5)
|
||||
|
||||
s = Series(np.arange(len(i)), index=i)
|
||||
|
||||
def f():
|
||||
s.iloc[3.0]
|
||||
tm.assert_raises_regex(TypeError,
|
||||
'cannot do positional indexing',
|
||||
f)
|
||||
|
||||
def f():
|
||||
s.iloc[3.0] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def test_scalar_non_numeric(self):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeCategoricalIndex,
|
||||
tm.makeDateIndex, tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex]:
|
||||
|
||||
i = index(5)
|
||||
|
||||
for s in [Series(
|
||||
np.arange(len(i)), index=i), DataFrame(
|
||||
np.random.randn(
|
||||
len(i), len(i)), index=i, columns=i)]:
|
||||
|
||||
# getting
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.iloc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s)[3.0]
|
||||
|
||||
# gettitem on a DataFrame is a KeyError as it is indexing
|
||||
# via labels on the columns
|
||||
if getitem and isinstance(s, DataFrame):
|
||||
error = KeyError
|
||||
else:
|
||||
error = TypeError
|
||||
pytest.raises(error, f)
|
||||
|
||||
# label based can be a TypeError or KeyError
|
||||
def f():
|
||||
s.loc[3.0]
|
||||
|
||||
if s.index.inferred_type in ['string', 'unicode', 'mixed']:
|
||||
error = KeyError
|
||||
else:
|
||||
error = TypeError
|
||||
pytest.raises(error, f)
|
||||
|
||||
# contains
|
||||
assert 3.0 not in s
|
||||
|
||||
# setting with a float fails with iloc
|
||||
def f():
|
||||
s.iloc[3.0] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setting with an indexer
|
||||
if s.index.inferred_type in ['categorical']:
|
||||
# Value or Type Error
|
||||
pass
|
||||
elif s.index.inferred_type in ['datetime64', 'timedelta64',
|
||||
'period']:
|
||||
|
||||
# these should prob work
|
||||
# and are inconsisten between series/dataframe ATM
|
||||
# for idxr in [lambda x: x.ix,
|
||||
# lambda x: x]:
|
||||
# s2 = s.copy()
|
||||
# def f():
|
||||
# idxr(s2)[3.0] = 0
|
||||
# pytest.raises(TypeError, f)
|
||||
pass
|
||||
|
||||
else:
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[3.0] = 10
|
||||
assert s2.index.is_object()
|
||||
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x]:
|
||||
s2 = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[3.0] = 0
|
||||
assert s2.index.is_object()
|
||||
|
||||
# fallsback to position selection, series only
|
||||
s = Series(np.arange(len(i)), index=i)
|
||||
s[3]
|
||||
pytest.raises(TypeError, lambda: s[3.0])
|
||||
|
||||
def test_scalar_with_mixed(self):
|
||||
|
||||
s2 = Series([1, 2, 3], index=['a', 'b', 'c'])
|
||||
s3 = Series([1, 2, 3], index=['a', 'b', 1.5])
|
||||
|
||||
# lookup in a pure string index
|
||||
# with an invalid indexer
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x,
|
||||
lambda x: x.iloc]:
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[1.0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
pytest.raises(KeyError, lambda: s2.loc[1.0])
|
||||
|
||||
result = s2.loc['b']
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
# mixed index so we have label
|
||||
# indexing
|
||||
for idxr in [lambda x: x]:
|
||||
|
||||
def f():
|
||||
idxr(s3)[1.0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
result = idxr(s3)[1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
# mixed index so we have label
|
||||
# indexing
|
||||
for idxr in [lambda x: x.ix]:
|
||||
with catch_warnings(record=True):
|
||||
|
||||
def f():
|
||||
idxr(s3)[1.0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
result = idxr(s3)[1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
pytest.raises(TypeError, lambda: s3.iloc[1.0])
|
||||
pytest.raises(KeyError, lambda: s3.loc[1.0])
|
||||
|
||||
result = s3.loc[1.5]
|
||||
expected = 3
|
||||
assert result == expected
|
||||
|
||||
def test_scalar_integer(self):
|
||||
|
||||
# test how scalar float indexers work on int indexes
|
||||
|
||||
# integer index
|
||||
for i in [Int64Index(range(5)), RangeIndex(5)]:
|
||||
|
||||
for s in [Series(np.arange(len(i))),
|
||||
DataFrame(np.random.randn(len(i), len(i)),
|
||||
index=i, columns=i)]:
|
||||
|
||||
# coerce to equal int
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.loc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[3.0]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# coerce to equal int
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.loc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
if isinstance(s, Series):
|
||||
def compare(x, y):
|
||||
assert x == y
|
||||
expected = 100
|
||||
else:
|
||||
compare = tm.assert_series_equal
|
||||
if getitem:
|
||||
expected = Series(100,
|
||||
index=range(len(s)), name=3)
|
||||
else:
|
||||
expected = Series(100.,
|
||||
index=range(len(s)), name=3)
|
||||
|
||||
s2 = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[3.0] = 100
|
||||
|
||||
result = idxr(s2)[3.0]
|
||||
compare(result, expected)
|
||||
|
||||
result = idxr(s2)[3]
|
||||
compare(result, expected)
|
||||
|
||||
# contains
|
||||
# coerce to equal int
|
||||
assert 3.0 in s
|
||||
|
||||
def test_scalar_float(self):
|
||||
|
||||
# scalar float indexers work on a float index
|
||||
index = Index(np.arange(5.))
|
||||
for s in [Series(np.arange(len(index)), index=index),
|
||||
DataFrame(np.random.randn(len(index), len(index)),
|
||||
index=index, columns=index)]:
|
||||
|
||||
# assert all operations except for iloc are ok
|
||||
indexer = index[3]
|
||||
for idxr, getitem in [(lambda x: x.ix, False),
|
||||
(lambda x: x.loc, False),
|
||||
(lambda x: x, True)]:
|
||||
|
||||
# getting
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# setting
|
||||
s2 = s.copy()
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[indexer] = expected
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s2)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# random integer is a KeyError
|
||||
with catch_warnings(record=True):
|
||||
pytest.raises(KeyError, lambda: idxr(s)[3.5])
|
||||
|
||||
# contains
|
||||
assert 3.0 in s
|
||||
|
||||
# iloc succeeds with an integer
|
||||
expected = s.iloc[3]
|
||||
s2 = s.copy()
|
||||
|
||||
s2.iloc[3] = expected
|
||||
result = s2.iloc[3]
|
||||
self.check(result, s, 3, False)
|
||||
|
||||
# iloc raises with a float
|
||||
pytest.raises(TypeError, lambda: s.iloc[3.0])
|
||||
|
||||
def g():
|
||||
s2.iloc[3.0] = 0
|
||||
pytest.raises(TypeError, g)
|
||||
|
||||
def test_slice_non_numeric(self):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeDateIndex, tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex]:
|
||||
|
||||
index = index(5)
|
||||
for s in [Series(range(5), index=index),
|
||||
DataFrame(np.random.randn(5, 2), index=index)]:
|
||||
|
||||
# getitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
def f():
|
||||
s.iloc[l]
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x.loc,
|
||||
lambda x: x.iloc,
|
||||
lambda x: x]:
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s)[l]
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
def f():
|
||||
s.iloc[l] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x.loc,
|
||||
lambda x: x.iloc,
|
||||
lambda x: x]:
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
idxr(s)[l] = 0
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def test_slice_integer(self):
|
||||
|
||||
# same as above, but for Integer based indexes
|
||||
# these coerce to a like integer
|
||||
# oob indicates if we are out of bounds
|
||||
# of positional indexing
|
||||
for index, oob in [(Int64Index(range(5)), False),
|
||||
(RangeIndex(5), False),
|
||||
(Int64Index(range(5)) + 10, True)]:
|
||||
|
||||
# s is an in-range index
|
||||
s = Series(range(5), index=index)
|
||||
|
||||
# getitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(3, 5)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem out-of-bounds
|
||||
for l in [slice(-6, 6),
|
||||
slice(-6.0, 6.0)]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(-6, 6)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[slice(-6.0, 6.0)]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem odd floats
|
||||
for l, res1 in [(slice(2.5, 4), slice(3, 5)),
|
||||
(slice(2, 3.5), slice(2, 4)),
|
||||
(slice(2.5, 3.5), slice(3, 4))]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
if oob:
|
||||
res = slice(0, 0)
|
||||
else:
|
||||
res = res1
|
||||
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
for idxr in [lambda x: x.loc,
|
||||
lambda x: x.ix]:
|
||||
sc = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(sc)[l] = 0
|
||||
result = idxr(sc)[l].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l] = 0
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def test_integer_positional_indexing(self):
|
||||
""" make sure that we are raising on positional indexing
|
||||
w.r.t. an integer index """
|
||||
|
||||
s = Series(range(2, 6), index=range(2, 6))
|
||||
|
||||
result = s[2:4]
|
||||
expected = s.iloc[2:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for idxr in [lambda x: x,
|
||||
lambda x: x.iloc]:
|
||||
|
||||
for l in [slice(2, 4.0),
|
||||
slice(2.0, 4),
|
||||
slice(2.0, 4.0)]:
|
||||
|
||||
def f():
|
||||
idxr(s)[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def test_slice_integer_frame_getitem(self):
|
||||
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
for index in [Int64Index(range(5)), RangeIndex(5)]:
|
||||
|
||||
s = DataFrame(np.random.randn(5, 2), index=index)
|
||||
|
||||
def f(idxr):
|
||||
|
||||
# getitem
|
||||
for l in [slice(0.0, 1),
|
||||
slice(0, 1.0),
|
||||
slice(0.0, 1.0)]:
|
||||
|
||||
result = idxr(s)[l]
|
||||
indexer = slice(0, 2)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem out-of-bounds
|
||||
for l in [slice(-10, 10),
|
||||
slice(-10.0, 10.0)]:
|
||||
|
||||
result = idxr(s)[l]
|
||||
self.check(result, s, slice(-10, 10), True)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[slice(-10.0, 10.0)]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# getitem odd floats
|
||||
for l, res in [(slice(0.5, 1), slice(1, 2)),
|
||||
(slice(0, 0.5), slice(0, 1)),
|
||||
(slice(0.5, 1.5), slice(1, 2))]:
|
||||
|
||||
result = idxr(s)[l]
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
# setitem
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
sc = s.copy()
|
||||
idxr(sc)[l] = 0
|
||||
result = idxr(sc)[l].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
# positional indexing
|
||||
def f():
|
||||
s[l] = 0
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
f(lambda x: x.loc)
|
||||
with catch_warnings(record=True):
|
||||
f(lambda x: x.ix)
|
||||
|
||||
def test_slice_float(self):
|
||||
|
||||
# same as above, but for floats
|
||||
index = Index(np.arange(5.)) + 0.1
|
||||
for s in [Series(range(5), index=index),
|
||||
DataFrame(np.random.randn(5, 2), index=index)]:
|
||||
|
||||
for l in [slice(3.0, 4),
|
||||
slice(3, 4.0),
|
||||
slice(3.0, 4.0)]:
|
||||
|
||||
expected = s.iloc[3:4]
|
||||
for idxr in [lambda x: x.ix,
|
||||
lambda x: x.loc,
|
||||
lambda x: x]:
|
||||
|
||||
# getitem
|
||||
with catch_warnings(record=True):
|
||||
result = idxr(s)[l]
|
||||
if isinstance(s, Series):
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
tm.assert_frame_equal(result, expected)
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
with catch_warnings(record=True):
|
||||
idxr(s2)[l] = 0
|
||||
result = idxr(s2)[l].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
def test_floating_index_doc_example(self):
|
||||
|
||||
index = Index([1.5, 2, 3, 4.5, 5])
|
||||
s = Series(range(5), index=index)
|
||||
assert s[3] == 2
|
||||
assert s.loc[3] == 2
|
||||
assert s.loc[3] == 2
|
||||
assert s.iloc[3] == 3
|
||||
|
||||
def test_floating_misc(self):
|
||||
|
||||
# related 236
|
||||
# scalar/slicing of a float index
|
||||
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
|
||||
|
||||
# label based slicing
|
||||
result1 = s[1.0:3.0]
|
||||
result2 = s.loc[1.0:3.0]
|
||||
result3 = s.loc[1.0:3.0]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
|
||||
# exact indexing when found
|
||||
result1 = s[5.0]
|
||||
result2 = s.loc[5.0]
|
||||
result3 = s.loc[5.0]
|
||||
assert result1 == result2
|
||||
assert result1 == result3
|
||||
|
||||
result1 = s[5]
|
||||
result2 = s.loc[5]
|
||||
result3 = s.loc[5]
|
||||
assert result1 == result2
|
||||
assert result1 == result3
|
||||
|
||||
assert s[5.0] == s[5]
|
||||
|
||||
# value not found (and no fallbacking at all)
|
||||
|
||||
# scalar integers
|
||||
pytest.raises(KeyError, lambda: s.loc[4])
|
||||
pytest.raises(KeyError, lambda: s.loc[4])
|
||||
pytest.raises(KeyError, lambda: s[4])
|
||||
|
||||
# fancy floats/integers create the correct entry (as nan)
|
||||
# fancy tests
|
||||
expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
|
||||
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
|
||||
assert_series_equal(s[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
|
||||
expected = Series([2, 0], index=Index([5, 0], dtype='int64'))
|
||||
for fancy_idx in [[5, 0], np.array([5, 0])]: # int
|
||||
assert_series_equal(s[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
assert_series_equal(s.loc[fancy_idx], expected)
|
||||
|
||||
# all should return the same as we are slicing 'the same'
|
||||
result1 = s.loc[2:5]
|
||||
result2 = s.loc[2.0:5.0]
|
||||
result3 = s.loc[2.0:5]
|
||||
result4 = s.loc[2.1:5]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
# previously this did fallback indexing
|
||||
result1 = s[2:5]
|
||||
result2 = s[2.0:5.0]
|
||||
result3 = s[2.0:5]
|
||||
result4 = s[2.1:5]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
result1 = s.loc[2:5]
|
||||
result2 = s.loc[2.0:5.0]
|
||||
result3 = s.loc[2.0:5]
|
||||
result4 = s.loc[2.1:5]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
# combined test
|
||||
result1 = s.loc[2:5]
|
||||
result2 = s.loc[2:5]
|
||||
result3 = s[2:5]
|
||||
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
|
||||
# list selection
|
||||
result1 = s[[0.0, 5, 10]]
|
||||
result2 = s.loc[[0.0, 5, 10]]
|
||||
result3 = s.loc[[0.0, 5, 10]]
|
||||
result4 = s.iloc[[0, 2, 4]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, result4)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result1 = s[[1.6, 5, 10]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result2 = s.loc[[1.6, 5, 10]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result3 = s.loc[[1.6, 5, 10]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, Series(
|
||||
[np.nan, 2, 4], index=[1.6, 5, 10]))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result1 = s[[0, 1, 2]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result2 = s.loc[[0, 1, 2]]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result3 = s.loc[[0, 1, 2]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, Series(
|
||||
[0.0, np.nan, np.nan], index=[0, 1, 2]))
|
||||
|
||||
result1 = s.loc[[2.5, 5]]
|
||||
result2 = s.loc[[2.5, 5]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0]))
|
||||
|
||||
result1 = s[[2.5]]
|
||||
result2 = s.loc[[2.5]]
|
||||
result3 = s.loc[[2.5]]
|
||||
assert_series_equal(result1, result2)
|
||||
assert_series_equal(result1, result3)
|
||||
assert_series_equal(result1, Series([1], index=[2.5]))
|
||||
|
||||
def test_floating_tuples(self):
|
||||
# see gh-13509
|
||||
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name='foo')
|
||||
|
||||
result = s[0.0]
|
||||
assert result == (1, 1)
|
||||
|
||||
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo')
|
||||
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name='foo')
|
||||
|
||||
result = s[0.0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_float64index_slicing_bug(self):
|
||||
# GH 5557, related to slicing a float index
|
||||
ser = {256: 2321.0,
|
||||
1: 78.0,
|
||||
2: 2716.0,
|
||||
3: 0.0,
|
||||
4: 369.0,
|
||||
5: 0.0,
|
||||
6: 269.0,
|
||||
7: 0.0,
|
||||
8: 0.0,
|
||||
9: 0.0,
|
||||
10: 3536.0,
|
||||
11: 0.0,
|
||||
12: 24.0,
|
||||
13: 0.0,
|
||||
14: 931.0,
|
||||
15: 0.0,
|
||||
16: 101.0,
|
||||
17: 78.0,
|
||||
18: 9643.0,
|
||||
19: 0.0,
|
||||
20: 0.0,
|
||||
21: 0.0,
|
||||
22: 63761.0,
|
||||
23: 0.0,
|
||||
24: 446.0,
|
||||
25: 0.0,
|
||||
26: 34773.0,
|
||||
27: 0.0,
|
||||
28: 729.0,
|
||||
29: 78.0,
|
||||
30: 0.0,
|
||||
31: 0.0,
|
||||
32: 3374.0,
|
||||
33: 0.0,
|
||||
34: 1391.0,
|
||||
35: 0.0,
|
||||
36: 361.0,
|
||||
37: 0.0,
|
||||
38: 61808.0,
|
||||
39: 0.0,
|
||||
40: 0.0,
|
||||
41: 0.0,
|
||||
42: 6677.0,
|
||||
43: 0.0,
|
||||
44: 802.0,
|
||||
45: 0.0,
|
||||
46: 2691.0,
|
||||
47: 0.0,
|
||||
48: 3582.0,
|
||||
49: 0.0,
|
||||
50: 734.0,
|
||||
51: 0.0,
|
||||
52: 627.0,
|
||||
53: 70.0,
|
||||
54: 2584.0,
|
||||
55: 0.0,
|
||||
56: 324.0,
|
||||
57: 0.0,
|
||||
58: 605.0,
|
||||
59: 0.0,
|
||||
60: 0.0,
|
||||
61: 0.0,
|
||||
62: 3989.0,
|
||||
63: 10.0,
|
||||
64: 42.0,
|
||||
65: 0.0,
|
||||
66: 904.0,
|
||||
67: 0.0,
|
||||
68: 88.0,
|
||||
69: 70.0,
|
||||
70: 8172.0,
|
||||
71: 0.0,
|
||||
72: 0.0,
|
||||
73: 0.0,
|
||||
74: 64902.0,
|
||||
75: 0.0,
|
||||
76: 347.0,
|
||||
77: 0.0,
|
||||
78: 36605.0,
|
||||
79: 0.0,
|
||||
80: 379.0,
|
||||
81: 70.0,
|
||||
82: 0.0,
|
||||
83: 0.0,
|
||||
84: 3001.0,
|
||||
85: 0.0,
|
||||
86: 1630.0,
|
||||
87: 7.0,
|
||||
88: 364.0,
|
||||
89: 0.0,
|
||||
90: 67404.0,
|
||||
91: 9.0,
|
||||
92: 0.0,
|
||||
93: 0.0,
|
||||
94: 7685.0,
|
||||
95: 0.0,
|
||||
96: 1017.0,
|
||||
97: 0.0,
|
||||
98: 2831.0,
|
||||
99: 0.0,
|
||||
100: 2963.0,
|
||||
101: 0.0,
|
||||
102: 854.0,
|
||||
103: 0.0,
|
||||
104: 0.0,
|
||||
105: 0.0,
|
||||
106: 0.0,
|
||||
107: 0.0,
|
||||
108: 0.0,
|
||||
109: 0.0,
|
||||
110: 0.0,
|
||||
111: 0.0,
|
||||
112: 0.0,
|
||||
113: 0.0,
|
||||
114: 0.0,
|
||||
115: 0.0,
|
||||
116: 0.0,
|
||||
117: 0.0,
|
||||
118: 0.0,
|
||||
119: 0.0,
|
||||
120: 0.0,
|
||||
121: 0.0,
|
||||
122: 0.0,
|
||||
123: 0.0,
|
||||
124: 0.0,
|
||||
125: 0.0,
|
||||
126: 67744.0,
|
||||
127: 22.0,
|
||||
128: 264.0,
|
||||
129: 0.0,
|
||||
260: 197.0,
|
||||
268: 0.0,
|
||||
265: 0.0,
|
||||
269: 0.0,
|
||||
261: 0.0,
|
||||
266: 1198.0,
|
||||
267: 0.0,
|
||||
262: 2629.0,
|
||||
258: 775.0,
|
||||
257: 0.0,
|
||||
263: 0.0,
|
||||
259: 0.0,
|
||||
264: 163.0,
|
||||
250: 10326.0,
|
||||
251: 0.0,
|
||||
252: 1228.0,
|
||||
253: 0.0,
|
||||
254: 2769.0,
|
||||
255: 0.0}
|
||||
|
||||
# smoke test for the repr
|
||||
s = Series(ser)
|
||||
result = s.value_counts()
|
||||
str(result)
|
||||
@@ -1,668 +0,0 @@
|
||||
""" test positional based indexing with iloc """
|
||||
|
||||
import pytest
|
||||
|
||||
from warnings import catch_warnings
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas.compat import lrange, lmap
|
||||
from pandas import Series, DataFrame, date_range, concat, isna
|
||||
from pandas.util import testing as tm
|
||||
from pandas.tests.indexing.common import Base
|
||||
from pandas.api.types import is_scalar
|
||||
|
||||
|
||||
class TestiLoc(Base):
|
||||
|
||||
def test_iloc_exceeds_bounds(self):
|
||||
|
||||
# GH6296
|
||||
# iloc should allow indexers that exceed the bounds
|
||||
df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE'))
|
||||
expected = df
|
||||
|
||||
# lists of positions should raise IndexErrror!
|
||||
with tm.assert_raises_regex(IndexError,
|
||||
'positional indexers '
|
||||
'are out-of-bounds'):
|
||||
df.iloc[:, [0, 1, 2, 3, 4, 5]]
|
||||
pytest.raises(IndexError, lambda: df.iloc[[1, 30]])
|
||||
pytest.raises(IndexError, lambda: df.iloc[[1, -30]])
|
||||
pytest.raises(IndexError, lambda: df.iloc[[100]])
|
||||
|
||||
s = df['A']
|
||||
pytest.raises(IndexError, lambda: s.iloc[[100]])
|
||||
pytest.raises(IndexError, lambda: s.iloc[[-100]])
|
||||
|
||||
# still raise on a single indexer
|
||||
msg = 'single positional indexer is out-of-bounds'
|
||||
with tm.assert_raises_regex(IndexError, msg):
|
||||
df.iloc[30]
|
||||
pytest.raises(IndexError, lambda: df.iloc[-30])
|
||||
|
||||
# GH10779
|
||||
# single positive/negative indexer exceeding Series bounds should raise
|
||||
# an IndexError
|
||||
with tm.assert_raises_regex(IndexError, msg):
|
||||
s.iloc[30]
|
||||
pytest.raises(IndexError, lambda: s.iloc[-30])
|
||||
|
||||
# slices are ok
|
||||
result = df.iloc[:, 4:10] # 0 < start < len < stop
|
||||
expected = df.iloc[:, 4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -4:-10] # stop < 0 < start < len
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
|
||||
expected = df.iloc[:, :4:-1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
|
||||
expected = df.iloc[:, 4::-1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -10:4] # start < 0 < stop < len
|
||||
expected = df.iloc[:, :4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:4] # 0 < stop < len < start
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 10:11] # 0 < len < start < stop
|
||||
expected = df.iloc[:, :0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice bounds exceeding is ok
|
||||
result = s.iloc[18:30]
|
||||
expected = s.iloc[18:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.iloc[30:]
|
||||
expected = s.iloc[:0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.iloc[30::-1]
|
||||
expected = s.iloc[::-1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# doc example
|
||||
def check(result, expected):
|
||||
str(result)
|
||||
result.dtypes
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
dfl = DataFrame(np.random.randn(5, 2), columns=list('AB'))
|
||||
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
|
||||
check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
|
||||
check(dfl.iloc[4:6], dfl.iloc[[4]])
|
||||
|
||||
pytest.raises(IndexError, lambda: dfl.iloc[[4, 5, 6]])
|
||||
pytest.raises(IndexError, lambda: dfl.iloc[:, 4])
|
||||
|
||||
def test_iloc_getitem_int(self):
|
||||
|
||||
# integer
|
||||
self.check_result('integer', 'iloc', 2, 'ix',
|
||||
{0: 4, 1: 6, 2: 8}, typs=['ints', 'uints'])
|
||||
self.check_result('integer', 'iloc', 2, 'indexer', 2,
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_neg_int(self):
|
||||
|
||||
# neg integer
|
||||
self.check_result('neg int', 'iloc', -1, 'ix',
|
||||
{0: 6, 1: 9, 2: 12}, typs=['ints', 'uints'])
|
||||
self.check_result('neg int', 'iloc', -1, 'indexer', -1,
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_list_int(self):
|
||||
|
||||
# list of ints
|
||||
self.check_result('list int', 'iloc', [0, 1, 2], 'ix',
|
||||
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
|
||||
typs=['ints', 'uints'])
|
||||
self.check_result('list int', 'iloc', [2], 'ix',
|
||||
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
|
||||
self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2],
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
# array of ints (GH5006), make sure that a single indexer is returning
|
||||
# the correct type
|
||||
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix',
|
||||
{0: [0, 2, 4],
|
||||
1: [0, 3, 6],
|
||||
2: [0, 4, 8]}, typs=['ints', 'uints'])
|
||||
self.check_result('array int', 'iloc', np.array([2]), 'ix',
|
||||
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
|
||||
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer',
|
||||
[0, 1, 2],
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_neg_int_can_reach_first_index(self):
|
||||
# GH10547 and GH10779
|
||||
# negative integers should be able to reach index 0
|
||||
df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]})
|
||||
s = df['A']
|
||||
|
||||
expected = df.iloc[0]
|
||||
result = df.iloc[-3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = df.iloc[[0]]
|
||||
result = df.iloc[[-3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = s.iloc[0]
|
||||
result = s.iloc[-3]
|
||||
assert result == expected
|
||||
|
||||
expected = s.iloc[[0]]
|
||||
result = s.iloc[[-3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# check the length 1 Series case highlighted in GH10547
|
||||
expected = Series(['a'], index=['A'])
|
||||
result = expected.iloc[[-1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_dups(self):
|
||||
|
||||
# no dups in panel (bug?)
|
||||
self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix',
|
||||
{0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
|
||||
objs=['series', 'frame'], typs=['ints', 'uints'])
|
||||
|
||||
# GH 6766
|
||||
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
|
||||
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
# cross-sectional indexing
|
||||
result = df.iloc[0, 0]
|
||||
assert isna(result)
|
||||
|
||||
result = df.iloc[0, :]
|
||||
expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'],
|
||||
name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_array(self):
|
||||
|
||||
# array like
|
||||
s = Series(index=lrange(1, 4))
|
||||
self.check_result('array like', 'iloc', s.index, 'ix',
|
||||
{0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]},
|
||||
typs=['ints', 'uints'])
|
||||
|
||||
def test_iloc_getitem_bool(self):
|
||||
|
||||
# boolean indexers
|
||||
b = [True, False, True, False, ]
|
||||
self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints'])
|
||||
self.check_result('bool', 'iloc', b, 'ix', b,
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_slice(self):
|
||||
|
||||
# slices
|
||||
self.check_result('slice', 'iloc', slice(1, 3), 'ix',
|
||||
{0: [2, 4], 1: [3, 6], 2: [4, 8]},
|
||||
typs=['ints', 'uints'])
|
||||
self.check_result('slice', 'iloc', slice(1, 3), 'indexer',
|
||||
slice(1, 3),
|
||||
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
||||
fails=IndexError)
|
||||
|
||||
def test_iloc_getitem_slice_dups(self):
|
||||
|
||||
df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
|
||||
df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2),
|
||||
columns=['A', 'C'])
|
||||
|
||||
# axis=1
|
||||
df = concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, :4], df1)
|
||||
tm.assert_frame_equal(df.iloc[:, 4:], df2)
|
||||
|
||||
df = concat([df2, df1], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[:, 2:], df1)
|
||||
|
||||
exp = concat([df2, df1.iloc[:, [0]]], axis=1)
|
||||
tm.assert_frame_equal(df.iloc[:, 0:3], exp)
|
||||
|
||||
# axis=0
|
||||
df = concat([df, df], axis=0)
|
||||
tm.assert_frame_equal(df.iloc[0:10, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
|
||||
tm.assert_frame_equal(df.iloc[10:, :2], df2)
|
||||
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
|
||||
|
||||
def test_iloc_setitem(self):
|
||||
df = self.frame_ints
|
||||
|
||||
df.iloc[1, 1] = 1
|
||||
result = df.iloc[1, 1]
|
||||
assert result == 1
|
||||
|
||||
df.iloc[:, 2:3] = 0
|
||||
expected = df.iloc[:, 2:3]
|
||||
result = df.iloc[:, 2:3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH5771
|
||||
s = Series(0, index=[4, 5, 6])
|
||||
s.iloc[1:2] += 1
|
||||
expected = Series([0, 1, 0], index=[4, 5, 6])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, indexes, values, expected_k', [
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
|
||||
[755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
|
||||
[8, 15, 13])
|
||||
])
|
||||
def test_iloc_setitem_int_multiindex_series(
|
||||
self, data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=['i', 'j', 'k'])
|
||||
df = df.set_index(['i', 'j'])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df['k'] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
def test_iloc_setitem_list(self):
|
||||
|
||||
# setitem with an iloc list
|
||||
df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"],
|
||||
columns=["A", "B", "C"])
|
||||
df.iloc[[0, 1], [1, 2]]
|
||||
df.iloc[[0, 1], [1, 2]] += 100
|
||||
|
||||
expected = DataFrame(
|
||||
np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
|
||||
index=["A", "B", "C"], columns=["A", "B", "C"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_iloc_setitem_pandas_object(self):
|
||||
# GH 17193, affecting old numpy (1.7 and 1.8)
|
||||
s_orig = Series([0, 1, 2, 3])
|
||||
expected = Series([0, -1, -2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s.iloc[Series([1, 2])] = [-1, -2]
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.iloc[pd.Index([1, 2])] = [-1, -2]
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_iloc_setitem_dups(self):
|
||||
|
||||
# GH 6766
|
||||
# iloc with a mask aligning from another iloc
|
||||
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
|
||||
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
expected = df.fillna(3)
|
||||
expected['A'] = expected['A'].astype('float64')
|
||||
inds = np.isnan(df.iloc[:, 0])
|
||||
mask = inds[inds].index
|
||||
df.iloc[mask, 0] = df.iloc[mask, 2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# del a dup column across blocks
|
||||
expected = DataFrame({0: [1, 2], 1: [3, 4]})
|
||||
expected.columns = ['B', 'B']
|
||||
del df['A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# assign back to self
|
||||
df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# reversed x 2
|
||||
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
|
||||
drop=True)
|
||||
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
|
||||
drop=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_iloc_getitem_frame(self):
|
||||
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
|
||||
columns=lrange(0, 8, 2))
|
||||
|
||||
result = df.iloc[2]
|
||||
with catch_warnings(record=True):
|
||||
exp = df.ix[4]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = df.iloc[2, 2]
|
||||
with catch_warnings(record=True):
|
||||
exp = df.ix[4, 4]
|
||||
assert result == exp
|
||||
|
||||
# slice
|
||||
result = df.iloc[4:8]
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[8:14]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[:, 2:3]
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[:, 4:5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# list of integers
|
||||
result = df.iloc[[0, 1, 3]]
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[[0, 2, 6]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[[0, 1, 3], [0, 1]]
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[[0, 2, 6], [0, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# neg indicies
|
||||
result = df.iloc[[-1, 1, 3], [-1, 1]]
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[[18, 2, 6], [6, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dups indicies
|
||||
result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[[18, 18, 2, 6], [6, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with index-like
|
||||
s = Series(index=lrange(1, 5))
|
||||
result = df.iloc[s.index]
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[[2, 4, 6, 8]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_iloc_getitem_labelled_frame(self):
|
||||
# try with labelled frame
|
||||
df = DataFrame(np.random.randn(10, 4),
|
||||
index=list('abcdefghij'), columns=list('ABCD'))
|
||||
|
||||
result = df.iloc[1, 1]
|
||||
exp = df.loc['b', 'B']
|
||||
assert result == exp
|
||||
|
||||
result = df.iloc[:, 2:3]
|
||||
expected = df.loc[:, ['C']]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# negative indexing
|
||||
result = df.iloc[-1, -1]
|
||||
exp = df.loc['j', 'D']
|
||||
assert result == exp
|
||||
|
||||
# out-of-bounds exception
|
||||
pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5]))
|
||||
|
||||
# trying to use a label
|
||||
pytest.raises(ValueError, df.iloc.__getitem__, tuple(['j', 'D']))
|
||||
|
||||
def test_iloc_getitem_doc_issue(self):
|
||||
|
||||
# multi axis slicing issue with single block
|
||||
# surfaced in GH 6059
|
||||
|
||||
arr = np.random.randn(6, 4)
|
||||
index = date_range('20130101', periods=6)
|
||||
columns = list('ABCD')
|
||||
df = DataFrame(arr, index=index, columns=columns)
|
||||
|
||||
# defines ref_locs
|
||||
df.describe()
|
||||
|
||||
result = df.iloc[3:5, 0:2]
|
||||
str(result)
|
||||
result.dtypes
|
||||
|
||||
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
|
||||
columns=columns[0:2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# for dups
|
||||
df.columns = list('aaaa')
|
||||
result = df.iloc[3:5, 0:2]
|
||||
str(result)
|
||||
result.dtypes
|
||||
|
||||
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
|
||||
columns=list('aa'))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# related
|
||||
arr = np.random.randn(6, 4)
|
||||
index = list(range(0, 12, 2))
|
||||
columns = list(range(0, 8, 2))
|
||||
df = DataFrame(arr, index=index, columns=columns)
|
||||
|
||||
df._data.blocks[0].mgr_locs
|
||||
result = df.iloc[1:5, 2:4]
|
||||
str(result)
|
||||
result.dtypes
|
||||
expected = DataFrame(arr[1:5, 2:4], index=index[1:5],
|
||||
columns=columns[2:4])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_iloc_setitem_series(self):
|
||||
df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'),
|
||||
columns=list('ABCD'))
|
||||
|
||||
df.iloc[1, 1] = 1
|
||||
result = df.iloc[1, 1]
|
||||
assert result == 1
|
||||
|
||||
df.iloc[:, 2:3] = 0
|
||||
expected = df.iloc[:, 2:3]
|
||||
result = df.iloc[:, 2:3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
s.iloc[1] = 1
|
||||
result = s.iloc[1]
|
||||
assert result == 1
|
||||
|
||||
s.iloc[:4] = 0
|
||||
expected = s.iloc[:4]
|
||||
result = s.iloc[:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series([-1] * 6)
|
||||
s.iloc[0::2] = [0, 2, 4]
|
||||
s.iloc[1::2] = [1, 3, 5]
|
||||
result = s
|
||||
expected = Series([0, 1, 2, 3, 4, 5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_setitem_list_of_lists(self):
|
||||
|
||||
# GH 7551
|
||||
# list-of-list is set incorrectly in mixed vs. single dtyped frames
|
||||
df = DataFrame(dict(A=np.arange(5, dtype='int64'),
|
||||
B=np.arange(5, 10, dtype='int64')))
|
||||
df.iloc[2:4] = [[10, 11], [12, 13]]
|
||||
expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(
|
||||
dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64')))
|
||||
df.iloc[2:4] = [['x', 11], ['y', 13]]
|
||||
expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'],
|
||||
B=[5, 6, 11, 13, 9]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'indexer', [[0], slice(None, 1, None), np.array([0])])
|
||||
@pytest.mark.parametrize(
|
||||
'value', [['Z'], np.array(['Z'])])
|
||||
def test_iloc_setitem_with_scalar_index(self, indexer, value):
|
||||
# GH #19474
|
||||
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
|
||||
# elementwisely, not using "setter('A', ['Z'])".
|
||||
|
||||
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
|
||||
df.iloc[0, indexer] = value
|
||||
result = df.iloc[0, 0]
|
||||
|
||||
assert is_scalar(result) and result == 'Z'
|
||||
|
||||
def test_iloc_mask(self):
|
||||
|
||||
# GH 3631, iloc with a mask (of a series) should raise
|
||||
df = DataFrame(lrange(5), list('ABCDE'), columns=['a'])
|
||||
mask = (df.a % 2 == 0)
|
||||
pytest.raises(ValueError, df.iloc.__getitem__, tuple([mask]))
|
||||
mask.index = lrange(len(mask))
|
||||
pytest.raises(NotImplementedError, df.iloc.__getitem__,
|
||||
tuple([mask]))
|
||||
|
||||
# ndarray ok
|
||||
result = df.iloc[np.array([True] * len(mask), dtype=bool)]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# the possibilities
|
||||
locs = np.arange(4)
|
||||
nums = 2 ** locs
|
||||
reps = lmap(bin, nums)
|
||||
df = DataFrame({'locs': locs, 'nums': nums}, reps)
|
||||
|
||||
expected = {
|
||||
(None, ''): '0b1100',
|
||||
(None, '.loc'): '0b1100',
|
||||
(None, '.iloc'): '0b1100',
|
||||
('index', ''): '0b11',
|
||||
('index', '.loc'): '0b11',
|
||||
('index', '.iloc'): ('iLocation based boolean indexing '
|
||||
'cannot use an indexable as a mask'),
|
||||
('locs', ''): 'Unalignable boolean Series provided as indexer '
|
||||
'(index of the boolean Series and of the indexed '
|
||||
'object do not match',
|
||||
('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
|
||||
'(index of the boolean Series and of the '
|
||||
'indexed object do not match',
|
||||
('locs', '.iloc'): ('iLocation based boolean indexing on an '
|
||||
'integer type is not available'),
|
||||
}
|
||||
|
||||
# UserWarnings from reindex of a boolean mask
|
||||
with catch_warnings(record=True):
|
||||
result = dict()
|
||||
for idx in [None, 'index', 'locs']:
|
||||
mask = (df.nums > 2).values
|
||||
if idx:
|
||||
mask = Series(mask, list(reversed(getattr(df, idx))))
|
||||
for method in ['', '.loc', '.iloc']:
|
||||
try:
|
||||
if method:
|
||||
accessor = getattr(df, method[1:])
|
||||
else:
|
||||
accessor = df
|
||||
ans = str(bin(accessor[mask]['nums'].sum()))
|
||||
except Exception as e:
|
||||
ans = str(e)
|
||||
|
||||
key = tuple([idx, method])
|
||||
r = expected.get(key)
|
||||
if r != ans:
|
||||
raise AssertionError(
|
||||
"[%s] does not match [%s], received [%s]"
|
||||
% (key, ans, r))
|
||||
|
||||
def test_iloc_non_unique_indexing(self):
|
||||
|
||||
# GH 4017, non-unique indexing (on the axis)
|
||||
df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000})
|
||||
idx = np.array(lrange(30)) * 99
|
||||
expected = df.iloc[idx]
|
||||
|
||||
df3 = concat([df, 2 * df, 3 * df])
|
||||
result = df3.iloc[idx]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000})
|
||||
df2 = concat([df2, 2 * df2, 3 * df2])
|
||||
|
||||
sidx = df2.index.to_series()
|
||||
expected = df2.iloc[idx[idx <= sidx.max()]]
|
||||
|
||||
new_list = []
|
||||
for r, s in expected.iterrows():
|
||||
new_list.append(s)
|
||||
new_list.append(s * 2)
|
||||
new_list.append(s * 3)
|
||||
|
||||
expected = DataFrame(new_list)
|
||||
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])],
|
||||
sort=True)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = df2.loc[idx]
|
||||
tm.assert_frame_equal(result, expected, check_index_type=False)
|
||||
|
||||
def test_iloc_empty_list_indexer_is_ok(self):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
|
||||
def test_identity_slice_returns_new_object(self):
|
||||
# GH13873
|
||||
original_df = DataFrame({'a': [1, 2, 3]})
|
||||
sliced_df = original_df.iloc[:]
|
||||
assert sliced_df is not original_df
|
||||
|
||||
# should be a shallow copy
|
||||
original_df['a'] = [4, 4, 4]
|
||||
assert (sliced_df['a'] == 4).all()
|
||||
|
||||
original_series = Series([1, 2, 3, 4, 5, 6])
|
||||
sliced_series = original_series.iloc[:]
|
||||
assert sliced_series is not original_series
|
||||
|
||||
# should also be a shallow copy
|
||||
original_series[:3] = [7, 8, 9]
|
||||
assert all(sliced_series[:3] == [7, 8, 9])
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,96 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.core.api import Series, DataFrame, MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
import pytest
|
||||
|
||||
|
||||
class TestIndexingSlow(object):
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_multiindex_get_loc(self): # GH7724, GH2646
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
|
||||
# test indexing into a multi-index before & past the lexsort depth
|
||||
from numpy.random import randint, choice, randn
|
||||
cols = ['jim', 'joe', 'jolie', 'joline', 'jolia']
|
||||
|
||||
def validate(mi, df, key):
|
||||
mask = np.ones(len(df)).astype('bool')
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[:i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[:i + 1] in mi.index
|
||||
right = df[mask].copy()
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
right.drop(cols[:i + 1], axis=1, inplace=True)
|
||||
right.set_index(cols[i + 1:-1], inplace=True)
|
||||
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
right.set_index(cols[:-1], inplace=True)
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(right['jolia'].values,
|
||||
name=right.index[0],
|
||||
index=['jolia'])
|
||||
tm.assert_series_equal(mi.loc[key[:i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
|
||||
|
||||
def loop(mi, df, keys):
|
||||
for key in keys:
|
||||
validate(mi, df, key)
|
||||
|
||||
n, m = 1000, 50
|
||||
|
||||
vals = [randint(0, 10, n), choice(
|
||||
list('abcdefghij'), n), choice(
|
||||
pd.date_range('20141009', periods=10).tolist(), n), choice(
|
||||
list('ZYXWVUTSRQ'), n), randn(n)]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
|
||||
# bunch of keys for testing
|
||||
keys = [randint(0, 11, m), choice(
|
||||
list('abcdefghijk'), m), choice(
|
||||
pd.date_range('20141009', periods=11).tolist(), m), choice(
|
||||
list('ZYXWVUTSRQP'), m)]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += list(map(lambda t: t[:-1], vals[::n // m]))
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
df = DataFrame(vals, columns=cols)
|
||||
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
for frame in a, b:
|
||||
for i in range(5): # lexsort depth
|
||||
df = frame.copy() if i == 0 else frame.sort_values(
|
||||
by=cols[:i])
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index.lexsort_depth < i
|
||||
loop(mi, df, keys)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_large_dataframe_indexing(self):
|
||||
# GH10692
|
||||
result = DataFrame({'x': range(10 ** 6)}, dtype='int64')
|
||||
result.loc[len(result)] = len(result) + 1
|
||||
expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_large_mi_dataframe_indexing(self):
|
||||
# GH10645
|
||||
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
|
||||
assert (not (10 ** 6, 0) in result)
|
||||
@@ -1,337 +0,0 @@
|
||||
""" test indexing with ix """
|
||||
|
||||
import pytest
|
||||
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
from pandas.compat import lrange
|
||||
from pandas import Series, DataFrame, option_context, MultiIndex
|
||||
from pandas.util import testing as tm
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
|
||||
class TestIX(object):
|
||||
|
||||
def test_ix_deprecation(self):
|
||||
# GH 15114
|
||||
|
||||
df = DataFrame({'A': [1, 2, 3]})
|
||||
with tm.assert_produces_warning(DeprecationWarning,
|
||||
check_stacklevel=False):
|
||||
df.ix[1, 'A']
|
||||
|
||||
def test_ix_loc_setitem_consistency(self):
|
||||
|
||||
# GH 5771
|
||||
# loc with slice and series
|
||||
s = Series(0, index=[4, 5, 6])
|
||||
s.loc[4:5] += 1
|
||||
expected = Series([1, 1, 0], index=[4, 5, 6])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 5928
|
||||
# chained indexing assignment
|
||||
df = DataFrame({'a': [0, 1, 2]})
|
||||
expected = df.copy()
|
||||
with catch_warnings(record=True):
|
||||
expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a']
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]})
|
||||
with catch_warnings(record=True):
|
||||
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype(
|
||||
'float64') + 0.5
|
||||
expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 8607
|
||||
# ix setitem consistency
|
||||
df = DataFrame({'delta': [1174, 904, 161],
|
||||
'elapsed': [7673, 9277, 1470],
|
||||
'timestamp': [1413840976, 1413842580, 1413760580]})
|
||||
expected = DataFrame({'delta': [1174, 904, 161],
|
||||
'elapsed': [7673, 9277, 1470],
|
||||
'timestamp': pd.to_datetime(
|
||||
[1413840976, 1413842580, 1413760580],
|
||||
unit='s')
|
||||
})
|
||||
|
||||
df2 = df.copy()
|
||||
df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df2 = df.copy()
|
||||
df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df2 = df.copy()
|
||||
with catch_warnings(record=True):
|
||||
df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_ix_loc_consistency(self):
|
||||
|
||||
# GH 8613
|
||||
# some edge cases where ix/loc should return the same
|
||||
# this is not an exhaustive case
|
||||
|
||||
def compare(result, expected):
|
||||
if is_scalar(expected):
|
||||
assert result == expected
|
||||
else:
|
||||
assert expected.equals(result)
|
||||
|
||||
# failure cases for .loc, but these work for .ix
|
||||
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'))
|
||||
for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]),
|
||||
tuple([slice(0, 2), df.columns[0:2]])]:
|
||||
|
||||
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||||
tm.makeDateIndex, tm.makePeriodIndex,
|
||||
tm.makeTimedeltaIndex]:
|
||||
df.index = index(len(df.index))
|
||||
with catch_warnings(record=True):
|
||||
df.ix[key]
|
||||
|
||||
pytest.raises(TypeError, lambda: df.loc[key])
|
||||
|
||||
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'),
|
||||
index=pd.date_range('2012-01-01', periods=5))
|
||||
|
||||
for key in ['2012-01-03',
|
||||
'2012-01-31',
|
||||
slice('2012-01-03', '2012-01-03'),
|
||||
slice('2012-01-03', '2012-01-04'),
|
||||
slice('2012-01-03', '2012-01-06', 2),
|
||||
slice('2012-01-03', '2012-01-31'),
|
||||
tuple([[True, True, True, False, True]]), ]:
|
||||
|
||||
# getitem
|
||||
|
||||
# if the expected raises, then compare the exceptions
|
||||
try:
|
||||
with catch_warnings(record=True):
|
||||
expected = df.ix[key]
|
||||
except KeyError:
|
||||
pytest.raises(KeyError, lambda: df.loc[key])
|
||||
continue
|
||||
|
||||
result = df.loc[key]
|
||||
compare(result, expected)
|
||||
|
||||
# setitem
|
||||
df1 = df.copy()
|
||||
df2 = df.copy()
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df1.ix[key] = 10
|
||||
df2.loc[key] = 10
|
||||
compare(df2, df1)
|
||||
|
||||
# edge cases
|
||||
s = Series([1, 2, 3, 4], index=list('abde'))
|
||||
|
||||
result1 = s['a':'c']
|
||||
with catch_warnings(record=True):
|
||||
result2 = s.ix['a':'c']
|
||||
result3 = s.loc['a':'c']
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
# now work rather than raising KeyError
|
||||
s = Series(range(5), [-2, -1, 1, 2, 3])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result1 = s.ix[-10:3]
|
||||
result2 = s.loc[-10:3]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result1 = s.ix[0:3]
|
||||
result2 = s.loc[0:3]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
def test_ix_weird_slicing(self):
|
||||
# http://stackoverflow.com/q/17056560/1240268
|
||||
df = DataFrame({'one': [1, 2, 3, np.nan, np.nan],
|
||||
'two': [1, 2, 3, 4, 5]})
|
||||
df.loc[df['one'] > 1, 'two'] = -df['two']
|
||||
|
||||
expected = DataFrame({'one': {0: 1.0,
|
||||
1: 2.0,
|
||||
2: 3.0,
|
||||
3: np.nan,
|
||||
4: np.nan},
|
||||
'two': {0: 1,
|
||||
1: -2,
|
||||
2: -3,
|
||||
3: 4,
|
||||
4: 5}})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_general(self):
|
||||
|
||||
# ix general issues
|
||||
|
||||
# GH 2817
|
||||
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
|
||||
df = DataFrame(data).set_index(keys=['col', 'year'])
|
||||
key = 4.0, 2012
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
df.sort_index(inplace=True)
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be Float64Index
|
||||
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
|
||||
names=['col', 'year'])
|
||||
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_ix_assign_column_mixed(self):
|
||||
# GH #1142
|
||||
df = DataFrame(tm.getSeriesData())
|
||||
df['foo'] = 'bar'
|
||||
|
||||
orig = df.loc[:, 'B'].copy()
|
||||
df.loc[:, 'B'] = df.loc[:, 'B'] + 1
|
||||
tm.assert_series_equal(df.B, orig + 1)
|
||||
|
||||
# GH 3668, mixed frame with series value
|
||||
df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'})
|
||||
expected = df.copy()
|
||||
|
||||
for i in range(5):
|
||||
indexer = i * 2
|
||||
v = 1000 + i * 200
|
||||
expected.loc[indexer, 'y'] = v
|
||||
assert expected.loc[indexer, 'y'] == v
|
||||
|
||||
df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 4508, making sure consistency of assignments
|
||||
df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
|
||||
df.loc[[0, 2, ], 'b'] = [100, -100]
|
||||
expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'a': lrange(4)})
|
||||
df['b'] = np.nan
|
||||
df.loc[[1, 3], 'b'] = [100, -100]
|
||||
expected = DataFrame({'a': [0, 1, 2, 3],
|
||||
'b': [np.nan, 100, np.nan, -100]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# ok, but chained assignments are dangerous
|
||||
# if we turn off chained assignment it will work
|
||||
with option_context('chained_assignment', None):
|
||||
df = DataFrame({'a': lrange(4)})
|
||||
df['b'] = np.nan
|
||||
df['b'].loc[[1, 3]] = [100, -100]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_get_set_consistency(self):
|
||||
|
||||
# GH 4544
|
||||
# ix/loc get/set not consistent when
|
||||
# a mixed int/string index
|
||||
df = DataFrame(np.arange(16).reshape((4, 4)),
|
||||
columns=['a', 'b', 8, 'c'],
|
||||
index=['e', 7, 'f', 'g'])
|
||||
|
||||
with catch_warnings(record=True):
|
||||
assert df.ix['e', 8] == 2
|
||||
assert df.loc['e', 8] == 2
|
||||
|
||||
with catch_warnings(record=True):
|
||||
df.ix['e', 8] = 42
|
||||
assert df.ix['e', 8] == 42
|
||||
assert df.loc['e', 8] == 42
|
||||
|
||||
df.loc['e', 8] = 45
|
||||
with catch_warnings(record=True):
|
||||
assert df.ix['e', 8] == 45
|
||||
assert df.loc['e', 8] == 45
|
||||
|
||||
def test_ix_slicing_strings(self):
|
||||
# see gh-3836
|
||||
data = {'Classification':
|
||||
['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'],
|
||||
'Random': [1, 2, 3, 4, 5],
|
||||
'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']}
|
||||
df = DataFrame(data)
|
||||
x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF'
|
||||
])]
|
||||
with catch_warnings(record=True):
|
||||
df.ix[x.index, 'X'] = df['Classification']
|
||||
|
||||
expected = DataFrame({'Classification': {0: 'SA EQUITY CFD',
|
||||
1: 'bbb',
|
||||
2: 'SA EQUITY',
|
||||
3: 'SA SSF',
|
||||
4: 'aaa'},
|
||||
'Random': {0: 1,
|
||||
1: 2,
|
||||
2: 3,
|
||||
3: 4,
|
||||
4: 5},
|
||||
'X': {0: 'correct',
|
||||
1: 'bbb',
|
||||
2: 'correct',
|
||||
3: 'correct',
|
||||
4: 'aaa'}}) # bug was 4: 'bbb'
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_ix_setitem_out_of_bounds_axis_0(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(2, 5), index=["row%s" % i for i in range(2)],
|
||||
columns=["col%s" % i for i in range(5)])
|
||||
with catch_warnings(record=True):
|
||||
pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100)
|
||||
|
||||
def test_ix_setitem_out_of_bounds_axis_1(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(5, 2), index=["row%s" % i for i in range(5)],
|
||||
columns=["col%s" % i for i in range(2)])
|
||||
with catch_warnings(record=True):
|
||||
pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100)
|
||||
|
||||
def test_ix_empty_list_indexer_is_ok(self):
|
||||
with catch_warnings(record=True):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
|
||||
def test_ix_duplicate_returns_series(self):
|
||||
df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2],
|
||||
columns=list('abc'))
|
||||
with catch_warnings(record=True):
|
||||
r = df.ix[0.2, 'a']
|
||||
e = df.loc[0.2, 'a']
|
||||
tm.assert_series_equal(r, e)
|
||||
@@ -1,805 +0,0 @@
|
||||
""" test label based indexing with loc """
|
||||
|
||||
import itertools
|
||||
import pytest
|
||||
|
||||
from warnings import catch_warnings
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas.compat import lrange, StringIO
|
||||
from pandas import Series, DataFrame, Timestamp, date_range, MultiIndex, Index
|
||||
from pandas.util import testing as tm
|
||||
from pandas.tests.indexing.common import Base
|
||||
from pandas.api.types import is_scalar
|
||||
from pandas.compat import PY2
|
||||
|
||||
|
||||
class TestLoc(Base):
|
||||
|
||||
def test_loc_getitem_dups(self):
|
||||
# GH 5678
|
||||
# repeated gettitems on a dup index returning a ndarray
|
||||
df = DataFrame(
|
||||
np.random.random_sample((20, 5)),
|
||||
index=['ABCDE' [x % 5] for x in range(20)])
|
||||
expected = df.loc['A', 0]
|
||||
result = df.loc[:, 0].loc['A']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_dups2(self):
|
||||
|
||||
# GH4726
|
||||
# dup indexing with iloc/loc
|
||||
df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]],
|
||||
columns=['a', 'a', 'a', 'a', 'a'], index=[1])
|
||||
expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')],
|
||||
index=['a', 'a', 'a', 'a', 'a'], name=1)
|
||||
|
||||
result = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_setitem_dups(self):
|
||||
|
||||
# GH 6541
|
||||
df_orig = DataFrame(
|
||||
{'me': list('rttti'),
|
||||
'foo': list('aaade'),
|
||||
'bar': np.arange(5, dtype='float64') * 1.34 + 2,
|
||||
'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me')
|
||||
|
||||
indexer = tuple(['r', ['bar', 'bar2']])
|
||||
df = df_orig.copy()
|
||||
df.loc[indexer] *= 2.0
|
||||
tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
|
||||
|
||||
indexer = tuple(['r', 'bar'])
|
||||
df = df_orig.copy()
|
||||
df.loc[indexer] *= 2.0
|
||||
assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
|
||||
|
||||
indexer = tuple(['t', ['bar', 'bar2']])
|
||||
df = df_orig.copy()
|
||||
df.loc[indexer] *= 2.0
|
||||
tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
|
||||
|
||||
def test_loc_setitem_slice(self):
|
||||
# GH10503
|
||||
|
||||
# assigning the same type should not change the type
|
||||
df1 = DataFrame({'a': [0, 1, 1],
|
||||
'b': Series([100, 200, 300], dtype='uint32')})
|
||||
ix = df1['a'] == 1
|
||||
newb1 = df1.loc[ix, 'b'] + 1
|
||||
df1.loc[ix, 'b'] = newb1
|
||||
expected = DataFrame({'a': [0, 1, 1],
|
||||
'b': Series([100, 201, 301], dtype='uint32')})
|
||||
tm.assert_frame_equal(df1, expected)
|
||||
|
||||
# assigning a new type should get the inferred type
|
||||
df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
|
||||
dtype='uint64')
|
||||
ix = df1['a'] == 1
|
||||
newb2 = df2.loc[ix, 'b']
|
||||
df1.loc[ix, 'b'] = newb2
|
||||
expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
|
||||
dtype='uint64')
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_loc_getitem_int(self):
|
||||
|
||||
# int label
|
||||
self.check_result('int label', 'loc', 2, 'ix', 2,
|
||||
typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('int label', 'loc', 3, 'ix', 3,
|
||||
typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('int label', 'loc', 4, 'ix', 4,
|
||||
typs=['ints', 'uints'], axes=2)
|
||||
self.check_result('int label', 'loc', 2, 'ix', 2,
|
||||
typs=['label'], fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label(self):
|
||||
|
||||
# label
|
||||
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'],
|
||||
axes=0)
|
||||
self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'],
|
||||
axes=0)
|
||||
self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0)
|
||||
self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1,
|
||||
typs=['ts'], axes=0)
|
||||
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'],
|
||||
fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label_out_of_range(self):
|
||||
|
||||
# out of range label
|
||||
self.check_result('label range', 'loc', 'f', 'ix', 'f',
|
||||
typs=['ints', 'uints', 'labels', 'mixed', 'ts'],
|
||||
fails=KeyError)
|
||||
self.check_result('label range', 'loc', 'f', 'ix', 'f',
|
||||
typs=['floats'], fails=KeyError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20,
|
||||
typs=['ints', 'uints', 'mixed'], fails=KeyError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20,
|
||||
typs=['labels'], fails=TypeError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'],
|
||||
axes=0, fails=TypeError)
|
||||
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'],
|
||||
axes=0, fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label_list(self):
|
||||
|
||||
# list of labels
|
||||
self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4],
|
||||
typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9],
|
||||
typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12],
|
||||
typs=['ints', 'uints'], axes=2)
|
||||
self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix',
|
||||
['a', 'b', 'd'], typs=['labels'], axes=0)
|
||||
self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix',
|
||||
['A', 'B', 'C'], typs=['labels'], axes=1)
|
||||
self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix',
|
||||
['Z', 'Y', 'W'], typs=['labels'], axes=2)
|
||||
self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix',
|
||||
[2, 8, 'null'], typs=['mixed'], axes=0)
|
||||
self.check_result('list lbl', 'loc',
|
||||
[Timestamp('20130102'), Timestamp('20130103')], 'ix',
|
||||
[Timestamp('20130102'), Timestamp('20130103')],
|
||||
typs=['ts'], axes=0)
|
||||
|
||||
@pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with "
|
||||
"Python 2 (GH #20770)"))
|
||||
def test_loc_getitem_label_list_with_missing(self):
|
||||
self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
|
||||
typs=['empty'], fails=KeyError)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10],
|
||||
typs=['ints', 'uints', 'floats'],
|
||||
axes=0, fails=KeyError)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
|
||||
typs=['ints', 'uints', 'floats'],
|
||||
axes=1, fails=KeyError)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
|
||||
typs=['ints', 'uints', 'floats'],
|
||||
axes=2, fails=KeyError)
|
||||
|
||||
# GH 17758 - MultiIndex and missing keys
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)],
|
||||
'ix', [(1, 3), (1, 4), (2, 5)],
|
||||
typs=['multi'],
|
||||
axes=0)
|
||||
|
||||
def test_getitem_label_list_with_missing(self):
|
||||
s = Series(range(3), index=['a', 'b', 'c'])
|
||||
|
||||
# consistency
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s[['a', 'd']]
|
||||
|
||||
s = Series(range(3))
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
s[[0, 3]]
|
||||
|
||||
def test_loc_getitem_label_list_fails(self):
|
||||
# fails
|
||||
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
|
||||
typs=['ints', 'uints'], axes=1, fails=KeyError)
|
||||
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
|
||||
typs=['ints', 'uints'], axes=2, fails=KeyError)
|
||||
|
||||
def test_loc_getitem_label_array_like(self):
|
||||
# array like
|
||||
self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index,
|
||||
'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index,
|
||||
'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index,
|
||||
'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2)
|
||||
|
||||
def test_loc_getitem_bool(self):
|
||||
# boolean indexers
|
||||
b = [True, False, True, False]
|
||||
self.check_result('bool', 'loc', b, 'ix', b,
|
||||
typs=['ints', 'uints', 'labels',
|
||||
'mixed', 'ts', 'floats'])
|
||||
self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
|
||||
fails=KeyError)
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
|
||||
# ok
|
||||
self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4],
|
||||
typs=['ints', 'uints'], axes=0)
|
||||
self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6],
|
||||
typs=['ints', 'uints'], axes=1)
|
||||
self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8],
|
||||
typs=['ints', 'uints'], axes=2)
|
||||
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_tuples([t for t in itertools.product(
|
||||
[6, 7, 8], ['a', 'b'])])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([t
|
||||
for t in itertools.product(
|
||||
[10, 20, 30], ['a', 'b'])])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ['a', 'b']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
# expected = df.ix[:,10] (this fails)
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_to_fail(self):
|
||||
|
||||
# GH3449
|
||||
df = DataFrame(np.random.random((3, 3)),
|
||||
index=['a', 'b', 'c'],
|
||||
columns=['e', 'f', 'g'])
|
||||
|
||||
# raise a KeyError?
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
tuple([[1, 2], [1, 2]]))
|
||||
|
||||
# GH 7496
|
||||
# loc should not fallback
|
||||
|
||||
s = Series()
|
||||
s.loc[1] = 1
|
||||
s.loc['a'] = 2
|
||||
|
||||
pytest.raises(KeyError, lambda: s.loc[-1])
|
||||
pytest.raises(KeyError, lambda: s.loc[[-1, -2]])
|
||||
|
||||
pytest.raises(KeyError, lambda: s.loc[['4']])
|
||||
|
||||
s.loc[-1] = 3
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s.loc[[-1, -2]]
|
||||
expected = Series([3, np.nan], index=[-1, -2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s['a'] = 2
|
||||
pytest.raises(KeyError, lambda: s.loc[[-2]])
|
||||
|
||||
del s['a']
|
||||
|
||||
def f():
|
||||
s.loc[[-2]] = 0
|
||||
|
||||
pytest.raises(KeyError, f)
|
||||
|
||||
# inconsistency between .loc[values] and .loc[values,:]
|
||||
# GH 7999
|
||||
df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value'])
|
||||
|
||||
def f():
|
||||
df.loc[[3], :]
|
||||
|
||||
pytest.raises(KeyError, f)
|
||||
|
||||
def f():
|
||||
df.loc[[3]]
|
||||
|
||||
pytest.raises(KeyError, f)
|
||||
|
||||
def test_loc_getitem_list_with_fail(self):
|
||||
# 15747
|
||||
# should KeyError if *any* missing labels
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
s.loc[[2]]
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
s.loc[[3]]
|
||||
|
||||
# a non-match and a match
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
expected = s.loc[[2, 3]]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_label_slice(self):
|
||||
|
||||
# label slices (with ints)
|
||||
self.check_result('lab slice', 'loc', slice(1, 3),
|
||||
'ix', slice(1, 3),
|
||||
typs=['labels', 'mixed', 'empty', 'ts', 'floats'],
|
||||
fails=TypeError)
|
||||
|
||||
# real label slices
|
||||
self.check_result('lab slice', 'loc', slice('a', 'c'),
|
||||
'ix', slice('a', 'c'), typs=['labels'], axes=0)
|
||||
self.check_result('lab slice', 'loc', slice('A', 'C'),
|
||||
'ix', slice('A', 'C'), typs=['labels'], axes=1)
|
||||
self.check_result('lab slice', 'loc', slice('W', 'Z'),
|
||||
'ix', slice('W', 'Z'), typs=['labels'], axes=2)
|
||||
|
||||
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||||
'ix', slice('20130102', '20130104'),
|
||||
typs=['ts'], axes=0)
|
||||
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||||
'ix', slice('20130102', '20130104'),
|
||||
typs=['ts'], axes=1, fails=TypeError)
|
||||
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||||
'ix', slice('20130102', '20130104'),
|
||||
typs=['ts'], axes=2, fails=TypeError)
|
||||
|
||||
# GH 14316
|
||||
self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'),
|
||||
'indexer', [0, 1, 2], typs=['ts_rev'], axes=0)
|
||||
|
||||
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||||
typs=['mixed'], axes=0, fails=TypeError)
|
||||
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||||
typs=['mixed'], axes=1, fails=KeyError)
|
||||
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||||
typs=['mixed'], axes=2, fails=KeyError)
|
||||
|
||||
self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
|
||||
2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)
|
||||
|
||||
def test_loc_index(self):
|
||||
# gh-17131
|
||||
# a boolean index should index like a boolean numpy array
|
||||
|
||||
df = DataFrame(
|
||||
np.random.random(size=(5, 10)),
|
||||
index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
|
||||
|
||||
mask = df.index.map(lambda x: "alpha" in x)
|
||||
expected = df.loc[np.array(mask)]
|
||||
|
||||
result = df.loc[mask]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[mask.values]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_general(self):
|
||||
|
||||
df = DataFrame(
|
||||
np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'],
|
||||
index=['A', 'B', 'C', 'D'])
|
||||
|
||||
# want this to work
|
||||
result = df.loc[:, "A":"B"].iloc[0:2, :]
|
||||
assert (result.columns == ['A', 'B']).all()
|
||||
assert (result.index == ['A', 'B']).all()
|
||||
|
||||
# mixed type
|
||||
result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0]
|
||||
expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == object
|
||||
|
||||
def test_loc_setitem_consistency(self):
|
||||
# GH 6149
|
||||
# coerce similarly for setitem and loc when rows have a null-slice
|
||||
expected = DataFrame({'date': Series(0, index=range(5),
|
||||
dtype=np.int64),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(
|
||||
range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = 0
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = np.array(0, dtype=np.int64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame({'date': Series('foo', index=range(5)),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = 'foo'
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame({'date': Series(1.0, index=range(5)),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(range(5), dtype=np.int64)})
|
||||
df.loc[:, 'date'] = 1.0
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 15494
|
||||
# setting on frame with single row
|
||||
df = DataFrame({'date': Series([Timestamp('20180101')])})
|
||||
df.loc[:, 'date'] = 'string'
|
||||
expected = DataFrame({'date': Series(['string'])})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_consistency_empty(self):
|
||||
# empty (essentially noops)
|
||||
expected = DataFrame(columns=['x', 'y'])
|
||||
expected['x'] = expected['x'].astype(np.int64)
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df.loc[:, 'x'] = 1
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df['x'] = 1
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_consistency_slice_column_len(self):
|
||||
# .loc[:,column] setting with slice == len of the column
|
||||
# GH10408
|
||||
data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
|
||||
Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
|
||||
Region,Site,RespondentID,,,,,
|
||||
Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
|
||||
Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
|
||||
Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
|
||||
Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""
|
||||
|
||||
df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
|
||||
df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
|
||||
'Respondent', 'StartDate')])
|
||||
df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
|
||||
'Respondent', 'EndDate')])
|
||||
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
|
||||
'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]
|
||||
|
||||
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
|
||||
'Respondent', 'Duration')].astype('timedelta64[s]')
|
||||
expected = Series([1380, 720, 840, 2160.], index=df.index,
|
||||
name=('Respondent', 'Duration'))
|
||||
tm.assert_series_equal(df[('Respondent', 'Duration')], expected)
|
||||
|
||||
def test_loc_setitem_frame(self):
|
||||
df = self.frame_labels
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
|
||||
df.loc['a', 'A'] = 1
|
||||
result = df.loc['a', 'A']
|
||||
assert result == 1
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == 1
|
||||
|
||||
df.loc[:, 'B':'D'] = 0
|
||||
expected = df.loc[:, 'B':'D']
|
||||
result = df.iloc[:, 1:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH 6254
|
||||
# setting issue
|
||||
df = DataFrame(index=[3, 5, 4], columns=['A'])
|
||||
df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64')
|
||||
expected = DataFrame(dict(A=Series(
|
||||
[1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 6252
|
||||
# setting with an empty frame
|
||||
keys1 = ['@' + str(i) for i in range(5)]
|
||||
val1 = np.arange(5, dtype='int64')
|
||||
|
||||
keys2 = ['@' + str(i) for i in range(4)]
|
||||
val2 = np.arange(4, dtype='int64')
|
||||
|
||||
index = list(set(keys1).union(keys2))
|
||||
df = DataFrame(index=index)
|
||||
df['A'] = np.nan
|
||||
df.loc[keys1, 'A'] = val1
|
||||
|
||||
df['B'] = np.nan
|
||||
df.loc[keys2, 'B'] = val2
|
||||
|
||||
expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series(
|
||||
val2, index=keys2))).reindex(index=index)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 8669
|
||||
# invalid coercion of nan -> int
|
||||
df = DataFrame({'A': [1, 2, 3], 'B': np.nan})
|
||||
df.loc[df.B > df.A, 'B'] = df.A
|
||||
expected = DataFrame({'A': [1, 2, 3], 'B': np.nan})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 6546
|
||||
# setting with mixed labels
|
||||
df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']})
|
||||
|
||||
result = df.loc[0, [1, 2]]
|
||||
expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']})
|
||||
df.loc[0, [1, 2]] = [5, 6]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_frame_multiples(self):
|
||||
# multiple setting
|
||||
df = DataFrame({'A': ['foo', 'bar', 'baz'],
|
||||
'B': Series(
|
||||
range(3), dtype=np.int64)})
|
||||
rhs = df.loc[1:2]
|
||||
rhs.index = df.index[0:2]
|
||||
df.loc[0:1] = rhs
|
||||
expected = DataFrame({'A': ['bar', 'baz', 'baz'],
|
||||
'B': Series(
|
||||
[1, 2, 2], dtype=np.int64)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# multiple setting with frame on rhs (with M8)
|
||||
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||||
'val': Series(
|
||||
range(5), dtype=np.int64)})
|
||||
expected = DataFrame({'date': [Timestamp('20000101'), Timestamp(
|
||||
'20000102'), Timestamp('20000101'), Timestamp('20000102'),
|
||||
Timestamp('20000103')],
|
||||
'val': Series(
|
||||
[0, 1, 0, 1, 2], dtype=np.int64)})
|
||||
rhs = df.loc[0:2]
|
||||
rhs.index = df.index[2:5]
|
||||
df.loc[2:4] = rhs
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'indexer', [['A'], slice(None, 'A', None), np.array(['A'])])
|
||||
@pytest.mark.parametrize(
|
||||
'value', [['Z'], np.array(['Z'])])
|
||||
def test_loc_setitem_with_scalar_index(self, indexer, value):
|
||||
# GH #19474
|
||||
# assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
|
||||
# elementwisely, not using "setter('A', ['Z'])".
|
||||
|
||||
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
|
||||
df.loc[0, indexer] = value
|
||||
result = df.loc[0, 'A']
|
||||
|
||||
assert is_scalar(result) and result == 'Z'
|
||||
|
||||
def test_loc_coerceion(self):
|
||||
|
||||
# 12411
|
||||
df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'),
|
||||
pd.NaT]})
|
||||
expected = df.dtypes
|
||||
|
||||
result = df.iloc[[0]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
result = df.iloc[[1]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
# 12045
|
||||
import datetime
|
||||
df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
|
||||
datetime.datetime(1012, 1, 2)]})
|
||||
expected = df.dtypes
|
||||
|
||||
result = df.iloc[[0]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
result = df.iloc[[1]]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
# 11594
|
||||
df = DataFrame({'text': ['some words'] + [None] * 9})
|
||||
expected = df.dtypes
|
||||
|
||||
result = df.iloc[0:2]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
result = df.iloc[3:]
|
||||
tm.assert_series_equal(result.dtypes, expected)
|
||||
|
||||
def test_loc_non_unique(self):
|
||||
# GH3659
|
||||
# non-unique indexer with loc slice
|
||||
# https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
|
||||
|
||||
# these are going to raise because the we are non monotonic
|
||||
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
|
||||
'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3])
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
tuple([slice(1, None)]))
|
||||
pytest.raises(KeyError, df.loc.__getitem__,
|
||||
tuple([slice(0, None)]))
|
||||
pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)]))
|
||||
|
||||
# monotonic are ok
|
||||
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
|
||||
'B': [3, 4, 5, 6, 7, 8]},
|
||||
index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0)
|
||||
result = df.loc[1:]
|
||||
expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]},
|
||||
index=[1, 1, 2, 3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[0:]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[1:2]
|
||||
expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]},
|
||||
index=[1, 1, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_non_unique_memory_error(self):
|
||||
|
||||
# GH 4280
|
||||
# non_unique index with a large selection triggers a memory error
|
||||
|
||||
columns = list('ABCDEFG')
|
||||
|
||||
def gen_test(l, l2):
|
||||
return pd.concat([
|
||||
DataFrame(np.random.randn(l, len(columns)),
|
||||
index=lrange(l), columns=columns),
|
||||
DataFrame(np.ones((l2, len(columns))),
|
||||
index=[0] * l2, columns=columns)])
|
||||
|
||||
def gen_expected(df, mask):
|
||||
l = len(mask)
|
||||
return pd.concat([df.take([0]),
|
||||
DataFrame(np.ones((l, len(columns))),
|
||||
index=[0] * l,
|
||||
columns=columns),
|
||||
df.take(mask[1:])])
|
||||
|
||||
df = gen_test(900, 100)
|
||||
assert not df.index.is_unique
|
||||
|
||||
mask = np.arange(100)
|
||||
result = df.loc[mask]
|
||||
expected = gen_expected(df, mask)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = gen_test(900000, 100000)
|
||||
assert not df.index.is_unique
|
||||
|
||||
mask = np.arange(100000)
|
||||
result = df.loc[mask]
|
||||
expected = gen_expected(df, mask)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_name(self):
|
||||
# GH 3880
|
||||
df = DataFrame([[1, 1], [1, 1]])
|
||||
df.index.name = 'index_name'
|
||||
result = df.iloc[[0, 1]].index.name
|
||||
assert result == 'index_name'
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = df.ix[[0, 1]].index.name
|
||||
assert result == 'index_name'
|
||||
|
||||
result = df.loc[[0, 1]].index.name
|
||||
assert result == 'index_name'
|
||||
|
||||
def test_loc_empty_list_indexer_is_ok(self):
|
||||
from pandas.util.testing import makeCustomDataframe as mkdf
|
||||
df = mkdf(5, 2)
|
||||
# vertical empty
|
||||
tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
|
||||
check_index_type=True, check_column_type=True)
|
||||
# horizontal empty
|
||||
tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :],
|
||||
check_index_type=True,
|
||||
check_column_type=True)
|
||||
|
||||
def test_identity_slice_returns_new_object(self):
|
||||
# GH13873
|
||||
original_df = DataFrame({'a': [1, 2, 3]})
|
||||
sliced_df = original_df.loc[:]
|
||||
assert sliced_df is not original_df
|
||||
assert original_df[:] is not original_df
|
||||
|
||||
# should be a shallow copy
|
||||
original_df['a'] = [4, 4, 4]
|
||||
assert (sliced_df['a'] == 4).all()
|
||||
|
||||
# These should not return copies
|
||||
assert original_df is original_df.loc[:, :]
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
assert df[0] is df.loc[:, 0]
|
||||
|
||||
# Same tests for Series
|
||||
original_series = Series([1, 2, 3, 4, 5, 6])
|
||||
sliced_series = original_series.loc[:]
|
||||
assert sliced_series is not original_series
|
||||
assert original_series[:] is not original_series
|
||||
|
||||
original_series[:3] = [7, 8, 9]
|
||||
assert all(sliced_series[:3] == [7, 8, 9])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'indexer_type_1',
|
||||
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||||
@pytest.mark.parametrize(
|
||||
'indexer_type_2',
|
||||
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = pd.MultiIndex.from_product([a, b])
|
||||
df = pd.DataFrame(
|
||||
np.arange(len(index), dtype='int64'),
|
||||
index=index, columns=['Data'])
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys))
|
||||
|
||||
result = df.loc[indexer, 'Data']
|
||||
expected = pd.Series(
|
||||
[1, 2, 4, 5], name='Data',
|
||||
index=pd.MultiIndex.from_product(keys))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_uint64(self):
|
||||
# GH20722
|
||||
# Test whether loc accept uint64 max value as index.
|
||||
s = pd.Series([1, 2],
|
||||
index=[np.iinfo('uint64').max - 1,
|
||||
np.iinfo('uint64').max])
|
||||
|
||||
result = s.loc[np.iinfo('uint64').max - 1]
|
||||
expected = s.iloc[0]
|
||||
assert result == expected
|
||||
|
||||
result = s.loc[[np.iinfo('uint64').max - 1]]
|
||||
expected = s.iloc[[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[[np.iinfo('uint64').max - 1,
|
||||
np.iinfo('uint64').max]]
|
||||
tm.assert_series_equal(result, s)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,219 +0,0 @@
|
||||
import pytest
|
||||
from warnings import catch_warnings
|
||||
|
||||
import numpy as np
|
||||
from pandas.util import testing as tm
|
||||
from pandas import Panel, date_range, DataFrame
|
||||
|
||||
|
||||
class TestPanel(object):
|
||||
|
||||
def test_iloc_getitem_panel(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH 7189
|
||||
p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2),
|
||||
items=['A', 'B', 'C', 'D'],
|
||||
major_axis=['a', 'b', 'c'],
|
||||
minor_axis=['one', 'two'])
|
||||
|
||||
result = p.iloc[1]
|
||||
expected = p.loc['B']
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = p.iloc[1, 1]
|
||||
expected = p.loc['B', 'b']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = p.iloc[1, 1, 1]
|
||||
expected = p.loc['B', 'b', 'two']
|
||||
assert result == expected
|
||||
|
||||
# slice
|
||||
result = p.iloc[1:3]
|
||||
expected = p.loc[['B', 'C']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = p.iloc[:, 0:2]
|
||||
expected = p.loc[:, ['a', 'b']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# list of integers
|
||||
result = p.iloc[[0, 2]]
|
||||
expected = p.loc[['A', 'C']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# neg indicies
|
||||
result = p.iloc[[-1, 1], [-1, 1]]
|
||||
expected = p.loc[['D', 'B'], ['c', 'b']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# dups indicies
|
||||
result = p.iloc[[-1, -1, 1], [-1, 1]]
|
||||
expected = p.loc[['D', 'D', 'B'], ['c', 'b']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# combined
|
||||
result = p.iloc[0, [True, True], [0, 1]]
|
||||
expected = p.loc['A', ['a', 'b'], ['one', 'two']]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# out-of-bounds exception
|
||||
with pytest.raises(IndexError):
|
||||
p.iloc[tuple([10, 5])]
|
||||
|
||||
def f():
|
||||
p.iloc[0, [True, True], [0, 1, 2]]
|
||||
|
||||
pytest.raises(IndexError, f)
|
||||
|
||||
# trying to use a label
|
||||
with pytest.raises(ValueError):
|
||||
p.iloc[tuple(['j', 'D'])]
|
||||
|
||||
# GH
|
||||
p = Panel(
|
||||
np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'],
|
||||
major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y'])
|
||||
expected = p['A']
|
||||
|
||||
result = p.iloc[0, :, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = p.iloc[0, [True, True, True], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = p.iloc[0, [True, True, True], [0, 1]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def f():
|
||||
p.iloc[0, [True, True, True], [0, 1, 2]]
|
||||
|
||||
pytest.raises(IndexError, f)
|
||||
|
||||
def f():
|
||||
p.iloc[0, [True, True, True], [2]]
|
||||
|
||||
pytest.raises(IndexError, f)
|
||||
|
||||
def test_iloc_panel_issue(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# see gh-3617
|
||||
p = Panel(np.random.randn(4, 4, 4))
|
||||
|
||||
assert p.iloc[:3, :3, :3].shape == (3, 3, 3)
|
||||
assert p.iloc[1, :3, :3].shape == (3, 3)
|
||||
assert p.iloc[:3, 1, :3].shape == (3, 3)
|
||||
assert p.iloc[:3, :3, 1].shape == (3, 3)
|
||||
assert p.iloc[1, 1, :3].shape == (3, )
|
||||
assert p.iloc[1, :3, 1].shape == (3, )
|
||||
assert p.iloc[:3, 1, 1].shape == (3, )
|
||||
|
||||
def test_panel_getitem(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH4016, date selection returns a frame when a partial string
|
||||
# selection
|
||||
ind = date_range(start="2000", freq="D", periods=1000)
|
||||
df = DataFrame(
|
||||
np.random.randn(
|
||||
len(ind), 5), index=ind, columns=list('ABCDE'))
|
||||
panel = Panel({'frame_' + c: df for c in list('ABC')})
|
||||
|
||||
test2 = panel.loc[:, "2002":"2002-12-31"]
|
||||
test1 = panel.loc[:, "2002"]
|
||||
tm.assert_panel_equal(test1, test2)
|
||||
|
||||
# GH8710
|
||||
# multi-element getting with a list
|
||||
panel = tm.makePanel()
|
||||
|
||||
expected = panel.iloc[[0, 1]]
|
||||
|
||||
result = panel.loc[['ItemA', 'ItemB']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = panel.loc[['ItemA', 'ItemB'], :, :]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = panel[['ItemA', 'ItemB']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
result = panel.loc['ItemA':'ItemB']
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
result = panel.ix[['ItemA', 'ItemB']]
|
||||
tm.assert_panel_equal(result, expected)
|
||||
|
||||
# with an object-like
|
||||
# GH 9140
|
||||
class TestObject(object):
|
||||
|
||||
def __str__(self):
|
||||
return "TestObject"
|
||||
|
||||
obj = TestObject()
|
||||
|
||||
p = Panel(np.random.randn(1, 5, 4), items=[obj],
|
||||
major_axis=date_range('1/1/2000', periods=5),
|
||||
minor_axis=['A', 'B', 'C', 'D'])
|
||||
|
||||
expected = p.iloc[0]
|
||||
result = p[obj]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_panel_setitem(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH 7763
|
||||
# loc and setitem have setting differences
|
||||
np.random.seed(0)
|
||||
index = range(3)
|
||||
columns = list('abc')
|
||||
|
||||
panel = Panel({'A': DataFrame(np.random.randn(3, 3),
|
||||
index=index, columns=columns),
|
||||
'B': DataFrame(np.random.randn(3, 3),
|
||||
index=index, columns=columns),
|
||||
'C': DataFrame(np.random.randn(3, 3),
|
||||
index=index, columns=columns)})
|
||||
|
||||
replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns)
|
||||
expected = Panel({'A': replace, 'B': replace, 'C': replace})
|
||||
|
||||
p = panel.copy()
|
||||
for idx in list('ABC'):
|
||||
p[idx] = replace
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
p = panel.copy()
|
||||
for idx in list('ABC'):
|
||||
p.loc[idx, :, :] = replace
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
def test_panel_assignment(self):
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# GH3777
|
||||
wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
|
||||
major_axis=date_range('1/1/2000', periods=5),
|
||||
minor_axis=['A', 'B', 'C', 'D'])
|
||||
wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
|
||||
major_axis=date_range('1/1/2000', periods=5),
|
||||
minor_axis=['A', 'B', 'C', 'D'])
|
||||
|
||||
# TODO: unused?
|
||||
# expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
|
||||
|
||||
def f():
|
||||
wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[
|
||||
['Item1', 'Item2'], :, ['A', 'B']]
|
||||
|
||||
pytest.raises(NotImplementedError, f)
|
||||
|
||||
# to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']]
|
||||
# wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign
|
||||
# result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
|
||||
# tm.assert_panel_equal(result,expected)
|
||||
@@ -1,640 +0,0 @@
|
||||
"""
|
||||
test setting *parts* of objects both positionally and label based
|
||||
|
||||
TOD: these should be split among the indexer tests
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from warnings import catch_warnings
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, DataFrame, Panel, Index, date_range
|
||||
from pandas.util import testing as tm
|
||||
|
||||
|
||||
class TestPartialSetting(object):
|
||||
|
||||
def test_partial_setting(self):
|
||||
|
||||
# GH2578, allow ix and friends to partially set
|
||||
|
||||
# series
|
||||
s_orig = Series([1, 2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5.
|
||||
expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5.
|
||||
expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# iloc/iat raise
|
||||
s = s_orig.copy()
|
||||
|
||||
def f():
|
||||
s.iloc[3] = 5.
|
||||
|
||||
pytest.raises(IndexError, f)
|
||||
|
||||
def f():
|
||||
s.iat[3] = 5.
|
||||
|
||||
pytest.raises(IndexError, f)
|
||||
|
||||
# ## frame ##
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64')
|
||||
|
||||
# iloc/iat raise
|
||||
df = df_orig.copy()
|
||||
|
||||
def f():
|
||||
df.iloc[4, 2] = 5.
|
||||
|
||||
pytest.raises(IndexError, f)
|
||||
|
||||
def f():
|
||||
df.iat[4, 2] = 5.
|
||||
|
||||
pytest.raises(IndexError, f)
|
||||
|
||||
# row setting where it exists
|
||||
expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.iloc[1] = df.iloc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[1] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# like 2578, partial setting with dtype preservation
|
||||
expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[3] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, overwrite
|
||||
expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]}))
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'B'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed dtype frame, overwrite
|
||||
expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])}))
|
||||
df = df_orig.copy()
|
||||
df['B'] = df['B'].astype(np.float64)
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'B'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected['C'] = df['A']
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'C'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected['C'] = df['A']
|
||||
df = df_orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.ix[:, 'C'] = df.ix[:, 'A']
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
with catch_warnings(record=True):
|
||||
# ## panel ##
|
||||
p_orig = Panel(np.arange(16).reshape(2, 4, 2),
|
||||
items=['Item1', 'Item2'],
|
||||
major_axis=pd.date_range('2001/1/12', periods=4),
|
||||
minor_axis=['A', 'B'], dtype='float64')
|
||||
|
||||
# panel setting via item
|
||||
p_orig = Panel(np.arange(16).reshape(2, 4, 2),
|
||||
items=['Item1', 'Item2'],
|
||||
major_axis=pd.date_range('2001/1/12', periods=4),
|
||||
minor_axis=['A', 'B'], dtype='float64')
|
||||
expected = p_orig.copy()
|
||||
expected['Item3'] = expected['Item1']
|
||||
p = p_orig.copy()
|
||||
p.loc['Item3'] = p['Item1']
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
# panel with aligned series
|
||||
expected = p_orig.copy()
|
||||
expected = expected.transpose(2, 1, 0)
|
||||
expected['C'] = DataFrame({'Item1': [30, 30, 30, 30],
|
||||
'Item2': [32, 32, 32, 32]},
|
||||
index=p_orig.major_axis)
|
||||
expected = expected.transpose(2, 1, 0)
|
||||
p = p_orig.copy()
|
||||
p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items)
|
||||
tm.assert_panel_equal(p, expected)
|
||||
|
||||
# GH 8473
|
||||
dates = date_range('1/1/2000', periods=8)
|
||||
df_orig = DataFrame(np.random.randn(8, 4), index=dates,
|
||||
columns=['A', 'B', 'C', 'D'])
|
||||
|
||||
expected = pd.concat([df_orig,
|
||||
DataFrame({'A': 7}, index=[dates[-1] + 1])],
|
||||
sort=True)
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + 1, 'A'] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + 1, 'A'] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
exp_other = DataFrame({0: 7}, index=[dates[-1] + 1])
|
||||
expected = pd.concat([df_orig, exp_other], axis=1)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + 1, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + 1, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting_mixed_dtype(self):
|
||||
|
||||
# in a mixed dtype environment, try to preserve dtypes
|
||||
# by appending
|
||||
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
|
||||
|
||||
s = df.loc[1].copy()
|
||||
s.name = 2
|
||||
expected = df.append(s)
|
||||
|
||||
df.loc[2] = df.loc[1]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[0] = Series(1, index=range(4))
|
||||
tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0]))
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[0] = Series(1, index=['B'])
|
||||
|
||||
exp = DataFrame([[np.nan, 1]], columns=['A', 'B'],
|
||||
index=[0], dtype='float64')
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# list-like must conform
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
|
||||
def f():
|
||||
df.loc[0] = [1, 2, 3]
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# TODO: #15657, these are left as object and not coerced
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[3] = [6, 7]
|
||||
|
||||
exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'],
|
||||
dtype='object')
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_series_partial_set(self):
|
||||
# partial set with new index
|
||||
# Regression from GH4825
|
||||
ser = Series([0.1, 0.2], index=[1, 2])
|
||||
|
||||
# loc equiv to .reindex
|
||||
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([3, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x'])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3, 'x']]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([3, 2, 3, 'x'])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 'x', 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([2, 2, 'x', 1])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing in in the index
|
||||
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = ser.reindex([2, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
|
||||
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[3, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([3, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[5, 3, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([5, 3, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[5, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([5, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[4, 5, 6, 7])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[7, 2, 2]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([7, 2, 2])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4],
|
||||
index=[1, 2, 3, 4])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.loc[[4, 5, 5]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
result = s.reindex([4, 5, 5])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_with_name(self):
|
||||
# GH 11497
|
||||
|
||||
idx = Index([1, 2], dtype='int64', name='idx')
|
||||
ser = Series([0.1, 0.2], index=idx, name='s')
|
||||
|
||||
# loc
|
||||
exp_idx = Index([3, 2, 3], dtype='int64', name='idx')
|
||||
expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx')
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx,
|
||||
name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[3, 2, 3, 'x']]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([2, 2, 1], dtype='int64', name='idx')
|
||||
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s')
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx')
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 'x', 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing in in the index
|
||||
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
|
||||
|
||||
exp_idx = Index([2, 2, 3], dtype='int64', name='idx')
|
||||
expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = ser.loc[[2, 2, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([3, 4, 4], dtype='int64', name='idx')
|
||||
expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3],
|
||||
index=idx,
|
||||
name='s').loc[[3, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([5, 3, 3], dtype='int64', name='idx')
|
||||
expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[5, 3, 3]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([5, 4, 4], dtype='int64', name='idx')
|
||||
expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[5, 4, 4]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([7, 2, 2], dtype='int64', name='idx')
|
||||
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
|
||||
idx = Index([4, 5, 6, 7], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[7, 2, 2]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
exp_idx = Index([4, 5, 5], dtype='int64', name='idx')
|
||||
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
|
||||
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
|
||||
name='s').loc[[4, 5, 5]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx')
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s')
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_partial_set_invalid(self):
|
||||
|
||||
# GH 4940
|
||||
# allow only setting of 'valid' values
|
||||
|
||||
orig = tm.makeTimeDataFrame()
|
||||
df = orig.copy()
|
||||
|
||||
# don't allow not string inserts
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
df.loc[100.0, :] = df.ix[0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
df.loc[100, :] = df.ix[0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
df.ix[100.0, :] = df.ix[0]
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
def f():
|
||||
with catch_warnings(record=True):
|
||||
df.ix[100, :] = df.ix[0]
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# allow object conversion here
|
||||
df = orig.copy()
|
||||
with catch_warnings(record=True):
|
||||
df.loc['a', :] = df.ix[0]
|
||||
exp = orig.append(Series(df.ix[0], name='a'))
|
||||
tm.assert_frame_equal(df, exp)
|
||||
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ['a']))
|
||||
assert df.index.dtype == 'object'
|
||||
|
||||
def test_partial_set_empty_series(self):
|
||||
|
||||
# GH5226
|
||||
|
||||
# partially set with an empty object series
|
||||
s = Series()
|
||||
s.loc[1] = 1
|
||||
tm.assert_series_equal(s, Series([1], index=[1]))
|
||||
s.loc[3] = 3
|
||||
tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))
|
||||
|
||||
s = Series()
|
||||
s.loc[1] = 1.
|
||||
tm.assert_series_equal(s, Series([1.], index=[1]))
|
||||
s.loc[3] = 3.
|
||||
tm.assert_series_equal(s, Series([1., 3.], index=[1, 3]))
|
||||
|
||||
s = Series()
|
||||
s.loc['foo'] = 1
|
||||
tm.assert_series_equal(s, Series([1], index=['foo']))
|
||||
s.loc['bar'] = 3
|
||||
tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar']))
|
||||
s.loc[3] = 4
|
||||
tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3]))
|
||||
|
||||
def test_partial_set_empty_frame(self):
|
||||
|
||||
# partially set with an empty object
|
||||
# frame
|
||||
df = DataFrame()
|
||||
|
||||
def f():
|
||||
df.loc[1] = 1
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def f():
|
||||
df.loc[1] = Series([1], index=['foo'])
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def f():
|
||||
df.loc[:, 1] = 1
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# these work as they don't really change
|
||||
# anything but the index
|
||||
# GH5632
|
||||
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = Series([], dtype='object')
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = Series(df.index)
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = df.index
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
|
||||
expected['foo'] = expected['foo'].astype('float64')
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = []
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
df['foo'] = Series(np.arange(len(df)), dtype='float64')
|
||||
return df
|
||||
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
def f():
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.index, Index([], dtype='object'))
|
||||
df['foo'] = range(len(df))
|
||||
return df
|
||||
|
||||
expected = DataFrame(columns=['foo'], index=Index([], dtype='int64'))
|
||||
expected['foo'] = expected['foo'].astype('float64')
|
||||
tm.assert_frame_equal(f(), expected)
|
||||
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.columns, Index([], dtype=object))
|
||||
df2 = DataFrame()
|
||||
df2[1] = Series([1], index=['foo'])
|
||||
df.loc[:, 1] = Series([1], index=['foo'])
|
||||
tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1]))
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
# no index to start
|
||||
expected = DataFrame({0: Series(1, index=range(4))},
|
||||
columns=['A', 'B', 0])
|
||||
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df[0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=['A', 'B'])
|
||||
df.loc[:, 0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_row(self):
|
||||
# GH5720, GH5744
|
||||
# don't create rows when empty
|
||||
expected = DataFrame(columns=['A', 'B', 'New'],
|
||||
index=Index([], dtype='int64'))
|
||||
expected['A'] = expected['A'].astype('int64')
|
||||
expected['B'] = expected['B'].astype('float64')
|
||||
expected['New'] = expected['New'].astype('float64')
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
y['New'] = np.nan
|
||||
tm.assert_frame_equal(y, expected)
|
||||
# tm.assert_frame_equal(y,expected)
|
||||
|
||||
expected = DataFrame(columns=['a', 'b', 'c c', 'd'])
|
||||
expected['d'] = expected['d'].astype('int64')
|
||||
df = DataFrame(columns=['a', 'b', 'c c'])
|
||||
df['d'] = 3
|
||||
tm.assert_frame_equal(df, expected)
|
||||
tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object))
|
||||
|
||||
# reindex columns is ok
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
result = y.reindex(columns=['A', 'B', 'C'])
|
||||
expected = DataFrame(columns=['A', 'B', 'C'],
|
||||
index=Index([], dtype='int64'))
|
||||
expected['A'] = expected['A'].astype('int64')
|
||||
expected['B'] = expected['B'].astype('float64')
|
||||
expected['C'] = expected['C'].astype('float64')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_set_empty_frame_set_series(self):
|
||||
# GH 5756
|
||||
# setting with empty Series
|
||||
df = DataFrame(Series())
|
||||
tm.assert_frame_equal(df, DataFrame({0: Series()}))
|
||||
|
||||
df = DataFrame(Series(name='foo'))
|
||||
tm.assert_frame_equal(df, DataFrame({'foo': Series()}))
|
||||
|
||||
def test_partial_set_empty_frame_empty_copy_assignment(self):
|
||||
# GH 5932
|
||||
# copy on empty with assignment fails
|
||||
df = DataFrame(index=[0])
|
||||
df = df.copy()
|
||||
df['a'] = 0
|
||||
expected = DataFrame(0, index=[0], columns=['a'])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_consistencies(self):
|
||||
# GH 6171
|
||||
# consistency on empty frames
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df['x'] = [1, 2]
|
||||
expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan]))
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df['x'] = ['1', '2']
|
||||
expected = DataFrame(
|
||||
dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=['x', 'y'])
|
||||
df.loc[0, 'x'] = 1
|
||||
expected = DataFrame(dict(x=[1], y=[np.nan]))
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
@@ -1,172 +0,0 @@
|
||||
""" test scalar indexing, including at and iat """
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (Series, DataFrame, Timestamp,
|
||||
Timedelta, date_range)
|
||||
from pandas.util import testing as tm
|
||||
from pandas.tests.indexing.common import Base
|
||||
|
||||
|
||||
class TestScalar(Base):
|
||||
|
||||
def test_at_and_iat_get(self):
|
||||
def _check(f, func, values=False):
|
||||
|
||||
if f is not None:
|
||||
indicies = self.generate_indices(f, values)
|
||||
for i in indicies:
|
||||
result = getattr(f, func)[i]
|
||||
expected = self.get_value(f, i, values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
for o in self._objs:
|
||||
|
||||
d = getattr(self, o)
|
||||
|
||||
# iat
|
||||
for f in [d['ints'], d['uints']]:
|
||||
_check(f, 'iat', values=True)
|
||||
|
||||
for f in [d['labels'], d['ts'], d['floats']]:
|
||||
if f is not None:
|
||||
pytest.raises(ValueError, self.check_values, f, 'iat')
|
||||
|
||||
# at
|
||||
for f in [d['ints'], d['uints'], d['labels'],
|
||||
d['ts'], d['floats']]:
|
||||
_check(f, 'at')
|
||||
|
||||
def test_at_and_iat_set(self):
|
||||
def _check(f, func, values=False):
|
||||
|
||||
if f is not None:
|
||||
indicies = self.generate_indices(f, values)
|
||||
for i in indicies:
|
||||
getattr(f, func)[i] = 1
|
||||
expected = self.get_value(f, i, values)
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
for t in self._objs:
|
||||
|
||||
d = getattr(self, t)
|
||||
|
||||
# iat
|
||||
for f in [d['ints'], d['uints']]:
|
||||
_check(f, 'iat', values=True)
|
||||
|
||||
for f in [d['labels'], d['ts'], d['floats']]:
|
||||
if f is not None:
|
||||
pytest.raises(ValueError, _check, f, 'iat')
|
||||
|
||||
# at
|
||||
for f in [d['ints'], d['uints'], d['labels'],
|
||||
d['ts'], d['floats']]:
|
||||
_check(f, 'at')
|
||||
|
||||
def test_at_iat_coercion(self):
|
||||
|
||||
# as timestamp is not a tuple!
|
||||
dates = date_range('1/1/2000', periods=8)
|
||||
df = DataFrame(np.random.randn(8, 4),
|
||||
index=dates,
|
||||
columns=['A', 'B', 'C', 'D'])
|
||||
s = df['A']
|
||||
|
||||
result = s.at[dates[5]]
|
||||
xp = s.values[5]
|
||||
assert result == xp
|
||||
|
||||
# GH 7729
|
||||
# make sure we are boxing the returns
|
||||
s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]')
|
||||
expected = Timestamp('2014-02-02')
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
s = Series(['1 days', '2 days'], dtype='timedelta64[ns]')
|
||||
expected = Timedelta('2 days')
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
def test_iat_invalid_args(self):
|
||||
pass
|
||||
|
||||
def test_imethods_with_dups(self):
|
||||
|
||||
# GH6493
|
||||
# iat/iloc with dups
|
||||
|
||||
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64')
|
||||
result = s.iloc[2]
|
||||
assert result == 2
|
||||
result = s.iat[2]
|
||||
assert result == 2
|
||||
|
||||
pytest.raises(IndexError, lambda: s.iat[10])
|
||||
pytest.raises(IndexError, lambda: s.iat[-10])
|
||||
|
||||
result = s.iloc[[2, 3]]
|
||||
expected = Series([2, 3], [2, 2], dtype='int64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = s.to_frame()
|
||||
result = df.iloc[2]
|
||||
expected = Series(2, index=[0], name=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iat[2, 0]
|
||||
assert result == 2
|
||||
|
||||
def test_at_to_fail(self):
|
||||
# at should not fallback
|
||||
# GH 7814
|
||||
s = Series([1, 2, 3], index=list('abc'))
|
||||
result = s.at['a']
|
||||
assert result == 1
|
||||
pytest.raises(ValueError, lambda: s.at[0])
|
||||
|
||||
df = DataFrame({'A': [1, 2, 3]}, index=list('abc'))
|
||||
result = df.at['a', 'A']
|
||||
assert result == 1
|
||||
pytest.raises(ValueError, lambda: df.at['a', 0])
|
||||
|
||||
s = Series([1, 2, 3], index=[3, 2, 1])
|
||||
result = s.at[1]
|
||||
assert result == 3
|
||||
pytest.raises(ValueError, lambda: s.at['a'])
|
||||
|
||||
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
|
||||
result = df.at[1, 0]
|
||||
assert result == 3
|
||||
pytest.raises(ValueError, lambda: df.at['a', 0])
|
||||
|
||||
# GH 13822, incorrect error string with non-unique columns when missing
|
||||
# column is accessed
|
||||
df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]})
|
||||
df.columns = ['x', 'x', 'z']
|
||||
|
||||
# Check that we get the correct value in the KeyError
|
||||
tm.assert_raises_regex(KeyError, r"\['y'\] not in index",
|
||||
lambda: df[['x', 'y', 'z']])
|
||||
|
||||
def test_at_with_tz(self):
|
||||
# gh-15822
|
||||
df = DataFrame({'name': ['John', 'Anderson'],
|
||||
'date': [Timestamp(2017, 3, 13, 13, 32, 56),
|
||||
Timestamp(2017, 2, 16, 12, 10, 3)]})
|
||||
df['date'] = df['date'].dt.tz_localize('Asia/Shanghai')
|
||||
|
||||
expected = Timestamp('2017-03-13 13:32:56+0800', tz='Asia/Shanghai')
|
||||
|
||||
result = df.loc[0, 'date']
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 'date']
|
||||
assert result == expected
|
||||
@@ -1,82 +0,0 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.util import testing as tm
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestTimedeltaIndexing(object):
|
||||
def test_boolean_indexing(self):
|
||||
# GH 14946
|
||||
df = pd.DataFrame({'x': range(10)})
|
||||
df.index = pd.to_timedelta(range(10), unit='s')
|
||||
conditions = [df['x'] > 3, df['x'] == 3, df['x'] < 3]
|
||||
expected_data = [[0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
|
||||
[0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
|
||||
[10, 10, 10, 3, 4, 5, 6, 7, 8, 9]]
|
||||
for cond, data in zip(conditions, expected_data):
|
||||
result = df.assign(x=df.mask(cond, 10).astype('int64'))
|
||||
expected = pd.DataFrame(data,
|
||||
index=pd.to_timedelta(range(10), unit='s'),
|
||||
columns=['x'],
|
||||
dtype='int64')
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
(slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
|
||||
([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9])])
|
||||
def test_list_like_indexing(self, indexer, expected):
|
||||
# GH 16637
|
||||
df = pd.DataFrame({'x': range(10)}, dtype="int64")
|
||||
df.index = pd.to_timedelta(range(10), unit='s')
|
||||
|
||||
df.loc[df.index[indexer], 'x'] = 20
|
||||
|
||||
expected = pd.DataFrame(expected,
|
||||
index=pd.to_timedelta(range(10), unit='s'),
|
||||
columns=['x'],
|
||||
dtype="int64")
|
||||
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
def test_string_indexing(self):
|
||||
# GH 16896
|
||||
df = pd.DataFrame({'x': range(3)},
|
||||
index=pd.to_timedelta(range(3), unit='days'))
|
||||
expected = df.iloc[0]
|
||||
sliced = df.loc['0 days']
|
||||
tm.assert_series_equal(sliced, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value",
|
||||
[None, pd.NaT, np.nan])
|
||||
def test_masked_setitem(self, value):
|
||||
# issue (#18586)
|
||||
series = pd.Series([0, 1, 2], dtype='timedelta64[ns]')
|
||||
series[series == series[0]] = value
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]')
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value",
|
||||
[None, pd.NaT, np.nan])
|
||||
def test_listlike_setitem(self, value):
|
||||
# issue (#18586)
|
||||
series = pd.Series([0, 1, 2], dtype='timedelta64[ns]')
|
||||
series.iloc[0] = value
|
||||
expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]')
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
@pytest.mark.parametrize('start,stop, expected_slice', [
|
||||
[np.timedelta64(0, 'ns'), None, slice(0, 11)],
|
||||
[np.timedelta64(1, 'D'), np.timedelta64(6, 'D'), slice(1, 7)],
|
||||
[None, np.timedelta64(4, 'D'), slice(0, 5)]])
|
||||
def test_numpy_timedelta_scalar_indexing(self, start, stop,
|
||||
expected_slice):
|
||||
# GH 20393
|
||||
s = pd.Series(range(11), pd.timedelta_range('0 days', '10 days'))
|
||||
result = s.loc[slice(start, stop)]
|
||||
expected = s.iloc[expected_slice]
|
||||
tm.assert_series_equal(result, expected)
|
||||
Reference in New Issue
Block a user