pruned venvs
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,8 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from pandas.tests.series.common import TestData
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def test_data():
|
||||
return TestData()
|
||||
-564
@@ -1,564 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, Series, date_range, isna
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'first_slice,second_slice', [
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]]
|
||||
])
|
||||
@pytest.mark.parametrize('fill', [None, -1])
|
||||
def test_align(test_data, first_slice, second_slice, join_type, fill):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
if fill is not None:
|
||||
diff_a = aa.index.difference(join_index)
|
||||
diff_b = ab.index.difference(join_index)
|
||||
if len(diff_a) > 0:
|
||||
assert (aa.reindex(diff_a) == fill).all()
|
||||
if len(diff_b) > 0:
|
||||
assert (ab.reindex(diff_b) == fill).all()
|
||||
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
if fill is not None:
|
||||
ea = ea.fillna(fill)
|
||||
eb = eb.fillna(fill)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
assert aa.name == 'ts'
|
||||
assert ea.name == 'ts'
|
||||
assert ab.name == 'ts'
|
||||
assert eb.name == 'ts'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'first_slice,second_slice', [
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]]
|
||||
])
|
||||
@pytest.mark.parametrize('method', ['pad', 'bfill'])
|
||||
@pytest.mark.parametrize('limit', [None, 1])
|
||||
def test_align_fill_method(test_data,
|
||||
first_slice, second_slice,
|
||||
join_type, method, limit):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
ea = ea.fillna(method=method, limit=limit)
|
||||
eb = eb.fillna(method=method, limit=limit)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
|
||||
|
||||
def test_align_nocopy(test_data):
|
||||
b = test_data.ts[:5].copy()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join='left')
|
||||
ra[:5] = 5
|
||||
assert not (a[:5] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join='left', copy=False)
|
||||
ra[:5] = 5
|
||||
assert (a[:5] == 5).all()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join='right')
|
||||
rb[:3] = 5
|
||||
assert not (b[:3] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join='right', copy=False)
|
||||
rb[:2] = 5
|
||||
assert (b[:2] == 5).all()
|
||||
|
||||
|
||||
def test_align_same_index(test_data):
|
||||
a, b = test_data.ts.align(test_data.ts, copy=False)
|
||||
assert a.index is test_data.ts.index
|
||||
assert b.index is test_data.ts.index
|
||||
|
||||
a, b = test_data.ts.align(test_data.ts, copy=True)
|
||||
assert a.index is not test_data.ts.index
|
||||
assert b.index is not test_data.ts.index
|
||||
|
||||
|
||||
def test_align_multiindex():
|
||||
# GH 10665
|
||||
|
||||
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
|
||||
names=('a', 'b', 'c'))
|
||||
idx = pd.Index(range(2), name='b')
|
||||
s1 = pd.Series(np.arange(12, dtype='int64'), index=midx)
|
||||
s2 = pd.Series(np.arange(2, dtype='int64'), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = s1.align(s2, join='left')
|
||||
res2l, res2r = s2.align(s1, join='right')
|
||||
|
||||
expl = s1
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = s1.align(s2, join='right')
|
||||
res2l, res2r = s2.align(s1, join='left')
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
|
||||
names=('a', 'b', 'c'))
|
||||
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
|
||||
def test_reindex(test_data):
|
||||
identity = test_data.series.reindex(test_data.series.index)
|
||||
|
||||
# __array_interface__ is not defined for older numpies
|
||||
# and on some pythons
|
||||
try:
|
||||
assert np.may_share_memory(test_data.series.index, identity.index)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
assert identity.index.is_(test_data.series.index)
|
||||
assert identity.index.identical(test_data.series.index)
|
||||
|
||||
subIndex = test_data.series.index[10:20]
|
||||
subSeries = test_data.series.reindex(subIndex)
|
||||
|
||||
for idx, val in compat.iteritems(subSeries):
|
||||
assert val == test_data.series[idx]
|
||||
|
||||
subIndex2 = test_data.ts.index[10:20]
|
||||
subTS = test_data.ts.reindex(subIndex2)
|
||||
|
||||
for idx, val in compat.iteritems(subTS):
|
||||
assert val == test_data.ts[idx]
|
||||
stuffSeries = test_data.ts.reindex(subIndex)
|
||||
|
||||
assert np.isnan(stuffSeries).all()
|
||||
|
||||
# This is extremely important for the Cython code to not screw up
|
||||
nonContigIndex = test_data.ts.index[::2]
|
||||
subNonContig = test_data.ts.reindex(nonContigIndex)
|
||||
for idx, val in compat.iteritems(subNonContig):
|
||||
assert val == test_data.ts[idx]
|
||||
|
||||
# return a copy the same index here
|
||||
result = test_data.ts.reindex()
|
||||
assert not (result is test_data.ts)
|
||||
|
||||
|
||||
def test_reindex_nan():
|
||||
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
|
||||
|
||||
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j])
|
||||
|
||||
ts.index = ts.index.astype('object')
|
||||
|
||||
# reindex coerces index.dtype to float, loc/iloc doesn't
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
|
||||
|
||||
|
||||
def test_reindex_series_add_nat():
|
||||
rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
|
||||
series = Series(rng)
|
||||
|
||||
result = series.reindex(lrange(15))
|
||||
assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))
|
||||
|
||||
mask = result.isna()
|
||||
assert mask[-5:].all()
|
||||
assert not mask[:-5].any()
|
||||
|
||||
|
||||
def test_reindex_with_datetimes():
|
||||
rng = date_range('1/1/2000', periods=20)
|
||||
ts = Series(np.random.randn(20), index=rng)
|
||||
|
||||
result = ts.reindex(list(ts.index[5:10]))
|
||||
expected = ts[5:10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[list(ts.index[5:10])]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_corner(test_data):
|
||||
# (don't forget to fix this) I think it's fixed
|
||||
test_data.empty.reindex(test_data.ts.index, method='pad') # it works
|
||||
|
||||
# corner case: pad empty series
|
||||
reindexed = test_data.empty.reindex(test_data.ts.index, method='pad')
|
||||
|
||||
# pass non-Index
|
||||
reindexed = test_data.ts.reindex(list(test_data.ts.index))
|
||||
assert_series_equal(test_data.ts, reindexed)
|
||||
|
||||
# bad fill method
|
||||
ts = test_data.ts[::2]
|
||||
msg = (r"Invalid fill method\. Expecting pad \(ffill\), backfill"
|
||||
r" \(bfill\) or nearest\. Got foo")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.reindex(test_data.ts.index, method='foo')
|
||||
|
||||
|
||||
def test_reindex_pad():
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
s2 = s[::2]
|
||||
|
||||
reindexed = s2.reindex(s.index, method='pad')
|
||||
reindexed2 = s2.reindex(s.index, method='ffill')
|
||||
assert_series_equal(reindexed, reindexed2)
|
||||
|
||||
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
|
||||
assert_series_equal(reindexed, expected)
|
||||
|
||||
# GH4604
|
||||
s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
|
||||
new_index = ['a', 'g', 'c', 'f']
|
||||
expected = Series([1, 1, 3, 3], index=new_index)
|
||||
|
||||
# this changes dtype because the ffill happens after
|
||||
result = s.reindex(new_index).ffill()
|
||||
assert_series_equal(result, expected.astype('float64'))
|
||||
|
||||
result = s.reindex(new_index).ffill(downcast='infer')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1, 5, 3, 5], index=new_index)
|
||||
result = s.reindex(new_index, method='ffill')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# inference of new dtype
|
||||
s = Series([True, False, False, True], index=list('abcd'))
|
||||
new_index = 'agc'
|
||||
result = s.reindex(list(new_index)).ffill()
|
||||
expected = Series([True, True, False], index=list(new_index))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH4618 shifted series downcasting
|
||||
s = Series(False, index=lrange(0, 5))
|
||||
result = s.shift(1).fillna(method='bfill')
|
||||
expected = Series(False, index=lrange(0, 5))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_nearest():
|
||||
s = Series(np.arange(10, dtype='int64'))
|
||||
target = [0.1, 0.9, 1.5, 2.0]
|
||||
actual = s.reindex(target, method='nearest')
|
||||
expected = Series(np.around(target).astype('int64'), target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method='nearest')
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method='nearest', tolerance=1)
|
||||
assert_series_equal(expected, actual)
|
||||
actual = s.reindex_like(actual, method='nearest',
|
||||
tolerance=[1, 2, 3, 4])
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method='nearest', tolerance=0.2)
|
||||
expected = Series([0, 1, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method='nearest',
|
||||
tolerance=[0.3, 0.01, 0.4, 3])
|
||||
expected = Series([0, np.nan, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
|
||||
def test_reindex_backfill():
|
||||
pass
|
||||
|
||||
|
||||
def test_reindex_int(test_data):
|
||||
ts = test_data.ts[::2]
|
||||
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_int = int_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_int.dtype == np.float_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_int = int_ts.reindex(int_ts.index[::2])
|
||||
assert reindexed_int.dtype == np.int_
|
||||
|
||||
|
||||
def test_reindex_bool(test_data):
|
||||
# A series other than float, int, string, or object
|
||||
ts = test_data.ts[::2]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_bool = bool_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_bool.dtype == np.object_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
|
||||
assert reindexed_bool.dtype == np.bool_
|
||||
|
||||
|
||||
def test_reindex_bool_pad(test_data):
|
||||
# fail
|
||||
ts = test_data.ts[5:]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
filled_bool = bool_ts.reindex(test_data.ts.index, method='pad')
|
||||
assert isna(filled_bool[:5]).all()
|
||||
|
||||
|
||||
def test_reindex_categorical():
|
||||
index = date_range('20000101', periods=3)
|
||||
|
||||
# reindexing to an invalid Categorical
|
||||
s = Series(['a', 'b', 'c'], dtype='category')
|
||||
result = s.reindex(index)
|
||||
expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
|
||||
categories=['a', 'b', 'c']))
|
||||
expected.index = index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# partial reindexing
|
||||
expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
|
||||
'c']))
|
||||
expected.index = [1, 2]
|
||||
result = s.reindex([1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(Categorical(
|
||||
values=['c', np.nan], categories=['a', 'b', 'c']))
|
||||
expected.index = [2, 3]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_like(test_data):
|
||||
other = test_data.ts[::2]
|
||||
assert_series_equal(test_data.ts.reindex(other.index),
|
||||
test_data.ts.reindex_like(other))
|
||||
|
||||
# GH 7179
|
||||
day1 = datetime(2013, 3, 5)
|
||||
day2 = datetime(2013, 5, 5)
|
||||
day3 = datetime(2014, 3, 5)
|
||||
|
||||
series1 = Series([5, None, None], [day1, day2, day3])
|
||||
series2 = Series([None, None], [day1, day3])
|
||||
|
||||
result = series1.reindex_like(series2, method='pad')
|
||||
expected = Series([5, np.nan], index=[day1, day3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_fill_value():
|
||||
# -----------------------------------------------------------
|
||||
# floats
|
||||
floats = Series([1., 2., 3.])
|
||||
result = floats.reindex([1, 2, 3])
|
||||
expected = Series([2., 3., np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = floats.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2., 3., 0], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# ints
|
||||
ints = Series([1, 2, 3])
|
||||
|
||||
result = ints.reindex([1, 2, 3])
|
||||
expected = Series([2., 3., np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# don't upcast
|
||||
result = ints.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2, 3, 0], index=[1, 2, 3])
|
||||
assert issubclass(result.dtype.type, np.integer)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# objects
|
||||
objects = Series([1, 2, 3], dtype=object)
|
||||
|
||||
result = objects.reindex([1, 2, 3])
|
||||
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = objects.reindex([1, 2, 3], fill_value='foo')
|
||||
expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# bools
|
||||
bools = Series([True, False, True])
|
||||
|
||||
result = bools.reindex([1, 2, 3])
|
||||
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = bools.reindex([1, 2, 3], fill_value=False)
|
||||
expected = Series([False, True, False], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_datetimeindexes_tz_naive_and_aware():
|
||||
# GH 8306
|
||||
idx = date_range('20131101', tz='America/Chicago', periods=7)
|
||||
newidx = date_range('20131103', periods=10, freq='H')
|
||||
s = Series(range(7), index=idx)
|
||||
with pytest.raises(TypeError):
|
||||
s.reindex(newidx, method='ffill')
|
||||
|
||||
|
||||
def test_reindex_empty_series_tz_dtype():
|
||||
# GH 20869
|
||||
result = Series(dtype='datetime64[ns, UTC]').reindex([0, 1])
|
||||
expected = Series([pd.NaT] * 2, dtype='datetime64[ns, UTC]')
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_rename():
|
||||
# GH 17407
|
||||
s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
|
||||
result = s.rename(str)
|
||||
expected = s.rename(lambda i: str(i))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
assert result.name == expected.name
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, index, drop_labels,'
|
||||
' axis, expected_data, expected_index',
|
||||
[
|
||||
# Unique Index
|
||||
([1, 2], ['one', 'two'], ['two'],
|
||||
0, [1], ['one']),
|
||||
([1, 2], ['one', 'two'], ['two'],
|
||||
'rows', [1], ['one']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], ['two'],
|
||||
0, [1, 2], ['one', 'one']),
|
||||
|
||||
# GH 5248 Non-Unique Index
|
||||
([1, 1, 2], ['one', 'two', 'one'], 'two',
|
||||
0, [1, 2], ['one', 'one']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], ['one'],
|
||||
0, [1], ['two']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], 'one',
|
||||
0, [1], ['two'])])
|
||||
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
|
||||
expected_data, expected_index):
|
||||
|
||||
s = Series(data=data, index=index)
|
||||
result = s.drop(drop_labels, axis=axis)
|
||||
expected = Series(data=expected_data, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, index, drop_labels,'
|
||||
' axis, error_type, error_desc',
|
||||
[
|
||||
# single string/tuple-like
|
||||
(range(3), list('abc'), 'bc',
|
||||
0, KeyError, 'not found in axis'),
|
||||
|
||||
# bad axis
|
||||
(range(3), list('abc'), ('a',),
|
||||
0, KeyError, 'not found in axis'),
|
||||
(range(3), list('abc'), 'one',
|
||||
'columns', ValueError, 'No axis named columns')])
|
||||
def test_drop_exception_raised(data, index, drop_labels,
|
||||
axis, error_type, error_desc):
|
||||
|
||||
with pytest.raises(error_type, match=error_desc):
|
||||
Series(data, index=index).drop(drop_labels, axis=axis)
|
||||
|
||||
|
||||
def test_drop_with_ignore_errors():
|
||||
# errors='ignore'
|
||||
s = Series(range(3), index=list('abc'))
|
||||
result = s.drop('bc', errors='ignore')
|
||||
tm.assert_series_equal(result, s)
|
||||
result = s.drop(['a', 'd'], errors='ignore')
|
||||
expected = s.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 8522
|
||||
s = Series([2, 3], index=[True, False])
|
||||
assert s.index.is_object()
|
||||
result = s.drop(True)
|
||||
expected = Series([3], index=[False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
|
||||
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
|
||||
def test_drop_empty_list(index, drop_labels):
|
||||
# GH 21494
|
||||
expected_index = [i for i in index if i not in drop_labels]
|
||||
series = pd.Series(index=index).drop(drop_labels)
|
||||
tm.assert_series_equal(series, pd.Series(index=expected_index))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data, index, drop_labels', [
|
||||
(None, [1, 2, 3], [1, 4]),
|
||||
(None, [1, 2, 2], [1, 4]),
|
||||
([2, 3], [0, 1], [False, True])
|
||||
])
|
||||
def test_drop_non_empty_list(data, index, drop_labels):
|
||||
# GH 21494 and GH 16877
|
||||
with pytest.raises(KeyError, match='not found in axis'):
|
||||
pd.Series(data=data, index=index).drop(drop_labels)
|
||||
@@ -1,634 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Series, Timestamp, date_range, isna
|
||||
from pandas.core.indexing import IndexingError
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_getitem_boolean(test_data):
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
|
||||
# passing list is OK
|
||||
result = s[list(mask)]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, s.index[mask])
|
||||
|
||||
|
||||
def test_getitem_boolean_empty():
|
||||
s = Series([], dtype=np.int64)
|
||||
s.index.name = 'index_name'
|
||||
s = s[s.isna()]
|
||||
assert s.index.name == 'index_name'
|
||||
assert s.dtype == np.int64
|
||||
|
||||
# GH5877
|
||||
# indexing with empty series
|
||||
s = Series(['A', 'B'])
|
||||
expected = Series(np.nan, index=['C'], dtype=object)
|
||||
result = s[Series(['C'], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(['A', 'B'])
|
||||
expected = Series(dtype=object, index=Index([], dtype='int64'))
|
||||
result = s[Series([], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match")
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([True], dtype=bool)]
|
||||
|
||||
|
||||
def test_getitem_boolean_object(test_data):
|
||||
# using column from DataFrame
|
||||
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = s[omask]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
cop = s.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "cannot index with vector containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask] = 5
|
||||
|
||||
|
||||
def test_getitem_setitem_boolean_corner(test_data):
|
||||
ts = test_data.ts
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
# these used to raise...??
|
||||
|
||||
msg = (r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match")
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted] = 1
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted] = 1
|
||||
|
||||
|
||||
def test_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
# similar indexed series
|
||||
result = test_data.series.copy()
|
||||
result[mask] = test_data.series * 2
|
||||
expected = test_data.series * 2
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
# needs alignment
|
||||
result = test_data.series.copy()
|
||||
result[mask] = (test_data.series * 2)[0:5]
|
||||
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
|
||||
expected[-mask] = test_data.series[mask]
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
|
||||
def test_get_set_boolean_different_order(test_data):
|
||||
ordered = test_data.series.sort_values()
|
||||
|
||||
# setting
|
||||
copy = test_data.series.copy()
|
||||
copy[ordered > 0] = 0
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[expected > 0] = 0
|
||||
|
||||
assert_series_equal(copy, expected)
|
||||
|
||||
# getting
|
||||
sel = test_data.series[ordered > 0]
|
||||
exp = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(sel, exp)
|
||||
|
||||
|
||||
def test_where_unsafe_int(sint_dtype):
|
||||
s = Series(np.arange(10), dtype=sint_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=sint_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_dtype):
|
||||
s = Series(np.arange(10), dtype=float_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=float_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype,expected_dtype", [
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64)
|
||||
])
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + lrange(5, 10), dtype=expected_dtype)
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(lrange(6) + values, dtype="float64")
|
||||
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
mask = s < 5
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64')
|
||||
assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64')
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_raise_on_error_deprecation():
|
||||
# gh-14968
|
||||
# deprecation of raise_on_error
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.where(cond, raise_on_error=True)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.mask(cond, raise_on_error=True)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert (s.shape == rs.shape)
|
||||
assert (rs is not s)
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
msg = ("NumPy boolean array indexing assignment cannot assign 0 input"
|
||||
" values to the 1 output values where the mask is true")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cond', [
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")]
|
||||
])
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = ("cannot set using a {} indexer with a different length than"
|
||||
" the value")
|
||||
|
||||
# slice
|
||||
s = Series(list('abc'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('slice')):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
assert_series_equal(s.astype(np.int64), expected, )
|
||||
|
||||
# slice with step
|
||||
s = Series(list('abcdef'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('slice')):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list('abcdef'))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, 'b', 1, 'd', 'e', 'f'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list('abcdef'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('slice')):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(['a', 'b', 'c', 0, 1, 'f'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list('abc'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('list-like')):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list('abc'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('list-like')):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list('abc'))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), 'b', 'c'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('size', range(2, 6))
|
||||
@pytest.mark.parametrize('mask', [
|
||||
[True, False, False, False, False],
|
||||
[True, False],
|
||||
[False]
|
||||
])
|
||||
@pytest.mark.parametrize('item', [
|
||||
2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min
|
||||
])
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize('box', [
|
||||
lambda x: np.array([x]),
|
||||
lambda x: [x],
|
||||
lambda x: (x,)
|
||||
])
|
||||
def test_broadcast(size, mask, item, box):
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series([item if use_item else data[
|
||||
i] for i, use_item in enumerate(selection)])
|
||||
|
||||
s = Series(data)
|
||||
s[selection] = box(item)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[cond])
|
||||
assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan],
|
||||
index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = pd.Series([1, 2, 3])
|
||||
w = s.where(s > 1, 'X')
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
w = s.where(s > 1, ['X', 'Y', 'Z'])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
w = s.where(s > 1, np.array(['X', 'Y', 'Z']))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
|
||||
def test_where_timedelta_coerce():
|
||||
s = Series([1, 2], dtype='timedelta64[ns]')
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype='object')
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetime_conversion():
|
||||
s = Series(date_range('20130102', periods=2))
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype='object')
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
# GH 15701
|
||||
timestamps = ['2016-12-31 12:00:04+00:00',
|
||||
'2016-12-31 12:00:04.010000+00:00']
|
||||
s = Series([pd.Timestamp(t) for t in timestamps])
|
||||
rs = s.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, s[1]])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_dt_tz_values(tz_naive_fixture):
|
||||
ser1 = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20150103'],
|
||||
tz=tz_naive_fixture))
|
||||
ser2 = pd.Series(pd.DatetimeIndex(['20160514', '20160515', '20160516'],
|
||||
tz=tz_naive_fixture))
|
||||
mask = pd.Series([True, True, False])
|
||||
result = ser1.where(mask, ser2)
|
||||
exp = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20160516'],
|
||||
tz=tz_naive_fixture))
|
||||
assert_series_equal(exp, result)
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.mask(s > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# see gh-21891
|
||||
s = Series([1, 2])
|
||||
res = s.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[~cond])
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.mask(cond, -s))
|
||||
@@ -1,33 +0,0 @@
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_getitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series(4, index=list('ABCD'))
|
||||
result = s[lambda x: 'A']
|
||||
assert result == s.loc['A']
|
||||
|
||||
result = s[lambda x: ['A', 'B']]
|
||||
tm.assert_series_equal(result, s.loc[['A', 'B']])
|
||||
|
||||
result = s[lambda x: [True, False, True, True]]
|
||||
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
|
||||
|
||||
|
||||
def test_setitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series([1, 2, 3, 4], index=list('ABCD'))
|
||||
s[lambda x: 'A'] = -1
|
||||
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD')))
|
||||
|
||||
|
||||
def test_setitem_other_callable():
|
||||
# GH 13299
|
||||
inc = lambda x: x + 1
|
||||
|
||||
s = pd.Series([1, 2, -1, 4])
|
||||
s[s < 0] = inc
|
||||
|
||||
expected = pd.Series([1, 2, inc, 4])
|
||||
tm.assert_series_equal(s, expected)
|
||||
@@ -1,714 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import iNaT
|
||||
import pandas._libs.index as _index
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal, assert_frame_equal, assert_series_equal)
|
||||
|
||||
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
|
||||
end=datetime(2010, 1, 1))
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
assert s[48] == 48
|
||||
assert s['1/2/2009'] == 48
|
||||
assert s['2009-1-2'] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s['2009-1-3']
|
||||
assert_series_equal(s['3/6/2009':'2009-06-05'],
|
||||
s[datetime(2009, 3, 6):datetime(2009, 6, 5)])
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
|
||||
end=datetime(2010, 1, 1))
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
s[48] = -1
|
||||
assert s[48] == -1
|
||||
s['1/2/2009'] = -2
|
||||
assert s[48] == -2
|
||||
s['1/2/2009':'2009-06-05'] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
def test_dti_snap():
|
||||
dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002',
|
||||
'1/5/2002', '1/6/2002', '1/7/2002'], freq='D')
|
||||
|
||||
res = dti.snap(freq='W-MON')
|
||||
exp = date_range('12/31/2001', '1/7/2002', freq='w-mon')
|
||||
exp = exp.repeat([3, 4])
|
||||
assert (res == exp).all()
|
||||
|
||||
res = dti.snap(freq='B')
|
||||
|
||||
exp = date_range('1/1/2002', '1/7/2002', freq='b')
|
||||
exp = exp.repeat([1, 1, 1, 2, 2])
|
||||
assert (res == exp).all()
|
||||
|
||||
|
||||
def test_dti_reset_index_round_trip():
|
||||
dti = date_range(start='1/1/2001', end='6/1/2001', freq='D')
|
||||
d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti)
|
||||
d2 = d1.reset_index()
|
||||
assert d2.dtypes[0] == np.dtype('M8[ns]')
|
||||
d3 = d2.set_index('index')
|
||||
assert_frame_equal(d1, d3, check_names=False)
|
||||
|
||||
# #2329
|
||||
stamp = datetime(2012, 11, 22)
|
||||
df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value'])
|
||||
df = df.set_index('Date')
|
||||
|
||||
assert df.index[0] == stamp
|
||||
assert df.reset_index()['Date'][0] == stamp
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# #1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s = Series().set_value(dates[0], 1.)
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s2 = s.set_value(dates[1], np.nan)
|
||||
|
||||
exp = Series([1., np.nan], index=index)
|
||||
|
||||
assert_series_equal(s2, exp)
|
||||
|
||||
# s = Series(index[:1], index[:1])
|
||||
# s2 = s.set_value(dates[1], index[1])
|
||||
# assert s2.values.dtype == 'M8[ns]'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_slice_locs_indexerror():
|
||||
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10)
|
||||
for i in range(100000)]
|
||||
s = Series(lrange(100000), times)
|
||||
s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)]
|
||||
|
||||
|
||||
def test_slicing_datetimes():
|
||||
# GH 7523
|
||||
|
||||
# unique
|
||||
df = DataFrame(np.arange(4., dtype='float64'),
|
||||
index=[datetime(2001, 1, i, 10, 00)
|
||||
for i in [1, 2, 3, 4]])
|
||||
result = df.loc[datetime(2001, 1, 1, 10):]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[:datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11):]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc['20010101 11':]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# duplicates
|
||||
df = pd.DataFrame(np.arange(5., dtype='float64'),
|
||||
index=[datetime(2001, 1, i, 10, 00)
|
||||
for i in [1, 2, 2, 3, 4]])
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 10):]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[:datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11):]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc['20010101 11':]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_datetime64_duplicated():
|
||||
dates = date_range('2010-07-01', end='2010-08-05')
|
||||
|
||||
tst = DataFrame({'symbol': 'AAA', 'date': dates})
|
||||
result = tst.duplicated(['date', 'symbol'])
|
||||
assert (-result).all()
|
||||
|
||||
tst = DataFrame({'date': dates})
|
||||
result = tst.duplicated()
|
||||
assert (-result).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_pytz():
|
||||
from pytz import timezone as tz
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
|
||||
# comparison dates with datetime MUST be localized!
|
||||
date = tz('US/Central').localize(datetime(1990, 1, 1, 3))
|
||||
result[date] = 0
|
||||
result[date] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_dateutil():
|
||||
from dateutil.tz import tzutc
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
|
||||
|
||||
tz = lambda x: tzutc() if x == 'UTC' else gettz(
|
||||
x) # handle special case for utc in dateutil
|
||||
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H',
|
||||
tz='America/New_York')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# repeat all the above with naive datetimes
|
||||
result = ts[datetime(1990, 1, 1, 4)]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4)] = 0
|
||||
result[datetime(1990, 1, 1, 4)] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0
|
||||
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = datetime(1990, 1, 1, 4)
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
|
||||
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
from pandas import period_range
|
||||
|
||||
N = 50
|
||||
rng = period_range('1/1/1990', periods=N, freq='H')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
# FutureWarning from NumPy.
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_getitem_median_slice_bug():
|
||||
index = date_range('20090415', '20090519', freq='2B')
|
||||
s = Series(np.random.randn(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
result = s[indexer]
|
||||
expected = s[indexer[0]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
from pandas import date_range
|
||||
|
||||
index = date_range('1/1/2000', '1/7/2000')
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp('1/8/2000')
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000L?$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000L?$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
"""
|
||||
test duplicates in time series
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def dups():
|
||||
dates = [datetime(2000, 1, 2), datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 2), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 3), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 5)]
|
||||
|
||||
return Series(np.random.randn(len(dates)), index=dates)
|
||||
|
||||
|
||||
def test_constructor(dups):
|
||||
assert isinstance(dups, Series)
|
||||
assert isinstance(dups.index, DatetimeIndex)
|
||||
|
||||
|
||||
def test_is_unique_monotonic(dups):
|
||||
assert not dups.index.is_unique
|
||||
|
||||
|
||||
def test_index_unique(dups):
|
||||
uniques = dups.index.unique()
|
||||
expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 5)])
|
||||
assert uniques.dtype == 'M8[ns]' # sanity
|
||||
tm.assert_index_equal(uniques, expected)
|
||||
assert dups.index.nunique() == 4
|
||||
|
||||
# #2563
|
||||
assert isinstance(uniques, DatetimeIndex)
|
||||
|
||||
dups_local = dups.index.tz_localize('US/Eastern')
|
||||
dups_local.name = 'foo'
|
||||
result = dups_local.unique()
|
||||
expected = DatetimeIndex(expected, name='foo')
|
||||
expected = expected.tz_localize('US/Eastern')
|
||||
assert result.tz is not None
|
||||
assert result.name == 'foo'
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# NaT, note this is excluded
|
||||
arr = [1370745748 + t for t in range(20)] + [iNaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
|
||||
for t in range(20)] + [NaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
|
||||
def test_index_dupes_contains():
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
ix = DatetimeIndex([d, d])
|
||||
assert d in ix
|
||||
|
||||
|
||||
def test_duplicate_dates_indexing(dups):
|
||||
ts = dups
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
assert_series_equal(result, expected)
|
||||
else:
|
||||
assert_almost_equal(result, expected[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
assert_series_equal(cp, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947116800000000000L?$"):
|
||||
ts[datetime(2000, 1, 6)]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_range_slice():
|
||||
idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000',
|
||||
'1/4/2000'])
|
||||
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts['1/2/2000':]
|
||||
expected = ts[1:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts['1/2/2000':'1/3/2000']
|
||||
expected = ts[1:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_average_dup_values(dups):
|
||||
result = dups.groupby(level=0).mean()
|
||||
expected = dups.groupby(dups.index).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff():
|
||||
import datetime
|
||||
# #1821
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
try:
|
||||
_index._SIZE_CUTOFF = 1000
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = datetime.timedelta(seconds=1)
|
||||
half_sec = datetime.timedelta(microseconds=500000)
|
||||
d = datetime.datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(np.random.randn(len(dates), 4),
|
||||
index=dates,
|
||||
columns=list('ABCD'))
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
finally:
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start='2011-01-01', end='2011-01-15')
|
||||
ts = Series(np.random.rand(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
# TODO: unused?
|
||||
s = str(t) # noqa
|
||||
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
compare(slice('2011-01-01', '2011-01-15'))
|
||||
compare(slice('2010-12-30', '2011-01-15'))
|
||||
compare(slice('2011-01-01', '2011-01-16'))
|
||||
|
||||
# partial ranges
|
||||
compare(slice('2011-01-01', '2011-01-6'))
|
||||
compare(slice('2011-01-06', '2011-01-8'))
|
||||
compare(slice('2011-01-06', '2011-01-12'))
|
||||
|
||||
# single values
|
||||
result = ts2['2011'].sort_index()
|
||||
expected = ts['2011']
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq='M')
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.permutation(20))
|
||||
|
||||
result = ts['2005']
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq='M')
|
||||
ts = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
expected = ts['2001']
|
||||
expected.name = 'A'
|
||||
|
||||
df = DataFrame(dict(A=ts))
|
||||
result = df['2001']['A']
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# setting
|
||||
ts['2001'] = 1
|
||||
expected = ts['2001']
|
||||
expected.name = 'A'
|
||||
|
||||
df.loc['2001', 'A'] = 1
|
||||
|
||||
result = df['2001']['A']
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
|
||||
freq='H')
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013-05']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
|
||||
freq='S')
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013-05']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = [Timestamp('2013-05-31 00:00'),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))]
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(np.random.rand(5, 5),
|
||||
columns=['open', 'high', 'low', 'close', 'volume'],
|
||||
index=date_range('2012-01-02 18:01:00',
|
||||
periods=5, tz='US/Central', freq='s'))
|
||||
expected = df.loc[[df.index[2]]]
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df['2012-01-02 18:01:02']
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
|
||||
|
||||
"""
|
||||
test NaT support
|
||||
"""
|
||||
|
||||
|
||||
def test_set_none_nan():
|
||||
series = Series(date_range('1/1/2000', periods=10))
|
||||
series[3] = None
|
||||
assert series[3] is NaT
|
||||
|
||||
series[3:5] = None
|
||||
assert series[4] is NaT
|
||||
|
||||
series[5] = np.nan
|
||||
assert series[5] is NaT
|
||||
|
||||
series[5:7] = np.nan
|
||||
assert series[6] is NaT
|
||||
|
||||
|
||||
def test_nat_operations():
|
||||
# GH 8617
|
||||
s = Series([0, pd.NaT], dtype='m8[ns]')
|
||||
exp = s[0]
|
||||
assert s.median() == exp
|
||||
assert s.min() == exp
|
||||
assert s.max() == exp
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ["round", "floor", "ceil"])
|
||||
@pytest.mark.parametrize('freq', ["s", "5s", "min", "5min", "h", "5h"])
|
||||
def test_round_nat(method, freq):
|
||||
# GH14940
|
||||
s = Series([pd.NaT])
|
||||
expected = Series(pd.NaT)
|
||||
round_method = getattr(s.dt, method)
|
||||
assert_series_equal(round_method(freq), expected)
|
||||
@@ -1,37 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas import Series
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
|
||||
def test_iloc():
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
for i in range(len(s)):
|
||||
result = s.iloc[i]
|
||||
exp = s[s.index[i]]
|
||||
assert_almost_equal(result, exp)
|
||||
|
||||
# pass a slice
|
||||
result = s.iloc[slice(1, 3)]
|
||||
expected = s.loc[2:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# test slice is a view
|
||||
result[:] = 0
|
||||
assert (s[1:3] == 0).all()
|
||||
|
||||
# list of integers
|
||||
result = s.iloc[[0, 2, 3, 4, 5]]
|
||||
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_nonunique():
|
||||
s = Series([0, 1, 2], index=[0, 1, 0])
|
||||
assert s.iloc[2] == 2
|
||||
@@ -1,840 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
""" test get/set & misc """
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp)
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b'])
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s['c']
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
msg = r"index out of bounds|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
result = test_data.ts[indices]
|
||||
expected = test_data.ts.reindex(indices)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts[indices[0]:indices[2]]
|
||||
expected = test_data.ts.loc[indices[0]:indices[2]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[inds]
|
||||
expected = s.reindex(inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[arr_inds]
|
||||
expected = s.reindex(arr_inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
|
||||
index=['a', 'b', 'c'])
|
||||
expected = Timestamp('2011-01-01', tz='US/Eastern')
|
||||
result = s.loc['a']
|
||||
assert result == expected
|
||||
result = s.iloc[0]
|
||||
assert result == expected
|
||||
result = s['a']
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis():
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
np.fix(s)
|
||||
|
||||
result = s[...]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
s[...] = 5
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
def test_getitem_get(test_data):
|
||||
test_series = test_data.series
|
||||
test_obj_series = test_data.objSeries
|
||||
|
||||
idx1 = test_series.index[5]
|
||||
idx2 = test_obj_series.index[5]
|
||||
|
||||
assert test_series[idx1] == test_series.get(idx1)
|
||||
assert test_obj_series[idx2] == test_obj_series.get(idx2)
|
||||
|
||||
assert test_series[idx1] == test_series[5]
|
||||
assert test_obj_series[idx2] == test_obj_series[5]
|
||||
|
||||
assert test_series.get(-1) == test_series.get(test_series.index[-1])
|
||||
assert test_series[5] == test_series.get(test_series.index[5])
|
||||
|
||||
# missing
|
||||
d = test_data.ts.index[0] - BDay()
|
||||
with pytest.raises(KeyError, match=r"Timestamp\('1999-12-31 00:00:00'\)"):
|
||||
test_data.ts[d]
|
||||
|
||||
# None
|
||||
# GH 5652
|
||||
for s in [Series(), Series(index=list('abc'))]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_getitem_fancy(test_data):
|
||||
slice1 = test_data.series[[1, 2, 3]]
|
||||
slice2 = test_data.objSeries[[1, 2, 3]]
|
||||
assert test_data.series.index[2] == slice1.index[1]
|
||||
assert test_data.objSeries.index[2] == slice2.index[1]
|
||||
assert test_data.series[2] == slice1[1]
|
||||
assert test_data.objSeries[2] == slice2[1]
|
||||
|
||||
|
||||
def test_getitem_generator(test_data):
|
||||
gen = (x > 0 for x in test_data.series)
|
||||
result = test_data.series[gen]
|
||||
result2 = test_data.series[iter(test_data.series > 0)]
|
||||
expected = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
def test_type_promotion():
|
||||
# GH12599
|
||||
s = pd.Series()
|
||||
s["a"] = pd.Timestamp("2016-01-01")
|
||||
s["b"] = 3.0
|
||||
s["c"] = "foo"
|
||||
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"],
|
||||
index=["a", "b", "c"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'result_1, duplicate_item, expected_1',
|
||||
[
|
||||
[
|
||||
pd.Series({1: 12, 2: [1, 2, 2, 3]}), pd.Series({1: 313}),
|
||||
pd.Series({1: 12, }, dtype=object),
|
||||
],
|
||||
[
|
||||
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
pd.Series({1: [1, 2, 3]}), pd.Series({1: [1, 2, 3], }),
|
||||
],
|
||||
])
|
||||
def test_getitem_with_duplicates_indices(
|
||||
result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1.append(duplicate_item)
|
||||
expected = expected_1.append(duplicate_item)
|
||||
assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_out_of_bounds(test_data):
|
||||
# don't segfault, GH #495
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
test_data.ts[len(test_data.ts)]
|
||||
|
||||
# GH #917
|
||||
s = Series([])
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-1]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ['a', 'b', 'c'])
|
||||
|
||||
assert s.iloc[0] == s['a']
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s['a'], 5)
|
||||
|
||||
|
||||
def test_getitem_box_float64(test_data):
|
||||
value = test_data.ts[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'arr',
|
||||
[
|
||||
np.random.randn(10),
|
||||
tm.makeDateIndex(10, name='a').tz_localize(
|
||||
tz='US/Eastern'),
|
||||
])
|
||||
def test_get(arr):
|
||||
# GH 21260
|
||||
s = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = Series(arr[:6], index=list('abcdef'))
|
||||
assert s.get('c') == s.iloc[2]
|
||||
|
||||
result = s.get(slice('b', 'd'))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get('Z')
|
||||
assert result is None
|
||||
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(arr)
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = pd.date_range('20090415', '20090519', freq='B')
|
||||
ser = Series(rng)
|
||||
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
rng = pd.date_range('20090415', '20090519', freq='B')
|
||||
ser = Series(rng, index=rng)
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
assert isinstance(ser.iat[5], pd.Timestamp)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror():
|
||||
s = Series(lrange(10), index=lrange(0, 20, 2))
|
||||
with pytest.raises(KeyError, match=r"^1L?$"):
|
||||
s[1]
|
||||
with pytest.raises(KeyError, match=r"^1L?$"):
|
||||
s.loc[1]
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b'])
|
||||
assert is_scalar(obj['c'])
|
||||
assert obj['c'] == 0
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing():
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah'])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
expected = s.loc[['foo', 'bar', 'bah', 'bam']]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[['foo', 'bar', 'bah', 'bam']]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64)
|
||||
expected = Series([3, 4], index=['C', 'C'], dtype=np.int64)
|
||||
result = s['C']
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror():
|
||||
s = Series(lrange(10), index=lrange(0, 20, 2))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
s2[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_getitem_dataframe():
|
||||
rng = list(range(10))
|
||||
s = pd.Series(10, index=rng)
|
||||
df = pd.DataFrame(rng, index=rng)
|
||||
msg = ("Indexing a Series with DataFrame is not supported,"
|
||||
" use the appropriate DataFrame column")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[df > 5]
|
||||
|
||||
|
||||
def test_setitem(test_data):
|
||||
test_data.ts[test_data.ts.index[5]] = np.NaN
|
||||
test_data.ts[[1, 2, 17]] = np.NaN
|
||||
test_data.ts[6] = np.NaN
|
||||
assert np.isnan(test_data.ts[6])
|
||||
assert np.isnan(test_data.ts[2])
|
||||
test_data.ts[np.isnan(test_data.ts)] = 5
|
||||
assert not np.isnan(test_data.ts[2])
|
||||
|
||||
# caught this bug when writing tests
|
||||
series = Series(tm.makeIntIndex(20).astype(float),
|
||||
index=tm.makeIntIndex(20))
|
||||
|
||||
series[::2] = 0
|
||||
assert (series[::2] == 0).all()
|
||||
|
||||
# set item that's not contained
|
||||
s = test_data.series.copy()
|
||||
s['foobar'] = 1
|
||||
|
||||
app = Series([1], index=['foobar'], name='series')
|
||||
expected = test_data.series.append(app)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# Test for issue #10193
|
||||
key = pd.Timestamp('2012-01-01')
|
||||
series = pd.Series()
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, [key])
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
series = pd.Series([], pd.DatetimeIndex([], freq='D'))
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, pd.DatetimeIndex([key], freq='D'))
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_setitem_dtypes():
|
||||
# change dtypes
|
||||
# GH 4463
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.iloc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([False])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan]))
|
||||
|
||||
s = Series([False, True])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan, 1.0]))
|
||||
|
||||
|
||||
def test_set_value(test_data):
|
||||
idx = test_data.ts.index[10]
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
res = test_data.ts.set_value(idx, 0)
|
||||
assert res is test_data.ts
|
||||
assert test_data.ts[idx] == 0
|
||||
|
||||
# equiv
|
||||
s = test_data.series.copy()
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
res = s.set_value('foobar', 0)
|
||||
assert res is s
|
||||
assert res.index[-1] == 'foobar'
|
||||
assert res['foobar'] == 0
|
||||
|
||||
s = test_data.series.copy()
|
||||
s.loc['foobar'] = 0
|
||||
assert s.index[-1] == 'foobar'
|
||||
assert s['foobar'] == 0
|
||||
|
||||
|
||||
def test_setslice(test_data):
|
||||
sl = test_data.ts[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
# FutureWarning from NumPy about [slice(None, 5).
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_basic_getitem_setitem_corner(test_data):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "Can only tuple-index with a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] will work but not two slices
|
||||
result = test_data.ts[[slice(None, 5)]]
|
||||
expected = test_data.ts[:5]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]] = 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize('tz', ['US/Eastern', 'UTC', 'Asia/Tokyo'])
|
||||
def test_setitem_with_tz(tz):
|
||||
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
|
||||
tz=tz))
|
||||
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2016-01-01 02:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
|
||||
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2012-01-01 00:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_setitem_with_tz_dst():
|
||||
# GH XXX
|
||||
tz = 'US/Eastern'
|
||||
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
|
||||
tz=tz))
|
||||
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00-05:00', tz=tz),
|
||||
pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
|
||||
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2012-01-01 00:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_categorial_assigning_ops():
|
||||
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
|
||||
s = orig.copy()
|
||||
s[:] = "a"
|
||||
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[1] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[s.index > 0] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[[False, True]] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.index = ["x", "y"]
|
||||
s["y"] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]),
|
||||
index=["x", "y"])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# ensure that one can set something to np.nan
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
|
||||
s[1] = np.nan
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_slice(test_data):
|
||||
numSlice = test_data.series[10:20]
|
||||
numSliceEnd = test_data.series[-10:]
|
||||
objSlice = test_data.objSeries[10:20]
|
||||
|
||||
assert test_data.series.index[9] not in numSlice.index
|
||||
assert test_data.objSeries.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == test_data.series.index[11]
|
||||
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = test_data.series[10:20]
|
||||
sl[:] = 0
|
||||
|
||||
assert (test_data.series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
s = Series(1, index=['a', 'a', 'b', 'b', 'c'])
|
||||
s[::-1] # it works!
|
||||
|
||||
|
||||
def test_ix_setitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[inds] = 5
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[[3, 4, 7]] = 5
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result.iloc[5:10] = 10
|
||||
expected[5:10] = 10
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set slice with indices
|
||||
d1, d2 = test_data.series.index[[5, 15]]
|
||||
result.loc[d1:d2] = 6
|
||||
expected[5:16] = 6 # because it's inclusive
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set index value
|
||||
test_data.series.loc[d1] = 4
|
||||
test_data.series.loc[d2] = 6
|
||||
assert test_data.series[d1] == 4
|
||||
assert test_data.series[d2] == 6
|
||||
|
||||
|
||||
def test_setitem_na():
|
||||
# these induce dtype changes
|
||||
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# gets coerced to float, right?
|
||||
expected = Series([np.nan, 1, np.nan, 0])
|
||||
s = Series([True, True, False, False])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8,
|
||||
9])
|
||||
s = Series(np.arange(10))
|
||||
s[:5] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([])
|
||||
s.loc['B'] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B']))
|
||||
|
||||
s = s.reindex(s.index.insert(0, 'A'))
|
||||
tm.assert_series_equal(s, Series(
|
||||
[np.nan, Timedelta('1 days')], index=['A', 'B']))
|
||||
|
||||
result = s.fillna(timedelta(1))
|
||||
expected = Series(Timedelta('1 days'), index=['A', 'B'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s.loc['A'] = timedelta(1)
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 14155
|
||||
s = Series(10 * [np.timedelta64(10, 'm')])
|
||||
s.loc[[1, 2, 3]] = np.timedelta64(20, 'm')
|
||||
expected = pd.Series(10 * [np.timedelta64(10, 'm')])
|
||||
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm'))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_underlying_data_conversion():
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ['a', 'b', 'c']})
|
||||
df.set_index(['a', 'b', 'c'], inplace=True)
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df['val'] = 0
|
||||
df
|
||||
df['val'].update(s)
|
||||
|
||||
expected = DataFrame(
|
||||
dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
|
||||
expected.set_index(['a', 'b', 'c'], inplace=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 3970
|
||||
# these are chained assignments as well
|
||||
pd.set_option('chained_assignment', None)
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
df["bb"].iloc[0] = .13
|
||||
|
||||
# TODO: unused
|
||||
df_tmp = df.iloc[ck] # noqa
|
||||
|
||||
df["bb"].iloc[0] = .15
|
||||
assert df['bb'].iloc[0] == 0.15
|
||||
pd.set_option('chained_assignment', 'raise')
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
|
||||
df['c'] = np.nan
|
||||
df['c'].update(pd.Series(['foo'], index=[0]))
|
||||
|
||||
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(test_data):
|
||||
seq = test_data.ts[[5, 10, 15]]
|
||||
seq[1] = np.NaN
|
||||
assert not np.isnan(test_data.ts[10])
|
||||
|
||||
|
||||
def test_cast_on_putmask():
|
||||
# GH 2746
|
||||
|
||||
# need to upcast
|
||||
s = Series([1, 2], index=[1, 2], dtype='int64')
|
||||
s[[True, False]] = Series([0], index=[1], dtype='int64')
|
||||
expected = Series([0, 2], index=[1, 2], dtype='int64')
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_type_promote_putmask():
|
||||
# GH8387: test that changing types does not break alignment
|
||||
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
|
||||
left, mask = ts.copy(), ts > 0
|
||||
right = ts[mask].copy().map(str)
|
||||
left[mask] = right
|
||||
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
|
||||
|
||||
s = Series([0, 1, 2, 0])
|
||||
mask = s > 0
|
||||
s2 = s[mask].map(str)
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, '1', '2', 0]))
|
||||
|
||||
s = Series([0, 'foo', 'bar', 0])
|
||||
mask = Series([False, True, True, False])
|
||||
s2 = s[mask]
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, 'foo', 'bar', 0]))
|
||||
|
||||
|
||||
def test_multilevel_preserve_name():
|
||||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
|
||||
'three']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
s = Series(np.random.randn(len(index)), index=index, name='sth')
|
||||
|
||||
result = s['foo']
|
||||
result2 = s.loc['foo']
|
||||
assert result.name == s.name
|
||||
assert result2.name == s.name
|
||||
|
||||
|
||||
def test_setitem_scalar_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
for n in range(len(series)):
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[n] = 1
|
||||
|
||||
assert array[n] == 0
|
||||
|
||||
|
||||
def test_setitem_slice_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[1:3] = 1
|
||||
|
||||
assert not array.any()
|
||||
|
||||
|
||||
"""
|
||||
miscellaneous methods
|
||||
"""
|
||||
|
||||
|
||||
def test_select(test_data):
|
||||
# deprecated: gh-12410
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
n = len(test_data.ts)
|
||||
result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
|
||||
expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts.select(lambda x: x.weekday() == 2)
|
||||
expected = test_data.ts[test_data.ts.index.weekday == 2]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pop():
|
||||
# GH 6600
|
||||
df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })
|
||||
k = df.iloc[4]
|
||||
|
||||
result = k.pop('B')
|
||||
assert result == 4
|
||||
|
||||
expected = Series([0, 0], index=['A', 'C'], name=4)
|
||||
assert_series_equal(k, expected)
|
||||
|
||||
|
||||
def test_take():
|
||||
s = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = s.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = s.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = "index {} is out of bounds for size 5"
|
||||
with pytest.raises(IndexError, match=msg.format(10)):
|
||||
s.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg.format(5)):
|
||||
s.take([2, 5])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.take([-1, 3, 4], convert=False)
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
s = Series(pd.Categorical(['a', 'b', 'c']))
|
||||
result = s.take([-2, -2, 0])
|
||||
expected = Series(pd.Categorical(['b', 'b', 'a'],
|
||||
categories=['a', 'b', 'c']),
|
||||
index=[1, 1, 0])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_head_tail(test_data):
|
||||
assert_series_equal(test_data.series.head(), test_data.series[:5])
|
||||
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
|
||||
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
|
||||
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])
|
||||
@@ -1,168 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, Timestamp
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val,expected", [
|
||||
(2**63 - 1, 3),
|
||||
(2**63, 4),
|
||||
])
|
||||
def test_loc_uint64(val, expected):
|
||||
# see gh-19399
|
||||
s = Series({2**63 - 1: 3, 2**63: 4})
|
||||
assert s.loc[val] == expected
|
||||
|
||||
|
||||
def test_loc_getitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
assert_series_equal(
|
||||
test_data.series.loc[inds],
|
||||
test_data.series.reindex(inds))
|
||||
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
|
||||
|
||||
# slice with indices
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
result = test_data.ts.loc[d1:d2]
|
||||
expected = test_data.ts.truncate(d1, d2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# boolean
|
||||
mask = test_data.series > test_data.series.median()
|
||||
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
|
||||
|
||||
# ask for index value
|
||||
assert test_data.ts.loc[d1] == test_data.ts[d1]
|
||||
assert test_data.ts.loc[d2] == test_data.ts[d2]
|
||||
|
||||
|
||||
def test_loc_getitem_not_monotonic(test_data):
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
|
||||
ts2 = test_data.ts[::2][[1, 2, 0]]
|
||||
|
||||
msg = r"Timestamp\('2000-01-10 00:00:00'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_setitem_integer_slice_keyerrors():
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
# this is OK
|
||||
cp = s.copy()
|
||||
cp.iloc[4:10] = 0
|
||||
assert (cp.iloc[4:10] == 0).all()
|
||||
|
||||
# so is this
|
||||
cp = s.copy()
|
||||
cp.iloc[3:11] = 0
|
||||
assert (cp.iloc[3:11] == 0).values.all()
|
||||
|
||||
result = s.iloc[2:6]
|
||||
result2 = s.loc[3:11]
|
||||
expected = s.reindex([4, 6, 8, 10])
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
# non-monotonic, raise KeyError
|
||||
s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]]
|
||||
with pytest.raises(KeyError, match=r"^3L?$"):
|
||||
s2.loc[3:11]
|
||||
with pytest.raises(KeyError, match=r"^3L?$"):
|
||||
s2.loc[3:11] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_iterator(test_data):
|
||||
idx = iter(test_data.series.index[:10])
|
||||
result = test_data.series.loc[idx]
|
||||
assert_series_equal(result, test_data.series[:10])
|
||||
|
||||
|
||||
def test_loc_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[mask] = 0
|
||||
expected = test_data.series
|
||||
expected[mask] = 0
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_setitem_corner(test_data):
|
||||
inds = list(test_data.series.index[[5, 8, 12]])
|
||||
test_data.series.loc[inds] = 5
|
||||
msg = r"\['foo'\] not in index"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
test_data.series.loc[inds + ['foo']] = 5
|
||||
|
||||
|
||||
def test_basic_setitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices] = 0
|
||||
exp.loc[indices] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices[0]:indices[2]] = 0
|
||||
exp.loc[indices[0]:indices[2]] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
inds = [0, 4, 6]
|
||||
arr_inds = np.array([0, 4, 6])
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[inds] = 0
|
||||
s.loc[inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[arr_inds] = 0
|
||||
s.loc[arr_inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
inds_notfound = [0, 4, 5, 6]
|
||||
arr_inds_notfound = np.array([0, 4, 5, 6])
|
||||
msg = r"\[5\] not contained in the index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[inds_notfound] = 0
|
||||
with pytest.raises(Exception, match=msg):
|
||||
s[arr_inds_notfound] = 0
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
|
||||
index=['a', 'b', 'c'])
|
||||
s2 = s.copy()
|
||||
expected = Timestamp('2011-01-03', tz='US/Eastern')
|
||||
s2.loc['a'] = expected
|
||||
result = s2.loc['a']
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2.iloc[0] = expected
|
||||
result = s2.iloc[0]
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2['a'] = expected
|
||||
result = s2['a']
|
||||
assert result == expected
|
||||
@@ -1,259 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45,
|
||||
51, 39, 55, 43, 54, 52, 51, 54]))
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56,
|
||||
45, 51, 39, 55, 43, 54, 52, 51, 54]),
|
||||
index=pd.Float64Index(
|
||||
[25.0, 36.0, 49.0, 64.0, 81.0, 100.0,
|
||||
121.0, 144.0, 169.0, 196.0, 1225.0,
|
||||
1296.0, 1369.0, 1444.0, 1521.0, 1600.0,
|
||||
1681.0, 1764.0, 1849.0, 1936.0],
|
||||
dtype='object'))
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default='Missing')
|
||||
assert result == 'Missing'
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default='Missing')
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default='Missing')
|
||||
assert result == 'Missing'
|
||||
|
||||
|
||||
def test_get_nan():
|
||||
# GH 8569
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default='Missing') == 'Missing'
|
||||
|
||||
|
||||
def test_get_nan_multiple():
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx),
|
||||
Series([2, np.nan], index=idx))
|
||||
|
||||
idx = [2, np.nan]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx),
|
||||
Series([2, np.nan], index=idx))
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert(s.get(idx) is None)
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert(s.get(idx) is None)
|
||||
|
||||
|
||||
def test_delitem():
|
||||
# GH 5542
|
||||
# should delete the item inplace
|
||||
s = Series(lrange(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(lrange(1, 5), index=lrange(1, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(lrange(2, 5), index=lrange(2, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# empty
|
||||
s = Series()
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='int64')))
|
||||
s[0] = 1
|
||||
assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='int64')))
|
||||
|
||||
# Index(dtype=object)
|
||||
s = Series(1, index=['a'])
|
||||
del s['a']
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='object')))
|
||||
s['a'] = 1
|
||||
assert_series_equal(s, Series(1, index=['a']))
|
||||
del s['a']
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='object')))
|
||||
|
||||
|
||||
def test_slice_float64():
|
||||
values = np.arange(10., 50., 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
s = Series(np.random.randn(20), index=index)
|
||||
|
||||
result = s[start:end]
|
||||
expected = s.iloc[5:16]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[start:end]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(np.random.randn(20, 3), index=index)
|
||||
|
||||
result = df[start:end]
|
||||
expected = df.iloc[5:16]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[start:end]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_negative_out_of_bounds():
|
||||
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11]
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11] = 'foo'
|
||||
|
||||
|
||||
def test_getitem_regression():
|
||||
s = Series(lrange(5), index=lrange(5))
|
||||
result = s[lrange(5)]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_bug():
|
||||
s = Series(lrange(10), lrange(10))
|
||||
result = s[-12:]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
result = s[-7:]
|
||||
assert_series_equal(result, s[3:])
|
||||
|
||||
result = s[:-12]
|
||||
assert_series_equal(result, s[:0])
|
||||
|
||||
s = Series(lrange(10), lrange(10))
|
||||
s[-12:] = 0
|
||||
assert (s == 0).all()
|
||||
|
||||
s[:-12] = 5
|
||||
assert (s == 0).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_integers():
|
||||
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
|
||||
|
||||
result = s[:4]
|
||||
expected = s.reindex([2, 4, 6, 8])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s[:4] = 0
|
||||
assert (s[:4] == 0).all()
|
||||
assert not (s[4:] == 0).any()
|
||||
|
||||
|
||||
def test_setitem_float_labels():
|
||||
# note labels are floats
|
||||
s = Series(['a', 'b', 'c'], index=[0, 0.5, 1])
|
||||
tmp = s.copy()
|
||||
|
||||
s.loc[1] = 'zoo'
|
||||
tmp.iloc[2] = 'zoo'
|
||||
|
||||
assert_series_equal(s, tmp)
|
||||
|
||||
|
||||
def test_slice_float_get_set(test_data):
|
||||
msg = (r"cannot do slice indexing on <class 'pandas\.core\.indexes"
|
||||
r"\.datetimes\.DatetimeIndex'> with these indexers \[{key}\]"
|
||||
r" of <(class|type) 'float'>")
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0] = 0
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0]
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0] = 0
|
||||
|
||||
|
||||
def test_slice_floats2():
|
||||
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
|
||||
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
i = np.arange(10, 20, dtype=float)
|
||||
i[2] = 12.2
|
||||
s.index = i
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
|
||||
def test_int_indexing():
|
||||
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s['c']
|
||||
|
||||
# not monotonic
|
||||
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s['c']
|
||||
|
||||
|
||||
def test_getitem_int64(test_data):
|
||||
idx = np.int64(5)
|
||||
assert test_data.ts[idx] == test_data.ts[5]
|
||||
Reference in New Issue
Block a user