pruned venvs

This commit is contained in:
d3m1g0d
2019-03-12 21:57:16 +01:00
parent 33f0511081
commit e441f4f7f7
5988 changed files with 0 additions and 1353666 deletions
@@ -1,8 +0,0 @@
import pytest
from pandas.tests.series.common import TestData
@pytest.fixture(scope='module')
def test_data():
return TestData()
@@ -1,564 +0,0 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
from datetime import datetime
import numpy as np
from numpy import nan
import pytest
import pandas.compat as compat
from pandas.compat import lrange, range
import pandas as pd
from pandas import Categorical, Series, date_range, isna
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
@pytest.mark.parametrize(
'first_slice,second_slice', [
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]]
])
@pytest.mark.parametrize('fill', [None, -1])
def test_align(test_data, first_slice, second_slice, join_type, fill):
a = test_data.ts[slice(*first_slice)]
b = test_data.ts[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, fill_value=fill)
join_index = a.index.join(b.index, how=join_type)
if fill is not None:
diff_a = aa.index.difference(join_index)
diff_b = ab.index.difference(join_index)
if len(diff_a) > 0:
assert (aa.reindex(diff_a) == fill).all()
if len(diff_b) > 0:
assert (ab.reindex(diff_b) == fill).all()
ea = a.reindex(join_index)
eb = b.reindex(join_index)
if fill is not None:
ea = ea.fillna(fill)
eb = eb.fillna(fill)
assert_series_equal(aa, ea)
assert_series_equal(ab, eb)
assert aa.name == 'ts'
assert ea.name == 'ts'
assert ab.name == 'ts'
assert eb.name == 'ts'
@pytest.mark.parametrize(
'first_slice,second_slice', [
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]]
])
@pytest.mark.parametrize('method', ['pad', 'bfill'])
@pytest.mark.parametrize('limit', [None, 1])
def test_align_fill_method(test_data,
first_slice, second_slice,
join_type, method, limit):
a = test_data.ts[slice(*first_slice)]
b = test_data.ts[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
join_index = a.index.join(b.index, how=join_type)
ea = a.reindex(join_index)
eb = b.reindex(join_index)
ea = ea.fillna(method=method, limit=limit)
eb = eb.fillna(method=method, limit=limit)
assert_series_equal(aa, ea)
assert_series_equal(ab, eb)
def test_align_nocopy(test_data):
b = test_data.ts[:5].copy()
# do copy
a = test_data.ts.copy()
ra, _ = a.align(b, join='left')
ra[:5] = 5
assert not (a[:5] == 5).any()
# do not copy
a = test_data.ts.copy()
ra, _ = a.align(b, join='left', copy=False)
ra[:5] = 5
assert (a[:5] == 5).all()
# do copy
a = test_data.ts.copy()
b = test_data.ts[:5].copy()
_, rb = a.align(b, join='right')
rb[:3] = 5
assert not (b[:3] == 5).any()
# do not copy
a = test_data.ts.copy()
b = test_data.ts[:5].copy()
_, rb = a.align(b, join='right', copy=False)
rb[:2] = 5
assert (b[:2] == 5).all()
def test_align_same_index(test_data):
a, b = test_data.ts.align(test_data.ts, copy=False)
assert a.index is test_data.ts.index
assert b.index is test_data.ts.index
a, b = test_data.ts.align(test_data.ts, copy=True)
assert a.index is not test_data.ts.index
assert b.index is not test_data.ts.index
def test_align_multiindex():
# GH 10665
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
names=('a', 'b', 'c'))
idx = pd.Index(range(2), name='b')
s1 = pd.Series(np.arange(12, dtype='int64'), index=midx)
s2 = pd.Series(np.arange(2, dtype='int64'), index=idx)
# these must be the same results (but flipped)
res1l, res1r = s1.align(s2, join='left')
res2l, res2r = s2.align(s1, join='right')
expl = s1
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
res1l, res1r = s1.align(s2, join='right')
res2l, res2r = s2.align(s1, join='left')
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
names=('a', 'b', 'c'))
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
def test_reindex(test_data):
identity = test_data.series.reindex(test_data.series.index)
# __array_interface__ is not defined for older numpies
# and on some pythons
try:
assert np.may_share_memory(test_data.series.index, identity.index)
except AttributeError:
pass
assert identity.index.is_(test_data.series.index)
assert identity.index.identical(test_data.series.index)
subIndex = test_data.series.index[10:20]
subSeries = test_data.series.reindex(subIndex)
for idx, val in compat.iteritems(subSeries):
assert val == test_data.series[idx]
subIndex2 = test_data.ts.index[10:20]
subTS = test_data.ts.reindex(subIndex2)
for idx, val in compat.iteritems(subTS):
assert val == test_data.ts[idx]
stuffSeries = test_data.ts.reindex(subIndex)
assert np.isnan(stuffSeries).all()
# This is extremely important for the Cython code to not screw up
nonContigIndex = test_data.ts.index[::2]
subNonContig = test_data.ts.reindex(nonContigIndex)
for idx, val in compat.iteritems(subNonContig):
assert val == test_data.ts[idx]
# return a copy the same index here
result = test_data.ts.reindex()
assert not (result is test_data.ts)
def test_reindex_nan():
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
assert_series_equal(ts.reindex(i), ts.iloc[j])
ts.index = ts.index.astype('object')
# reindex coerces index.dtype to float, loc/iloc doesn't
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
def test_reindex_series_add_nat():
rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
series = Series(rng)
result = series.reindex(lrange(15))
assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))
mask = result.isna()
assert mask[-5:].all()
assert not mask[:-5].any()
def test_reindex_with_datetimes():
rng = date_range('1/1/2000', periods=20)
ts = Series(np.random.randn(20), index=rng)
result = ts.reindex(list(ts.index[5:10]))
expected = ts[5:10]
tm.assert_series_equal(result, expected)
result = ts[list(ts.index[5:10])]
tm.assert_series_equal(result, expected)
def test_reindex_corner(test_data):
# (don't forget to fix this) I think it's fixed
test_data.empty.reindex(test_data.ts.index, method='pad') # it works
# corner case: pad empty series
reindexed = test_data.empty.reindex(test_data.ts.index, method='pad')
# pass non-Index
reindexed = test_data.ts.reindex(list(test_data.ts.index))
assert_series_equal(test_data.ts, reindexed)
# bad fill method
ts = test_data.ts[::2]
msg = (r"Invalid fill method\. Expecting pad \(ffill\), backfill"
r" \(bfill\) or nearest\. Got foo")
with pytest.raises(ValueError, match=msg):
ts.reindex(test_data.ts.index, method='foo')
def test_reindex_pad():
s = Series(np.arange(10), dtype='int64')
s2 = s[::2]
reindexed = s2.reindex(s.index, method='pad')
reindexed2 = s2.reindex(s.index, method='ffill')
assert_series_equal(reindexed, reindexed2)
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
assert_series_equal(reindexed, expected)
# GH4604
s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
new_index = ['a', 'g', 'c', 'f']
expected = Series([1, 1, 3, 3], index=new_index)
# this changes dtype because the ffill happens after
result = s.reindex(new_index).ffill()
assert_series_equal(result, expected.astype('float64'))
result = s.reindex(new_index).ffill(downcast='infer')
assert_series_equal(result, expected)
expected = Series([1, 5, 3, 5], index=new_index)
result = s.reindex(new_index, method='ffill')
assert_series_equal(result, expected)
# inference of new dtype
s = Series([True, False, False, True], index=list('abcd'))
new_index = 'agc'
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
assert_series_equal(result, expected)
# GH4618 shifted series downcasting
s = Series(False, index=lrange(0, 5))
result = s.shift(1).fillna(method='bfill')
expected = Series(False, index=lrange(0, 5))
assert_series_equal(result, expected)
def test_reindex_nearest():
s = Series(np.arange(10, dtype='int64'))
target = [0.1, 0.9, 1.5, 2.0]
actual = s.reindex(target, method='nearest')
expected = Series(np.around(target).astype('int64'), target)
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method='nearest')
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method='nearest', tolerance=1)
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method='nearest',
tolerance=[1, 2, 3, 4])
assert_series_equal(expected, actual)
actual = s.reindex(target, method='nearest', tolerance=0.2)
expected = Series([0, 1, np.nan, 2], target)
assert_series_equal(expected, actual)
actual = s.reindex(target, method='nearest',
tolerance=[0.3, 0.01, 0.4, 3])
expected = Series([0, np.nan, np.nan, 2], target)
assert_series_equal(expected, actual)
def test_reindex_backfill():
pass
def test_reindex_int(test_data):
ts = test_data.ts[::2]
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
# this should work fine
reindexed_int = int_ts.reindex(test_data.ts.index)
# if NaNs introduced
assert reindexed_int.dtype == np.float_
# NO NaNs introduced
reindexed_int = int_ts.reindex(int_ts.index[::2])
assert reindexed_int.dtype == np.int_
def test_reindex_bool(test_data):
# A series other than float, int, string, or object
ts = test_data.ts[::2]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
# this should work fine
reindexed_bool = bool_ts.reindex(test_data.ts.index)
# if NaNs introduced
assert reindexed_bool.dtype == np.object_
# NO NaNs introduced
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
assert reindexed_bool.dtype == np.bool_
def test_reindex_bool_pad(test_data):
# fail
ts = test_data.ts[5:]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
filled_bool = bool_ts.reindex(test_data.ts.index, method='pad')
assert isna(filled_bool[:5]).all()
def test_reindex_categorical():
index = date_range('20000101', periods=3)
# reindexing to an invalid Categorical
s = Series(['a', 'b', 'c'], dtype='category')
result = s.reindex(index)
expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
categories=['a', 'b', 'c']))
expected.index = index
tm.assert_series_equal(result, expected)
# partial reindexing
expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
'c']))
expected.index = [1, 2]
result = s.reindex([1, 2])
tm.assert_series_equal(result, expected)
expected = Series(Categorical(
values=['c', np.nan], categories=['a', 'b', 'c']))
expected.index = [2, 3]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
def test_reindex_like(test_data):
other = test_data.ts[::2]
assert_series_equal(test_data.ts.reindex(other.index),
test_data.ts.reindex_like(other))
# GH 7179
day1 = datetime(2013, 3, 5)
day2 = datetime(2013, 5, 5)
day3 = datetime(2014, 3, 5)
series1 = Series([5, None, None], [day1, day2, day3])
series2 = Series([None, None], [day1, day3])
result = series1.reindex_like(series2, method='pad')
expected = Series([5, np.nan], index=[day1, day3])
assert_series_equal(result, expected)
def test_reindex_fill_value():
# -----------------------------------------------------------
# floats
floats = Series([1., 2., 3.])
result = floats.reindex([1, 2, 3])
expected = Series([2., 3., np.nan], index=[1, 2, 3])
assert_series_equal(result, expected)
result = floats.reindex([1, 2, 3], fill_value=0)
expected = Series([2., 3., 0], index=[1, 2, 3])
assert_series_equal(result, expected)
# -----------------------------------------------------------
# ints
ints = Series([1, 2, 3])
result = ints.reindex([1, 2, 3])
expected = Series([2., 3., np.nan], index=[1, 2, 3])
assert_series_equal(result, expected)
# don't upcast
result = ints.reindex([1, 2, 3], fill_value=0)
expected = Series([2, 3, 0], index=[1, 2, 3])
assert issubclass(result.dtype.type, np.integer)
assert_series_equal(result, expected)
# -----------------------------------------------------------
# objects
objects = Series([1, 2, 3], dtype=object)
result = objects.reindex([1, 2, 3])
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
result = objects.reindex([1, 2, 3], fill_value='foo')
expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
# ------------------------------------------------------------
# bools
bools = Series([True, False, True])
result = bools.reindex([1, 2, 3])
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
result = bools.reindex([1, 2, 3], fill_value=False)
expected = Series([False, True, False], index=[1, 2, 3])
assert_series_equal(result, expected)
def test_reindex_datetimeindexes_tz_naive_and_aware():
# GH 8306
idx = date_range('20131101', tz='America/Chicago', periods=7)
newidx = date_range('20131103', periods=10, freq='H')
s = Series(range(7), index=idx)
with pytest.raises(TypeError):
s.reindex(newidx, method='ffill')
def test_reindex_empty_series_tz_dtype():
# GH 20869
result = Series(dtype='datetime64[ns, UTC]').reindex([0, 1])
expected = Series([pd.NaT] * 2, dtype='datetime64[ns, UTC]')
tm.assert_equal(result, expected)
def test_rename():
# GH 17407
s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
result = s.rename(str)
expected = s.rename(lambda i: str(i))
assert_series_equal(result, expected)
assert result.name == expected.name
@pytest.mark.parametrize(
'data, index, drop_labels,'
' axis, expected_data, expected_index',
[
# Unique Index
([1, 2], ['one', 'two'], ['two'],
0, [1], ['one']),
([1, 2], ['one', 'two'], ['two'],
'rows', [1], ['one']),
([1, 1, 2], ['one', 'two', 'one'], ['two'],
0, [1, 2], ['one', 'one']),
# GH 5248 Non-Unique Index
([1, 1, 2], ['one', 'two', 'one'], 'two',
0, [1, 2], ['one', 'one']),
([1, 1, 2], ['one', 'two', 'one'], ['one'],
0, [1], ['two']),
([1, 1, 2], ['one', 'two', 'one'], 'one',
0, [1], ['two'])])
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
expected_data, expected_index):
s = Series(data=data, index=index)
result = s.drop(drop_labels, axis=axis)
expected = Series(data=expected_data, index=expected_index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
'data, index, drop_labels,'
' axis, error_type, error_desc',
[
# single string/tuple-like
(range(3), list('abc'), 'bc',
0, KeyError, 'not found in axis'),
# bad axis
(range(3), list('abc'), ('a',),
0, KeyError, 'not found in axis'),
(range(3), list('abc'), 'one',
'columns', ValueError, 'No axis named columns')])
def test_drop_exception_raised(data, index, drop_labels,
axis, error_type, error_desc):
with pytest.raises(error_type, match=error_desc):
Series(data, index=index).drop(drop_labels, axis=axis)
def test_drop_with_ignore_errors():
# errors='ignore'
s = Series(range(3), index=list('abc'))
result = s.drop('bc', errors='ignore')
tm.assert_series_equal(result, s)
result = s.drop(['a', 'd'], errors='ignore')
expected = s.iloc[1:]
tm.assert_series_equal(result, expected)
# GH 8522
s = Series([2, 3], index=[True, False])
assert s.index.is_object()
result = s.drop(True)
expected = Series([3], index=[False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = pd.Series(index=index).drop(drop_labels)
tm.assert_series_equal(series, pd.Series(index=expected_index))
@pytest.mark.parametrize('data, index, drop_labels', [
(None, [1, 2, 3], [1, 4]),
(None, [1, 2, 2], [1, 4]),
([2, 3], [0, 1], [False, True])
])
def test_drop_non_empty_list(data, index, drop_labels):
# GH 21494 and GH 16877
with pytest.raises(KeyError, match='not found in axis'):
pd.Series(data=data, index=index).drop(drop_labels)
@@ -1,634 +0,0 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import numpy as np
import pytest
from pandas.compat import lrange, range
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import Index, Series, Timestamp, date_range, isna
from pandas.core.indexing import IndexingError
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
from pandas.tseries.offsets import BDay
def test_getitem_boolean(test_data):
s = test_data.series
mask = s > s.median()
# passing list is OK
result = s[list(mask)]
expected = s[mask]
assert_series_equal(result, expected)
tm.assert_index_equal(result.index, s.index[mask])
def test_getitem_boolean_empty():
s = Series([], dtype=np.int64)
s.index.name = 'index_name'
s = s[s.isna()]
assert s.index.name == 'index_name'
assert s.dtype == np.int64
# GH5877
# indexing with empty series
s = Series(['A', 'B'])
expected = Series(np.nan, index=['C'], dtype=object)
result = s[Series(['C'], dtype=object)]
assert_series_equal(result, expected)
s = Series(['A', 'B'])
expected = Series(dtype=object, index=Index([], dtype='int64'))
result = s[Series([], dtype=object)]
assert_series_equal(result, expected)
# invalid because of the boolean indexer
# that's empty or not-aligned
msg = (r"Unalignable boolean Series provided as indexer \(index of"
r" the boolean Series and of the indexed object do not match")
with pytest.raises(IndexingError, match=msg):
s[Series([], dtype=bool)]
with pytest.raises(IndexingError, match=msg):
s[Series([True], dtype=bool)]
def test_getitem_boolean_object(test_data):
# using column from DataFrame
s = test_data.series
mask = s > s.median()
omask = mask.astype(object)
# getitem
result = s[omask]
expected = s[mask]
assert_series_equal(result, expected)
# setitem
s2 = s.copy()
cop = s.copy()
cop[omask] = 5
s2[mask] = 5
assert_series_equal(cop, s2)
# nans raise exception
omask[5:10] = np.nan
msg = "cannot index with vector containing NA / NaN values"
with pytest.raises(ValueError, match=msg):
s[omask]
with pytest.raises(ValueError, match=msg):
s[omask] = 5
def test_getitem_setitem_boolean_corner(test_data):
ts = test_data.ts
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
# these used to raise...??
msg = (r"Unalignable boolean Series provided as indexer \(index of"
r" the boolean Series and of the indexed object do not match")
with pytest.raises(IndexingError, match=msg):
ts[mask_shifted]
with pytest.raises(IndexingError, match=msg):
ts[mask_shifted] = 1
with pytest.raises(IndexingError, match=msg):
ts.loc[mask_shifted]
with pytest.raises(IndexingError, match=msg):
ts.loc[mask_shifted] = 1
def test_setitem_boolean(test_data):
mask = test_data.series > test_data.series.median()
# similar indexed series
result = test_data.series.copy()
result[mask] = test_data.series * 2
expected = test_data.series * 2
assert_series_equal(result[mask], expected[mask])
# needs alignment
result = test_data.series.copy()
result[mask] = (test_data.series * 2)[0:5]
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
expected[-mask] = test_data.series[mask]
assert_series_equal(result[mask], expected[mask])
def test_get_set_boolean_different_order(test_data):
ordered = test_data.series.sort_values()
# setting
copy = test_data.series.copy()
copy[ordered > 0] = 0
expected = test_data.series.copy()
expected[expected > 0] = 0
assert_series_equal(copy, expected)
# getting
sel = test_data.series[ordered > 0]
exp = test_data.series[test_data.series > 0]
assert_series_equal(sel, exp)
def test_where_unsafe_int(sint_dtype):
s = Series(np.arange(10), dtype=sint_dtype)
mask = s < 5
s[mask] = lrange(2, 7)
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=sint_dtype)
assert_series_equal(s, expected)
def test_where_unsafe_float(float_dtype):
s = Series(np.arange(10), dtype=float_dtype)
mask = s < 5
s[mask] = lrange(2, 7)
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=float_dtype)
assert_series_equal(s, expected)
@pytest.mark.parametrize("dtype,expected_dtype", [
(np.int8, np.float64),
(np.int16, np.float64),
(np.int32, np.float64),
(np.int64, np.float64),
(np.float32, np.float32),
(np.float64, np.float64)
])
def test_where_unsafe_upcast(dtype, expected_dtype):
# see gh-9743
s = Series(np.arange(10), dtype=dtype)
values = [2.5, 3.5, 4.5, 5.5, 6.5]
mask = s < 5
expected = Series(values + lrange(5, 10), dtype=expected_dtype)
s[mask] = values
assert_series_equal(s, expected)
def test_where_unsafe():
# see gh-9731
s = Series(np.arange(10), dtype="int64")
values = [2.5, 3.5, 4.5, 5.5]
mask = s > 5
expected = Series(lrange(6) + values, dtype="float64")
s[mask] = values
assert_series_equal(s, expected)
# see gh-3235
s = Series(np.arange(10), dtype='int64')
mask = s < 5
s[mask] = lrange(2, 7)
expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64')
assert_series_equal(s, expected)
assert s.dtype == expected.dtype
s = Series(np.arange(10), dtype='int64')
mask = s > 5
s[mask] = [0] * 4
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64')
assert_series_equal(s, expected)
s = Series(np.arange(10))
mask = s > 5
msg = "cannot assign mismatch length to masked array"
with pytest.raises(ValueError, match=msg):
s[mask] = [5, 4, 3, 2, 1]
with pytest.raises(ValueError, match=msg):
s[mask] = [0] * 5
# dtype changes
s = Series([1, 2, 3, 4])
result = s.where(s > 2, np.nan)
expected = Series([np.nan, np.nan, 3, 4])
assert_series_equal(result, expected)
# GH 4667
# setting with None changes dtype
s = Series(range(10)).astype(float)
s[8] = None
result = s[8]
assert isna(result)
s = Series(range(10)).astype(float)
s[s > 8] = None
result = s[isna(s)]
expected = Series(np.nan, index=[9])
assert_series_equal(result, expected)
def test_where_raise_on_error_deprecation():
# gh-14968
# deprecation of raise_on_error
s = Series(np.random.randn(5))
cond = s > 0
with tm.assert_produces_warning(FutureWarning):
s.where(cond, raise_on_error=True)
with tm.assert_produces_warning(FutureWarning):
s.mask(cond, raise_on_error=True)
def test_where():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.where(cond).dropna()
rs2 = s[cond]
assert_series_equal(rs, rs2)
rs = s.where(cond, -s)
assert_series_equal(rs, s.abs())
rs = s.where(cond)
assert (s.shape == rs.shape)
assert (rs is not s)
# test alignment
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
rs = s2.where(cond[:3])
assert_series_equal(rs, expected)
expected = s2.abs()
expected.iloc[0] = s2[0]
rs = s2.where(cond[:3], -s2)
assert_series_equal(rs, expected)
def test_where_error():
s = Series(np.random.randn(5))
cond = s > 0
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where(1)
with pytest.raises(ValueError, match=msg):
s.where(cond[:3].values, -s)
# GH 2745
s = Series([1, 2])
s[[True, False]] = [0, 1]
expected = Series([0, 2])
assert_series_equal(s, expected)
# failures
msg = "cannot assign mismatch length to masked array"
with pytest.raises(ValueError, match=msg):
s[[True, False]] = [0, 2, 3]
msg = ("NumPy boolean array indexing assignment cannot assign 0 input"
" values to the 1 output values where the mask is true")
with pytest.raises(ValueError, match=msg):
s[[True, False]] = []
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where_array_like(klass):
# see gh-15414
s = Series([1, 2, 3])
cond = [False, True, True]
expected = Series([np.nan, 2, 3])
result = s.where(klass(cond))
assert_series_equal(result, expected)
@pytest.mark.parametrize('cond', [
[1, 0, 1],
Series([2, 5, 7]),
["True", "False", "True"],
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")]
])
def test_where_invalid_input(cond):
# see gh-15414: only boolean arrays accepted
s = Series([1, 2, 3])
msg = "Boolean array expected for the condition"
with pytest.raises(ValueError, match=msg):
s.where(cond)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where([True])
def test_where_ndframe_align():
msg = "Array conditional must be same shape as self"
s = Series([1, 2, 3])
cond = [True]
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([1, np.nan, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
cond = np.array([False, True, False, True])
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([np.nan, 2, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
msg = ("cannot set using a {} indexer with a different length than"
" the value")
# slice
s = Series(list('abc'))
with pytest.raises(ValueError, match=msg.format('slice')):
s[0:3] = list(range(27))
s[0:3] = list(range(3))
expected = Series([0, 1, 2])
assert_series_equal(s.astype(np.int64), expected, )
# slice with step
s = Series(list('abcdef'))
with pytest.raises(ValueError, match=msg.format('slice')):
s[0:4:2] = list(range(27))
s = Series(list('abcdef'))
s[0:4:2] = list(range(2))
expected = Series([0, 'b', 1, 'd', 'e', 'f'])
assert_series_equal(s, expected)
# neg slices
s = Series(list('abcdef'))
with pytest.raises(ValueError, match=msg.format('slice')):
s[:-1] = list(range(27))
s[-3:-1] = list(range(2))
expected = Series(['a', 'b', 'c', 0, 1, 'f'])
assert_series_equal(s, expected)
# list
s = Series(list('abc'))
with pytest.raises(ValueError, match=msg.format('list-like')):
s[[0, 1, 2]] = list(range(27))
s = Series(list('abc'))
with pytest.raises(ValueError, match=msg.format('list-like')):
s[[0, 1, 2]] = list(range(2))
# scalar
s = Series(list('abc'))
s[0] = list(range(10))
expected = Series([list(range(10)), 'b', 'c'])
assert_series_equal(s, expected)
@pytest.mark.parametrize('size', range(2, 6))
@pytest.mark.parametrize('mask', [
[True, False, False, False, False],
[True, False],
[False]
])
@pytest.mark.parametrize('item', [
2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min
])
# Test numpy arrays, lists and tuples as the input to be
# broadcast
@pytest.mark.parametrize('box', [
lambda x: np.array([x]),
lambda x: [x],
lambda x: (x,)
])
def test_broadcast(size, mask, item, box):
selection = np.resize(mask, size)
data = np.arange(size, dtype=float)
# Construct the expected series by taking the source
# data or item based on the selection
expected = Series([item if use_item else data[
i] for i, use_item in enumerate(selection)])
s = Series(data)
s[selection] = box(item)
assert_series_equal(s, expected)
s = Series(data)
result = s.where(~selection, box(item))
assert_series_equal(result, expected)
s = Series(data)
result = s.mask(selection, box(item))
assert_series_equal(result, expected)
def test_where_inplace():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.copy()
rs.where(cond, inplace=True)
assert_series_equal(rs.dropna(), s[cond])
assert_series_equal(rs, s.where(cond))
rs = s.copy()
rs.where(cond, -s, inplace=True)
assert_series_equal(rs, s.where(cond, -s))
def test_where_dups():
# GH 4550
# where crashes with dups in index
s1 = Series(list(range(3)))
s2 = Series(list(range(3)))
comb = pd.concat([s1, s2])
result = comb.where(comb < 2)
expected = Series([0, 1, np.nan, 0, 1, np.nan],
index=[0, 1, 2, 0, 1, 2])
assert_series_equal(result, expected)
# GH 4548
# inplace updating not working with dups
comb[comb < 1] = 5
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
assert_series_equal(comb, expected)
comb[comb < 2] += 10
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
assert_series_equal(comb, expected)
def test_where_numeric_with_string():
# GH 9280
s = pd.Series([1, 2, 3])
w = s.where(s > 1, 'X')
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == 'object'
w = s.where(s > 1, ['X', 'Y', 'Z'])
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == 'object'
w = s.where(s > 1, np.array(['X', 'Y', 'Z']))
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == 'object'
def test_where_timedelta_coerce():
s = Series([1, 2], dtype='timedelta64[ns]')
expected = Series([10, 10])
mask = np.array([False, False])
rs = s.where(mask, [10, 10])
assert_series_equal(rs, expected)
rs = s.where(mask, 10)
assert_series_equal(rs, expected)
rs = s.where(mask, 10.0)
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, 10.0])
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, np.nan])
expected = Series([10, None], dtype='object')
assert_series_equal(rs, expected)
def test_where_datetime_conversion():
s = Series(date_range('20130102', periods=2))
expected = Series([10, 10])
mask = np.array([False, False])
rs = s.where(mask, [10, 10])
assert_series_equal(rs, expected)
rs = s.where(mask, 10)
assert_series_equal(rs, expected)
rs = s.where(mask, 10.0)
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, 10.0])
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, np.nan])
expected = Series([10, None], dtype='object')
assert_series_equal(rs, expected)
# GH 15701
timestamps = ['2016-12-31 12:00:04+00:00',
'2016-12-31 12:00:04.010000+00:00']
s = Series([pd.Timestamp(t) for t in timestamps])
rs = s.where(Series([False, True]))
expected = Series([pd.NaT, s[1]])
assert_series_equal(rs, expected)
def test_where_dt_tz_values(tz_naive_fixture):
ser1 = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20150103'],
tz=tz_naive_fixture))
ser2 = pd.Series(pd.DatetimeIndex(['20160514', '20160515', '20160516'],
tz=tz_naive_fixture))
mask = pd.Series([True, True, False])
result = ser1.where(mask, ser2)
exp = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20160516'],
tz=tz_naive_fixture))
assert_series_equal(exp, result)
def test_mask():
# compare with tested results in test_where
s = Series(np.random.randn(5))
cond = s > 0
rs = s.where(~cond, np.nan)
assert_series_equal(rs, s.mask(cond))
rs = s.where(~cond)
rs2 = s.mask(cond)
assert_series_equal(rs, rs2)
rs = s.where(~cond, -s)
rs2 = s.mask(cond, -s)
assert_series_equal(rs, rs2)
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
rs = s2.where(~cond[:3])
rs2 = s2.mask(cond[:3])
assert_series_equal(rs, rs2)
rs = s2.where(~cond[:3], -s2)
rs2 = s2.mask(cond[:3], -s2)
assert_series_equal(rs, rs2)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.mask(1)
with pytest.raises(ValueError, match=msg):
s.mask(cond[:3].values, -s)
# dtype changes
s = Series([1, 2, 3, 4])
result = s.mask(s > 2, np.nan)
expected = Series([1, 2, np.nan, np.nan])
assert_series_equal(result, expected)
# see gh-21891
s = Series([1, 2])
res = s.mask([True, False])
exp = Series([np.nan, 2])
tm.assert_series_equal(res, exp)
def test_mask_inplace():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.copy()
rs.mask(cond, inplace=True)
assert_series_equal(rs.dropna(), s[~cond])
assert_series_equal(rs, s.mask(cond))
rs = s.copy()
rs.mask(cond, -s, inplace=True)
assert_series_equal(rs, s.mask(cond, -s))
@@ -1,33 +0,0 @@
import pandas as pd
import pandas.util.testing as tm
def test_getitem_callable():
# GH 12533
s = pd.Series(4, index=list('ABCD'))
result = s[lambda x: 'A']
assert result == s.loc['A']
result = s[lambda x: ['A', 'B']]
tm.assert_series_equal(result, s.loc[['A', 'B']])
result = s[lambda x: [True, False, True, True]]
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
def test_setitem_callable():
# GH 12533
s = pd.Series([1, 2, 3, 4], index=list('ABCD'))
s[lambda x: 'A'] = -1
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD')))
def test_setitem_other_callable():
# GH 13299
inc = lambda x: x + 1
s = pd.Series([1, 2, -1, 4])
s[s < 0] = inc
expected = pd.Series([1, 2, inc, 4])
tm.assert_series_equal(s, expected)
@@ -1,714 +0,0 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
from datetime import datetime, timedelta
import numpy as np
import pytest
from pandas._libs import iNaT
import pandas._libs.index as _index
from pandas.compat import lrange, range
import pandas as pd
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal, assert_frame_equal, assert_series_equal)
"""
Also test support for datetime64[ns] in Series / DataFrame
"""
def test_fancy_getitem():
dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
end=datetime(2010, 1, 1))
s = Series(np.arange(len(dti)), index=dti)
assert s[48] == 48
assert s['1/2/2009'] == 48
assert s['2009-1-2'] == 48
assert s[datetime(2009, 1, 2)] == 48
assert s[Timestamp(datetime(2009, 1, 2))] == 48
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
s['2009-1-3']
assert_series_equal(s['3/6/2009':'2009-06-05'],
s[datetime(2009, 3, 6):datetime(2009, 6, 5)])
def test_fancy_setitem():
dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
end=datetime(2010, 1, 1))
s = Series(np.arange(len(dti)), index=dti)
s[48] = -1
assert s[48] == -1
s['1/2/2009'] = -2
assert s[48] == -2
s['1/2/2009':'2009-06-05'] = -3
assert (s[48:54] == -3).all()
def test_dti_snap():
dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002',
'1/5/2002', '1/6/2002', '1/7/2002'], freq='D')
res = dti.snap(freq='W-MON')
exp = date_range('12/31/2001', '1/7/2002', freq='w-mon')
exp = exp.repeat([3, 4])
assert (res == exp).all()
res = dti.snap(freq='B')
exp = date_range('1/1/2002', '1/7/2002', freq='b')
exp = exp.repeat([1, 1, 1, 2, 2])
assert (res == exp).all()
def test_dti_reset_index_round_trip():
dti = date_range(start='1/1/2001', end='6/1/2001', freq='D')
d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti)
d2 = d1.reset_index()
assert d2.dtypes[0] == np.dtype('M8[ns]')
d3 = d2.set_index('index')
assert_frame_equal(d1, d3, check_names=False)
# #2329
stamp = datetime(2012, 11, 22)
df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value'])
df = df.set_index('Date')
assert df.index[0] == stamp
assert df.reset_index()['Date'][0] == stamp
def test_series_set_value():
# #1561
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
index = DatetimeIndex(dates)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
s = Series().set_value(dates[0], 1.)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
s2 = s.set_value(dates[1], np.nan)
exp = Series([1., np.nan], index=index)
assert_series_equal(s2, exp)
# s = Series(index[:1], index[:1])
# s2 = s.set_value(dates[1], index[1])
# assert s2.values.dtype == 'M8[ns]'
@pytest.mark.slow
def test_slice_locs_indexerror():
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10)
for i in range(100000)]
s = Series(lrange(100000), times)
s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)]
def test_slicing_datetimes():
# GH 7523
# unique
df = DataFrame(np.arange(4., dtype='float64'),
index=[datetime(2001, 1, i, 10, 00)
for i in [1, 2, 3, 4]])
result = df.loc[datetime(2001, 1, 1, 10):]
assert_frame_equal(result, df)
result = df.loc[:datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 11):]
expected = df.iloc[1:]
assert_frame_equal(result, expected)
result = df.loc['20010101 11':]
assert_frame_equal(result, expected)
# duplicates
df = pd.DataFrame(np.arange(5., dtype='float64'),
index=[datetime(2001, 1, i, 10, 00)
for i in [1, 2, 2, 3, 4]])
result = df.loc[datetime(2001, 1, 1, 10):]
assert_frame_equal(result, df)
result = df.loc[:datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 11):]
expected = df.iloc[1:]
assert_frame_equal(result, expected)
result = df.loc['20010101 11':]
assert_frame_equal(result, expected)
def test_frame_datetime64_duplicated():
dates = date_range('2010-07-01', end='2010-08-05')
tst = DataFrame({'symbol': 'AAA', 'date': dates})
result = tst.duplicated(['date', 'symbol'])
assert (-result).all()
tst = DataFrame({'date': dates})
result = tst.duplicated()
assert (-result).all()
def test_getitem_setitem_datetime_tz_pytz():
from pytz import timezone as tz
from pandas import date_range
N = 50
# testing with timezone, GH #2785
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
ts = Series(np.random.randn(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
# comparison dates with datetime MUST be localized!
date = tz('US/Central').localize(datetime(1990, 1, 1, 3))
result[date] = 0
result[date] = ts[4]
assert_series_equal(result, ts)
def test_getitem_setitem_datetime_tz_dateutil():
from dateutil.tz import tzutc
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
tz = lambda x: tzutc() if x == 'UTC' else gettz(
x) # handle special case for utc in dateutil
from pandas import date_range
N = 50
# testing with timezone, GH #2785
rng = date_range('1/1/1990', periods=N, freq='H',
tz='America/New_York')
ts = Series(np.random.randn(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4]
assert_series_equal(result, ts)
def test_getitem_setitem_datetimeindex():
N = 50
# testing with timezone, GH #2785
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04:00:00"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04:00:00"] = 0
result["1990-01-01 04:00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
assert_series_equal(result, ts)
lb = "1990-01-01 04:00:00"
rb = "1990-01-01 07:00:00"
# GH#18435 strings get a pass from tzawareness compat
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
lb = "1990-01-01 04:00:00-0500"
rb = "1990-01-01 07:00:00-0500"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
# repeat all the above with naive datetimes
result = ts[datetime(1990, 1, 1, 4)]
expected = ts[4]
assert result == expected
result = ts.copy()
result[datetime(1990, 1, 1, 4)] = 0
result[datetime(1990, 1, 1, 4)] = ts[4]
assert_series_equal(result, ts)
result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8]
assert_series_equal(result, ts)
lb = datetime(1990, 1, 1, 4)
rb = datetime(1990, 1, 1, 7)
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
with pytest.raises(TypeError, match=msg):
# tznaive vs tzaware comparison is invalid
# see GH#18376, GH#18162
ts[(ts.index >= lb) & (ts.index <= rb)]
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)
# also test partial date slicing
result = ts["1990-01-02"]
expected = ts[24:48]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-02"] = 0
result["1990-01-02"] = ts[24:48]
assert_series_equal(result, ts)
def test_getitem_setitem_periodindex():
from pandas import period_range
N = 50
rng = period_range('1/1/1990', periods=N, freq='H')
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04"] = 0
result["1990-01-01 04"] = ts[4]
assert_series_equal(result, ts)
result = ts["1990-01-01 04":"1990-01-01 07"]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04":"1990-01-01 07"] = 0
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
assert_series_equal(result, ts)
lb = "1990-01-01 04"
rb = "1990-01-01 07"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
# GH 2782
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)
# FutureWarning from NumPy.
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_getitem_median_slice_bug():
index = date_range('20090415', '20090519', freq='2B')
s = Series(np.random.randn(13), index=index)
indexer = [slice(6, 7, None)]
result = s[indexer]
expected = s[indexer[0]]
assert_series_equal(result, expected)
def test_datetime_indexing():
from pandas import date_range
index = date_range('1/1/2000', '1/7/2000')
index = index.repeat(3)
s = Series(len(index), index=index)
stamp = Timestamp('1/8/2000')
with pytest.raises(KeyError, match=r"^947289600000000000L?$"):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
# not monotonic
s = Series(len(index), index=index)
s = s[::-1]
with pytest.raises(KeyError, match=r"^947289600000000000L?$"):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
"""
test duplicates in time series
"""
@pytest.fixture(scope='module')
def dups():
dates = [datetime(2000, 1, 2), datetime(2000, 1, 2),
datetime(2000, 1, 2), datetime(2000, 1, 3),
datetime(2000, 1, 3), datetime(2000, 1, 3),
datetime(2000, 1, 4), datetime(2000, 1, 4),
datetime(2000, 1, 4), datetime(2000, 1, 5)]
return Series(np.random.randn(len(dates)), index=dates)
def test_constructor(dups):
assert isinstance(dups, Series)
assert isinstance(dups.index, DatetimeIndex)
def test_is_unique_monotonic(dups):
assert not dups.index.is_unique
def test_index_unique(dups):
uniques = dups.index.unique()
expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3),
datetime(2000, 1, 4), datetime(2000, 1, 5)])
assert uniques.dtype == 'M8[ns]' # sanity
tm.assert_index_equal(uniques, expected)
assert dups.index.nunique() == 4
# #2563
assert isinstance(uniques, DatetimeIndex)
dups_local = dups.index.tz_localize('US/Eastern')
dups_local.name = 'foo'
result = dups_local.unique()
expected = DatetimeIndex(expected, name='foo')
expected = expected.tz_localize('US/Eastern')
assert result.tz is not None
assert result.name == 'foo'
tm.assert_index_equal(result, expected)
# NaT, note this is excluded
arr = [1370745748 + t for t in range(20)] + [iNaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
for t in range(20)] + [NaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
def test_index_dupes_contains():
d = datetime(2011, 12, 5, 20, 30)
ix = DatetimeIndex([d, d])
assert d in ix
def test_duplicate_dates_indexing(dups):
ts = dups
uniques = ts.index.unique()
for date in uniques:
result = ts[date]
mask = ts.index == date
total = (ts.index == date).sum()
expected = ts[mask]
if total > 1:
assert_series_equal(result, expected)
else:
assert_almost_equal(result, expected[0])
cp = ts.copy()
cp[date] = 0
expected = Series(np.where(mask, 0, ts), index=ts.index)
assert_series_equal(cp, expected)
with pytest.raises(KeyError, match=r"^947116800000000000L?$"):
ts[datetime(2000, 1, 6)]
# new index
ts[datetime(2000, 1, 6)] = 0
assert ts[datetime(2000, 1, 6)] == 0
def test_range_slice():
idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000',
'1/4/2000'])
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts['1/2/2000':]
expected = ts[1:]
assert_series_equal(result, expected)
result = ts['1/2/2000':'1/3/2000']
expected = ts[1:4]
assert_series_equal(result, expected)
def test_groupby_average_dup_values(dups):
result = dups.groupby(level=0).mean()
expected = dups.groupby(dups.index).mean()
assert_series_equal(result, expected)
def test_indexing_over_size_cutoff():
import datetime
# #1821
old_cutoff = _index._SIZE_CUTOFF
try:
_index._SIZE_CUTOFF = 1000
# create large list of non periodic datetime
dates = []
sec = datetime.timedelta(seconds=1)
half_sec = datetime.timedelta(microseconds=500000)
d = datetime.datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec
# duplicate some values in the list
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]
df = DataFrame(np.random.randn(len(dates), 4),
index=dates,
columns=list('ABCD'))
pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index
# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0
finally:
_index._SIZE_CUTOFF = old_cutoff
def test_indexing_unordered():
# GH 2437
rng = date_range(start='2011-01-01', end='2011-01-15')
ts = Series(np.random.rand(len(rng)), index=rng)
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
for t in ts.index:
# TODO: unused?
s = str(t) # noqa
expected = ts[t]
result = ts2[t]
assert expected == result
# GH 3448 (ranges)
def compare(slobj):
result = ts2[slobj].copy()
result = result.sort_index()
expected = ts[slobj]
assert_series_equal(result, expected)
compare(slice('2011-01-01', '2011-01-15'))
compare(slice('2010-12-30', '2011-01-15'))
compare(slice('2011-01-01', '2011-01-16'))
# partial ranges
compare(slice('2011-01-01', '2011-01-6'))
compare(slice('2011-01-06', '2011-01-8'))
compare(slice('2011-01-06', '2011-01-12'))
# single values
result = ts2['2011'].sort_index()
expected = ts['2011']
assert_series_equal(result, expected)
# diff freq
rng = date_range(datetime(2005, 1, 1), periods=20, freq='M')
ts = Series(np.arange(len(rng)), index=rng)
ts = ts.take(np.random.permutation(20))
result = ts['2005']
for t in result.index:
assert t.year == 2005
def test_indexing():
idx = date_range("2001-1-1", periods=20, freq='M')
ts = Series(np.random.rand(len(idx)), index=idx)
# getting
# GH 3070, make sure semantics work on Series/Frame
expected = ts['2001']
expected.name = 'A'
df = DataFrame(dict(A=ts))
result = df['2001']['A']
assert_series_equal(expected, result)
# setting
ts['2001'] = 1
expected = ts['2001']
expected.name = 'A'
df.loc['2001', 'A'] = 1
result = df['2001']['A']
assert_series_equal(expected, result)
# GH3546 (not including times on the last day)
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
freq='H')
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013-05']
assert_series_equal(expected, ts)
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
freq='S')
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013-05']
assert_series_equal(expected, ts)
idx = [Timestamp('2013-05-31 00:00'),
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))]
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013']
assert_series_equal(expected, ts)
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(np.random.rand(5, 5),
columns=['open', 'high', 'low', 'close', 'volume'],
index=date_range('2012-01-02 18:01:00',
periods=5, tz='US/Central', freq='s'))
expected = df.loc[[df.index[2]]]
# this is a single date, so will raise
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
df['2012-01-02 18:01:02']
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
with pytest.raises(KeyError, match=msg):
df[df.index[2]]
"""
test NaT support
"""
def test_set_none_nan():
series = Series(date_range('1/1/2000', periods=10))
series[3] = None
assert series[3] is NaT
series[3:5] = None
assert series[4] is NaT
series[5] = np.nan
assert series[5] is NaT
series[5:7] = np.nan
assert series[6] is NaT
def test_nat_operations():
# GH 8617
s = Series([0, pd.NaT], dtype='m8[ns]')
exp = s[0]
assert s.median() == exp
assert s.min() == exp
assert s.max() == exp
@pytest.mark.parametrize('method', ["round", "floor", "ceil"])
@pytest.mark.parametrize('freq', ["s", "5s", "min", "5min", "h", "5h"])
def test_round_nat(method, freq):
# GH14940
s = Series([pd.NaT])
expected = Series(pd.NaT)
round_method = getattr(s.dt, method)
assert_series_equal(round_method(freq), expected)
@@ -1,37 +0,0 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import numpy as np
from pandas.compat import lrange, range
from pandas import Series
from pandas.util.testing import assert_almost_equal, assert_series_equal
def test_iloc():
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
for i in range(len(s)):
result = s.iloc[i]
exp = s[s.index[i]]
assert_almost_equal(result, exp)
# pass a slice
result = s.iloc[slice(1, 3)]
expected = s.loc[2:4]
assert_series_equal(result, expected)
# test slice is a view
result[:] = 0
assert (s[1:3] == 0).all()
# list of integers
result = s.iloc[[0, 2, 3, 4, 5]]
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
assert_series_equal(result, expected)
def test_iloc_nonunique():
s = Series([0, 1, 2], index=[0, 1, 0])
assert s.iloc[2] == 2
@@ -1,840 +0,0 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
""" test get/set & misc """
from datetime import timedelta
import numpy as np
import pytest
from pandas.compat import lrange, range
from pandas.core.dtypes.common import is_scalar
import pandas as pd
from pandas import (
Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp)
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
from pandas.tseries.offsets import BDay
def test_basic_indexing():
s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b'])
msg = "index out of bounds"
with pytest.raises(IndexError, match=msg):
s[5]
msg = "index 5 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
s[5] = 0
with pytest.raises(KeyError, match=r"^'c'$"):
s['c']
s = s.sort_index()
msg = r"index out of bounds|^5$"
with pytest.raises(IndexError, match=msg):
s[5]
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
with pytest.raises(IndexError, match=msg):
s[5] = 0
def test_basic_getitem_with_labels(test_data):
indices = test_data.ts.index[[5, 10, 15]]
result = test_data.ts[indices]
expected = test_data.ts.reindex(indices)
assert_series_equal(result, expected)
result = test_data.ts[indices[0]:indices[2]]
expected = test_data.ts.loc[indices[0]:indices[2]]
assert_series_equal(result, expected)
# integer indexes, be careful
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
inds = [0, 2, 5, 7, 8]
arr_inds = np.array([0, 2, 5, 7, 8])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s[inds]
expected = s.reindex(inds)
assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s[arr_inds]
expected = s.reindex(arr_inds)
assert_series_equal(result, expected)
# GH12089
# with tz for values
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
index=['a', 'b', 'c'])
expected = Timestamp('2011-01-01', tz='US/Eastern')
result = s.loc['a']
assert result == expected
result = s.iloc[0]
assert result == expected
result = s['a']
assert result == expected
def test_getitem_setitem_ellipsis():
s = Series(np.random.randn(10))
np.fix(s)
result = s[...]
assert_series_equal(result, s)
s[...] = 5
assert (result == 5).all()
def test_getitem_get(test_data):
test_series = test_data.series
test_obj_series = test_data.objSeries
idx1 = test_series.index[5]
idx2 = test_obj_series.index[5]
assert test_series[idx1] == test_series.get(idx1)
assert test_obj_series[idx2] == test_obj_series.get(idx2)
assert test_series[idx1] == test_series[5]
assert test_obj_series[idx2] == test_obj_series[5]
assert test_series.get(-1) == test_series.get(test_series.index[-1])
assert test_series[5] == test_series.get(test_series.index[5])
# missing
d = test_data.ts.index[0] - BDay()
with pytest.raises(KeyError, match=r"Timestamp\('1999-12-31 00:00:00'\)"):
test_data.ts[d]
# None
# GH 5652
for s in [Series(), Series(index=list('abc'))]:
result = s.get(None)
assert result is None
def test_getitem_fancy(test_data):
slice1 = test_data.series[[1, 2, 3]]
slice2 = test_data.objSeries[[1, 2, 3]]
assert test_data.series.index[2] == slice1.index[1]
assert test_data.objSeries.index[2] == slice2.index[1]
assert test_data.series[2] == slice1[1]
assert test_data.objSeries[2] == slice2[1]
def test_getitem_generator(test_data):
gen = (x > 0 for x in test_data.series)
result = test_data.series[gen]
result2 = test_data.series[iter(test_data.series > 0)]
expected = test_data.series[test_data.series > 0]
assert_series_equal(result, expected)
assert_series_equal(result2, expected)
def test_type_promotion():
# GH12599
s = pd.Series()
s["a"] = pd.Timestamp("2016-01-01")
s["b"] = 3.0
s["c"] = "foo"
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"],
index=["a", "b", "c"])
assert_series_equal(s, expected)
@pytest.mark.parametrize(
'result_1, duplicate_item, expected_1',
[
[
pd.Series({1: 12, 2: [1, 2, 2, 3]}), pd.Series({1: 313}),
pd.Series({1: 12, }, dtype=object),
],
[
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
pd.Series({1: [1, 2, 3]}), pd.Series({1: [1, 2, 3], }),
],
])
def test_getitem_with_duplicates_indices(
result_1, duplicate_item, expected_1):
# GH 17610
result = result_1.append(duplicate_item)
expected = expected_1.append(duplicate_item)
assert_series_equal(result[1], expected)
assert result[2] == result_1[2]
def test_getitem_out_of_bounds(test_data):
# don't segfault, GH #495
msg = "index out of bounds"
with pytest.raises(IndexError, match=msg):
test_data.ts[len(test_data.ts)]
# GH #917
s = Series([])
with pytest.raises(IndexError, match=msg):
s[-1]
def test_getitem_setitem_integers():
# caused bug without test
s = Series([1, 2, 3], ['a', 'b', 'c'])
assert s.iloc[0] == s['a']
s.iloc[0] = 5
tm.assert_almost_equal(s['a'], 5)
def test_getitem_box_float64(test_data):
value = test_data.ts[5]
assert isinstance(value, np.float64)
@pytest.mark.parametrize(
'arr',
[
np.random.randn(10),
tm.makeDateIndex(10, name='a').tz_localize(
tz='US/Eastern'),
])
def test_get(arr):
# GH 21260
s = Series(arr, index=[2 * i for i in range(len(arr))])
assert s.get(4) == s.iloc[2]
result = s.get([4, 6])
expected = s.iloc[[2, 3]]
tm.assert_series_equal(result, expected)
result = s.get(slice(2))
expected = s.iloc[[0, 1]]
tm.assert_series_equal(result, expected)
assert s.get(-1) is None
assert s.get(s.index.max() + 1) is None
s = Series(arr[:6], index=list('abcdef'))
assert s.get('c') == s.iloc[2]
result = s.get(slice('b', 'd'))
expected = s.iloc[[1, 2, 3]]
tm.assert_series_equal(result, expected)
result = s.get('Z')
assert result is None
assert s.get(4) == s.iloc[4]
assert s.get(-1) == s.iloc[-1]
assert s.get(len(s)) is None
# GH 21257
s = pd.Series(arr)
s2 = s[::2]
assert s2.get(1) is None
def test_series_box_timestamp():
rng = pd.date_range('20090415', '20090519', freq='B')
ser = Series(rng)
assert isinstance(ser[5], pd.Timestamp)
rng = pd.date_range('20090415', '20090519', freq='B')
ser = Series(rng, index=rng)
assert isinstance(ser[5], pd.Timestamp)
assert isinstance(ser.iat[5], pd.Timestamp)
def test_getitem_ambiguous_keyerror():
s = Series(lrange(10), index=lrange(0, 20, 2))
with pytest.raises(KeyError, match=r"^1L?$"):
s[1]
with pytest.raises(KeyError, match=r"^1L?$"):
s.loc[1]
def test_getitem_unordered_dup():
obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b'])
assert is_scalar(obj['c'])
assert obj['c'] == 0
def test_getitem_dups_with_missing():
# breaks reindex, so need to use .loc internally
# GH 4246
s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah'])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
expected = s.loc[['foo', 'bar', 'bah', 'bam']]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s[['foo', 'bar', 'bah', 'bam']]
assert_series_equal(result, expected)
def test_getitem_dups():
s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64)
expected = Series([3, 4], index=['C', 'C'], dtype=np.int64)
result = s['C']
assert_series_equal(result, expected)
def test_setitem_ambiguous_keyerror():
s = Series(lrange(10), index=lrange(0, 20, 2))
# equivalent of an append
s2 = s.copy()
s2[1] = 5
expected = s.append(Series([5], index=[1]))
assert_series_equal(s2, expected)
s2 = s.copy()
s2.loc[1] = 5
expected = s.append(Series([5], index=[1]))
assert_series_equal(s2, expected)
def test_getitem_dataframe():
rng = list(range(10))
s = pd.Series(10, index=rng)
df = pd.DataFrame(rng, index=rng)
msg = ("Indexing a Series with DataFrame is not supported,"
" use the appropriate DataFrame column")
with pytest.raises(TypeError, match=msg):
s[df > 5]
def test_setitem(test_data):
test_data.ts[test_data.ts.index[5]] = np.NaN
test_data.ts[[1, 2, 17]] = np.NaN
test_data.ts[6] = np.NaN
assert np.isnan(test_data.ts[6])
assert np.isnan(test_data.ts[2])
test_data.ts[np.isnan(test_data.ts)] = 5
assert not np.isnan(test_data.ts[2])
# caught this bug when writing tests
series = Series(tm.makeIntIndex(20).astype(float),
index=tm.makeIntIndex(20))
series[::2] = 0
assert (series[::2] == 0).all()
# set item that's not contained
s = test_data.series.copy()
s['foobar'] = 1
app = Series([1], index=['foobar'], name='series')
expected = test_data.series.append(app)
assert_series_equal(s, expected)
# Test for issue #10193
key = pd.Timestamp('2012-01-01')
series = pd.Series()
series[key] = 47
expected = pd.Series(47, [key])
assert_series_equal(series, expected)
series = pd.Series([], pd.DatetimeIndex([], freq='D'))
series[key] = 47
expected = pd.Series(47, pd.DatetimeIndex([key], freq='D'))
assert_series_equal(series, expected)
def test_setitem_dtypes():
# change dtypes
# GH 4463
expected = Series([np.nan, 2, 3])
s = Series([1, 2, 3])
s.iloc[0] = np.nan
assert_series_equal(s, expected)
s = Series([1, 2, 3])
s.loc[0] = np.nan
assert_series_equal(s, expected)
s = Series([1, 2, 3])
s[0] = np.nan
assert_series_equal(s, expected)
s = Series([False])
s.loc[0] = np.nan
assert_series_equal(s, Series([np.nan]))
s = Series([False, True])
s.loc[0] = np.nan
assert_series_equal(s, Series([np.nan, 1.0]))
def test_set_value(test_data):
idx = test_data.ts.index[10]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res = test_data.ts.set_value(idx, 0)
assert res is test_data.ts
assert test_data.ts[idx] == 0
# equiv
s = test_data.series.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res = s.set_value('foobar', 0)
assert res is s
assert res.index[-1] == 'foobar'
assert res['foobar'] == 0
s = test_data.series.copy()
s.loc['foobar'] = 0
assert s.index[-1] == 'foobar'
assert s['foobar'] == 0
def test_setslice(test_data):
sl = test_data.ts[5:20]
assert len(sl) == len(sl.index)
assert sl.index.is_unique is True
# FutureWarning from NumPy about [slice(None, 5).
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_basic_getitem_setitem_corner(test_data):
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
msg = "Can only tuple-index with a MultiIndex"
with pytest.raises(ValueError, match=msg):
test_data.ts[:, 2]
with pytest.raises(ValueError, match=msg):
test_data.ts[:, 2] = 2
# weird lists. [slice(0, 5)] will work but not two slices
result = test_data.ts[[slice(None, 5)]]
expected = test_data.ts[:5]
assert_series_equal(result, expected)
# OK
msg = r"unhashable type(: 'slice')?"
with pytest.raises(TypeError, match=msg):
test_data.ts[[5, slice(None, None)]]
with pytest.raises(TypeError, match=msg):
test_data.ts[[5, slice(None, None)]] = 2
@pytest.mark.parametrize('tz', ['US/Eastern', 'UTC', 'Asia/Tokyo'])
def test_setitem_with_tz(tz):
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
tz=tz))
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2016-01-01 02:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
def test_setitem_with_tz_dst():
# GH XXX
tz = 'US/Eastern'
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
tz=tz))
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz),
pd.Timestamp('2011-01-01 00:00-05:00', tz=tz),
pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
def test_categorial_assigning_ops():
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
s = orig.copy()
s[:] = "a"
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[1] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[s.index > 0] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[[False, True]] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s.index = ["x", "y"]
s["y"] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]),
index=["x", "y"])
tm.assert_series_equal(s, exp)
# ensure that one can set something to np.nan
s = Series(Categorical([1, 2, 3]))
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
s[1] = np.nan
tm.assert_series_equal(s, exp)
def test_slice(test_data):
numSlice = test_data.series[10:20]
numSliceEnd = test_data.series[-10:]
objSlice = test_data.objSeries[10:20]
assert test_data.series.index[9] not in numSlice.index
assert test_data.objSeries.index[9] not in objSlice.index
assert len(numSlice) == len(numSlice.index)
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
assert numSlice.index[1] == test_data.series.index[11]
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
# Test return view.
sl = test_data.series[10:20]
sl[:] = 0
assert (test_data.series[10:20] == 0).all()
def test_slice_can_reorder_not_uniquely_indexed():
s = Series(1, index=['a', 'a', 'b', 'b', 'c'])
s[::-1] # it works!
def test_ix_setitem(test_data):
inds = test_data.series.index[[3, 4, 7]]
result = test_data.series.copy()
result.loc[inds] = 5
expected = test_data.series.copy()
expected[[3, 4, 7]] = 5
assert_series_equal(result, expected)
result.iloc[5:10] = 10
expected[5:10] = 10
assert_series_equal(result, expected)
# set slice with indices
d1, d2 = test_data.series.index[[5, 15]]
result.loc[d1:d2] = 6
expected[5:16] = 6 # because it's inclusive
assert_series_equal(result, expected)
# set index value
test_data.series.loc[d1] = 4
test_data.series.loc[d2] = 6
assert test_data.series[d1] == 4
assert test_data.series[d2] == 6
def test_setitem_na():
# these induce dtype changes
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
s[::2] = np.nan
assert_series_equal(s, expected)
# gets coerced to float, right?
expected = Series([np.nan, 1, np.nan, 0])
s = Series([True, True, False, False])
s[::2] = np.nan
assert_series_equal(s, expected)
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8,
9])
s = Series(np.arange(10))
s[:5] = np.nan
assert_series_equal(s, expected)
def test_timedelta_assignment():
# GH 8209
s = Series([])
s.loc['B'] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B']))
s = s.reindex(s.index.insert(0, 'A'))
tm.assert_series_equal(s, Series(
[np.nan, Timedelta('1 days')], index=['A', 'B']))
result = s.fillna(timedelta(1))
expected = Series(Timedelta('1 days'), index=['A', 'B'])
tm.assert_series_equal(result, expected)
s.loc['A'] = timedelta(1)
tm.assert_series_equal(s, expected)
# GH 14155
s = Series(10 * [np.timedelta64(10, 'm')])
s.loc[[1, 2, 3]] = np.timedelta64(20, 'm')
expected = pd.Series(10 * [np.timedelta64(10, 'm')])
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm'))
tm.assert_series_equal(s, expected)
def test_underlying_data_conversion():
# GH 4080
df = DataFrame({c: [1, 2, 3] for c in ['a', 'b', 'c']})
df.set_index(['a', 'b', 'c'], inplace=True)
s = Series([1], index=[(2, 2, 2)])
df['val'] = 0
df
df['val'].update(s)
expected = DataFrame(
dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
expected.set_index(['a', 'b', 'c'], inplace=True)
tm.assert_frame_equal(df, expected)
# GH 3970
# these are chained assignments as well
pd.set_option('chained_assignment', None)
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
df["cc"] = 0.0
ck = [True] * len(df)
df["bb"].iloc[0] = .13
# TODO: unused
df_tmp = df.iloc[ck] # noqa
df["bb"].iloc[0] = .15
assert df['bb'].iloc[0] == 0.15
pd.set_option('chained_assignment', 'raise')
# GH 3217
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
df['c'] = np.nan
df['c'].update(pd.Series(['foo'], index=[0]))
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan]))
tm.assert_frame_equal(df, expected)
def test_preserve_refs(test_data):
seq = test_data.ts[[5, 10, 15]]
seq[1] = np.NaN
assert not np.isnan(test_data.ts[10])
def test_cast_on_putmask():
# GH 2746
# need to upcast
s = Series([1, 2], index=[1, 2], dtype='int64')
s[[True, False]] = Series([0], index=[1], dtype='int64')
expected = Series([0, 2], index=[1, 2], dtype='int64')
assert_series_equal(s, expected)
def test_type_promote_putmask():
# GH8387: test that changing types does not break alignment
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
left, mask = ts.copy(), ts > 0
right = ts[mask].copy().map(str)
left[mask] = right
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
s = Series([0, 1, 2, 0])
mask = s > 0
s2 = s[mask].map(str)
s[mask] = s2
assert_series_equal(s, Series([0, '1', '2', 0]))
s = Series([0, 'foo', 'bar', 0])
mask = Series([False, True, True, False])
s2 = s[mask]
s[mask] = s2
assert_series_equal(s, Series([0, 'foo', 'bar', 0]))
def test_multilevel_preserve_name():
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
'three']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
s = Series(np.random.randn(len(index)), index=index, name='sth')
result = s['foo']
result2 = s.loc['foo']
assert result.name == s.name
assert result2.name == s.name
def test_setitem_scalar_into_readonly_backing_data():
# GH14359: test that you cannot mutate a read only buffer
array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)
for n in range(len(series)):
msg = "assignment destination is read-only"
with pytest.raises(ValueError, match=msg):
series[n] = 1
assert array[n] == 0
def test_setitem_slice_into_readonly_backing_data():
# GH14359: test that you cannot mutate a read only buffer
array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)
msg = "assignment destination is read-only"
with pytest.raises(ValueError, match=msg):
series[1:3] = 1
assert not array.any()
"""
miscellaneous methods
"""
def test_select(test_data):
# deprecated: gh-12410
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
n = len(test_data.ts)
result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
assert_series_equal(result, expected)
result = test_data.ts.select(lambda x: x.weekday() == 2)
expected = test_data.ts[test_data.ts.index.weekday == 2]
assert_series_equal(result, expected)
def test_pop():
# GH 6600
df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })
k = df.iloc[4]
result = k.pop('B')
assert result == 4
expected = Series([0, 0], index=['A', 'C'], name=4)
assert_series_equal(k, expected)
def test_take():
s = Series([-1, 5, 6, 2, 4])
actual = s.take([1, 3, 4])
expected = Series([5, 2, 4], index=[1, 3, 4])
tm.assert_series_equal(actual, expected)
actual = s.take([-1, 3, 4])
expected = Series([4, 2, 4], index=[4, 3, 4])
tm.assert_series_equal(actual, expected)
msg = "index {} is out of bounds for size 5"
with pytest.raises(IndexError, match=msg.format(10)):
s.take([1, 10])
with pytest.raises(IndexError, match=msg.format(5)):
s.take([2, 5])
with tm.assert_produces_warning(FutureWarning):
s.take([-1, 3, 4], convert=False)
def test_take_categorical():
# https://github.com/pandas-dev/pandas/issues/20664
s = Series(pd.Categorical(['a', 'b', 'c']))
result = s.take([-2, -2, 0])
expected = Series(pd.Categorical(['b', 'b', 'a'],
categories=['a', 'b', 'c']),
index=[1, 1, 0])
assert_series_equal(result, expected)
def test_head_tail(test_data):
assert_series_equal(test_data.series.head(), test_data.series[:5])
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])
@@ -1,168 +0,0 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import numpy as np
import pytest
from pandas.compat import lrange
import pandas as pd
from pandas import Series, Timestamp
from pandas.util.testing import assert_series_equal
@pytest.mark.parametrize("val,expected", [
(2**63 - 1, 3),
(2**63, 4),
])
def test_loc_uint64(val, expected):
# see gh-19399
s = Series({2**63 - 1: 3, 2**63: 4})
assert s.loc[val] == expected
def test_loc_getitem(test_data):
inds = test_data.series.index[[3, 4, 7]]
assert_series_equal(
test_data.series.loc[inds],
test_data.series.reindex(inds))
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
# slice with indices
d1, d2 = test_data.ts.index[[5, 15]]
result = test_data.ts.loc[d1:d2]
expected = test_data.ts.truncate(d1, d2)
assert_series_equal(result, expected)
# boolean
mask = test_data.series > test_data.series.median()
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
# ask for index value
assert test_data.ts.loc[d1] == test_data.ts[d1]
assert test_data.ts.loc[d2] == test_data.ts[d2]
def test_loc_getitem_not_monotonic(test_data):
d1, d2 = test_data.ts.index[[5, 15]]
ts2 = test_data.ts[::2][[1, 2, 0]]
msg = r"Timestamp\('2000-01-10 00:00:00'\)"
with pytest.raises(KeyError, match=msg):
ts2.loc[d1:d2]
with pytest.raises(KeyError, match=msg):
ts2.loc[d1:d2] = 0
def test_loc_getitem_setitem_integer_slice_keyerrors():
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
# this is OK
cp = s.copy()
cp.iloc[4:10] = 0
assert (cp.iloc[4:10] == 0).all()
# so is this
cp = s.copy()
cp.iloc[3:11] = 0
assert (cp.iloc[3:11] == 0).values.all()
result = s.iloc[2:6]
result2 = s.loc[3:11]
expected = s.reindex([4, 6, 8, 10])
assert_series_equal(result, expected)
assert_series_equal(result2, expected)
# non-monotonic, raise KeyError
s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]]
with pytest.raises(KeyError, match=r"^3L?$"):
s2.loc[3:11]
with pytest.raises(KeyError, match=r"^3L?$"):
s2.loc[3:11] = 0
def test_loc_getitem_iterator(test_data):
idx = iter(test_data.series.index[:10])
result = test_data.series.loc[idx]
assert_series_equal(result, test_data.series[:10])
def test_loc_setitem_boolean(test_data):
mask = test_data.series > test_data.series.median()
result = test_data.series.copy()
result.loc[mask] = 0
expected = test_data.series
expected[mask] = 0
assert_series_equal(result, expected)
def test_loc_setitem_corner(test_data):
inds = list(test_data.series.index[[5, 8, 12]])
test_data.series.loc[inds] = 5
msg = r"\['foo'\] not in index"
with pytest.raises(KeyError, match=msg):
test_data.series.loc[inds + ['foo']] = 5
def test_basic_setitem_with_labels(test_data):
indices = test_data.ts.index[[5, 10, 15]]
cp = test_data.ts.copy()
exp = test_data.ts.copy()
cp[indices] = 0
exp.loc[indices] = 0
assert_series_equal(cp, exp)
cp = test_data.ts.copy()
exp = test_data.ts.copy()
cp[indices[0]:indices[2]] = 0
exp.loc[indices[0]:indices[2]] = 0
assert_series_equal(cp, exp)
# integer indexes, be careful
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
inds = [0, 4, 6]
arr_inds = np.array([0, 4, 6])
cp = s.copy()
exp = s.copy()
s[inds] = 0
s.loc[inds] = 0
assert_series_equal(cp, exp)
cp = s.copy()
exp = s.copy()
s[arr_inds] = 0
s.loc[arr_inds] = 0
assert_series_equal(cp, exp)
inds_notfound = [0, 4, 5, 6]
arr_inds_notfound = np.array([0, 4, 5, 6])
msg = r"\[5\] not contained in the index"
with pytest.raises(ValueError, match=msg):
s[inds_notfound] = 0
with pytest.raises(Exception, match=msg):
s[arr_inds_notfound] = 0
# GH12089
# with tz for values
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
index=['a', 'b', 'c'])
s2 = s.copy()
expected = Timestamp('2011-01-03', tz='US/Eastern')
s2.loc['a'] = expected
result = s2.loc['a']
assert result == expected
s2 = s.copy()
s2.iloc[0] = expected
result = s2.iloc[0]
assert result == expected
s2 = s.copy()
s2['a'] = expected
result = s2['a']
assert result == expected
@@ -1,259 +0,0 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import numpy as np
import pytest
from pandas.compat import lrange, range
import pandas as pd
from pandas import DataFrame, Index, Series
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
def test_get():
# GH 6383
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45,
51, 39, 55, 43, 54, 52, 51, 54]))
result = s.get(25, 0)
expected = 0
assert result == expected
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56,
45, 51, 39, 55, 43, 54, 52, 51, 54]),
index=pd.Float64Index(
[25.0, 36.0, 49.0, 64.0, 81.0, 100.0,
121.0, 144.0, 169.0, 196.0, 1225.0,
1296.0, 1369.0, 1444.0, 1521.0, 1600.0,
1681.0, 1764.0, 1849.0, 1936.0],
dtype='object'))
result = s.get(25, 0)
expected = 43
assert result == expected
# GH 7407
# with a boolean accessor
df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3})
vc = df.i.value_counts()
result = vc.get(99, default='Missing')
assert result == 'Missing'
vc = df.b.value_counts()
result = vc.get(False, default='Missing')
assert result == 3
result = vc.get(True, default='Missing')
assert result == 'Missing'
def test_get_nan():
# GH 8569
s = pd.Float64Index(range(10)).to_series()
assert s.get(np.nan) is None
assert s.get(np.nan, default='Missing') == 'Missing'
def test_get_nan_multiple():
# GH 8569
# ensure that fixing "test_get_nan" above hasn't broken get
# with multiple elements
s = pd.Float64Index(range(10)).to_series()
idx = [2, 30]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert_series_equal(s.get(idx),
Series([2, np.nan], index=idx))
idx = [2, np.nan]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert_series_equal(s.get(idx),
Series([2, np.nan], index=idx))
# GH 17295 - all missing keys
idx = [20, 30]
assert(s.get(idx) is None)
idx = [np.nan, np.nan]
assert(s.get(idx) is None)
def test_delitem():
# GH 5542
# should delete the item inplace
s = Series(lrange(5))
del s[0]
expected = Series(lrange(1, 5), index=lrange(1, 5))
assert_series_equal(s, expected)
del s[1]
expected = Series(lrange(2, 5), index=lrange(2, 5))
assert_series_equal(s, expected)
# empty
s = Series()
with pytest.raises(KeyError, match=r"^0$"):
del s[0]
# only 1 left, del, add, del
s = Series(1)
del s[0]
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='int64')))
s[0] = 1
assert_series_equal(s, Series(1))
del s[0]
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='int64')))
# Index(dtype=object)
s = Series(1, index=['a'])
del s['a']
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='object')))
s['a'] = 1
assert_series_equal(s, Series(1, index=['a']))
del s['a']
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='object')))
def test_slice_float64():
values = np.arange(10., 50., 2)
index = Index(values)
start, end = values[[5, 15]]
s = Series(np.random.randn(20), index=index)
result = s[start:end]
expected = s.iloc[5:16]
assert_series_equal(result, expected)
result = s.loc[start:end]
assert_series_equal(result, expected)
df = DataFrame(np.random.randn(20, 3), index=index)
result = df[start:end]
expected = df.iloc[5:16]
tm.assert_frame_equal(result, expected)
result = df.loc[start:end]
tm.assert_frame_equal(result, expected)
def test_getitem_negative_out_of_bounds():
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
msg = "index out of bounds"
with pytest.raises(IndexError, match=msg):
s[-11]
msg = "index -11 is out of bounds for axis 0 with size 10"
with pytest.raises(IndexError, match=msg):
s[-11] = 'foo'
def test_getitem_regression():
s = Series(lrange(5), index=lrange(5))
result = s[lrange(5)]
assert_series_equal(result, s)
def test_getitem_setitem_slice_bug():
s = Series(lrange(10), lrange(10))
result = s[-12:]
assert_series_equal(result, s)
result = s[-7:]
assert_series_equal(result, s[3:])
result = s[:-12]
assert_series_equal(result, s[:0])
s = Series(lrange(10), lrange(10))
s[-12:] = 0
assert (s == 0).all()
s[:-12] = 5
assert (s == 0).all()
def test_getitem_setitem_slice_integers():
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
result = s[:4]
expected = s.reindex([2, 4, 6, 8])
assert_series_equal(result, expected)
s[:4] = 0
assert (s[:4] == 0).all()
assert not (s[4:] == 0).any()
def test_setitem_float_labels():
# note labels are floats
s = Series(['a', 'b', 'c'], index=[0, 0.5, 1])
tmp = s.copy()
s.loc[1] = 'zoo'
tmp.iloc[2] = 'zoo'
assert_series_equal(s, tmp)
def test_slice_float_get_set(test_data):
msg = (r"cannot do slice indexing on <class 'pandas\.core\.indexes"
r"\.datetimes\.DatetimeIndex'> with these indexers \[{key}\]"
r" of <(class|type) 'float'>")
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
test_data.ts[4.0:10.0]
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
test_data.ts[4.0:10.0] = 0
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
test_data.ts[4.5:10.0]
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
test_data.ts[4.5:10.0] = 0
def test_slice_floats2():
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
assert len(s.loc[12.0:]) == 8
assert len(s.loc[12.5:]) == 7
i = np.arange(10, 20, dtype=float)
i[2] = 12.2
s.index = i
assert len(s.loc[12.0:]) == 8
assert len(s.loc[12.5:]) == 7
def test_int_indexing():
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
with pytest.raises(KeyError, match=r"^5$"):
s[5]
with pytest.raises(KeyError, match=r"^'c'$"):
s['c']
# not monotonic
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
with pytest.raises(KeyError, match=r"^5$"):
s[5]
with pytest.raises(KeyError, match=r"^'c'$"):
s['c']
def test_getitem_int64(test_data):
idx = np.int64(5)
assert test_data.ts[idx] == test_data.ts[5]