started work on backend

This commit is contained in:
d3m1g0d
2019-01-21 17:36:00 +01:00
parent a1a8bca34b
commit 9f9a7e4974
4032 changed files with 745079 additions and 0 deletions
@@ -0,0 +1,8 @@
import pytest
from pandas.tests.series.common import TestData
@pytest.fixture(scope='module')
def test_data():
return TestData()
@@ -0,0 +1,548 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import pytest
from datetime import datetime
import pandas as pd
import numpy as np
from numpy import nan
from pandas import compat
from pandas import (Series, date_range, isna, Categorical)
from pandas.compat import lrange, range
from pandas.util.testing import (assert_series_equal)
import pandas.util.testing as tm
@pytest.mark.parametrize(
'first_slice,second_slice', [
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]]
])
@pytest.mark.parametrize('fill', [None, -1])
def test_align(test_data, first_slice, second_slice, join_type, fill):
a = test_data.ts[slice(*first_slice)]
b = test_data.ts[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, fill_value=fill)
join_index = a.index.join(b.index, how=join_type)
if fill is not None:
diff_a = aa.index.difference(join_index)
diff_b = ab.index.difference(join_index)
if len(diff_a) > 0:
assert (aa.reindex(diff_a) == fill).all()
if len(diff_b) > 0:
assert (ab.reindex(diff_b) == fill).all()
ea = a.reindex(join_index)
eb = b.reindex(join_index)
if fill is not None:
ea = ea.fillna(fill)
eb = eb.fillna(fill)
assert_series_equal(aa, ea)
assert_series_equal(ab, eb)
assert aa.name == 'ts'
assert ea.name == 'ts'
assert ab.name == 'ts'
assert eb.name == 'ts'
@pytest.mark.parametrize(
'first_slice,second_slice', [
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]]
])
@pytest.mark.parametrize('method', ['pad', 'bfill'])
@pytest.mark.parametrize('limit', [None, 1])
def test_align_fill_method(test_data,
first_slice, second_slice,
join_type, method, limit):
a = test_data.ts[slice(*first_slice)]
b = test_data.ts[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
join_index = a.index.join(b.index, how=join_type)
ea = a.reindex(join_index)
eb = b.reindex(join_index)
ea = ea.fillna(method=method, limit=limit)
eb = eb.fillna(method=method, limit=limit)
assert_series_equal(aa, ea)
assert_series_equal(ab, eb)
def test_align_nocopy(test_data):
b = test_data.ts[:5].copy()
# do copy
a = test_data.ts.copy()
ra, _ = a.align(b, join='left')
ra[:5] = 5
assert not (a[:5] == 5).any()
# do not copy
a = test_data.ts.copy()
ra, _ = a.align(b, join='left', copy=False)
ra[:5] = 5
assert (a[:5] == 5).all()
# do copy
a = test_data.ts.copy()
b = test_data.ts[:5].copy()
_, rb = a.align(b, join='right')
rb[:3] = 5
assert not (b[:3] == 5).any()
# do not copy
a = test_data.ts.copy()
b = test_data.ts[:5].copy()
_, rb = a.align(b, join='right', copy=False)
rb[:2] = 5
assert (b[:2] == 5).all()
def test_align_same_index(test_data):
a, b = test_data.ts.align(test_data.ts, copy=False)
assert a.index is test_data.ts.index
assert b.index is test_data.ts.index
a, b = test_data.ts.align(test_data.ts, copy=True)
assert a.index is not test_data.ts.index
assert b.index is not test_data.ts.index
def test_align_multiindex():
# GH 10665
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
names=('a', 'b', 'c'))
idx = pd.Index(range(2), name='b')
s1 = pd.Series(np.arange(12, dtype='int64'), index=midx)
s2 = pd.Series(np.arange(2, dtype='int64'), index=idx)
# these must be the same results (but flipped)
res1l, res1r = s1.align(s2, join='left')
res2l, res2r = s2.align(s1, join='right')
expl = s1
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
res1l, res1r = s1.align(s2, join='right')
res2l, res2r = s2.align(s1, join='left')
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
names=('a', 'b', 'c'))
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
def test_reindex(test_data):
identity = test_data.series.reindex(test_data.series.index)
# __array_interface__ is not defined for older numpies
# and on some pythons
try:
assert np.may_share_memory(test_data.series.index, identity.index)
except AttributeError:
pass
assert identity.index.is_(test_data.series.index)
assert identity.index.identical(test_data.series.index)
subIndex = test_data.series.index[10:20]
subSeries = test_data.series.reindex(subIndex)
for idx, val in compat.iteritems(subSeries):
assert val == test_data.series[idx]
subIndex2 = test_data.ts.index[10:20]
subTS = test_data.ts.reindex(subIndex2)
for idx, val in compat.iteritems(subTS):
assert val == test_data.ts[idx]
stuffSeries = test_data.ts.reindex(subIndex)
assert np.isnan(stuffSeries).all()
# This is extremely important for the Cython code to not screw up
nonContigIndex = test_data.ts.index[::2]
subNonContig = test_data.ts.reindex(nonContigIndex)
for idx, val in compat.iteritems(subNonContig):
assert val == test_data.ts[idx]
# return a copy the same index here
result = test_data.ts.reindex()
assert not (result is test_data.ts)
def test_reindex_nan():
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
assert_series_equal(ts.reindex(i), ts.iloc[j])
ts.index = ts.index.astype('object')
# reindex coerces index.dtype to float, loc/iloc doesn't
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
def test_reindex_series_add_nat():
rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
series = Series(rng)
result = series.reindex(lrange(15))
assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))
mask = result.isna()
assert mask[-5:].all()
assert not mask[:-5].any()
def test_reindex_with_datetimes():
rng = date_range('1/1/2000', periods=20)
ts = Series(np.random.randn(20), index=rng)
result = ts.reindex(list(ts.index[5:10]))
expected = ts[5:10]
tm.assert_series_equal(result, expected)
result = ts[list(ts.index[5:10])]
tm.assert_series_equal(result, expected)
def test_reindex_corner(test_data):
# (don't forget to fix this) I think it's fixed
test_data.empty.reindex(test_data.ts.index, method='pad') # it works
# corner case: pad empty series
reindexed = test_data.empty.reindex(test_data.ts.index, method='pad')
# pass non-Index
reindexed = test_data.ts.reindex(list(test_data.ts.index))
assert_series_equal(test_data.ts, reindexed)
# bad fill method
ts = test_data.ts[::2]
pytest.raises(Exception, ts.reindex, test_data.ts.index, method='foo')
def test_reindex_pad():
s = Series(np.arange(10), dtype='int64')
s2 = s[::2]
reindexed = s2.reindex(s.index, method='pad')
reindexed2 = s2.reindex(s.index, method='ffill')
assert_series_equal(reindexed, reindexed2)
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
assert_series_equal(reindexed, expected)
# GH4604
s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
new_index = ['a', 'g', 'c', 'f']
expected = Series([1, 1, 3, 3], index=new_index)
# this changes dtype because the ffill happens after
result = s.reindex(new_index).ffill()
assert_series_equal(result, expected.astype('float64'))
result = s.reindex(new_index).ffill(downcast='infer')
assert_series_equal(result, expected)
expected = Series([1, 5, 3, 5], index=new_index)
result = s.reindex(new_index, method='ffill')
assert_series_equal(result, expected)
# inference of new dtype
s = Series([True, False, False, True], index=list('abcd'))
new_index = 'agc'
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
assert_series_equal(result, expected)
# GH4618 shifted series downcasting
s = Series(False, index=lrange(0, 5))
result = s.shift(1).fillna(method='bfill')
expected = Series(False, index=lrange(0, 5))
assert_series_equal(result, expected)
def test_reindex_nearest():
s = Series(np.arange(10, dtype='int64'))
target = [0.1, 0.9, 1.5, 2.0]
actual = s.reindex(target, method='nearest')
expected = Series(np.around(target).astype('int64'), target)
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method='nearest')
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method='nearest', tolerance=1)
assert_series_equal(expected, actual)
actual = s.reindex_like(actual, method='nearest',
tolerance=[1, 2, 3, 4])
assert_series_equal(expected, actual)
actual = s.reindex(target, method='nearest', tolerance=0.2)
expected = Series([0, 1, np.nan, 2], target)
assert_series_equal(expected, actual)
actual = s.reindex(target, method='nearest',
tolerance=[0.3, 0.01, 0.4, 3])
expected = Series([0, np.nan, np.nan, 2], target)
assert_series_equal(expected, actual)
def test_reindex_backfill():
pass
def test_reindex_int(test_data):
ts = test_data.ts[::2]
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
# this should work fine
reindexed_int = int_ts.reindex(test_data.ts.index)
# if NaNs introduced
assert reindexed_int.dtype == np.float_
# NO NaNs introduced
reindexed_int = int_ts.reindex(int_ts.index[::2])
assert reindexed_int.dtype == np.int_
def test_reindex_bool(test_data):
# A series other than float, int, string, or object
ts = test_data.ts[::2]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
# this should work fine
reindexed_bool = bool_ts.reindex(test_data.ts.index)
# if NaNs introduced
assert reindexed_bool.dtype == np.object_
# NO NaNs introduced
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
assert reindexed_bool.dtype == np.bool_
def test_reindex_bool_pad(test_data):
# fail
ts = test_data.ts[5:]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
filled_bool = bool_ts.reindex(test_data.ts.index, method='pad')
assert isna(filled_bool[:5]).all()
def test_reindex_categorical():
index = date_range('20000101', periods=3)
# reindexing to an invalid Categorical
s = Series(['a', 'b', 'c'], dtype='category')
result = s.reindex(index)
expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
categories=['a', 'b', 'c']))
expected.index = index
tm.assert_series_equal(result, expected)
# partial reindexing
expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
'c']))
expected.index = [1, 2]
result = s.reindex([1, 2])
tm.assert_series_equal(result, expected)
expected = Series(Categorical(
values=['c', np.nan], categories=['a', 'b', 'c']))
expected.index = [2, 3]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
def test_reindex_like(test_data):
other = test_data.ts[::2]
assert_series_equal(test_data.ts.reindex(other.index),
test_data.ts.reindex_like(other))
# GH 7179
day1 = datetime(2013, 3, 5)
day2 = datetime(2013, 5, 5)
day3 = datetime(2014, 3, 5)
series1 = Series([5, None, None], [day1, day2, day3])
series2 = Series([None, None], [day1, day3])
result = series1.reindex_like(series2, method='pad')
expected = Series([5, np.nan], index=[day1, day3])
assert_series_equal(result, expected)
def test_reindex_fill_value():
# -----------------------------------------------------------
# floats
floats = Series([1., 2., 3.])
result = floats.reindex([1, 2, 3])
expected = Series([2., 3., np.nan], index=[1, 2, 3])
assert_series_equal(result, expected)
result = floats.reindex([1, 2, 3], fill_value=0)
expected = Series([2., 3., 0], index=[1, 2, 3])
assert_series_equal(result, expected)
# -----------------------------------------------------------
# ints
ints = Series([1, 2, 3])
result = ints.reindex([1, 2, 3])
expected = Series([2., 3., np.nan], index=[1, 2, 3])
assert_series_equal(result, expected)
# don't upcast
result = ints.reindex([1, 2, 3], fill_value=0)
expected = Series([2, 3, 0], index=[1, 2, 3])
assert issubclass(result.dtype.type, np.integer)
assert_series_equal(result, expected)
# -----------------------------------------------------------
# objects
objects = Series([1, 2, 3], dtype=object)
result = objects.reindex([1, 2, 3])
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
result = objects.reindex([1, 2, 3], fill_value='foo')
expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
# ------------------------------------------------------------
# bools
bools = Series([True, False, True])
result = bools.reindex([1, 2, 3])
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
assert_series_equal(result, expected)
result = bools.reindex([1, 2, 3], fill_value=False)
expected = Series([False, True, False], index=[1, 2, 3])
assert_series_equal(result, expected)
def test_rename():
# GH 17407
s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
result = s.rename(str)
expected = s.rename(lambda i: str(i))
assert_series_equal(result, expected)
assert result.name == expected.name
@pytest.mark.parametrize(
'data, index, drop_labels,'
' axis, expected_data, expected_index',
[
# Unique Index
([1, 2], ['one', 'two'], ['two'],
0, [1], ['one']),
([1, 2], ['one', 'two'], ['two'],
'rows', [1], ['one']),
([1, 1, 2], ['one', 'two', 'one'], ['two'],
0, [1, 2], ['one', 'one']),
# GH 5248 Non-Unique Index
([1, 1, 2], ['one', 'two', 'one'], 'two',
0, [1, 2], ['one', 'one']),
([1, 1, 2], ['one', 'two', 'one'], ['one'],
0, [1], ['two']),
([1, 1, 2], ['one', 'two', 'one'], 'one',
0, [1], ['two'])])
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
expected_data, expected_index):
s = Series(data=data, index=index)
result = s.drop(drop_labels, axis=axis)
expected = Series(data=expected_data, index=expected_index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
'data, index, drop_labels,'
' axis, error_type, error_desc',
[
# single string/tuple-like
(range(3), list('abc'), 'bc',
0, KeyError, 'not found in axis'),
# bad axis
(range(3), list('abc'), ('a',),
0, KeyError, 'not found in axis'),
(range(3), list('abc'), 'one',
'columns', ValueError, 'No axis named columns')])
def test_drop_exception_raised(data, index, drop_labels,
axis, error_type, error_desc):
with tm.assert_raises_regex(error_type, error_desc):
Series(data, index=index).drop(drop_labels, axis=axis)
def test_drop_with_ignore_errors():
# errors='ignore'
s = Series(range(3), index=list('abc'))
result = s.drop('bc', errors='ignore')
tm.assert_series_equal(result, s)
result = s.drop(['a', 'd'], errors='ignore')
expected = s.iloc[1:]
tm.assert_series_equal(result, expected)
# GH 8522
s = Series([2, 3], index=[True, False])
assert s.index.is_object()
result = s.drop(True)
expected = Series([3], index=[False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = pd.Series(index=index).drop(drop_labels)
tm.assert_series_equal(series, pd.Series(index=expected_index))
@pytest.mark.parametrize('data, index, drop_labels', [
(None, [1, 2, 3], [1, 4]),
(None, [1, 2, 2], [1, 4]),
([2, 3], [0, 1], [False, True])
])
def test_drop_non_empty_list(data, index, drop_labels):
# GH 21494 and GH 16877
with tm.assert_raises_regex(KeyError, 'not found in axis'):
pd.Series(data=data, index=index).drop(drop_labels)
@@ -0,0 +1,601 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import pytest
import pandas as pd
import numpy as np
from pandas import (Series, date_range, isna, Index, Timestamp)
from pandas.compat import lrange, range
from pandas.core.dtypes.common import is_integer
from pandas.core.indexing import IndexingError
from pandas.tseries.offsets import BDay
from pandas.util.testing import (assert_series_equal)
import pandas.util.testing as tm
def test_getitem_boolean(test_data):
s = test_data.series
mask = s > s.median()
# passing list is OK
result = s[list(mask)]
expected = s[mask]
assert_series_equal(result, expected)
tm.assert_index_equal(result.index, s.index[mask])
def test_getitem_boolean_empty():
s = Series([], dtype=np.int64)
s.index.name = 'index_name'
s = s[s.isna()]
assert s.index.name == 'index_name'
assert s.dtype == np.int64
# GH5877
# indexing with empty series
s = Series(['A', 'B'])
expected = Series(np.nan, index=['C'], dtype=object)
result = s[Series(['C'], dtype=object)]
assert_series_equal(result, expected)
s = Series(['A', 'B'])
expected = Series(dtype=object, index=Index([], dtype='int64'))
result = s[Series([], dtype=object)]
assert_series_equal(result, expected)
# invalid because of the boolean indexer
# that's empty or not-aligned
def f():
s[Series([], dtype=bool)]
pytest.raises(IndexingError, f)
def f():
s[Series([True], dtype=bool)]
pytest.raises(IndexingError, f)
def test_getitem_boolean_object(test_data):
# using column from DataFrame
s = test_data.series
mask = s > s.median()
omask = mask.astype(object)
# getitem
result = s[omask]
expected = s[mask]
assert_series_equal(result, expected)
# setitem
s2 = s.copy()
cop = s.copy()
cop[omask] = 5
s2[mask] = 5
assert_series_equal(cop, s2)
# nans raise exception
omask[5:10] = np.nan
pytest.raises(Exception, s.__getitem__, omask)
pytest.raises(Exception, s.__setitem__, omask, 5)
def test_getitem_setitem_boolean_corner(test_data):
ts = test_data.ts
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
# these used to raise...??
pytest.raises(Exception, ts.__getitem__, mask_shifted)
pytest.raises(Exception, ts.__setitem__, mask_shifted, 1)
# ts[mask_shifted]
# ts[mask_shifted] = 1
pytest.raises(Exception, ts.loc.__getitem__, mask_shifted)
pytest.raises(Exception, ts.loc.__setitem__, mask_shifted, 1)
# ts.loc[mask_shifted]
# ts.loc[mask_shifted] = 2
def test_setitem_boolean(test_data):
mask = test_data.series > test_data.series.median()
# similar indexed series
result = test_data.series.copy()
result[mask] = test_data.series * 2
expected = test_data.series * 2
assert_series_equal(result[mask], expected[mask])
# needs alignment
result = test_data.series.copy()
result[mask] = (test_data.series * 2)[0:5]
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
expected[-mask] = test_data.series[mask]
assert_series_equal(result[mask], expected[mask])
def test_get_set_boolean_different_order(test_data):
ordered = test_data.series.sort_values()
# setting
copy = test_data.series.copy()
copy[ordered > 0] = 0
expected = test_data.series.copy()
expected[expected > 0] = 0
assert_series_equal(copy, expected)
# getting
sel = test_data.series[ordered > 0]
exp = test_data.series[test_data.series > 0]
assert_series_equal(sel, exp)
def test_where_unsafe():
# unsafe dtype changes
for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16,
np.float32, np.float64]:
s = Series(np.arange(10), dtype=dtype)
mask = s < 5
s[mask] = lrange(2, 7)
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=dtype)
assert_series_equal(s, expected)
assert s.dtype == expected.dtype
# these are allowed operations, but are upcasted
for dtype in [np.int64, np.float64]:
s = Series(np.arange(10), dtype=dtype)
mask = s < 5
values = [2.5, 3.5, 4.5, 5.5, 6.5]
s[mask] = values
expected = Series(values + lrange(5, 10), dtype='float64')
assert_series_equal(s, expected)
assert s.dtype == expected.dtype
# GH 9731
s = Series(np.arange(10), dtype='int64')
mask = s > 5
values = [2.5, 3.5, 4.5, 5.5]
s[mask] = values
expected = Series(lrange(6) + values, dtype='float64')
assert_series_equal(s, expected)
# can't do these as we are forced to change the itemsize of the input
# to something we cannot
for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]:
s = Series(np.arange(10), dtype=dtype)
mask = s < 5
values = [2.5, 3.5, 4.5, 5.5, 6.5]
pytest.raises(Exception, s.__setitem__, tuple(mask), values)
# GH3235
s = Series(np.arange(10), dtype='int64')
mask = s < 5
s[mask] = lrange(2, 7)
expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64')
assert_series_equal(s, expected)
assert s.dtype == expected.dtype
s = Series(np.arange(10), dtype='int64')
mask = s > 5
s[mask] = [0] * 4
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64')
assert_series_equal(s, expected)
s = Series(np.arange(10))
mask = s > 5
def f():
s[mask] = [5, 4, 3, 2, 1]
pytest.raises(ValueError, f)
def f():
s[mask] = [0] * 5
pytest.raises(ValueError, f)
# dtype changes
s = Series([1, 2, 3, 4])
result = s.where(s > 2, np.nan)
expected = Series([np.nan, np.nan, 3, 4])
assert_series_equal(result, expected)
# GH 4667
# setting with None changes dtype
s = Series(range(10)).astype(float)
s[8] = None
result = s[8]
assert isna(result)
s = Series(range(10)).astype(float)
s[s > 8] = None
result = s[isna(s)]
expected = Series(np.nan, index=[9])
assert_series_equal(result, expected)
def test_where_raise_on_error_deprecation():
# gh-14968
# deprecation of raise_on_error
s = Series(np.random.randn(5))
cond = s > 0
with tm.assert_produces_warning(FutureWarning):
s.where(cond, raise_on_error=True)
with tm.assert_produces_warning(FutureWarning):
s.mask(cond, raise_on_error=True)
def test_where():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.where(cond).dropna()
rs2 = s[cond]
assert_series_equal(rs, rs2)
rs = s.where(cond, -s)
assert_series_equal(rs, s.abs())
rs = s.where(cond)
assert (s.shape == rs.shape)
assert (rs is not s)
# test alignment
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
rs = s2.where(cond[:3])
assert_series_equal(rs, expected)
expected = s2.abs()
expected.iloc[0] = s2[0]
rs = s2.where(cond[:3], -s2)
assert_series_equal(rs, expected)
def test_where_error():
s = Series(np.random.randn(5))
cond = s > 0
pytest.raises(ValueError, s.where, 1)
pytest.raises(ValueError, s.where, cond[:3].values, -s)
# GH 2745
s = Series([1, 2])
s[[True, False]] = [0, 1]
expected = Series([0, 2])
assert_series_equal(s, expected)
# failures
pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]),
[0, 2, 3])
pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]),
[])
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where_array_like(klass):
# see gh-15414
s = Series([1, 2, 3])
cond = [False, True, True]
expected = Series([np.nan, 2, 3])
result = s.where(klass(cond))
assert_series_equal(result, expected)
@pytest.mark.parametrize('cond', [
[1, 0, 1],
Series([2, 5, 7]),
["True", "False", "True"],
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")]
])
def test_where_invalid_input(cond):
# see gh-15414: only boolean arrays accepted
s = Series([1, 2, 3])
msg = "Boolean array expected for the condition"
with tm.assert_raises_regex(ValueError, msg):
s.where(cond)
msg = "Array conditional must be same shape as self"
with tm.assert_raises_regex(ValueError, msg):
s.where([True])
def test_where_ndframe_align():
msg = "Array conditional must be same shape as self"
s = Series([1, 2, 3])
cond = [True]
with tm.assert_raises_regex(ValueError, msg):
s.where(cond)
expected = Series([1, np.nan, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
cond = np.array([False, True, False, True])
with tm.assert_raises_regex(ValueError, msg):
s.where(cond)
expected = Series([np.nan, 2, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
# slice
s = Series(list('abc'))
def f():
s[0:3] = list(range(27))
pytest.raises(ValueError, f)
s[0:3] = list(range(3))
expected = Series([0, 1, 2])
assert_series_equal(s.astype(np.int64), expected, )
# slice with step
s = Series(list('abcdef'))
def f():
s[0:4:2] = list(range(27))
pytest.raises(ValueError, f)
s = Series(list('abcdef'))
s[0:4:2] = list(range(2))
expected = Series([0, 'b', 1, 'd', 'e', 'f'])
assert_series_equal(s, expected)
# neg slices
s = Series(list('abcdef'))
def f():
s[:-1] = list(range(27))
pytest.raises(ValueError, f)
s[-3:-1] = list(range(2))
expected = Series(['a', 'b', 'c', 0, 1, 'f'])
assert_series_equal(s, expected)
# list
s = Series(list('abc'))
def f():
s[[0, 1, 2]] = list(range(27))
pytest.raises(ValueError, f)
s = Series(list('abc'))
def f():
s[[0, 1, 2]] = list(range(2))
pytest.raises(ValueError, f)
# scalar
s = Series(list('abc'))
s[0] = list(range(10))
expected = Series([list(range(10)), 'b', 'c'])
assert_series_equal(s, expected)
@pytest.mark.parametrize('size', range(2, 6))
@pytest.mark.parametrize('mask', [
[True, False, False, False, False],
[True, False],
[False]
])
@pytest.mark.parametrize('item', [
2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min
])
# Test numpy arrays, lists and tuples as the input to be
# broadcast
@pytest.mark.parametrize('box', [
lambda x: np.array([x]),
lambda x: [x],
lambda x: (x,)
])
def test_broadcast(size, mask, item, box):
selection = np.resize(mask, size)
data = np.arange(size, dtype=float)
# Construct the expected series by taking the source
# data or item based on the selection
expected = Series([item if use_item else data[
i] for i, use_item in enumerate(selection)])
s = Series(data)
s[selection] = box(item)
assert_series_equal(s, expected)
s = Series(data)
result = s.where(~selection, box(item))
assert_series_equal(result, expected)
s = Series(data)
result = s.mask(selection, box(item))
assert_series_equal(result, expected)
def test_where_inplace():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.copy()
rs.where(cond, inplace=True)
assert_series_equal(rs.dropna(), s[cond])
assert_series_equal(rs, s.where(cond))
rs = s.copy()
rs.where(cond, -s, inplace=True)
assert_series_equal(rs, s.where(cond, -s))
def test_where_dups():
# GH 4550
# where crashes with dups in index
s1 = Series(list(range(3)))
s2 = Series(list(range(3)))
comb = pd.concat([s1, s2])
result = comb.where(comb < 2)
expected = Series([0, 1, np.nan, 0, 1, np.nan],
index=[0, 1, 2, 0, 1, 2])
assert_series_equal(result, expected)
# GH 4548
# inplace updating not working with dups
comb[comb < 1] = 5
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
assert_series_equal(comb, expected)
comb[comb < 2] += 10
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
assert_series_equal(comb, expected)
def test_where_numeric_with_string():
# GH 9280
s = pd.Series([1, 2, 3])
w = s.where(s > 1, 'X')
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == 'object'
w = s.where(s > 1, ['X', 'Y', 'Z'])
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == 'object'
w = s.where(s > 1, np.array(['X', 'Y', 'Z']))
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == 'object'
def test_where_timedelta_coerce():
s = Series([1, 2], dtype='timedelta64[ns]')
expected = Series([10, 10])
mask = np.array([False, False])
rs = s.where(mask, [10, 10])
assert_series_equal(rs, expected)
rs = s.where(mask, 10)
assert_series_equal(rs, expected)
rs = s.where(mask, 10.0)
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, 10.0])
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, np.nan])
expected = Series([10, None], dtype='object')
assert_series_equal(rs, expected)
def test_where_datetime_conversion():
s = Series(date_range('20130102', periods=2))
expected = Series([10, 10])
mask = np.array([False, False])
rs = s.where(mask, [10, 10])
assert_series_equal(rs, expected)
rs = s.where(mask, 10)
assert_series_equal(rs, expected)
rs = s.where(mask, 10.0)
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, 10.0])
assert_series_equal(rs, expected)
rs = s.where(mask, [10.0, np.nan])
expected = Series([10, None], dtype='object')
assert_series_equal(rs, expected)
# GH 15701
timestamps = ['2016-12-31 12:00:04+00:00',
'2016-12-31 12:00:04.010000+00:00']
s = Series([pd.Timestamp(t) for t in timestamps])
rs = s.where(Series([False, True]))
expected = Series([pd.NaT, s[1]])
assert_series_equal(rs, expected)
def test_mask():
# compare with tested results in test_where
s = Series(np.random.randn(5))
cond = s > 0
rs = s.where(~cond, np.nan)
assert_series_equal(rs, s.mask(cond))
rs = s.where(~cond)
rs2 = s.mask(cond)
assert_series_equal(rs, rs2)
rs = s.where(~cond, -s)
rs2 = s.mask(cond, -s)
assert_series_equal(rs, rs2)
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
rs = s2.where(~cond[:3])
rs2 = s2.mask(cond[:3])
assert_series_equal(rs, rs2)
rs = s2.where(~cond[:3], -s2)
rs2 = s2.mask(cond[:3], -s2)
assert_series_equal(rs, rs2)
pytest.raises(ValueError, s.mask, 1)
pytest.raises(ValueError, s.mask, cond[:3].values, -s)
# dtype changes
s = Series([1, 2, 3, 4])
result = s.mask(s > 2, np.nan)
expected = Series([1, 2, np.nan, np.nan])
assert_series_equal(result, expected)
def test_mask_inplace():
s = Series(np.random.randn(5))
cond = s > 0
rs = s.copy()
rs.mask(cond, inplace=True)
assert_series_equal(rs.dropna(), s[~cond])
assert_series_equal(rs, s.mask(cond))
rs = s.copy()
rs.mask(cond, -s, inplace=True)
assert_series_equal(rs, s.mask(cond, -s))
@@ -0,0 +1,33 @@
import pandas as pd
import pandas.util.testing as tm
def test_getitem_callable():
# GH 12533
s = pd.Series(4, index=list('ABCD'))
result = s[lambda x: 'A']
assert result == s.loc['A']
result = s[lambda x: ['A', 'B']]
tm.assert_series_equal(result, s.loc[['A', 'B']])
result = s[lambda x: [True, False, True, True]]
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
def test_setitem_callable():
# GH 12533
s = pd.Series([1, 2, 3, 4], index=list('ABCD'))
s[lambda x: 'A'] = -1
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD')))
def test_setitem_other_callable():
# GH 13299
inc = lambda x: x + 1
s = pd.Series([1, 2, -1, 4])
s[s < 0] = inc
expected = pd.Series([1, 2, inc, 4])
tm.assert_series_equal(s, expected)
@@ -0,0 +1,709 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import pytest
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from pandas import (Series, DataFrame,
date_range, Timestamp, DatetimeIndex, NaT)
from pandas.compat import lrange, range
from pandas.util.testing import (assert_series_equal,
assert_frame_equal, assert_almost_equal)
import pandas.util.testing as tm
import pandas._libs.index as _index
from pandas._libs import tslib
"""
Also test support for datetime64[ns] in Series / DataFrame
"""
def test_fancy_getitem():
dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1),
end=datetime(2010, 1, 1))
s = Series(np.arange(len(dti)), index=dti)
assert s[48] == 48
assert s['1/2/2009'] == 48
assert s['2009-1-2'] == 48
assert s[datetime(2009, 1, 2)] == 48
assert s[Timestamp(datetime(2009, 1, 2))] == 48
pytest.raises(KeyError, s.__getitem__, '2009-1-3')
assert_series_equal(s['3/6/2009':'2009-06-05'],
s[datetime(2009, 3, 6):datetime(2009, 6, 5)])
def test_fancy_setitem():
dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1),
end=datetime(2010, 1, 1))
s = Series(np.arange(len(dti)), index=dti)
s[48] = -1
assert s[48] == -1
s['1/2/2009'] = -2
assert s[48] == -2
s['1/2/2009':'2009-06-05'] = -3
assert (s[48:54] == -3).all()
def test_dti_snap():
dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002',
'1/5/2002', '1/6/2002', '1/7/2002'], freq='D')
res = dti.snap(freq='W-MON')
exp = date_range('12/31/2001', '1/7/2002', freq='w-mon')
exp = exp.repeat([3, 4])
assert (res == exp).all()
res = dti.snap(freq='B')
exp = date_range('1/1/2002', '1/7/2002', freq='b')
exp = exp.repeat([1, 1, 1, 2, 2])
assert (res == exp).all()
def test_dti_reset_index_round_trip():
dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D')
d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti)
d2 = d1.reset_index()
assert d2.dtypes[0] == np.dtype('M8[ns]')
d3 = d2.set_index('index')
assert_frame_equal(d1, d3, check_names=False)
# #2329
stamp = datetime(2012, 11, 22)
df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value'])
df = df.set_index('Date')
assert df.index[0] == stamp
assert df.reset_index()['Date'][0] == stamp
def test_series_set_value():
# #1561
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
index = DatetimeIndex(dates)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
s = Series().set_value(dates[0], 1.)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
s2 = s.set_value(dates[1], np.nan)
exp = Series([1., np.nan], index=index)
assert_series_equal(s2, exp)
# s = Series(index[:1], index[:1])
# s2 = s.set_value(dates[1], index[1])
# assert s2.values.dtype == 'M8[ns]'
@pytest.mark.slow
def test_slice_locs_indexerror():
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10)
for i in range(100000)]
s = Series(lrange(100000), times)
s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)]
def test_slicing_datetimes():
# GH 7523
# unique
df = DataFrame(np.arange(4., dtype='float64'),
index=[datetime(2001, 1, i, 10, 00)
for i in [1, 2, 3, 4]])
result = df.loc[datetime(2001, 1, 1, 10):]
assert_frame_equal(result, df)
result = df.loc[:datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 11):]
expected = df.iloc[1:]
assert_frame_equal(result, expected)
result = df.loc['20010101 11':]
assert_frame_equal(result, expected)
# duplicates
df = pd.DataFrame(np.arange(5., dtype='float64'),
index=[datetime(2001, 1, i, 10, 00)
for i in [1, 2, 2, 3, 4]])
result = df.loc[datetime(2001, 1, 1, 10):]
assert_frame_equal(result, df)
result = df.loc[:datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
assert_frame_equal(result, df)
result = df.loc[datetime(2001, 1, 1, 11):]
expected = df.iloc[1:]
assert_frame_equal(result, expected)
result = df.loc['20010101 11':]
assert_frame_equal(result, expected)
def test_frame_datetime64_duplicated():
dates = date_range('2010-07-01', end='2010-08-05')
tst = DataFrame({'symbol': 'AAA', 'date': dates})
result = tst.duplicated(['date', 'symbol'])
assert (-result).all()
tst = DataFrame({'date': dates})
result = tst.duplicated()
assert (-result).all()
def test_getitem_setitem_datetime_tz_pytz():
from pytz import timezone as tz
from pandas import date_range
N = 50
# testing with timezone, GH #2785
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
ts = Series(np.random.randn(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
# comparison dates with datetime MUST be localized!
date = tz('US/Central').localize(datetime(1990, 1, 1, 3))
result[date] = 0
result[date] = ts[4]
assert_series_equal(result, ts)
def test_getitem_setitem_datetime_tz_dateutil():
from dateutil.tz import tzutc
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
tz = lambda x: tzutc() if x == 'UTC' else gettz(
x) # handle special case for utc in dateutil
from pandas import date_range
N = 50
# testing with timezone, GH #2785
rng = date_range('1/1/1990', periods=N, freq='H',
tz='America/New_York')
ts = Series(np.random.randn(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
assert_series_equal(result, ts)
result = ts.copy()
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4]
assert_series_equal(result, ts)
def test_getitem_setitem_datetimeindex():
N = 50
# testing with timezone, GH #2785
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04:00:00"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04:00:00"] = 0
result["1990-01-01 04:00:00"] = ts[4]
assert_series_equal(result, ts)
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
assert_series_equal(result, ts)
lb = "1990-01-01 04:00:00"
rb = "1990-01-01 07:00:00"
# GH#18435 strings get a pass from tzawareness compat
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
lb = "1990-01-01 04:00:00-0500"
rb = "1990-01-01 07:00:00-0500"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
# repeat all the above with naive datetimes
result = ts[datetime(1990, 1, 1, 4)]
expected = ts[4]
assert result == expected
result = ts.copy()
result[datetime(1990, 1, 1, 4)] = 0
result[datetime(1990, 1, 1, 4)] = ts[4]
assert_series_equal(result, ts)
result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8]
assert_series_equal(result, ts)
lb = datetime(1990, 1, 1, 4)
rb = datetime(1990, 1, 1, 7)
with pytest.raises(TypeError):
# tznaive vs tzaware comparison is invalid
# see GH#18376, GH#18162
ts[(ts.index >= lb) & (ts.index <= rb)]
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)
# also test partial date slicing
result = ts["1990-01-02"]
expected = ts[24:48]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-02"] = 0
result["1990-01-02"] = ts[24:48]
assert_series_equal(result, ts)
def test_getitem_setitem_periodindex():
from pandas import period_range
N = 50
rng = period_range('1/1/1990', periods=N, freq='H')
ts = Series(np.random.randn(N), index=rng)
result = ts["1990-01-01 04"]
expected = ts[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04"] = 0
result["1990-01-01 04"] = ts[4]
assert_series_equal(result, ts)
result = ts["1990-01-01 04":"1990-01-01 07"]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04":"1990-01-01 07"] = 0
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
assert_series_equal(result, ts)
lb = "1990-01-01 04"
rb = "1990-01-01 07"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)
# GH 2782
result = ts[ts.index[4]]
expected = ts[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)
def test_getitem_median_slice_bug():
index = date_range('20090415', '20090519', freq='2B')
s = Series(np.random.randn(13), index=index)
indexer = [slice(6, 7, None)]
result = s[indexer]
expected = s[indexer[0]]
assert_series_equal(result, expected)
def test_datetime_indexing():
from pandas import date_range
index = date_range('1/1/2000', '1/7/2000')
index = index.repeat(3)
s = Series(len(index), index=index)
stamp = Timestamp('1/8/2000')
pytest.raises(KeyError, s.__getitem__, stamp)
s[stamp] = 0
assert s[stamp] == 0
# not monotonic
s = Series(len(index), index=index)
s = s[::-1]
pytest.raises(KeyError, s.__getitem__, stamp)
s[stamp] = 0
assert s[stamp] == 0
"""
test duplicates in time series
"""
@pytest.fixture(scope='module')
def dups():
dates = [datetime(2000, 1, 2), datetime(2000, 1, 2),
datetime(2000, 1, 2), datetime(2000, 1, 3),
datetime(2000, 1, 3), datetime(2000, 1, 3),
datetime(2000, 1, 4), datetime(2000, 1, 4),
datetime(2000, 1, 4), datetime(2000, 1, 5)]
return Series(np.random.randn(len(dates)), index=dates)
def test_constructor(dups):
assert isinstance(dups, Series)
assert isinstance(dups.index, DatetimeIndex)
def test_is_unique_monotonic(dups):
assert not dups.index.is_unique
def test_index_unique(dups):
uniques = dups.index.unique()
expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3),
datetime(2000, 1, 4), datetime(2000, 1, 5)])
assert uniques.dtype == 'M8[ns]' # sanity
tm.assert_index_equal(uniques, expected)
assert dups.index.nunique() == 4
# #2563
assert isinstance(uniques, DatetimeIndex)
dups_local = dups.index.tz_localize('US/Eastern')
dups_local.name = 'foo'
result = dups_local.unique()
expected = DatetimeIndex(expected, name='foo')
expected = expected.tz_localize('US/Eastern')
assert result.tz is not None
assert result.name == 'foo'
tm.assert_index_equal(result, expected)
# NaT, note this is excluded
arr = [1370745748 + t for t in range(20)] + [tslib.iNaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
for t in range(20)] + [NaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
def test_index_dupes_contains():
d = datetime(2011, 12, 5, 20, 30)
ix = DatetimeIndex([d, d])
assert d in ix
def test_duplicate_dates_indexing(dups):
ts = dups
uniques = ts.index.unique()
for date in uniques:
result = ts[date]
mask = ts.index == date
total = (ts.index == date).sum()
expected = ts[mask]
if total > 1:
assert_series_equal(result, expected)
else:
assert_almost_equal(result, expected[0])
cp = ts.copy()
cp[date] = 0
expected = Series(np.where(mask, 0, ts), index=ts.index)
assert_series_equal(cp, expected)
pytest.raises(KeyError, ts.__getitem__, datetime(2000, 1, 6))
# new index
ts[datetime(2000, 1, 6)] = 0
assert ts[datetime(2000, 1, 6)] == 0
def test_range_slice():
idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000',
'1/4/2000'])
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts['1/2/2000':]
expected = ts[1:]
assert_series_equal(result, expected)
result = ts['1/2/2000':'1/3/2000']
expected = ts[1:4]
assert_series_equal(result, expected)
def test_groupby_average_dup_values(dups):
result = dups.groupby(level=0).mean()
expected = dups.groupby(dups.index).mean()
assert_series_equal(result, expected)
def test_indexing_over_size_cutoff():
import datetime
# #1821
old_cutoff = _index._SIZE_CUTOFF
try:
_index._SIZE_CUTOFF = 1000
# create large list of non periodic datetime
dates = []
sec = datetime.timedelta(seconds=1)
half_sec = datetime.timedelta(microseconds=500000)
d = datetime.datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec
# duplicate some values in the list
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]
df = DataFrame(np.random.randn(len(dates), 4),
index=dates,
columns=list('ABCD'))
pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index
# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0
finally:
_index._SIZE_CUTOFF = old_cutoff
def test_indexing_unordered():
# GH 2437
rng = date_range(start='2011-01-01', end='2011-01-15')
ts = Series(np.random.rand(len(rng)), index=rng)
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
for t in ts.index:
# TODO: unused?
s = str(t) # noqa
expected = ts[t]
result = ts2[t]
assert expected == result
# GH 3448 (ranges)
def compare(slobj):
result = ts2[slobj].copy()
result = result.sort_index()
expected = ts[slobj]
assert_series_equal(result, expected)
compare(slice('2011-01-01', '2011-01-15'))
compare(slice('2010-12-30', '2011-01-15'))
compare(slice('2011-01-01', '2011-01-16'))
# partial ranges
compare(slice('2011-01-01', '2011-01-6'))
compare(slice('2011-01-06', '2011-01-8'))
compare(slice('2011-01-06', '2011-01-12'))
# single values
result = ts2['2011'].sort_index()
expected = ts['2011']
assert_series_equal(result, expected)
# diff freq
rng = date_range(datetime(2005, 1, 1), periods=20, freq='M')
ts = Series(np.arange(len(rng)), index=rng)
ts = ts.take(np.random.permutation(20))
result = ts['2005']
for t in result.index:
assert t.year == 2005
def test_indexing():
idx = date_range("2001-1-1", periods=20, freq='M')
ts = Series(np.random.rand(len(idx)), index=idx)
# getting
# GH 3070, make sure semantics work on Series/Frame
expected = ts['2001']
expected.name = 'A'
df = DataFrame(dict(A=ts))
result = df['2001']['A']
assert_series_equal(expected, result)
# setting
ts['2001'] = 1
expected = ts['2001']
expected.name = 'A'
df.loc['2001', 'A'] = 1
result = df['2001']['A']
assert_series_equal(expected, result)
# GH3546 (not including times on the last day)
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
freq='H')
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013-05']
assert_series_equal(expected, ts)
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
freq='S')
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013-05']
assert_series_equal(expected, ts)
idx = [Timestamp('2013-05-31 00:00'),
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))]
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013']
assert_series_equal(expected, ts)
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(np.random.rand(5, 5),
columns=['open', 'high', 'low', 'close', 'volume'],
index=date_range('2012-01-02 18:01:00',
periods=5, tz='US/Central', freq='s'))
expected = df.loc[[df.index[2]]]
# this is a single date, so will raise
pytest.raises(KeyError, df.__getitem__, '2012-01-02 18:01:02', )
pytest.raises(KeyError, df.__getitem__, df.index[2], )
"""
test NaT support
"""
def test_set_none_nan():
series = Series(date_range('1/1/2000', periods=10))
series[3] = None
assert series[3] is NaT
series[3:5] = None
assert series[4] is NaT
series[5] = np.nan
assert series[5] is NaT
series[5:7] = np.nan
assert series[6] is NaT
def test_nat_operations():
# GH 8617
s = Series([0, pd.NaT], dtype='m8[ns]')
exp = s[0]
assert s.median() == exp
assert s.min() == exp
assert s.max() == exp
@pytest.mark.parametrize('method', ["round", "floor", "ceil"])
@pytest.mark.parametrize('freq', ["s", "5s", "min", "5min", "h", "5h"])
def test_round_nat(method, freq):
# GH14940
s = Series([pd.NaT])
expected = Series(pd.NaT)
round_method = getattr(s.dt, method)
assert_series_equal(round_method(freq), expected)
@@ -0,0 +1,38 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import numpy as np
from pandas import Series
from pandas.compat import lrange, range
from pandas.util.testing import (assert_series_equal,
assert_almost_equal)
def test_iloc():
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
for i in range(len(s)):
result = s.iloc[i]
exp = s[s.index[i]]
assert_almost_equal(result, exp)
# pass a slice
result = s.iloc[slice(1, 3)]
expected = s.loc[2:4]
assert_series_equal(result, expected)
# test slice is a view
result[:] = 0
assert (s[1:3] == 0).all()
# list of integers
result = s.iloc[[0, 2, 3, 4, 5]]
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
assert_series_equal(result, expected)
def test_iloc_nonunique():
s = Series([0, 1, 2], index=[0, 1, 0])
assert s.iloc[2] == 2
@@ -0,0 +1,770 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
""" test get/set & misc """
import pytest
from datetime import timedelta
import numpy as np
import pandas as pd
from pandas.core.dtypes.common import is_scalar
from pandas import (Series, DataFrame, MultiIndex,
Timestamp, Timedelta, Categorical)
from pandas.tseries.offsets import BDay
from pandas.compat import lrange, range
from pandas.util.testing import (assert_series_equal)
import pandas.util.testing as tm
def test_basic_indexing():
s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b'])
pytest.raises(IndexError, s.__getitem__, 5)
pytest.raises(IndexError, s.__setitem__, 5, 0)
pytest.raises(KeyError, s.__getitem__, 'c')
s = s.sort_index()
pytest.raises(IndexError, s.__getitem__, 5)
pytest.raises(IndexError, s.__setitem__, 5, 0)
def test_basic_getitem_with_labels(test_data):
indices = test_data.ts.index[[5, 10, 15]]
result = test_data.ts[indices]
expected = test_data.ts.reindex(indices)
assert_series_equal(result, expected)
result = test_data.ts[indices[0]:indices[2]]
expected = test_data.ts.loc[indices[0]:indices[2]]
assert_series_equal(result, expected)
# integer indexes, be careful
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
inds = [0, 2, 5, 7, 8]
arr_inds = np.array([0, 2, 5, 7, 8])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s[inds]
expected = s.reindex(inds)
assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s[arr_inds]
expected = s.reindex(arr_inds)
assert_series_equal(result, expected)
# GH12089
# with tz for values
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
index=['a', 'b', 'c'])
expected = Timestamp('2011-01-01', tz='US/Eastern')
result = s.loc['a']
assert result == expected
result = s.iloc[0]
assert result == expected
result = s['a']
assert result == expected
def test_getitem_setitem_ellipsis():
s = Series(np.random.randn(10))
np.fix(s)
result = s[...]
assert_series_equal(result, s)
s[...] = 5
assert (result == 5).all()
def test_getitem_get(test_data):
test_series = test_data.series
test_obj_series = test_data.objSeries
idx1 = test_series.index[5]
idx2 = test_obj_series.index[5]
assert test_series[idx1] == test_series.get(idx1)
assert test_obj_series[idx2] == test_obj_series.get(idx2)
assert test_series[idx1] == test_series[5]
assert test_obj_series[idx2] == test_obj_series[5]
assert test_series.get(-1) == test_series.get(test_series.index[-1])
assert test_series[5] == test_series.get(test_series.index[5])
# missing
d = test_data.ts.index[0] - BDay()
pytest.raises(KeyError, test_data.ts.__getitem__, d)
# None
# GH 5652
for s in [Series(), Series(index=list('abc'))]:
result = s.get(None)
assert result is None
def test_getitem_fancy(test_data):
slice1 = test_data.series[[1, 2, 3]]
slice2 = test_data.objSeries[[1, 2, 3]]
assert test_data.series.index[2] == slice1.index[1]
assert test_data.objSeries.index[2] == slice2.index[1]
assert test_data.series[2] == slice1[1]
assert test_data.objSeries[2] == slice2[1]
def test_getitem_generator(test_data):
gen = (x > 0 for x in test_data.series)
result = test_data.series[gen]
result2 = test_data.series[iter(test_data.series > 0)]
expected = test_data.series[test_data.series > 0]
assert_series_equal(result, expected)
assert_series_equal(result2, expected)
def test_type_promotion():
# GH12599
s = pd.Series()
s["a"] = pd.Timestamp("2016-01-01")
s["b"] = 3.0
s["c"] = "foo"
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"],
index=["a", "b", "c"])
assert_series_equal(s, expected)
@pytest.mark.parametrize(
'result_1, duplicate_item, expected_1',
[
[
pd.Series({1: 12, 2: [1, 2, 2, 3]}), pd.Series({1: 313}),
pd.Series({1: 12, }, dtype=object),
],
[
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
pd.Series({1: [1, 2, 3]}), pd.Series({1: [1, 2, 3], }),
],
])
def test_getitem_with_duplicates_indices(
result_1, duplicate_item, expected_1):
# GH 17610
result = result_1.append(duplicate_item)
expected = expected_1.append(duplicate_item)
assert_series_equal(result[1], expected)
assert result[2] == result_1[2]
def test_getitem_out_of_bounds(test_data):
# don't segfault, GH #495
pytest.raises(IndexError, test_data.ts.__getitem__, len(test_data.ts))
# GH #917
s = Series([])
pytest.raises(IndexError, s.__getitem__, -1)
def test_getitem_setitem_integers():
# caused bug without test
s = Series([1, 2, 3], ['a', 'b', 'c'])
assert s.iloc[0] == s['a']
s.iloc[0] = 5
tm.assert_almost_equal(s['a'], 5)
def test_getitem_box_float64(test_data):
value = test_data.ts[5]
assert isinstance(value, np.float64)
def test_series_box_timestamp():
rng = pd.date_range('20090415', '20090519', freq='B')
ser = Series(rng)
assert isinstance(ser[5], pd.Timestamp)
rng = pd.date_range('20090415', '20090519', freq='B')
ser = Series(rng, index=rng)
assert isinstance(ser[5], pd.Timestamp)
assert isinstance(ser.iat[5], pd.Timestamp)
def test_getitem_ambiguous_keyerror():
s = Series(lrange(10), index=lrange(0, 20, 2))
pytest.raises(KeyError, s.__getitem__, 1)
pytest.raises(KeyError, s.loc.__getitem__, 1)
def test_getitem_unordered_dup():
obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b'])
assert is_scalar(obj['c'])
assert obj['c'] == 0
def test_getitem_dups_with_missing():
# breaks reindex, so need to use .loc internally
# GH 4246
s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah'])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
expected = s.loc[['foo', 'bar', 'bah', 'bam']]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s[['foo', 'bar', 'bah', 'bam']]
assert_series_equal(result, expected)
def test_getitem_dups():
s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64)
expected = Series([3, 4], index=['C', 'C'], dtype=np.int64)
result = s['C']
assert_series_equal(result, expected)
def test_setitem_ambiguous_keyerror():
s = Series(lrange(10), index=lrange(0, 20, 2))
# equivalent of an append
s2 = s.copy()
s2[1] = 5
expected = s.append(Series([5], index=[1]))
assert_series_equal(s2, expected)
s2 = s.copy()
s2.loc[1] = 5
expected = s.append(Series([5], index=[1]))
assert_series_equal(s2, expected)
def test_getitem_dataframe():
rng = list(range(10))
s = pd.Series(10, index=rng)
df = pd.DataFrame(rng, index=rng)
pytest.raises(TypeError, s.__getitem__, df > 5)
def test_setitem(test_data):
test_data.ts[test_data.ts.index[5]] = np.NaN
test_data.ts[[1, 2, 17]] = np.NaN
test_data.ts[6] = np.NaN
assert np.isnan(test_data.ts[6])
assert np.isnan(test_data.ts[2])
test_data.ts[np.isnan(test_data.ts)] = 5
assert not np.isnan(test_data.ts[2])
# caught this bug when writing tests
series = Series(tm.makeIntIndex(20).astype(float),
index=tm.makeIntIndex(20))
series[::2] = 0
assert (series[::2] == 0).all()
# set item that's not contained
s = test_data.series.copy()
s['foobar'] = 1
app = Series([1], index=['foobar'], name='series')
expected = test_data.series.append(app)
assert_series_equal(s, expected)
# Test for issue #10193
key = pd.Timestamp('2012-01-01')
series = pd.Series()
series[key] = 47
expected = pd.Series(47, [key])
assert_series_equal(series, expected)
series = pd.Series([], pd.DatetimeIndex([], freq='D'))
series[key] = 47
expected = pd.Series(47, pd.DatetimeIndex([key], freq='D'))
assert_series_equal(series, expected)
def test_setitem_dtypes():
# change dtypes
# GH 4463
expected = Series([np.nan, 2, 3])
s = Series([1, 2, 3])
s.iloc[0] = np.nan
assert_series_equal(s, expected)
s = Series([1, 2, 3])
s.loc[0] = np.nan
assert_series_equal(s, expected)
s = Series([1, 2, 3])
s[0] = np.nan
assert_series_equal(s, expected)
s = Series([False])
s.loc[0] = np.nan
assert_series_equal(s, Series([np.nan]))
s = Series([False, True])
s.loc[0] = np.nan
assert_series_equal(s, Series([np.nan, 1.0]))
def test_set_value(test_data):
idx = test_data.ts.index[10]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res = test_data.ts.set_value(idx, 0)
assert res is test_data.ts
assert test_data.ts[idx] == 0
# equiv
s = test_data.series.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res = s.set_value('foobar', 0)
assert res is s
assert res.index[-1] == 'foobar'
assert res['foobar'] == 0
s = test_data.series.copy()
s.loc['foobar'] = 0
assert s.index[-1] == 'foobar'
assert s['foobar'] == 0
def test_setslice(test_data):
sl = test_data.ts[5:20]
assert len(sl) == len(sl.index)
assert sl.index.is_unique
def test_basic_getitem_setitem_corner(test_data):
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
with tm.assert_raises_regex(ValueError, 'tuple-index'):
test_data.ts[:, 2]
with tm.assert_raises_regex(ValueError, 'tuple-index'):
test_data.ts[:, 2] = 2
# weird lists. [slice(0, 5)] will work but not two slices
result = test_data.ts[[slice(None, 5)]]
expected = test_data.ts[:5]
assert_series_equal(result, expected)
# OK
pytest.raises(Exception, test_data.ts.__getitem__,
[5, slice(None, None)])
pytest.raises(Exception, test_data.ts.__setitem__,
[5, slice(None, None)], 2)
@pytest.mark.parametrize('tz', ['US/Eastern', 'UTC', 'Asia/Tokyo'])
def test_setitem_with_tz(tz):
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
tz=tz))
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2016-01-01 02:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
def test_setitem_with_tz_dst():
# GH XXX
tz = 'US/Eastern'
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
tz=tz))
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz),
pd.Timestamp('2011-01-01 00:00-05:00', tz=tz),
pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
tm.assert_series_equal(s, exp)
# vector
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
s[[1, 2]] = vals
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2012-01-01 00:00', tz=tz)])
tm.assert_series_equal(s, exp)
s = orig.copy()
s.loc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
s = orig.copy()
s.iloc[[1, 2]] = vals
tm.assert_series_equal(s, exp)
def test_categorial_assigning_ops():
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
s = orig.copy()
s[:] = "a"
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[1] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[s.index > 0] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s[[False, True]] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
tm.assert_series_equal(s, exp)
s = orig.copy()
s.index = ["x", "y"]
s["y"] = "a"
exp = Series(Categorical(["b", "a"], categories=["a", "b"]),
index=["x", "y"])
tm.assert_series_equal(s, exp)
# ensure that one can set something to np.nan
s = Series(Categorical([1, 2, 3]))
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
s[1] = np.nan
tm.assert_series_equal(s, exp)
def test_slice(test_data):
numSlice = test_data.series[10:20]
numSliceEnd = test_data.series[-10:]
objSlice = test_data.objSeries[10:20]
assert test_data.series.index[9] not in numSlice.index
assert test_data.objSeries.index[9] not in objSlice.index
assert len(numSlice) == len(numSlice.index)
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
assert numSlice.index[1] == test_data.series.index[11]
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
# Test return view.
sl = test_data.series[10:20]
sl[:] = 0
assert (test_data.series[10:20] == 0).all()
def test_slice_can_reorder_not_uniquely_indexed():
s = Series(1, index=['a', 'a', 'b', 'b', 'c'])
s[::-1] # it works!
def test_ix_setitem(test_data):
inds = test_data.series.index[[3, 4, 7]]
result = test_data.series.copy()
result.loc[inds] = 5
expected = test_data.series.copy()
expected[[3, 4, 7]] = 5
assert_series_equal(result, expected)
result.iloc[5:10] = 10
expected[5:10] = 10
assert_series_equal(result, expected)
# set slice with indices
d1, d2 = test_data.series.index[[5, 15]]
result.loc[d1:d2] = 6
expected[5:16] = 6 # because it's inclusive
assert_series_equal(result, expected)
# set index value
test_data.series.loc[d1] = 4
test_data.series.loc[d2] = 6
assert test_data.series[d1] == 4
assert test_data.series[d2] == 6
def test_setitem_na():
# these induce dtype changes
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
s[::2] = np.nan
assert_series_equal(s, expected)
# gets coerced to float, right?
expected = Series([np.nan, 1, np.nan, 0])
s = Series([True, True, False, False])
s[::2] = np.nan
assert_series_equal(s, expected)
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8,
9])
s = Series(np.arange(10))
s[:5] = np.nan
assert_series_equal(s, expected)
def test_timedelta_assignment():
# GH 8209
s = Series([])
s.loc['B'] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B']))
s = s.reindex(s.index.insert(0, 'A'))
tm.assert_series_equal(s, Series(
[np.nan, Timedelta('1 days')], index=['A', 'B']))
result = s.fillna(timedelta(1))
expected = Series(Timedelta('1 days'), index=['A', 'B'])
tm.assert_series_equal(result, expected)
s.loc['A'] = timedelta(1)
tm.assert_series_equal(s, expected)
# GH 14155
s = Series(10 * [np.timedelta64(10, 'm')])
s.loc[[1, 2, 3]] = np.timedelta64(20, 'm')
expected = pd.Series(10 * [np.timedelta64(10, 'm')])
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm'))
tm.assert_series_equal(s, expected)
def test_underlying_data_conversion():
# GH 4080
df = DataFrame({c: [1, 2, 3] for c in ['a', 'b', 'c']})
df.set_index(['a', 'b', 'c'], inplace=True)
s = Series([1], index=[(2, 2, 2)])
df['val'] = 0
df
df['val'].update(s)
expected = DataFrame(
dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
expected.set_index(['a', 'b', 'c'], inplace=True)
tm.assert_frame_equal(df, expected)
# GH 3970
# these are chained assignments as well
pd.set_option('chained_assignment', None)
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
df["cc"] = 0.0
ck = [True] * len(df)
df["bb"].iloc[0] = .13
# TODO: unused
df_tmp = df.iloc[ck] # noqa
df["bb"].iloc[0] = .15
assert df['bb'].iloc[0] == 0.15
pd.set_option('chained_assignment', 'raise')
# GH 3217
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
df['c'] = np.nan
df['c'].update(pd.Series(['foo'], index=[0]))
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan]))
tm.assert_frame_equal(df, expected)
def test_preserve_refs(test_data):
seq = test_data.ts[[5, 10, 15]]
seq[1] = np.NaN
assert not np.isnan(test_data.ts[10])
def test_cast_on_putmask():
# GH 2746
# need to upcast
s = Series([1, 2], index=[1, 2], dtype='int64')
s[[True, False]] = Series([0], index=[1], dtype='int64')
expected = Series([0, 2], index=[1, 2], dtype='int64')
assert_series_equal(s, expected)
def test_type_promote_putmask():
# GH8387: test that changing types does not break alignment
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
left, mask = ts.copy(), ts > 0
right = ts[mask].copy().map(str)
left[mask] = right
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
s = Series([0, 1, 2, 0])
mask = s > 0
s2 = s[mask].map(str)
s[mask] = s2
assert_series_equal(s, Series([0, '1', '2', 0]))
s = Series([0, 'foo', 'bar', 0])
mask = Series([False, True, True, False])
s2 = s[mask]
s[mask] = s2
assert_series_equal(s, Series([0, 'foo', 'bar', 0]))
def test_multilevel_preserve_name():
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
s = Series(np.random.randn(len(index)), index=index, name='sth')
result = s['foo']
result2 = s.loc['foo']
assert result.name == s.name
assert result2.name == s.name
def test_setitem_scalar_into_readonly_backing_data():
# GH14359: test that you cannot mutate a read only buffer
array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)
for n in range(len(series)):
with pytest.raises(ValueError):
series[n] = 1
assert array[n] == 0
def test_setitem_slice_into_readonly_backing_data():
# GH14359: test that you cannot mutate a read only buffer
array = np.zeros(5)
array.flags.writeable = False # make the array immutable
series = Series(array)
with pytest.raises(ValueError):
series[1:3] = 1
assert not array.any()
"""
miscellaneous methods
"""
def test_select(test_data):
# deprecated: gh-12410
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
n = len(test_data.ts)
result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
assert_series_equal(result, expected)
result = test_data.ts.select(lambda x: x.weekday() == 2)
expected = test_data.ts[test_data.ts.index.weekday == 2]
assert_series_equal(result, expected)
def test_pop():
# GH 6600
df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })
k = df.iloc[4]
result = k.pop('B')
assert result == 4
expected = Series([0, 0], index=['A', 'C'], name=4)
assert_series_equal(k, expected)
def test_take():
s = Series([-1, 5, 6, 2, 4])
actual = s.take([1, 3, 4])
expected = Series([5, 2, 4], index=[1, 3, 4])
tm.assert_series_equal(actual, expected)
actual = s.take([-1, 3, 4])
expected = Series([4, 2, 4], index=[4, 3, 4])
tm.assert_series_equal(actual, expected)
pytest.raises(IndexError, s.take, [1, 10])
pytest.raises(IndexError, s.take, [2, 5])
with tm.assert_produces_warning(FutureWarning):
s.take([-1, 3, 4], convert=False)
def test_take_categorical():
# https://github.com/pandas-dev/pandas/issues/20664
s = Series(pd.Categorical(['a', 'b', 'c']))
result = s.take([-2, -2, 0])
expected = Series(pd.Categorical(['b', 'b', 'a'],
categories=['a', 'b', 'c']),
index=[1, 1, 0])
assert_series_equal(result, expected)
def test_head_tail(test_data):
assert_series_equal(test_data.series.head(), test_data.series[:5])
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])
@@ -0,0 +1,150 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import pytest
import numpy as np
import pandas as pd
from pandas import (Series, Timestamp)
from pandas.compat import lrange
from pandas.util.testing import (assert_series_equal)
def test_loc_getitem(test_data):
inds = test_data.series.index[[3, 4, 7]]
assert_series_equal(
test_data.series.loc[inds],
test_data.series.reindex(inds))
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
# slice with indices
d1, d2 = test_data.ts.index[[5, 15]]
result = test_data.ts.loc[d1:d2]
expected = test_data.ts.truncate(d1, d2)
assert_series_equal(result, expected)
# boolean
mask = test_data.series > test_data.series.median()
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
# ask for index value
assert test_data.ts.loc[d1] == test_data.ts[d1]
assert test_data.ts.loc[d2] == test_data.ts[d2]
def test_loc_getitem_not_monotonic(test_data):
d1, d2 = test_data.ts.index[[5, 15]]
ts2 = test_data.ts[::2][[1, 2, 0]]
pytest.raises(KeyError, ts2.loc.__getitem__, slice(d1, d2))
pytest.raises(KeyError, ts2.loc.__setitem__, slice(d1, d2), 0)
def test_loc_getitem_setitem_integer_slice_keyerrors():
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
# this is OK
cp = s.copy()
cp.iloc[4:10] = 0
assert (cp.iloc[4:10] == 0).all()
# so is this
cp = s.copy()
cp.iloc[3:11] = 0
assert (cp.iloc[3:11] == 0).values.all()
result = s.iloc[2:6]
result2 = s.loc[3:11]
expected = s.reindex([4, 6, 8, 10])
assert_series_equal(result, expected)
assert_series_equal(result2, expected)
# non-monotonic, raise KeyError
s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]]
pytest.raises(KeyError, s2.loc.__getitem__, slice(3, 11))
pytest.raises(KeyError, s2.loc.__setitem__, slice(3, 11), 0)
def test_loc_getitem_iterator(test_data):
idx = iter(test_data.series.index[:10])
result = test_data.series.loc[idx]
assert_series_equal(result, test_data.series[:10])
def test_loc_setitem_boolean(test_data):
mask = test_data.series > test_data.series.median()
result = test_data.series.copy()
result.loc[mask] = 0
expected = test_data.series
expected[mask] = 0
assert_series_equal(result, expected)
def test_loc_setitem_corner(test_data):
inds = list(test_data.series.index[[5, 8, 12]])
test_data.series.loc[inds] = 5
pytest.raises(Exception, test_data.series.loc.__setitem__,
inds + ['foo'], 5)
def test_basic_setitem_with_labels(test_data):
indices = test_data.ts.index[[5, 10, 15]]
cp = test_data.ts.copy()
exp = test_data.ts.copy()
cp[indices] = 0
exp.loc[indices] = 0
assert_series_equal(cp, exp)
cp = test_data.ts.copy()
exp = test_data.ts.copy()
cp[indices[0]:indices[2]] = 0
exp.loc[indices[0]:indices[2]] = 0
assert_series_equal(cp, exp)
# integer indexes, be careful
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
inds = [0, 4, 6]
arr_inds = np.array([0, 4, 6])
cp = s.copy()
exp = s.copy()
s[inds] = 0
s.loc[inds] = 0
assert_series_equal(cp, exp)
cp = s.copy()
exp = s.copy()
s[arr_inds] = 0
s.loc[arr_inds] = 0
assert_series_equal(cp, exp)
inds_notfound = [0, 4, 5, 6]
arr_inds_notfound = np.array([0, 4, 5, 6])
pytest.raises(Exception, s.__setitem__, inds_notfound, 0)
pytest.raises(Exception, s.__setitem__, arr_inds_notfound, 0)
# GH12089
# with tz for values
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
index=['a', 'b', 'c'])
s2 = s.copy()
expected = Timestamp('2011-01-03', tz='US/Eastern')
s2.loc['a'] = expected
result = s2.loc['a']
assert result == expected
s2 = s.copy()
s2.iloc[0] = expected
result = s2.iloc[0]
assert result == expected
s2 = s.copy()
s2['a'] = expected
result = s2['a']
assert result == expected
@@ -0,0 +1,251 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
import pytest
import numpy as np
import pandas as pd
from pandas import (Index, Series, DataFrame)
from pandas.compat import lrange, range
from pandas.util.testing import (assert_series_equal)
import pandas.util.testing as tm
def test_get():
# GH 6383
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45,
51, 39, 55, 43, 54, 52, 51, 54]))
result = s.get(25, 0)
expected = 0
assert result == expected
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56,
45, 51, 39, 55, 43, 54, 52, 51, 54]),
index=pd.Float64Index(
[25.0, 36.0, 49.0, 64.0, 81.0, 100.0,
121.0, 144.0, 169.0, 196.0, 1225.0,
1296.0, 1369.0, 1444.0, 1521.0, 1600.0,
1681.0, 1764.0, 1849.0, 1936.0],
dtype='object'))
result = s.get(25, 0)
expected = 43
assert result == expected
# GH 7407
# with a boolean accessor
df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3})
vc = df.i.value_counts()
result = vc.get(99, default='Missing')
assert result == 'Missing'
vc = df.b.value_counts()
result = vc.get(False, default='Missing')
assert result == 3
result = vc.get(True, default='Missing')
assert result == 'Missing'
def test_get_nan():
# GH 8569
s = pd.Float64Index(range(10)).to_series()
assert s.get(np.nan) is None
assert s.get(np.nan, default='Missing') == 'Missing'
def test_get_nan_multiple():
# GH 8569
# ensure that fixing "test_get_nan" above hasn't broken get
# with multiple elements
s = pd.Float64Index(range(10)).to_series()
idx = [2, 30]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert_series_equal(s.get(idx),
Series([2, np.nan], index=idx))
idx = [2, np.nan]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert_series_equal(s.get(idx),
Series([2, np.nan], index=idx))
# GH 17295 - all missing keys
idx = [20, 30]
assert(s.get(idx) is None)
idx = [np.nan, np.nan]
assert(s.get(idx) is None)
def test_delitem():
# GH 5542
# should delete the item inplace
s = Series(lrange(5))
del s[0]
expected = Series(lrange(1, 5), index=lrange(1, 5))
assert_series_equal(s, expected)
del s[1]
expected = Series(lrange(2, 5), index=lrange(2, 5))
assert_series_equal(s, expected)
# empty
s = Series()
def f():
del s[0]
pytest.raises(KeyError, f)
# only 1 left, del, add, del
s = Series(1)
del s[0]
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='int64')))
s[0] = 1
assert_series_equal(s, Series(1))
del s[0]
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='int64')))
# Index(dtype=object)
s = Series(1, index=['a'])
del s['a']
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='object')))
s['a'] = 1
assert_series_equal(s, Series(1, index=['a']))
del s['a']
assert_series_equal(s, Series(dtype='int64', index=Index(
[], dtype='object')))
def test_slice_float64():
values = np.arange(10., 50., 2)
index = Index(values)
start, end = values[[5, 15]]
s = Series(np.random.randn(20), index=index)
result = s[start:end]
expected = s.iloc[5:16]
assert_series_equal(result, expected)
result = s.loc[start:end]
assert_series_equal(result, expected)
df = DataFrame(np.random.randn(20, 3), index=index)
result = df[start:end]
expected = df.iloc[5:16]
tm.assert_frame_equal(result, expected)
result = df.loc[start:end]
tm.assert_frame_equal(result, expected)
def test_getitem_negative_out_of_bounds():
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
pytest.raises(IndexError, s.__getitem__, -11)
pytest.raises(IndexError, s.__setitem__, -11, 'foo')
def test_getitem_regression():
s = Series(lrange(5), index=lrange(5))
result = s[lrange(5)]
assert_series_equal(result, s)
def test_getitem_setitem_slice_bug():
s = Series(lrange(10), lrange(10))
result = s[-12:]
assert_series_equal(result, s)
result = s[-7:]
assert_series_equal(result, s[3:])
result = s[:-12]
assert_series_equal(result, s[:0])
s = Series(lrange(10), lrange(10))
s[-12:] = 0
assert (s == 0).all()
s[:-12] = 5
assert (s == 0).all()
def test_getitem_setitem_slice_integers():
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
result = s[:4]
expected = s.reindex([2, 4, 6, 8])
assert_series_equal(result, expected)
s[:4] = 0
assert (s[:4] == 0).all()
assert not (s[4:] == 0).any()
def test_setitem_float_labels():
# note labels are floats
s = Series(['a', 'b', 'c'], index=[0, 0.5, 1])
tmp = s.copy()
s.loc[1] = 'zoo'
tmp.iloc[2] = 'zoo'
assert_series_equal(s, tmp)
def test_slice_float_get_set(test_data):
pytest.raises(TypeError, lambda: test_data.ts[4.0:10.0])
def f():
test_data.ts[4.0:10.0] = 0
pytest.raises(TypeError, f)
pytest.raises(TypeError, test_data.ts.__getitem__, slice(4.5, 10.0))
pytest.raises(TypeError, test_data.ts.__setitem__, slice(4.5, 10.0), 0)
def test_slice_floats2():
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
assert len(s.loc[12.0:]) == 8
assert len(s.loc[12.5:]) == 7
i = np.arange(10, 20, dtype=float)
i[2] = 12.2
s.index = i
assert len(s.loc[12.0:]) == 8
assert len(s.loc[12.5:]) == 7
def test_int_indexing():
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
pytest.raises(KeyError, s.__getitem__, 5)
pytest.raises(KeyError, s.__getitem__, 'c')
# not monotonic
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
pytest.raises(KeyError, s.__getitem__, 5)
pytest.raises(KeyError, s.__getitem__, 'c')
def test_getitem_int64(test_data):
idx = np.int64(5)
assert test_data.ts[idx] == test_data.ts[5]