demo + utils venv
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,31 @@
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
_ts = tm.makeTimeSeries()
|
||||
|
||||
|
||||
class TestData(object):
|
||||
|
||||
@cache_readonly
|
||||
def ts(self):
|
||||
ts = _ts.copy()
|
||||
ts.name = 'ts'
|
||||
return ts
|
||||
|
||||
@cache_readonly
|
||||
def series(self):
|
||||
series = tm.makeStringSeries()
|
||||
series.name = 'series'
|
||||
return series
|
||||
|
||||
@cache_readonly
|
||||
def objSeries(self):
|
||||
objSeries = tm.makeObjectSeries()
|
||||
objSeries.name = 'objects'
|
||||
return objSeries
|
||||
|
||||
@cache_readonly
|
||||
def empty(self):
|
||||
return pd.Series([], index=[])
|
||||
@@ -0,0 +1,42 @@
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def datetime_series():
|
||||
"""
|
||||
Fixture for Series of floats with DatetimeIndex
|
||||
"""
|
||||
s = tm.makeTimeSeries()
|
||||
s.name = 'ts'
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def string_series():
|
||||
"""
|
||||
Fixture for Series of floats with Index of unique strings
|
||||
"""
|
||||
s = tm.makeStringSeries()
|
||||
s.name = 'series'
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def object_series():
|
||||
"""
|
||||
Fixture for Series of dtype datetime64[ns] with Index of unique strings
|
||||
"""
|
||||
s = tm.makeObjectSeries()
|
||||
s.name = 'objects'
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_series():
|
||||
"""
|
||||
Fixture for empty Series
|
||||
"""
|
||||
return Series([], index=[])
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
import pytest
|
||||
|
||||
from pandas.tests.series.common import TestData
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def test_data():
|
||||
return TestData()
|
||||
+564
@@ -0,0 +1,564 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical, Series, date_range, isna
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'first_slice,second_slice', [
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]]
|
||||
])
|
||||
@pytest.mark.parametrize('fill', [None, -1])
|
||||
def test_align(test_data, first_slice, second_slice, join_type, fill):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
if fill is not None:
|
||||
diff_a = aa.index.difference(join_index)
|
||||
diff_b = ab.index.difference(join_index)
|
||||
if len(diff_a) > 0:
|
||||
assert (aa.reindex(diff_a) == fill).all()
|
||||
if len(diff_b) > 0:
|
||||
assert (ab.reindex(diff_b) == fill).all()
|
||||
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
if fill is not None:
|
||||
ea = ea.fillna(fill)
|
||||
eb = eb.fillna(fill)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
assert aa.name == 'ts'
|
||||
assert ea.name == 'ts'
|
||||
assert ab.name == 'ts'
|
||||
assert eb.name == 'ts'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'first_slice,second_slice', [
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]]
|
||||
])
|
||||
@pytest.mark.parametrize('method', ['pad', 'bfill'])
|
||||
@pytest.mark.parametrize('limit', [None, 1])
|
||||
def test_align_fill_method(test_data,
|
||||
first_slice, second_slice,
|
||||
join_type, method, limit):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
ea = ea.fillna(method=method, limit=limit)
|
||||
eb = eb.fillna(method=method, limit=limit)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
|
||||
|
||||
def test_align_nocopy(test_data):
|
||||
b = test_data.ts[:5].copy()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join='left')
|
||||
ra[:5] = 5
|
||||
assert not (a[:5] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join='left', copy=False)
|
||||
ra[:5] = 5
|
||||
assert (a[:5] == 5).all()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join='right')
|
||||
rb[:3] = 5
|
||||
assert not (b[:3] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join='right', copy=False)
|
||||
rb[:2] = 5
|
||||
assert (b[:2] == 5).all()
|
||||
|
||||
|
||||
def test_align_same_index(test_data):
|
||||
a, b = test_data.ts.align(test_data.ts, copy=False)
|
||||
assert a.index is test_data.ts.index
|
||||
assert b.index is test_data.ts.index
|
||||
|
||||
a, b = test_data.ts.align(test_data.ts, copy=True)
|
||||
assert a.index is not test_data.ts.index
|
||||
assert b.index is not test_data.ts.index
|
||||
|
||||
|
||||
def test_align_multiindex():
|
||||
# GH 10665
|
||||
|
||||
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
|
||||
names=('a', 'b', 'c'))
|
||||
idx = pd.Index(range(2), name='b')
|
||||
s1 = pd.Series(np.arange(12, dtype='int64'), index=midx)
|
||||
s2 = pd.Series(np.arange(2, dtype='int64'), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = s1.align(s2, join='left')
|
||||
res2l, res2r = s2.align(s1, join='right')
|
||||
|
||||
expl = s1
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = s1.align(s2, join='right')
|
||||
res2l, res2r = s2.align(s1, join='left')
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
|
||||
names=('a', 'b', 'c'))
|
||||
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
|
||||
def test_reindex(test_data):
|
||||
identity = test_data.series.reindex(test_data.series.index)
|
||||
|
||||
# __array_interface__ is not defined for older numpies
|
||||
# and on some pythons
|
||||
try:
|
||||
assert np.may_share_memory(test_data.series.index, identity.index)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
assert identity.index.is_(test_data.series.index)
|
||||
assert identity.index.identical(test_data.series.index)
|
||||
|
||||
subIndex = test_data.series.index[10:20]
|
||||
subSeries = test_data.series.reindex(subIndex)
|
||||
|
||||
for idx, val in compat.iteritems(subSeries):
|
||||
assert val == test_data.series[idx]
|
||||
|
||||
subIndex2 = test_data.ts.index[10:20]
|
||||
subTS = test_data.ts.reindex(subIndex2)
|
||||
|
||||
for idx, val in compat.iteritems(subTS):
|
||||
assert val == test_data.ts[idx]
|
||||
stuffSeries = test_data.ts.reindex(subIndex)
|
||||
|
||||
assert np.isnan(stuffSeries).all()
|
||||
|
||||
# This is extremely important for the Cython code to not screw up
|
||||
nonContigIndex = test_data.ts.index[::2]
|
||||
subNonContig = test_data.ts.reindex(nonContigIndex)
|
||||
for idx, val in compat.iteritems(subNonContig):
|
||||
assert val == test_data.ts[idx]
|
||||
|
||||
# return a copy the same index here
|
||||
result = test_data.ts.reindex()
|
||||
assert not (result is test_data.ts)
|
||||
|
||||
|
||||
def test_reindex_nan():
|
||||
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
|
||||
|
||||
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j])
|
||||
|
||||
ts.index = ts.index.astype('object')
|
||||
|
||||
# reindex coerces index.dtype to float, loc/iloc doesn't
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
|
||||
|
||||
|
||||
def test_reindex_series_add_nat():
|
||||
rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
|
||||
series = Series(rng)
|
||||
|
||||
result = series.reindex(lrange(15))
|
||||
assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))
|
||||
|
||||
mask = result.isna()
|
||||
assert mask[-5:].all()
|
||||
assert not mask[:-5].any()
|
||||
|
||||
|
||||
def test_reindex_with_datetimes():
|
||||
rng = date_range('1/1/2000', periods=20)
|
||||
ts = Series(np.random.randn(20), index=rng)
|
||||
|
||||
result = ts.reindex(list(ts.index[5:10]))
|
||||
expected = ts[5:10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[list(ts.index[5:10])]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_corner(test_data):
|
||||
# (don't forget to fix this) I think it's fixed
|
||||
test_data.empty.reindex(test_data.ts.index, method='pad') # it works
|
||||
|
||||
# corner case: pad empty series
|
||||
reindexed = test_data.empty.reindex(test_data.ts.index, method='pad')
|
||||
|
||||
# pass non-Index
|
||||
reindexed = test_data.ts.reindex(list(test_data.ts.index))
|
||||
assert_series_equal(test_data.ts, reindexed)
|
||||
|
||||
# bad fill method
|
||||
ts = test_data.ts[::2]
|
||||
msg = (r"Invalid fill method\. Expecting pad \(ffill\), backfill"
|
||||
r" \(bfill\) or nearest\. Got foo")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.reindex(test_data.ts.index, method='foo')
|
||||
|
||||
|
||||
def test_reindex_pad():
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
s2 = s[::2]
|
||||
|
||||
reindexed = s2.reindex(s.index, method='pad')
|
||||
reindexed2 = s2.reindex(s.index, method='ffill')
|
||||
assert_series_equal(reindexed, reindexed2)
|
||||
|
||||
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
|
||||
assert_series_equal(reindexed, expected)
|
||||
|
||||
# GH4604
|
||||
s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
|
||||
new_index = ['a', 'g', 'c', 'f']
|
||||
expected = Series([1, 1, 3, 3], index=new_index)
|
||||
|
||||
# this changes dtype because the ffill happens after
|
||||
result = s.reindex(new_index).ffill()
|
||||
assert_series_equal(result, expected.astype('float64'))
|
||||
|
||||
result = s.reindex(new_index).ffill(downcast='infer')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1, 5, 3, 5], index=new_index)
|
||||
result = s.reindex(new_index, method='ffill')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# inference of new dtype
|
||||
s = Series([True, False, False, True], index=list('abcd'))
|
||||
new_index = 'agc'
|
||||
result = s.reindex(list(new_index)).ffill()
|
||||
expected = Series([True, True, False], index=list(new_index))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH4618 shifted series downcasting
|
||||
s = Series(False, index=lrange(0, 5))
|
||||
result = s.shift(1).fillna(method='bfill')
|
||||
expected = Series(False, index=lrange(0, 5))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_nearest():
|
||||
s = Series(np.arange(10, dtype='int64'))
|
||||
target = [0.1, 0.9, 1.5, 2.0]
|
||||
actual = s.reindex(target, method='nearest')
|
||||
expected = Series(np.around(target).astype('int64'), target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method='nearest')
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method='nearest', tolerance=1)
|
||||
assert_series_equal(expected, actual)
|
||||
actual = s.reindex_like(actual, method='nearest',
|
||||
tolerance=[1, 2, 3, 4])
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method='nearest', tolerance=0.2)
|
||||
expected = Series([0, 1, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method='nearest',
|
||||
tolerance=[0.3, 0.01, 0.4, 3])
|
||||
expected = Series([0, np.nan, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
|
||||
def test_reindex_backfill():
|
||||
pass
|
||||
|
||||
|
||||
def test_reindex_int(test_data):
|
||||
ts = test_data.ts[::2]
|
||||
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_int = int_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_int.dtype == np.float_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_int = int_ts.reindex(int_ts.index[::2])
|
||||
assert reindexed_int.dtype == np.int_
|
||||
|
||||
|
||||
def test_reindex_bool(test_data):
|
||||
# A series other than float, int, string, or object
|
||||
ts = test_data.ts[::2]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_bool = bool_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_bool.dtype == np.object_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
|
||||
assert reindexed_bool.dtype == np.bool_
|
||||
|
||||
|
||||
def test_reindex_bool_pad(test_data):
|
||||
# fail
|
||||
ts = test_data.ts[5:]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
filled_bool = bool_ts.reindex(test_data.ts.index, method='pad')
|
||||
assert isna(filled_bool[:5]).all()
|
||||
|
||||
|
||||
def test_reindex_categorical():
|
||||
index = date_range('20000101', periods=3)
|
||||
|
||||
# reindexing to an invalid Categorical
|
||||
s = Series(['a', 'b', 'c'], dtype='category')
|
||||
result = s.reindex(index)
|
||||
expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
|
||||
categories=['a', 'b', 'c']))
|
||||
expected.index = index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# partial reindexing
|
||||
expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
|
||||
'c']))
|
||||
expected.index = [1, 2]
|
||||
result = s.reindex([1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(Categorical(
|
||||
values=['c', np.nan], categories=['a', 'b', 'c']))
|
||||
expected.index = [2, 3]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_like(test_data):
|
||||
other = test_data.ts[::2]
|
||||
assert_series_equal(test_data.ts.reindex(other.index),
|
||||
test_data.ts.reindex_like(other))
|
||||
|
||||
# GH 7179
|
||||
day1 = datetime(2013, 3, 5)
|
||||
day2 = datetime(2013, 5, 5)
|
||||
day3 = datetime(2014, 3, 5)
|
||||
|
||||
series1 = Series([5, None, None], [day1, day2, day3])
|
||||
series2 = Series([None, None], [day1, day3])
|
||||
|
||||
result = series1.reindex_like(series2, method='pad')
|
||||
expected = Series([5, np.nan], index=[day1, day3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_fill_value():
|
||||
# -----------------------------------------------------------
|
||||
# floats
|
||||
floats = Series([1., 2., 3.])
|
||||
result = floats.reindex([1, 2, 3])
|
||||
expected = Series([2., 3., np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = floats.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2., 3., 0], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# ints
|
||||
ints = Series([1, 2, 3])
|
||||
|
||||
result = ints.reindex([1, 2, 3])
|
||||
expected = Series([2., 3., np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# don't upcast
|
||||
result = ints.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2, 3, 0], index=[1, 2, 3])
|
||||
assert issubclass(result.dtype.type, np.integer)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# objects
|
||||
objects = Series([1, 2, 3], dtype=object)
|
||||
|
||||
result = objects.reindex([1, 2, 3])
|
||||
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = objects.reindex([1, 2, 3], fill_value='foo')
|
||||
expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# bools
|
||||
bools = Series([True, False, True])
|
||||
|
||||
result = bools.reindex([1, 2, 3])
|
||||
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = bools.reindex([1, 2, 3], fill_value=False)
|
||||
expected = Series([False, True, False], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_datetimeindexes_tz_naive_and_aware():
|
||||
# GH 8306
|
||||
idx = date_range('20131101', tz='America/Chicago', periods=7)
|
||||
newidx = date_range('20131103', periods=10, freq='H')
|
||||
s = Series(range(7), index=idx)
|
||||
with pytest.raises(TypeError):
|
||||
s.reindex(newidx, method='ffill')
|
||||
|
||||
|
||||
def test_reindex_empty_series_tz_dtype():
|
||||
# GH 20869
|
||||
result = Series(dtype='datetime64[ns, UTC]').reindex([0, 1])
|
||||
expected = Series([pd.NaT] * 2, dtype='datetime64[ns, UTC]')
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_rename():
|
||||
# GH 17407
|
||||
s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
|
||||
result = s.rename(str)
|
||||
expected = s.rename(lambda i: str(i))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
assert result.name == expected.name
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, index, drop_labels,'
|
||||
' axis, expected_data, expected_index',
|
||||
[
|
||||
# Unique Index
|
||||
([1, 2], ['one', 'two'], ['two'],
|
||||
0, [1], ['one']),
|
||||
([1, 2], ['one', 'two'], ['two'],
|
||||
'rows', [1], ['one']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], ['two'],
|
||||
0, [1, 2], ['one', 'one']),
|
||||
|
||||
# GH 5248 Non-Unique Index
|
||||
([1, 1, 2], ['one', 'two', 'one'], 'two',
|
||||
0, [1, 2], ['one', 'one']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], ['one'],
|
||||
0, [1], ['two']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], 'one',
|
||||
0, [1], ['two'])])
|
||||
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
|
||||
expected_data, expected_index):
|
||||
|
||||
s = Series(data=data, index=index)
|
||||
result = s.drop(drop_labels, axis=axis)
|
||||
expected = Series(data=expected_data, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, index, drop_labels,'
|
||||
' axis, error_type, error_desc',
|
||||
[
|
||||
# single string/tuple-like
|
||||
(range(3), list('abc'), 'bc',
|
||||
0, KeyError, 'not found in axis'),
|
||||
|
||||
# bad axis
|
||||
(range(3), list('abc'), ('a',),
|
||||
0, KeyError, 'not found in axis'),
|
||||
(range(3), list('abc'), 'one',
|
||||
'columns', ValueError, 'No axis named columns')])
|
||||
def test_drop_exception_raised(data, index, drop_labels,
|
||||
axis, error_type, error_desc):
|
||||
|
||||
with pytest.raises(error_type, match=error_desc):
|
||||
Series(data, index=index).drop(drop_labels, axis=axis)
|
||||
|
||||
|
||||
def test_drop_with_ignore_errors():
|
||||
# errors='ignore'
|
||||
s = Series(range(3), index=list('abc'))
|
||||
result = s.drop('bc', errors='ignore')
|
||||
tm.assert_series_equal(result, s)
|
||||
result = s.drop(['a', 'd'], errors='ignore')
|
||||
expected = s.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 8522
|
||||
s = Series([2, 3], index=[True, False])
|
||||
assert s.index.is_object()
|
||||
result = s.drop(True)
|
||||
expected = Series([3], index=[False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
|
||||
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
|
||||
def test_drop_empty_list(index, drop_labels):
|
||||
# GH 21494
|
||||
expected_index = [i for i in index if i not in drop_labels]
|
||||
series = pd.Series(index=index).drop(drop_labels)
|
||||
tm.assert_series_equal(series, pd.Series(index=expected_index))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data, index, drop_labels', [
|
||||
(None, [1, 2, 3], [1, 4]),
|
||||
(None, [1, 2, 2], [1, 4]),
|
||||
([2, 3], [0, 1], [False, True])
|
||||
])
|
||||
def test_drop_non_empty_list(data, index, drop_labels):
|
||||
# GH 21494 and GH 16877
|
||||
with pytest.raises(KeyError, match='not found in axis'):
|
||||
pd.Series(data=data, index=index).drop(drop_labels)
|
||||
@@ -0,0 +1,634 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Series, Timestamp, date_range, isna
|
||||
from pandas.core.indexing import IndexingError
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_getitem_boolean(test_data):
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
|
||||
# passing list is OK
|
||||
result = s[list(mask)]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, s.index[mask])
|
||||
|
||||
|
||||
def test_getitem_boolean_empty():
|
||||
s = Series([], dtype=np.int64)
|
||||
s.index.name = 'index_name'
|
||||
s = s[s.isna()]
|
||||
assert s.index.name == 'index_name'
|
||||
assert s.dtype == np.int64
|
||||
|
||||
# GH5877
|
||||
# indexing with empty series
|
||||
s = Series(['A', 'B'])
|
||||
expected = Series(np.nan, index=['C'], dtype=object)
|
||||
result = s[Series(['C'], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(['A', 'B'])
|
||||
expected = Series(dtype=object, index=Index([], dtype='int64'))
|
||||
result = s[Series([], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match")
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
s[Series([True], dtype=bool)]
|
||||
|
||||
|
||||
def test_getitem_boolean_object(test_data):
|
||||
# using column from DataFrame
|
||||
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = s[omask]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
cop = s.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "cannot index with vector containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[omask] = 5
|
||||
|
||||
|
||||
def test_getitem_setitem_boolean_corner(test_data):
|
||||
ts = test_data.ts
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
# these used to raise...??
|
||||
|
||||
msg = (r"Unalignable boolean Series provided as indexer \(index of"
|
||||
r" the boolean Series and of the indexed object do not match")
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted] = 1
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted] = 1
|
||||
|
||||
|
||||
def test_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
# similar indexed series
|
||||
result = test_data.series.copy()
|
||||
result[mask] = test_data.series * 2
|
||||
expected = test_data.series * 2
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
# needs alignment
|
||||
result = test_data.series.copy()
|
||||
result[mask] = (test_data.series * 2)[0:5]
|
||||
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
|
||||
expected[-mask] = test_data.series[mask]
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
|
||||
def test_get_set_boolean_different_order(test_data):
|
||||
ordered = test_data.series.sort_values()
|
||||
|
||||
# setting
|
||||
copy = test_data.series.copy()
|
||||
copy[ordered > 0] = 0
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[expected > 0] = 0
|
||||
|
||||
assert_series_equal(copy, expected)
|
||||
|
||||
# getting
|
||||
sel = test_data.series[ordered > 0]
|
||||
exp = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(sel, exp)
|
||||
|
||||
|
||||
def test_where_unsafe_int(sint_dtype):
|
||||
s = Series(np.arange(10), dtype=sint_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=sint_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_dtype):
|
||||
s = Series(np.arange(10), dtype=float_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=float_dtype)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype,expected_dtype", [
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64)
|
||||
])
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + lrange(5, 10), dtype=expected_dtype)
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(lrange(6) + values, dtype="float64")
|
||||
|
||||
s[mask] = values
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
mask = s < 5
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64')
|
||||
assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64')
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_raise_on_error_deprecation():
|
||||
# gh-14968
|
||||
# deprecation of raise_on_error
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.where(cond, raise_on_error=True)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.mask(cond, raise_on_error=True)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert (s.shape == rs.shape)
|
||||
assert (rs is not s)
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot assign mismatch length to masked array"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
msg = ("NumPy boolean array indexing assignment cannot assign 0 input"
|
||||
" values to the 1 output values where the mask is true")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cond', [
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")]
|
||||
])
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = ("cannot set using a {} indexer with a different length than"
|
||||
" the value")
|
||||
|
||||
# slice
|
||||
s = Series(list('abc'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('slice')):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
assert_series_equal(s.astype(np.int64), expected, )
|
||||
|
||||
# slice with step
|
||||
s = Series(list('abcdef'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('slice')):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list('abcdef'))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, 'b', 1, 'd', 'e', 'f'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list('abcdef'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('slice')):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(['a', 'b', 'c', 0, 1, 'f'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list('abc'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('list-like')):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list('abc'))
|
||||
|
||||
with pytest.raises(ValueError, match=msg.format('list-like')):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list('abc'))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), 'b', 'c'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('size', range(2, 6))
|
||||
@pytest.mark.parametrize('mask', [
|
||||
[True, False, False, False, False],
|
||||
[True, False],
|
||||
[False]
|
||||
])
|
||||
@pytest.mark.parametrize('item', [
|
||||
2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min
|
||||
])
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize('box', [
|
||||
lambda x: np.array([x]),
|
||||
lambda x: [x],
|
||||
lambda x: (x,)
|
||||
])
|
||||
def test_broadcast(size, mask, item, box):
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series([item if use_item else data[
|
||||
i] for i, use_item in enumerate(selection)])
|
||||
|
||||
s = Series(data)
|
||||
s[selection] = box(item)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[cond])
|
||||
assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan],
|
||||
index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = pd.Series([1, 2, 3])
|
||||
w = s.where(s > 1, 'X')
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
w = s.where(s > 1, ['X', 'Y', 'Z'])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
w = s.where(s > 1, np.array(['X', 'Y', 'Z']))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
|
||||
def test_where_timedelta_coerce():
|
||||
s = Series([1, 2], dtype='timedelta64[ns]')
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype='object')
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetime_conversion():
|
||||
s = Series(date_range('20130102', periods=2))
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype='object')
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
# GH 15701
|
||||
timestamps = ['2016-12-31 12:00:04+00:00',
|
||||
'2016-12-31 12:00:04.010000+00:00']
|
||||
s = Series([pd.Timestamp(t) for t in timestamps])
|
||||
rs = s.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, s[1]])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_dt_tz_values(tz_naive_fixture):
|
||||
ser1 = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20150103'],
|
||||
tz=tz_naive_fixture))
|
||||
ser2 = pd.Series(pd.DatetimeIndex(['20160514', '20160515', '20160516'],
|
||||
tz=tz_naive_fixture))
|
||||
mask = pd.Series([True, True, False])
|
||||
result = ser1.where(mask, ser2)
|
||||
exp = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20160516'],
|
||||
tz=tz_naive_fixture))
|
||||
assert_series_equal(exp, result)
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.mask(s > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# see gh-21891
|
||||
s = Series([1, 2])
|
||||
res = s.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[~cond])
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.mask(cond, -s))
|
||||
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_getitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series(4, index=list('ABCD'))
|
||||
result = s[lambda x: 'A']
|
||||
assert result == s.loc['A']
|
||||
|
||||
result = s[lambda x: ['A', 'B']]
|
||||
tm.assert_series_equal(result, s.loc[['A', 'B']])
|
||||
|
||||
result = s[lambda x: [True, False, True, True]]
|
||||
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
|
||||
|
||||
|
||||
def test_setitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series([1, 2, 3, 4], index=list('ABCD'))
|
||||
s[lambda x: 'A'] = -1
|
||||
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD')))
|
||||
|
||||
|
||||
def test_setitem_other_callable():
|
||||
# GH 13299
|
||||
inc = lambda x: x + 1
|
||||
|
||||
s = pd.Series([1, 2, -1, 4])
|
||||
s[s < 0] = inc
|
||||
|
||||
expected = pd.Series([1, 2, inc, 4])
|
||||
tm.assert_series_equal(s, expected)
|
||||
@@ -0,0 +1,714 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import iNaT
|
||||
import pandas._libs.index as _index
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal, assert_frame_equal, assert_series_equal)
|
||||
|
||||
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
|
||||
end=datetime(2010, 1, 1))
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
assert s[48] == 48
|
||||
assert s['1/2/2009'] == 48
|
||||
assert s['2009-1-2'] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s['2009-1-3']
|
||||
assert_series_equal(s['3/6/2009':'2009-06-05'],
|
||||
s[datetime(2009, 3, 6):datetime(2009, 6, 5)])
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
|
||||
end=datetime(2010, 1, 1))
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
s[48] = -1
|
||||
assert s[48] == -1
|
||||
s['1/2/2009'] = -2
|
||||
assert s[48] == -2
|
||||
s['1/2/2009':'2009-06-05'] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
def test_dti_snap():
|
||||
dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002',
|
||||
'1/5/2002', '1/6/2002', '1/7/2002'], freq='D')
|
||||
|
||||
res = dti.snap(freq='W-MON')
|
||||
exp = date_range('12/31/2001', '1/7/2002', freq='w-mon')
|
||||
exp = exp.repeat([3, 4])
|
||||
assert (res == exp).all()
|
||||
|
||||
res = dti.snap(freq='B')
|
||||
|
||||
exp = date_range('1/1/2002', '1/7/2002', freq='b')
|
||||
exp = exp.repeat([1, 1, 1, 2, 2])
|
||||
assert (res == exp).all()
|
||||
|
||||
|
||||
def test_dti_reset_index_round_trip():
|
||||
dti = date_range(start='1/1/2001', end='6/1/2001', freq='D')
|
||||
d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti)
|
||||
d2 = d1.reset_index()
|
||||
assert d2.dtypes[0] == np.dtype('M8[ns]')
|
||||
d3 = d2.set_index('index')
|
||||
assert_frame_equal(d1, d3, check_names=False)
|
||||
|
||||
# #2329
|
||||
stamp = datetime(2012, 11, 22)
|
||||
df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value'])
|
||||
df = df.set_index('Date')
|
||||
|
||||
assert df.index[0] == stamp
|
||||
assert df.reset_index()['Date'][0] == stamp
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# #1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s = Series().set_value(dates[0], 1.)
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s2 = s.set_value(dates[1], np.nan)
|
||||
|
||||
exp = Series([1., np.nan], index=index)
|
||||
|
||||
assert_series_equal(s2, exp)
|
||||
|
||||
# s = Series(index[:1], index[:1])
|
||||
# s2 = s.set_value(dates[1], index[1])
|
||||
# assert s2.values.dtype == 'M8[ns]'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_slice_locs_indexerror():
|
||||
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10)
|
||||
for i in range(100000)]
|
||||
s = Series(lrange(100000), times)
|
||||
s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)]
|
||||
|
||||
|
||||
def test_slicing_datetimes():
|
||||
# GH 7523
|
||||
|
||||
# unique
|
||||
df = DataFrame(np.arange(4., dtype='float64'),
|
||||
index=[datetime(2001, 1, i, 10, 00)
|
||||
for i in [1, 2, 3, 4]])
|
||||
result = df.loc[datetime(2001, 1, 1, 10):]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[:datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11):]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc['20010101 11':]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# duplicates
|
||||
df = pd.DataFrame(np.arange(5., dtype='float64'),
|
||||
index=[datetime(2001, 1, i, 10, 00)
|
||||
for i in [1, 2, 2, 3, 4]])
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 10):]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[:datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11):]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc['20010101 11':]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_datetime64_duplicated():
|
||||
dates = date_range('2010-07-01', end='2010-08-05')
|
||||
|
||||
tst = DataFrame({'symbol': 'AAA', 'date': dates})
|
||||
result = tst.duplicated(['date', 'symbol'])
|
||||
assert (-result).all()
|
||||
|
||||
tst = DataFrame({'date': dates})
|
||||
result = tst.duplicated()
|
||||
assert (-result).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_pytz():
|
||||
from pytz import timezone as tz
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
|
||||
# comparison dates with datetime MUST be localized!
|
||||
date = tz('US/Central').localize(datetime(1990, 1, 1, 3))
|
||||
result[date] = 0
|
||||
result[date] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_dateutil():
|
||||
from dateutil.tz import tzutc
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
|
||||
|
||||
tz = lambda x: tzutc() if x == 'UTC' else gettz(
|
||||
x) # handle special case for utc in dateutil
|
||||
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H',
|
||||
tz='America/New_York')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# repeat all the above with naive datetimes
|
||||
result = ts[datetime(1990, 1, 1, 4)]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4)] = 0
|
||||
result[datetime(1990, 1, 1, 4)] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0
|
||||
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = datetime(1990, 1, 1, 4)
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
|
||||
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
from pandas import period_range
|
||||
|
||||
N = 50
|
||||
rng = period_range('1/1/1990', periods=N, freq='H')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
# FutureWarning from NumPy.
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_getitem_median_slice_bug():
|
||||
index = date_range('20090415', '20090519', freq='2B')
|
||||
s = Series(np.random.randn(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
result = s[indexer]
|
||||
expected = s[indexer[0]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
from pandas import date_range
|
||||
|
||||
index = date_range('1/1/2000', '1/7/2000')
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp('1/8/2000')
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000L?$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947289600000000000L?$"):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
"""
|
||||
test duplicates in time series
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def dups():
|
||||
dates = [datetime(2000, 1, 2), datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 2), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 3), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 5)]
|
||||
|
||||
return Series(np.random.randn(len(dates)), index=dates)
|
||||
|
||||
|
||||
def test_constructor(dups):
|
||||
assert isinstance(dups, Series)
|
||||
assert isinstance(dups.index, DatetimeIndex)
|
||||
|
||||
|
||||
def test_is_unique_monotonic(dups):
|
||||
assert not dups.index.is_unique
|
||||
|
||||
|
||||
def test_index_unique(dups):
|
||||
uniques = dups.index.unique()
|
||||
expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 5)])
|
||||
assert uniques.dtype == 'M8[ns]' # sanity
|
||||
tm.assert_index_equal(uniques, expected)
|
||||
assert dups.index.nunique() == 4
|
||||
|
||||
# #2563
|
||||
assert isinstance(uniques, DatetimeIndex)
|
||||
|
||||
dups_local = dups.index.tz_localize('US/Eastern')
|
||||
dups_local.name = 'foo'
|
||||
result = dups_local.unique()
|
||||
expected = DatetimeIndex(expected, name='foo')
|
||||
expected = expected.tz_localize('US/Eastern')
|
||||
assert result.tz is not None
|
||||
assert result.name == 'foo'
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# NaT, note this is excluded
|
||||
arr = [1370745748 + t for t in range(20)] + [iNaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
|
||||
for t in range(20)] + [NaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
|
||||
def test_index_dupes_contains():
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
ix = DatetimeIndex([d, d])
|
||||
assert d in ix
|
||||
|
||||
|
||||
def test_duplicate_dates_indexing(dups):
|
||||
ts = dups
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
assert_series_equal(result, expected)
|
||||
else:
|
||||
assert_almost_equal(result, expected[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
assert_series_equal(cp, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^947116800000000000L?$"):
|
||||
ts[datetime(2000, 1, 6)]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_range_slice():
|
||||
idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000',
|
||||
'1/4/2000'])
|
||||
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts['1/2/2000':]
|
||||
expected = ts[1:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts['1/2/2000':'1/3/2000']
|
||||
expected = ts[1:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_average_dup_values(dups):
|
||||
result = dups.groupby(level=0).mean()
|
||||
expected = dups.groupby(dups.index).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff():
|
||||
import datetime
|
||||
# #1821
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
try:
|
||||
_index._SIZE_CUTOFF = 1000
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = datetime.timedelta(seconds=1)
|
||||
half_sec = datetime.timedelta(microseconds=500000)
|
||||
d = datetime.datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(np.random.randn(len(dates), 4),
|
||||
index=dates,
|
||||
columns=list('ABCD'))
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
finally:
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start='2011-01-01', end='2011-01-15')
|
||||
ts = Series(np.random.rand(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
# TODO: unused?
|
||||
s = str(t) # noqa
|
||||
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
compare(slice('2011-01-01', '2011-01-15'))
|
||||
compare(slice('2010-12-30', '2011-01-15'))
|
||||
compare(slice('2011-01-01', '2011-01-16'))
|
||||
|
||||
# partial ranges
|
||||
compare(slice('2011-01-01', '2011-01-6'))
|
||||
compare(slice('2011-01-06', '2011-01-8'))
|
||||
compare(slice('2011-01-06', '2011-01-12'))
|
||||
|
||||
# single values
|
||||
result = ts2['2011'].sort_index()
|
||||
expected = ts['2011']
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq='M')
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.permutation(20))
|
||||
|
||||
result = ts['2005']
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq='M')
|
||||
ts = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
expected = ts['2001']
|
||||
expected.name = 'A'
|
||||
|
||||
df = DataFrame(dict(A=ts))
|
||||
result = df['2001']['A']
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# setting
|
||||
ts['2001'] = 1
|
||||
expected = ts['2001']
|
||||
expected.name = 'A'
|
||||
|
||||
df.loc['2001', 'A'] = 1
|
||||
|
||||
result = df['2001']['A']
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
|
||||
freq='H')
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013-05']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
|
||||
freq='S')
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013-05']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = [Timestamp('2013-05-31 00:00'),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))]
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(np.random.rand(5, 5),
|
||||
columns=['open', 'high', 'low', 'close', 'volume'],
|
||||
index=date_range('2012-01-02 18:01:00',
|
||||
periods=5, tz='US/Central', freq='s'))
|
||||
expected = df.loc[[df.index[2]]]
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df['2012-01-02 18:01:02']
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
|
||||
|
||||
"""
|
||||
test NaT support
|
||||
"""
|
||||
|
||||
|
||||
def test_set_none_nan():
|
||||
series = Series(date_range('1/1/2000', periods=10))
|
||||
series[3] = None
|
||||
assert series[3] is NaT
|
||||
|
||||
series[3:5] = None
|
||||
assert series[4] is NaT
|
||||
|
||||
series[5] = np.nan
|
||||
assert series[5] is NaT
|
||||
|
||||
series[5:7] = np.nan
|
||||
assert series[6] is NaT
|
||||
|
||||
|
||||
def test_nat_operations():
|
||||
# GH 8617
|
||||
s = Series([0, pd.NaT], dtype='m8[ns]')
|
||||
exp = s[0]
|
||||
assert s.median() == exp
|
||||
assert s.min() == exp
|
||||
assert s.max() == exp
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ["round", "floor", "ceil"])
|
||||
@pytest.mark.parametrize('freq', ["s", "5s", "min", "5min", "h", "5h"])
|
||||
def test_round_nat(method, freq):
|
||||
# GH14940
|
||||
s = Series([pd.NaT])
|
||||
expected = Series(pd.NaT)
|
||||
round_method = getattr(s.dt, method)
|
||||
assert_series_equal(round_method(freq), expected)
|
||||
@@ -0,0 +1,37 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas import Series
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
|
||||
def test_iloc():
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
for i in range(len(s)):
|
||||
result = s.iloc[i]
|
||||
exp = s[s.index[i]]
|
||||
assert_almost_equal(result, exp)
|
||||
|
||||
# pass a slice
|
||||
result = s.iloc[slice(1, 3)]
|
||||
expected = s.loc[2:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# test slice is a view
|
||||
result[:] = 0
|
||||
assert (s[1:3] == 0).all()
|
||||
|
||||
# list of integers
|
||||
result = s.iloc[[0, 2, 3, 4, 5]]
|
||||
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_nonunique():
|
||||
s = Series([0, 1, 2], index=[0, 1, 0])
|
||||
assert s.iloc[2] == 2
|
||||
@@ -0,0 +1,840 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
""" test get/set & misc """
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp)
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b'])
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s['c']
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
msg = r"index out of bounds|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
result = test_data.ts[indices]
|
||||
expected = test_data.ts.reindex(indices)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts[indices[0]:indices[2]]
|
||||
expected = test_data.ts.loc[indices[0]:indices[2]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[inds]
|
||||
expected = s.reindex(inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[arr_inds]
|
||||
expected = s.reindex(arr_inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
|
||||
index=['a', 'b', 'c'])
|
||||
expected = Timestamp('2011-01-01', tz='US/Eastern')
|
||||
result = s.loc['a']
|
||||
assert result == expected
|
||||
result = s.iloc[0]
|
||||
assert result == expected
|
||||
result = s['a']
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis():
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
np.fix(s)
|
||||
|
||||
result = s[...]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
s[...] = 5
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
def test_getitem_get(test_data):
|
||||
test_series = test_data.series
|
||||
test_obj_series = test_data.objSeries
|
||||
|
||||
idx1 = test_series.index[5]
|
||||
idx2 = test_obj_series.index[5]
|
||||
|
||||
assert test_series[idx1] == test_series.get(idx1)
|
||||
assert test_obj_series[idx2] == test_obj_series.get(idx2)
|
||||
|
||||
assert test_series[idx1] == test_series[5]
|
||||
assert test_obj_series[idx2] == test_obj_series[5]
|
||||
|
||||
assert test_series.get(-1) == test_series.get(test_series.index[-1])
|
||||
assert test_series[5] == test_series.get(test_series.index[5])
|
||||
|
||||
# missing
|
||||
d = test_data.ts.index[0] - BDay()
|
||||
with pytest.raises(KeyError, match=r"Timestamp\('1999-12-31 00:00:00'\)"):
|
||||
test_data.ts[d]
|
||||
|
||||
# None
|
||||
# GH 5652
|
||||
for s in [Series(), Series(index=list('abc'))]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_getitem_fancy(test_data):
|
||||
slice1 = test_data.series[[1, 2, 3]]
|
||||
slice2 = test_data.objSeries[[1, 2, 3]]
|
||||
assert test_data.series.index[2] == slice1.index[1]
|
||||
assert test_data.objSeries.index[2] == slice2.index[1]
|
||||
assert test_data.series[2] == slice1[1]
|
||||
assert test_data.objSeries[2] == slice2[1]
|
||||
|
||||
|
||||
def test_getitem_generator(test_data):
|
||||
gen = (x > 0 for x in test_data.series)
|
||||
result = test_data.series[gen]
|
||||
result2 = test_data.series[iter(test_data.series > 0)]
|
||||
expected = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
def test_type_promotion():
|
||||
# GH12599
|
||||
s = pd.Series()
|
||||
s["a"] = pd.Timestamp("2016-01-01")
|
||||
s["b"] = 3.0
|
||||
s["c"] = "foo"
|
||||
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"],
|
||||
index=["a", "b", "c"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'result_1, duplicate_item, expected_1',
|
||||
[
|
||||
[
|
||||
pd.Series({1: 12, 2: [1, 2, 2, 3]}), pd.Series({1: 313}),
|
||||
pd.Series({1: 12, }, dtype=object),
|
||||
],
|
||||
[
|
||||
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
pd.Series({1: [1, 2, 3]}), pd.Series({1: [1, 2, 3], }),
|
||||
],
|
||||
])
|
||||
def test_getitem_with_duplicates_indices(
|
||||
result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1.append(duplicate_item)
|
||||
expected = expected_1.append(duplicate_item)
|
||||
assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_out_of_bounds(test_data):
|
||||
# don't segfault, GH #495
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
test_data.ts[len(test_data.ts)]
|
||||
|
||||
# GH #917
|
||||
s = Series([])
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-1]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ['a', 'b', 'c'])
|
||||
|
||||
assert s.iloc[0] == s['a']
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s['a'], 5)
|
||||
|
||||
|
||||
def test_getitem_box_float64(test_data):
|
||||
value = test_data.ts[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'arr',
|
||||
[
|
||||
np.random.randn(10),
|
||||
tm.makeDateIndex(10, name='a').tz_localize(
|
||||
tz='US/Eastern'),
|
||||
])
|
||||
def test_get(arr):
|
||||
# GH 21260
|
||||
s = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = Series(arr[:6], index=list('abcdef'))
|
||||
assert s.get('c') == s.iloc[2]
|
||||
|
||||
result = s.get(slice('b', 'd'))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get('Z')
|
||||
assert result is None
|
||||
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(arr)
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = pd.date_range('20090415', '20090519', freq='B')
|
||||
ser = Series(rng)
|
||||
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
rng = pd.date_range('20090415', '20090519', freq='B')
|
||||
ser = Series(rng, index=rng)
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
assert isinstance(ser.iat[5], pd.Timestamp)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror():
|
||||
s = Series(lrange(10), index=lrange(0, 20, 2))
|
||||
with pytest.raises(KeyError, match=r"^1L?$"):
|
||||
s[1]
|
||||
with pytest.raises(KeyError, match=r"^1L?$"):
|
||||
s.loc[1]
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b'])
|
||||
assert is_scalar(obj['c'])
|
||||
assert obj['c'] == 0
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing():
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah'])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
expected = s.loc[['foo', 'bar', 'bah', 'bam']]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[['foo', 'bar', 'bah', 'bam']]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64)
|
||||
expected = Series([3, 4], index=['C', 'C'], dtype=np.int64)
|
||||
result = s['C']
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror():
|
||||
s = Series(lrange(10), index=lrange(0, 20, 2))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
s2[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_getitem_dataframe():
|
||||
rng = list(range(10))
|
||||
s = pd.Series(10, index=rng)
|
||||
df = pd.DataFrame(rng, index=rng)
|
||||
msg = ("Indexing a Series with DataFrame is not supported,"
|
||||
" use the appropriate DataFrame column")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[df > 5]
|
||||
|
||||
|
||||
def test_setitem(test_data):
|
||||
test_data.ts[test_data.ts.index[5]] = np.NaN
|
||||
test_data.ts[[1, 2, 17]] = np.NaN
|
||||
test_data.ts[6] = np.NaN
|
||||
assert np.isnan(test_data.ts[6])
|
||||
assert np.isnan(test_data.ts[2])
|
||||
test_data.ts[np.isnan(test_data.ts)] = 5
|
||||
assert not np.isnan(test_data.ts[2])
|
||||
|
||||
# caught this bug when writing tests
|
||||
series = Series(tm.makeIntIndex(20).astype(float),
|
||||
index=tm.makeIntIndex(20))
|
||||
|
||||
series[::2] = 0
|
||||
assert (series[::2] == 0).all()
|
||||
|
||||
# set item that's not contained
|
||||
s = test_data.series.copy()
|
||||
s['foobar'] = 1
|
||||
|
||||
app = Series([1], index=['foobar'], name='series')
|
||||
expected = test_data.series.append(app)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# Test for issue #10193
|
||||
key = pd.Timestamp('2012-01-01')
|
||||
series = pd.Series()
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, [key])
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
series = pd.Series([], pd.DatetimeIndex([], freq='D'))
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, pd.DatetimeIndex([key], freq='D'))
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_setitem_dtypes():
|
||||
# change dtypes
|
||||
# GH 4463
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.iloc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([False])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan]))
|
||||
|
||||
s = Series([False, True])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan, 1.0]))
|
||||
|
||||
|
||||
def test_set_value(test_data):
|
||||
idx = test_data.ts.index[10]
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
res = test_data.ts.set_value(idx, 0)
|
||||
assert res is test_data.ts
|
||||
assert test_data.ts[idx] == 0
|
||||
|
||||
# equiv
|
||||
s = test_data.series.copy()
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
res = s.set_value('foobar', 0)
|
||||
assert res is s
|
||||
assert res.index[-1] == 'foobar'
|
||||
assert res['foobar'] == 0
|
||||
|
||||
s = test_data.series.copy()
|
||||
s.loc['foobar'] = 0
|
||||
assert s.index[-1] == 'foobar'
|
||||
assert s['foobar'] == 0
|
||||
|
||||
|
||||
def test_setslice(test_data):
|
||||
sl = test_data.ts[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
# FutureWarning from NumPy about [slice(None, 5).
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_basic_getitem_setitem_corner(test_data):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "Can only tuple-index with a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
test_data.ts[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] will work but not two slices
|
||||
result = test_data.ts[[slice(None, 5)]]
|
||||
expected = test_data.ts[:5]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
test_data.ts[[5, slice(None, None)]] = 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize('tz', ['US/Eastern', 'UTC', 'Asia/Tokyo'])
|
||||
def test_setitem_with_tz(tz):
|
||||
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
|
||||
tz=tz))
|
||||
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2016-01-01 02:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
|
||||
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2012-01-01 00:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_setitem_with_tz_dst():
|
||||
# GH XXX
|
||||
tz = 'US/Eastern'
|
||||
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
|
||||
tz=tz))
|
||||
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00-05:00', tz=tz),
|
||||
pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
|
||||
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2012-01-01 00:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_categorial_assigning_ops():
|
||||
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
|
||||
s = orig.copy()
|
||||
s[:] = "a"
|
||||
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[1] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[s.index > 0] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[[False, True]] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.index = ["x", "y"]
|
||||
s["y"] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]),
|
||||
index=["x", "y"])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# ensure that one can set something to np.nan
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
|
||||
s[1] = np.nan
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_slice(test_data):
|
||||
numSlice = test_data.series[10:20]
|
||||
numSliceEnd = test_data.series[-10:]
|
||||
objSlice = test_data.objSeries[10:20]
|
||||
|
||||
assert test_data.series.index[9] not in numSlice.index
|
||||
assert test_data.objSeries.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == test_data.series.index[11]
|
||||
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = test_data.series[10:20]
|
||||
sl[:] = 0
|
||||
|
||||
assert (test_data.series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
s = Series(1, index=['a', 'a', 'b', 'b', 'c'])
|
||||
s[::-1] # it works!
|
||||
|
||||
|
||||
def test_ix_setitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[inds] = 5
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[[3, 4, 7]] = 5
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result.iloc[5:10] = 10
|
||||
expected[5:10] = 10
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set slice with indices
|
||||
d1, d2 = test_data.series.index[[5, 15]]
|
||||
result.loc[d1:d2] = 6
|
||||
expected[5:16] = 6 # because it's inclusive
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set index value
|
||||
test_data.series.loc[d1] = 4
|
||||
test_data.series.loc[d2] = 6
|
||||
assert test_data.series[d1] == 4
|
||||
assert test_data.series[d2] == 6
|
||||
|
||||
|
||||
def test_setitem_na():
|
||||
# these induce dtype changes
|
||||
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# gets coerced to float, right?
|
||||
expected = Series([np.nan, 1, np.nan, 0])
|
||||
s = Series([True, True, False, False])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8,
|
||||
9])
|
||||
s = Series(np.arange(10))
|
||||
s[:5] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([])
|
||||
s.loc['B'] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B']))
|
||||
|
||||
s = s.reindex(s.index.insert(0, 'A'))
|
||||
tm.assert_series_equal(s, Series(
|
||||
[np.nan, Timedelta('1 days')], index=['A', 'B']))
|
||||
|
||||
result = s.fillna(timedelta(1))
|
||||
expected = Series(Timedelta('1 days'), index=['A', 'B'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s.loc['A'] = timedelta(1)
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 14155
|
||||
s = Series(10 * [np.timedelta64(10, 'm')])
|
||||
s.loc[[1, 2, 3]] = np.timedelta64(20, 'm')
|
||||
expected = pd.Series(10 * [np.timedelta64(10, 'm')])
|
||||
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm'))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_underlying_data_conversion():
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ['a', 'b', 'c']})
|
||||
df.set_index(['a', 'b', 'c'], inplace=True)
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df['val'] = 0
|
||||
df
|
||||
df['val'].update(s)
|
||||
|
||||
expected = DataFrame(
|
||||
dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
|
||||
expected.set_index(['a', 'b', 'c'], inplace=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 3970
|
||||
# these are chained assignments as well
|
||||
pd.set_option('chained_assignment', None)
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
df["bb"].iloc[0] = .13
|
||||
|
||||
# TODO: unused
|
||||
df_tmp = df.iloc[ck] # noqa
|
||||
|
||||
df["bb"].iloc[0] = .15
|
||||
assert df['bb'].iloc[0] == 0.15
|
||||
pd.set_option('chained_assignment', 'raise')
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
|
||||
df['c'] = np.nan
|
||||
df['c'].update(pd.Series(['foo'], index=[0]))
|
||||
|
||||
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(test_data):
|
||||
seq = test_data.ts[[5, 10, 15]]
|
||||
seq[1] = np.NaN
|
||||
assert not np.isnan(test_data.ts[10])
|
||||
|
||||
|
||||
def test_cast_on_putmask():
|
||||
# GH 2746
|
||||
|
||||
# need to upcast
|
||||
s = Series([1, 2], index=[1, 2], dtype='int64')
|
||||
s[[True, False]] = Series([0], index=[1], dtype='int64')
|
||||
expected = Series([0, 2], index=[1, 2], dtype='int64')
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_type_promote_putmask():
|
||||
# GH8387: test that changing types does not break alignment
|
||||
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
|
||||
left, mask = ts.copy(), ts > 0
|
||||
right = ts[mask].copy().map(str)
|
||||
left[mask] = right
|
||||
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
|
||||
|
||||
s = Series([0, 1, 2, 0])
|
||||
mask = s > 0
|
||||
s2 = s[mask].map(str)
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, '1', '2', 0]))
|
||||
|
||||
s = Series([0, 'foo', 'bar', 0])
|
||||
mask = Series([False, True, True, False])
|
||||
s2 = s[mask]
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, 'foo', 'bar', 0]))
|
||||
|
||||
|
||||
def test_multilevel_preserve_name():
|
||||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
|
||||
'three']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
s = Series(np.random.randn(len(index)), index=index, name='sth')
|
||||
|
||||
result = s['foo']
|
||||
result2 = s.loc['foo']
|
||||
assert result.name == s.name
|
||||
assert result2.name == s.name
|
||||
|
||||
|
||||
def test_setitem_scalar_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
for n in range(len(series)):
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[n] = 1
|
||||
|
||||
assert array[n] == 0
|
||||
|
||||
|
||||
def test_setitem_slice_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
msg = "assignment destination is read-only"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series[1:3] = 1
|
||||
|
||||
assert not array.any()
|
||||
|
||||
|
||||
"""
|
||||
miscellaneous methods
|
||||
"""
|
||||
|
||||
|
||||
def test_select(test_data):
|
||||
# deprecated: gh-12410
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
n = len(test_data.ts)
|
||||
result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
|
||||
expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts.select(lambda x: x.weekday() == 2)
|
||||
expected = test_data.ts[test_data.ts.index.weekday == 2]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pop():
|
||||
# GH 6600
|
||||
df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })
|
||||
k = df.iloc[4]
|
||||
|
||||
result = k.pop('B')
|
||||
assert result == 4
|
||||
|
||||
expected = Series([0, 0], index=['A', 'C'], name=4)
|
||||
assert_series_equal(k, expected)
|
||||
|
||||
|
||||
def test_take():
|
||||
s = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = s.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = s.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = "index {} is out of bounds for size 5"
|
||||
with pytest.raises(IndexError, match=msg.format(10)):
|
||||
s.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg.format(5)):
|
||||
s.take([2, 5])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.take([-1, 3, 4], convert=False)
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
s = Series(pd.Categorical(['a', 'b', 'c']))
|
||||
result = s.take([-2, -2, 0])
|
||||
expected = Series(pd.Categorical(['b', 'b', 'a'],
|
||||
categories=['a', 'b', 'c']),
|
||||
index=[1, 1, 0])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_head_tail(test_data):
|
||||
assert_series_equal(test_data.series.head(), test_data.series[:5])
|
||||
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
|
||||
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
|
||||
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])
|
||||
@@ -0,0 +1,168 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, Timestamp
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val,expected", [
|
||||
(2**63 - 1, 3),
|
||||
(2**63, 4),
|
||||
])
|
||||
def test_loc_uint64(val, expected):
|
||||
# see gh-19399
|
||||
s = Series({2**63 - 1: 3, 2**63: 4})
|
||||
assert s.loc[val] == expected
|
||||
|
||||
|
||||
def test_loc_getitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
assert_series_equal(
|
||||
test_data.series.loc[inds],
|
||||
test_data.series.reindex(inds))
|
||||
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
|
||||
|
||||
# slice with indices
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
result = test_data.ts.loc[d1:d2]
|
||||
expected = test_data.ts.truncate(d1, d2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# boolean
|
||||
mask = test_data.series > test_data.series.median()
|
||||
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
|
||||
|
||||
# ask for index value
|
||||
assert test_data.ts.loc[d1] == test_data.ts[d1]
|
||||
assert test_data.ts.loc[d2] == test_data.ts[d2]
|
||||
|
||||
|
||||
def test_loc_getitem_not_monotonic(test_data):
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
|
||||
ts2 = test_data.ts[::2][[1, 2, 0]]
|
||||
|
||||
msg = r"Timestamp\('2000-01-10 00:00:00'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ts2.loc[d1:d2] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_setitem_integer_slice_keyerrors():
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
# this is OK
|
||||
cp = s.copy()
|
||||
cp.iloc[4:10] = 0
|
||||
assert (cp.iloc[4:10] == 0).all()
|
||||
|
||||
# so is this
|
||||
cp = s.copy()
|
||||
cp.iloc[3:11] = 0
|
||||
assert (cp.iloc[3:11] == 0).values.all()
|
||||
|
||||
result = s.iloc[2:6]
|
||||
result2 = s.loc[3:11]
|
||||
expected = s.reindex([4, 6, 8, 10])
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
# non-monotonic, raise KeyError
|
||||
s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]]
|
||||
with pytest.raises(KeyError, match=r"^3L?$"):
|
||||
s2.loc[3:11]
|
||||
with pytest.raises(KeyError, match=r"^3L?$"):
|
||||
s2.loc[3:11] = 0
|
||||
|
||||
|
||||
def test_loc_getitem_iterator(test_data):
|
||||
idx = iter(test_data.series.index[:10])
|
||||
result = test_data.series.loc[idx]
|
||||
assert_series_equal(result, test_data.series[:10])
|
||||
|
||||
|
||||
def test_loc_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[mask] = 0
|
||||
expected = test_data.series
|
||||
expected[mask] = 0
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_setitem_corner(test_data):
|
||||
inds = list(test_data.series.index[[5, 8, 12]])
|
||||
test_data.series.loc[inds] = 5
|
||||
msg = r"\['foo'\] not in index"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
test_data.series.loc[inds + ['foo']] = 5
|
||||
|
||||
|
||||
def test_basic_setitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices] = 0
|
||||
exp.loc[indices] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices[0]:indices[2]] = 0
|
||||
exp.loc[indices[0]:indices[2]] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
inds = [0, 4, 6]
|
||||
arr_inds = np.array([0, 4, 6])
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[inds] = 0
|
||||
s.loc[inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[arr_inds] = 0
|
||||
s.loc[arr_inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
inds_notfound = [0, 4, 5, 6]
|
||||
arr_inds_notfound = np.array([0, 4, 5, 6])
|
||||
msg = r"\[5\] not contained in the index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[inds_notfound] = 0
|
||||
with pytest.raises(Exception, match=msg):
|
||||
s[arr_inds_notfound] = 0
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
|
||||
index=['a', 'b', 'c'])
|
||||
s2 = s.copy()
|
||||
expected = Timestamp('2011-01-03', tz='US/Eastern')
|
||||
s2.loc['a'] = expected
|
||||
result = s2.loc['a']
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2.iloc[0] = expected
|
||||
result = s2.iloc[0]
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2['a'] = expected
|
||||
result = s2['a']
|
||||
assert result == expected
|
||||
@@ -0,0 +1,259 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45,
|
||||
51, 39, 55, 43, 54, 52, 51, 54]))
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56,
|
||||
45, 51, 39, 55, 43, 54, 52, 51, 54]),
|
||||
index=pd.Float64Index(
|
||||
[25.0, 36.0, 49.0, 64.0, 81.0, 100.0,
|
||||
121.0, 144.0, 169.0, 196.0, 1225.0,
|
||||
1296.0, 1369.0, 1444.0, 1521.0, 1600.0,
|
||||
1681.0, 1764.0, 1849.0, 1936.0],
|
||||
dtype='object'))
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default='Missing')
|
||||
assert result == 'Missing'
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default='Missing')
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default='Missing')
|
||||
assert result == 'Missing'
|
||||
|
||||
|
||||
def test_get_nan():
|
||||
# GH 8569
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default='Missing') == 'Missing'
|
||||
|
||||
|
||||
def test_get_nan_multiple():
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx),
|
||||
Series([2, np.nan], index=idx))
|
||||
|
||||
idx = [2, np.nan]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx),
|
||||
Series([2, np.nan], index=idx))
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert(s.get(idx) is None)
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert(s.get(idx) is None)
|
||||
|
||||
|
||||
def test_delitem():
|
||||
# GH 5542
|
||||
# should delete the item inplace
|
||||
s = Series(lrange(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(lrange(1, 5), index=lrange(1, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(lrange(2, 5), index=lrange(2, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# empty
|
||||
s = Series()
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='int64')))
|
||||
s[0] = 1
|
||||
assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='int64')))
|
||||
|
||||
# Index(dtype=object)
|
||||
s = Series(1, index=['a'])
|
||||
del s['a']
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='object')))
|
||||
s['a'] = 1
|
||||
assert_series_equal(s, Series(1, index=['a']))
|
||||
del s['a']
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='object')))
|
||||
|
||||
|
||||
def test_slice_float64():
|
||||
values = np.arange(10., 50., 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
s = Series(np.random.randn(20), index=index)
|
||||
|
||||
result = s[start:end]
|
||||
expected = s.iloc[5:16]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[start:end]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(np.random.randn(20, 3), index=index)
|
||||
|
||||
result = df[start:end]
|
||||
expected = df.iloc[5:16]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[start:end]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_negative_out_of_bounds():
|
||||
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
|
||||
|
||||
msg = "index out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11]
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[-11] = 'foo'
|
||||
|
||||
|
||||
def test_getitem_regression():
|
||||
s = Series(lrange(5), index=lrange(5))
|
||||
result = s[lrange(5)]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_bug():
|
||||
s = Series(lrange(10), lrange(10))
|
||||
result = s[-12:]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
result = s[-7:]
|
||||
assert_series_equal(result, s[3:])
|
||||
|
||||
result = s[:-12]
|
||||
assert_series_equal(result, s[:0])
|
||||
|
||||
s = Series(lrange(10), lrange(10))
|
||||
s[-12:] = 0
|
||||
assert (s == 0).all()
|
||||
|
||||
s[:-12] = 5
|
||||
assert (s == 0).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_integers():
|
||||
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
|
||||
|
||||
result = s[:4]
|
||||
expected = s.reindex([2, 4, 6, 8])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s[:4] = 0
|
||||
assert (s[:4] == 0).all()
|
||||
assert not (s[4:] == 0).any()
|
||||
|
||||
|
||||
def test_setitem_float_labels():
|
||||
# note labels are floats
|
||||
s = Series(['a', 'b', 'c'], index=[0, 0.5, 1])
|
||||
tmp = s.copy()
|
||||
|
||||
s.loc[1] = 'zoo'
|
||||
tmp.iloc[2] = 'zoo'
|
||||
|
||||
assert_series_equal(s, tmp)
|
||||
|
||||
|
||||
def test_slice_float_get_set(test_data):
|
||||
msg = (r"cannot do slice indexing on <class 'pandas\.core\.indexes"
|
||||
r"\.datetimes\.DatetimeIndex'> with these indexers \[{key}\]"
|
||||
r" of <(class|type) 'float'>")
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
test_data.ts[4.0:10.0] = 0
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0]
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
test_data.ts[4.5:10.0] = 0
|
||||
|
||||
|
||||
def test_slice_floats2():
|
||||
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
|
||||
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
i = np.arange(10, 20, dtype=float)
|
||||
i[2] = 12.2
|
||||
s.index = i
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
|
||||
def test_int_indexing():
|
||||
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s['c']
|
||||
|
||||
# not monotonic
|
||||
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
s[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s['c']
|
||||
|
||||
|
||||
def test_getitem_int64(test_data):
|
||||
idx = np.int64(5)
|
||||
assert test_data.ts[idx] == test_data.ts[5]
|
||||
@@ -0,0 +1,336 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import lrange, range, zip
|
||||
|
||||
from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesAlterAxes(object):
|
||||
|
||||
def test_setindex(self, string_series):
|
||||
# wrong type
|
||||
msg = (r"Index\(\.\.\.\) must be called with a collection of some"
|
||||
r" kind, None was passed")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
string_series.index = None
|
||||
|
||||
# wrong length
|
||||
msg = ("Length mismatch: Expected axis has 30 elements, new"
|
||||
" values have 29 elements")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
string_series.index = np.arange(len(string_series) - 1)
|
||||
|
||||
# works
|
||||
string_series.index = np.arange(len(string_series))
|
||||
assert isinstance(string_series.index, Index)
|
||||
|
||||
# Renaming
|
||||
|
||||
def test_rename(self, datetime_series):
|
||||
ts = datetime_series
|
||||
renamer = lambda x: x.strftime('%Y%m%d')
|
||||
renamed = ts.rename(renamer)
|
||||
assert renamed.index[0] == renamer(ts.index[0])
|
||||
|
||||
# dict
|
||||
rename_dict = dict(zip(ts.index, renamed.index))
|
||||
renamed2 = ts.rename(rename_dict)
|
||||
tm.assert_series_equal(renamed, renamed2)
|
||||
|
||||
# partial dict
|
||||
s = Series(np.arange(4), index=['a', 'b', 'c', 'd'], dtype='int64')
|
||||
renamed = s.rename({'b': 'foo', 'd': 'bar'})
|
||||
tm.assert_index_equal(renamed.index, Index(['a', 'foo', 'c', 'bar']))
|
||||
|
||||
# index with name
|
||||
renamer = Series(np.arange(4),
|
||||
index=Index(['a', 'b', 'c', 'd'], name='name'),
|
||||
dtype='int64')
|
||||
renamed = renamer.rename({})
|
||||
assert renamed.index.name == renamer.index.name
|
||||
|
||||
def test_rename_by_series(self):
|
||||
s = Series(range(5), name='foo')
|
||||
renamer = Series({1: 10, 2: 20})
|
||||
result = s.rename(renamer)
|
||||
expected = Series(range(5), index=[0, 10, 20, 3, 4], name='foo')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_set_name(self):
|
||||
s = Series(range(4), index=list('abcd'))
|
||||
for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]:
|
||||
result = s.rename(name)
|
||||
assert result.name == name
|
||||
tm.assert_numpy_array_equal(result.index.values, s.index.values)
|
||||
assert s.name is None
|
||||
|
||||
def test_rename_set_name_inplace(self):
|
||||
s = Series(range(3), index=list('abc'))
|
||||
for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]:
|
||||
s.rename(name, inplace=True)
|
||||
assert s.name == name
|
||||
|
||||
exp = np.array(['a', 'b', 'c'], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(s.index.values, exp)
|
||||
|
||||
def test_rename_axis_supported(self):
|
||||
# Supporting axis for compatibility, detailed in GH-18589
|
||||
s = Series(range(5))
|
||||
s.rename({}, axis=0)
|
||||
s.rename({}, axis='index')
|
||||
with pytest.raises(ValueError, match='No axis named 5'):
|
||||
s.rename({}, axis=5)
|
||||
|
||||
def test_set_name_attribute(self):
|
||||
s = Series([1, 2, 3])
|
||||
s2 = Series([1, 2, 3], name='bar')
|
||||
for name in [7, 7., 'name', datetime(2001, 1, 1), (1,), u"\u05D0"]:
|
||||
s.name = name
|
||||
assert s.name == name
|
||||
s2.name = name
|
||||
assert s2.name == name
|
||||
|
||||
def test_set_name(self):
|
||||
s = Series([1, 2, 3])
|
||||
s2 = s._set_name('foo')
|
||||
assert s2.name == 'foo'
|
||||
assert s.name is None
|
||||
assert s is not s2
|
||||
|
||||
def test_rename_inplace(self, datetime_series):
|
||||
renamer = lambda x: x.strftime('%Y%m%d')
|
||||
expected = renamer(datetime_series.index[0])
|
||||
|
||||
datetime_series.rename(renamer, inplace=True)
|
||||
assert datetime_series.index[0] == expected
|
||||
|
||||
def test_set_index_makes_timeseries(self):
|
||||
idx = tm.makeDateIndex(10)
|
||||
|
||||
s = Series(lrange(10))
|
||||
s.index = idx
|
||||
assert s.index.is_all_dates
|
||||
|
||||
def test_reset_index(self):
|
||||
df = tm.makeDataFrame()[:5]
|
||||
ser = df.stack()
|
||||
ser.index.names = ['hash', 'category']
|
||||
|
||||
ser.name = 'value'
|
||||
df = ser.reset_index()
|
||||
assert 'value' in df
|
||||
|
||||
df = ser.reset_index(name='value2')
|
||||
assert 'value2' in df
|
||||
|
||||
# check inplace
|
||||
s = ser.reset_index(drop=True)
|
||||
s2 = ser
|
||||
s2.reset_index(drop=True, inplace=True)
|
||||
tm.assert_series_equal(s, s2)
|
||||
|
||||
# level
|
||||
index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
|
||||
[0, 1, 0, 1, 0, 1]])
|
||||
s = Series(np.random.randn(6), index=index)
|
||||
rs = s.reset_index(level=1)
|
||||
assert len(rs.columns) == 2
|
||||
|
||||
rs = s.reset_index(level=[0, 2], drop=True)
|
||||
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
|
||||
assert isinstance(rs, Series)
|
||||
|
||||
def test_reset_index_name(self):
|
||||
s = Series([1, 2, 3], index=Index(range(3), name='x'))
|
||||
assert s.reset_index().index.name is None
|
||||
assert s.reset_index(drop=True).index.name is None
|
||||
|
||||
def test_reset_index_level(self):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]],
|
||||
columns=['A', 'B', 'C'])
|
||||
|
||||
for levels in ['A', 'B'], [0, 1]:
|
||||
# With MultiIndex
|
||||
s = df.set_index(['A', 'B'])['C']
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df.set_index('B'))
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df.set_index('B'))
|
||||
|
||||
result = s.reset_index(level=levels)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.set_index(['A', 'B']).reset_index(level=levels,
|
||||
drop=True)
|
||||
tm.assert_frame_equal(result, df[['C']])
|
||||
|
||||
with pytest.raises(KeyError, match='Level E '):
|
||||
s.reset_index(level=['A', 'E'])
|
||||
|
||||
# With single-level Index
|
||||
s = df.set_index('A')['B']
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df[['A', 'B']])
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df[['A', 'B']])
|
||||
|
||||
result = s.reset_index(level=levels[0], drop=True)
|
||||
tm.assert_series_equal(result, df['B'])
|
||||
|
||||
with pytest.raises(IndexError, match='Too many levels'):
|
||||
s.reset_index(level=[0, 1, 2])
|
||||
|
||||
# Check that .reset_index([],drop=True) doesn't fail
|
||||
result = Series(range(4)).reset_index([], drop=True)
|
||||
expected = Series(range(4))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reset_index_range(self):
|
||||
# GH 12071
|
||||
s = Series(range(2), name='A', dtype='int64')
|
||||
series_result = s.reset_index()
|
||||
assert isinstance(series_result.index, RangeIndex)
|
||||
series_expected = DataFrame([[0, 0], [1, 1]],
|
||||
columns=['index', 'A'],
|
||||
index=RangeIndex(stop=2))
|
||||
tm.assert_frame_equal(series_result, series_expected)
|
||||
|
||||
def test_reorder_levels(self):
|
||||
index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
|
||||
[0, 1, 0, 1, 0, 1]],
|
||||
names=['L0', 'L1', 'L2'])
|
||||
s = Series(np.arange(6), index=index)
|
||||
|
||||
# no change, position
|
||||
result = s.reorder_levels([0, 1, 2])
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
# no change, labels
|
||||
result = s.reorder_levels(['L0', 'L1', 'L2'])
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
# rotate, position
|
||||
result = s.reorder_levels([1, 2, 0])
|
||||
e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']],
|
||||
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1],
|
||||
[0, 0, 0, 0, 0, 0]],
|
||||
names=['L1', 'L2', 'L0'])
|
||||
expected = Series(np.arange(6), index=e_idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_axis_mapper(self):
|
||||
# GH 19978
|
||||
mi = MultiIndex.from_product([['a', 'b', 'c'], [1, 2]],
|
||||
names=['ll', 'nn'])
|
||||
s = Series([i for i in range(len(mi))], index=mi)
|
||||
|
||||
result = s.rename_axis(index={'ll': 'foo'})
|
||||
assert result.index.names == ['foo', 'nn']
|
||||
|
||||
result = s.rename_axis(index=str.upper, axis=0)
|
||||
assert result.index.names == ['LL', 'NN']
|
||||
|
||||
result = s.rename_axis(index=['foo', 'goo'])
|
||||
assert result.index.names == ['foo', 'goo']
|
||||
|
||||
with pytest.raises(TypeError, match='unexpected'):
|
||||
s.rename_axis(columns='wrong')
|
||||
|
||||
def test_rename_axis_inplace(self, datetime_series):
|
||||
# GH 15704
|
||||
expected = datetime_series.rename_axis('foo')
|
||||
result = datetime_series
|
||||
no_return = result.rename_axis('foo', inplace=True)
|
||||
|
||||
assert no_return is None
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_set_axis_inplace_axes(self, axis_series):
|
||||
# GH14636
|
||||
ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64')
|
||||
|
||||
expected = ser.copy()
|
||||
expected.index = list('abcd')
|
||||
|
||||
# inplace=True
|
||||
# The FutureWarning comes from the fact that we would like to have
|
||||
# inplace default to False some day
|
||||
for inplace, warn in [(None, FutureWarning), (True, None)]:
|
||||
result = ser.copy()
|
||||
kwargs = {'inplace': inplace}
|
||||
with tm.assert_produces_warning(warn):
|
||||
result.set_axis(list('abcd'), axis=axis_series, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_set_axis_inplace(self):
|
||||
# GH14636
|
||||
|
||||
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64')
|
||||
|
||||
expected = s.copy()
|
||||
expected.index = list('abcd')
|
||||
|
||||
# inplace=False
|
||||
result = s.set_axis(list('abcd'), axis=0, inplace=False)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# omitting the "axis" parameter
|
||||
with tm.assert_produces_warning(None):
|
||||
result = s.set_axis(list('abcd'), inplace=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# wrong values for the "axis" parameter
|
||||
for axis in [2, 'foo']:
|
||||
with pytest.raises(ValueError, match='No axis named'):
|
||||
s.set_axis(list('abcd'), axis=axis, inplace=False)
|
||||
|
||||
def test_set_axis_prior_to_deprecation_signature(self):
|
||||
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64')
|
||||
|
||||
expected = s.copy()
|
||||
expected.index = list('abcd')
|
||||
|
||||
for axis in [0, 'index']:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.set_axis(0, list('abcd'), inplace=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reset_index_drop_errors(self):
|
||||
# GH 20925
|
||||
|
||||
# KeyError raised for series index when passed level name is missing
|
||||
s = Series(range(4))
|
||||
with pytest.raises(KeyError, match='must be same as name'):
|
||||
s.reset_index('wrong', drop=True)
|
||||
with pytest.raises(KeyError, match='must be same as name'):
|
||||
s.reset_index('wrong')
|
||||
|
||||
# KeyError raised for series when level to be dropped is missing
|
||||
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
|
||||
with pytest.raises(KeyError, match='not found'):
|
||||
s.reset_index('wrong', drop=True)
|
||||
|
||||
def test_droplevel(self):
|
||||
# GH20342
|
||||
ser = Series([1, 2, 3, 4])
|
||||
ser.index = MultiIndex.from_arrays([(1, 2, 3, 4), (5, 6, 7, 8)],
|
||||
names=['a', 'b'])
|
||||
expected = ser.reset_index('b', drop=True)
|
||||
result = ser.droplevel('b', axis='index')
|
||||
tm.assert_series_equal(result, expected)
|
||||
# test that droplevel raises ValueError on axis != 0
|
||||
with pytest.raises(ValueError):
|
||||
ser.droplevel(1, axis='columns')
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,712 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
from collections import OrderedDict
|
||||
import pydoc
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import isidentifier, lzip, range, string_types
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, DataFrame, DatetimeIndex, Index, Series, TimedeltaIndex,
|
||||
date_range, period_range, timedelta_range)
|
||||
from pandas.core.arrays import PeriodArray
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal, ensure_clean
|
||||
|
||||
import pandas.io.formats.printing as printing
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class SharedWithSparse(object):
|
||||
"""
|
||||
A collection of tests Series and SparseSeries can share.
|
||||
|
||||
In generic tests on this class, use ``self._assert_series_equal()``
|
||||
which is implemented in sub-classes.
|
||||
"""
|
||||
def _assert_series_equal(self, left, right):
|
||||
"""Dispatch to series class dependent assertion"""
|
||||
raise NotImplementedError
|
||||
|
||||
def test_scalarop_preserve_name(self):
|
||||
result = self.ts * 2
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_copy_name(self):
|
||||
result = self.ts.copy()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_copy_index_name_checking(self):
|
||||
# don't want to be able to modify the index stored elsewhere after
|
||||
# making a copy
|
||||
|
||||
self.ts.index.name = None
|
||||
assert self.ts.index.name is None
|
||||
assert self.ts is self.ts
|
||||
|
||||
cp = self.ts.copy()
|
||||
cp.index.name = 'foo'
|
||||
printing.pprint_thing(self.ts.index.name)
|
||||
assert self.ts.index.name is None
|
||||
|
||||
def test_append_preserve_name(self):
|
||||
result = self.ts[:5].append(self.ts[5:])
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_binop_maybe_preserve_name(self):
|
||||
# names match, preserve
|
||||
result = self.ts * self.ts
|
||||
assert result.name == self.ts.name
|
||||
result = self.ts.mul(self.ts)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts * self.ts[:-2]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
# names don't match, don't preserve
|
||||
cp = self.ts.copy()
|
||||
cp.name = 'something else'
|
||||
result = self.ts + cp
|
||||
assert result.name is None
|
||||
result = self.ts.add(cp)
|
||||
assert result.name is None
|
||||
|
||||
ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow']
|
||||
ops = ops + ['r' + op for op in ops]
|
||||
for op in ops:
|
||||
# names match, preserve
|
||||
s = self.ts.copy()
|
||||
result = getattr(s, op)(s)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
# names don't match, don't preserve
|
||||
cp = self.ts.copy()
|
||||
cp.name = 'changed'
|
||||
result = getattr(s, op)(cp)
|
||||
assert result.name is None
|
||||
|
||||
def test_combine_first_name(self):
|
||||
result = self.ts.combine_first(self.ts[:5])
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_getitem_preserve_name(self):
|
||||
result = self.ts[self.ts > 0]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts[[0, 2, 4]]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts[5:10]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_pickle(self):
|
||||
unp_series = self._pickle_roundtrip(self.series)
|
||||
unp_ts = self._pickle_roundtrip(self.ts)
|
||||
assert_series_equal(unp_series, self.series)
|
||||
assert_series_equal(unp_ts, self.ts)
|
||||
|
||||
def _pickle_roundtrip(self, obj):
|
||||
|
||||
with ensure_clean() as path:
|
||||
obj.to_pickle(path)
|
||||
unpickled = pd.read_pickle(path)
|
||||
return unpickled
|
||||
|
||||
def test_argsort_preserve_name(self):
|
||||
result = self.ts.argsort()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_sort_index_name(self):
|
||||
result = self.ts.sort_index(ascending=False)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_to_sparse_pass_name(self):
|
||||
result = self.ts.to_sparse()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_constructor_dict(self):
|
||||
d = {'a': 0., 'b': 1., 'c': 2.}
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass(d, index=sorted(d.keys()))
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
result = self.series_klass(d, index=['b', 'c', 'd', 'a'])
|
||||
expected = self.series_klass([1, 2, np.nan, 0],
|
||||
index=['b', 'c', 'd', 'a'])
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_constructor_subclass_dict(self):
|
||||
data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
|
||||
series = self.series_klass(data)
|
||||
expected = self.series_klass(dict(compat.iteritems(data)))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
def test_constructor_ordereddict(self):
|
||||
# GH3283
|
||||
data = OrderedDict(
|
||||
('col%s' % i, np.random.random()) for i in range(12))
|
||||
|
||||
series = self.series_klass(data)
|
||||
expected = self.series_klass(list(data.values()), list(data.keys()))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
# Test with subclass
|
||||
class A(OrderedDict):
|
||||
pass
|
||||
|
||||
series = self.series_klass(A(data))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
def test_constructor_dict_multiindex(self):
|
||||
d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.}
|
||||
_d = sorted(d.items())
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass(
|
||||
[x[1] for x in _d],
|
||||
index=pd.MultiIndex.from_tuples([x[0] for x in _d]))
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
d['z'] = 111.
|
||||
_d.insert(0, ('z', d['z']))
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass([x[1] for x in _d],
|
||||
index=pd.Index([x[0] for x in _d],
|
||||
tupleize_cols=False))
|
||||
result = result.reindex(index=expected.index)
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_constructor_dict_timedelta_index(self):
|
||||
# GH #12169 : Resample category data with timedelta index
|
||||
# construct Series from dict as data and TimedeltaIndex as index
|
||||
# will result NaN in result Series data
|
||||
expected = self.series_klass(
|
||||
data=['A', 'B', 'C'],
|
||||
index=pd.to_timedelta([0, 10, 20], unit='s')
|
||||
)
|
||||
|
||||
result = self.series_klass(
|
||||
data={pd.to_timedelta(0, unit='s'): 'A',
|
||||
pd.to_timedelta(10, unit='s'): 'B',
|
||||
pd.to_timedelta(20, unit='s'): 'C'},
|
||||
index=pd.to_timedelta([0, 10, 20], unit='s')
|
||||
)
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_from_array_deprecated(self):
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
self.series_klass.from_array([1, 2, 3])
|
||||
|
||||
def test_sparse_accessor_updates_on_inplace(self):
|
||||
s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]")
|
||||
s.drop([0, 1], inplace=True)
|
||||
assert s.sparse.density == 1.0
|
||||
|
||||
|
||||
class TestSeriesMisc(TestData, SharedWithSparse):
|
||||
|
||||
series_klass = Series
|
||||
# SharedWithSparse tests use generic, series_klass-agnostic assertion
|
||||
_assert_series_equal = staticmethod(tm.assert_series_equal)
|
||||
|
||||
def test_tab_completion(self):
|
||||
# GH 9910
|
||||
s = Series(list('abcd'))
|
||||
# Series of str values should have .str but not .dt/.cat in __dir__
|
||||
assert 'str' in dir(s)
|
||||
assert 'dt' not in dir(s)
|
||||
assert 'cat' not in dir(s)
|
||||
|
||||
# similarly for .dt
|
||||
s = Series(date_range('1/1/2015', periods=5))
|
||||
assert 'dt' in dir(s)
|
||||
assert 'str' not in dir(s)
|
||||
assert 'cat' not in dir(s)
|
||||
|
||||
# Similarly for .cat, but with the twist that str and dt should be
|
||||
# there if the categories are of that type first cat and str.
|
||||
s = Series(list('abbcd'), dtype="category")
|
||||
assert 'cat' in dir(s)
|
||||
assert 'str' in dir(s) # as it is a string categorical
|
||||
assert 'dt' not in dir(s)
|
||||
|
||||
# similar to cat and str
|
||||
s = Series(date_range('1/1/2015', periods=5)).astype("category")
|
||||
assert 'cat' in dir(s)
|
||||
assert 'str' not in dir(s)
|
||||
assert 'dt' in dir(s) # as it is a datetime categorical
|
||||
|
||||
def test_tab_completion_with_categorical(self):
|
||||
# test the tab completion display
|
||||
ok_for_cat = ['name', 'index', 'categorical', 'categories', 'codes',
|
||||
'ordered', 'set_categories', 'add_categories',
|
||||
'remove_categories', 'rename_categories',
|
||||
'reorder_categories', 'remove_unused_categories',
|
||||
'as_ordered', 'as_unordered']
|
||||
|
||||
def get_dir(s):
|
||||
results = [r for r in s.cat.__dir__() if not r.startswith('_')]
|
||||
return list(sorted(set(results)))
|
||||
|
||||
s = Series(list('aabbcde')).astype('category')
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
|
||||
|
||||
@pytest.mark.parametrize("index", [
|
||||
tm.makeUnicodeIndex(10),
|
||||
tm.makeStringIndex(10),
|
||||
tm.makeCategoricalIndex(10),
|
||||
Index(['foo', 'bar', 'baz'] * 2),
|
||||
tm.makeDateIndex(10),
|
||||
tm.makePeriodIndex(10),
|
||||
tm.makeTimedeltaIndex(10),
|
||||
tm.makeIntIndex(10),
|
||||
tm.makeUIntIndex(10),
|
||||
tm.makeIntIndex(10),
|
||||
tm.makeFloatIndex(10),
|
||||
Index([True, False]),
|
||||
Index(['a{}'.format(i) for i in range(101)]),
|
||||
pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')),
|
||||
pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ])
|
||||
def test_index_tab_completion(self, index):
|
||||
# dir contains string-like values of the Index.
|
||||
s = pd.Series(index=index)
|
||||
dir_s = dir(s)
|
||||
for i, x in enumerate(s.index.unique(level=0)):
|
||||
if i < 100:
|
||||
assert (not isinstance(x, string_types) or
|
||||
not isidentifier(x) or x in dir_s)
|
||||
else:
|
||||
assert x not in dir_s
|
||||
|
||||
def test_not_hashable(self):
|
||||
s_empty = Series()
|
||||
s = Series([1])
|
||||
msg = "'Series' objects are mutable, thus they cannot be hashed"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
hash(s_empty)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
hash(s)
|
||||
|
||||
def test_contains(self):
|
||||
tm.assert_contains_all(self.ts.index, self.ts)
|
||||
|
||||
def test_iter(self):
|
||||
for i, val in enumerate(self.series):
|
||||
assert val == self.series[i]
|
||||
|
||||
for i, val in enumerate(self.ts):
|
||||
assert val == self.ts[i]
|
||||
|
||||
def test_keys(self):
|
||||
# HACK: By doing this in two stages, we avoid 2to3 wrapping the call
|
||||
# to .keys() in a list()
|
||||
getkeys = self.ts.keys
|
||||
assert getkeys() is self.ts.index
|
||||
|
||||
def test_values(self):
|
||||
tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)
|
||||
|
||||
def test_iteritems(self):
|
||||
for idx, val in compat.iteritems(self.series):
|
||||
assert val == self.series[idx]
|
||||
|
||||
for idx, val in compat.iteritems(self.ts):
|
||||
assert val == self.ts[idx]
|
||||
|
||||
# assert is lazy (genrators don't define reverse, lists do)
|
||||
assert not hasattr(self.series.iteritems(), 'reverse')
|
||||
|
||||
def test_items(self):
|
||||
for idx, val in self.series.items():
|
||||
assert val == self.series[idx]
|
||||
|
||||
for idx, val in self.ts.items():
|
||||
assert val == self.ts[idx]
|
||||
|
||||
# assert is lazy (genrators don't define reverse, lists do)
|
||||
assert not hasattr(self.series.items(), 'reverse')
|
||||
|
||||
def test_raise_on_info(self):
|
||||
s = Series(np.random.randn(10))
|
||||
msg = "'Series' object has no attribute 'info'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.info()
|
||||
|
||||
def test_copy(self):
|
||||
|
||||
for deep in [None, False, True]:
|
||||
s = Series(np.arange(10), dtype='float64')
|
||||
|
||||
# default deep is True
|
||||
if deep is None:
|
||||
s2 = s.copy()
|
||||
else:
|
||||
s2 = s.copy(deep=deep)
|
||||
|
||||
s2[::2] = np.NaN
|
||||
|
||||
if deep is None or deep is True:
|
||||
# Did not modify original Series
|
||||
assert np.isnan(s2[0])
|
||||
assert not np.isnan(s[0])
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert np.isnan(s2[0])
|
||||
assert np.isnan(s[0])
|
||||
|
||||
# GH 11794
|
||||
# copy of tz-aware
|
||||
expected = Series([Timestamp('2012/01/01', tz='UTC')])
|
||||
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
|
||||
|
||||
for deep in [None, False, True]:
|
||||
|
||||
s = Series([Timestamp('2012/01/01', tz='UTC')])
|
||||
|
||||
if deep is None:
|
||||
s2 = s.copy()
|
||||
else:
|
||||
s2 = s.copy(deep=deep)
|
||||
|
||||
s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
|
||||
|
||||
# default deep is True
|
||||
if deep is None or deep is True:
|
||||
# Did not modify original Series
|
||||
assert_series_equal(s2, expected2)
|
||||
assert_series_equal(s, expected)
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert_series_equal(s2, expected2)
|
||||
assert_series_equal(s, expected2)
|
||||
|
||||
def test_axis_alias(self):
|
||||
s = Series([1, 2, np.nan])
|
||||
assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
|
||||
assert s.dropna().sum('rows') == 3
|
||||
assert s._get_axis_number('rows') == 0
|
||||
assert s._get_axis_name('rows') == 'index'
|
||||
|
||||
def test_class_axis(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/18147
|
||||
# no exception and no empty docstring
|
||||
assert pydoc.getdoc(Series.index)
|
||||
|
||||
def test_numpy_unique(self):
|
||||
# it works!
|
||||
np.unique(self.ts)
|
||||
|
||||
def test_ndarray_compat(self):
|
||||
|
||||
# test numpy compat with Series as sub-class of NDFrame
|
||||
tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],
|
||||
index=date_range('1/1/2000', periods=1000))
|
||||
|
||||
def f(x):
|
||||
return x[x.idxmax()]
|
||||
|
||||
result = tsdf.apply(f)
|
||||
expected = tsdf.max()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .item()
|
||||
s = Series([1])
|
||||
result = s.item()
|
||||
assert result == 1
|
||||
assert s.item() == s.iloc[0]
|
||||
|
||||
# using an ndarray like function
|
||||
s = Series(np.random.randn(10))
|
||||
result = Series(np.ones_like(s))
|
||||
expected = Series(1, index=range(10), dtype='float64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ravel
|
||||
s = Series(np.random.randn(10))
|
||||
tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F'))
|
||||
|
||||
# compress
|
||||
# GH 6658
|
||||
s = Series([0, 1., -1], index=list('abc'))
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s > 0, s)
|
||||
tm.assert_series_equal(result, Series([1.], index=['b']))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s < -1, s)
|
||||
# result empty Index(dtype=object) as the same as original
|
||||
exp = Series([], dtype='float64', index=Index([], dtype='object'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
s = Series([0, 1., -1], index=[.1, .2, .3])
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s > 0, s)
|
||||
tm.assert_series_equal(result, Series([1.], index=[.2]))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = np.compress(s < -1, s)
|
||||
# result empty Float64Index as the same as original
|
||||
exp = Series([], dtype='float64', index=Index([], dtype='float64'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_str_accessor_updates_on_inplace(self):
|
||||
s = pd.Series(list('abc'))
|
||||
s.drop([0], inplace=True)
|
||||
assert len(s.str.lower()) == 2
|
||||
|
||||
def test_str_attribute(self):
|
||||
# GH9068
|
||||
methods = ['strip', 'rstrip', 'lstrip']
|
||||
s = Series([' jack', 'jill ', ' jesse ', 'frank'])
|
||||
for method in methods:
|
||||
expected = Series([getattr(str, method)(x) for x in s.values])
|
||||
assert_series_equal(getattr(Series.str, method)(s.str), expected)
|
||||
|
||||
# str accessor only valid with string values
|
||||
s = Series(range(5))
|
||||
with pytest.raises(AttributeError, match='only use .str accessor'):
|
||||
s.str.repeat(2)
|
||||
|
||||
def test_empty_method(self):
|
||||
s_empty = pd.Series()
|
||||
assert s_empty.empty
|
||||
|
||||
for full_series in [pd.Series([1]), pd.Series(index=[1])]:
|
||||
assert not full_series.empty
|
||||
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip('IPython', minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; s = pd.Series()"
|
||||
ip.run_code(code)
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter('ignore'):
|
||||
list(ip.Completer.completions('s.', 1))
|
||||
|
||||
|
||||
class TestCategoricalSeries(object):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.cat.set_categories([1, 2, 3]),
|
||||
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
|
||||
lambda x: x.cat.rename_categories([1, 2, 3]),
|
||||
lambda x: x.cat.remove_unused_categories(),
|
||||
lambda x: x.cat.remove_categories([2]),
|
||||
lambda x: x.cat.add_categories([4]),
|
||||
lambda x: x.cat.as_ordered(),
|
||||
lambda x: x.cat.as_unordered(),
|
||||
])
|
||||
def test_getname_categorical_accessor(self, method):
|
||||
# GH 17509
|
||||
s = Series([1, 2, 3], name='A').astype('category')
|
||||
expected = 'A'
|
||||
result = method(s).name
|
||||
assert result == expected
|
||||
|
||||
def test_cat_accessor(self):
|
||||
s = Series(Categorical(["a", "b", np.nan, "a"]))
|
||||
tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
|
||||
assert not s.cat.ordered, False
|
||||
|
||||
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
|
||||
s.cat.set_categories(["b", "a"], inplace=True)
|
||||
tm.assert_categorical_equal(s.values, exp)
|
||||
|
||||
res = s.cat.set_categories(["b", "a"])
|
||||
tm.assert_categorical_equal(res.values, exp)
|
||||
|
||||
s[:] = "a"
|
||||
s = s.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(s.cat.categories, Index(["a"]))
|
||||
|
||||
def test_cat_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.arrays.categorical import CategoricalAccessor
|
||||
assert Series.cat is CategoricalAccessor
|
||||
s = Series(list('aabbcde')).astype('category')
|
||||
assert isinstance(s.cat, CategoricalAccessor)
|
||||
|
||||
invalid = Series([1])
|
||||
with pytest.raises(AttributeError, match="only use .cat accessor"):
|
||||
invalid.cat
|
||||
assert not hasattr(invalid, 'cat')
|
||||
|
||||
def test_cat_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
c = Series(list('aabbcde')).astype('category')
|
||||
with pytest.raises(AttributeError,
|
||||
match="You cannot add any new attribute"):
|
||||
c.cat.xlabel = "a"
|
||||
|
||||
def test_cat_accessor_updates_on_inplace(self):
|
||||
s = Series(list('abc')).astype('category')
|
||||
s.drop(0, inplace=True)
|
||||
s.cat.remove_unused_categories(inplace=True)
|
||||
assert len(s.cat.categories) == 2
|
||||
|
||||
def test_categorical_delegations(self):
|
||||
|
||||
# invalid accessor
|
||||
msg = r"Can only use \.cat accessor with a 'category' dtype"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat()
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(['a', 'b', 'c']).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(np.arange(5.)).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([Timestamp('20130101')]).cat
|
||||
|
||||
# Series should delegate calls to '.categories', '.codes', '.ordered'
|
||||
# and the methods '.set_categories()' 'drop_unused_categories()' to the
|
||||
# categorical# -*- coding: utf-8 -*-
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
s.cat.categories = [1, 2, 3]
|
||||
exp_categories = Index([1, 2, 3])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
|
||||
exp_codes = Series([0, 1, 2, 0], dtype='int8')
|
||||
tm.assert_series_equal(s.cat.codes, exp_codes)
|
||||
|
||||
assert s.cat.ordered
|
||||
s = s.cat.as_unordered()
|
||||
assert not s.cat.ordered
|
||||
s.cat.as_ordered(inplace=True)
|
||||
assert s.cat.ordered
|
||||
|
||||
# reorder
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
s = s.cat.set_categories(["c", "b", "a"])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp_values)
|
||||
|
||||
# remove unused categories
|
||||
s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"
|
||||
]))
|
||||
exp_categories = Index(["a", "b"])
|
||||
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
|
||||
s = s.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp_values)
|
||||
|
||||
# This method is likely to be confused, so test that it raises an error
|
||||
# on wrong inputs:
|
||||
msg = "'Series' object has no attribute 'set_categories'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.set_categories([4, 3, 2, 1])
|
||||
|
||||
# right: s.cat.set_categories([4,3,2,1])
|
||||
|
||||
# GH18862 (let Series.cat.rename_categories take callables)
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
result = s.cat.rename_categories(lambda x: x.upper())
|
||||
expected = Series(Categorical(["A", "B", "C", "A"],
|
||||
categories=["A", "B", "C"],
|
||||
ordered=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api_for_categorical(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10661
|
||||
from pandas.core.indexes.accessors import Properties
|
||||
|
||||
s_dr = Series(date_range('1/1/2015', periods=5, tz="MET"))
|
||||
c_dr = s_dr.astype("category")
|
||||
|
||||
s_pr = Series(period_range('1/1/2015', freq='D', periods=5))
|
||||
c_pr = s_pr.astype("category")
|
||||
|
||||
s_tdr = Series(timedelta_range('1 days', '10 days'))
|
||||
c_tdr = s_tdr.astype("category")
|
||||
|
||||
# only testing field (like .day)
|
||||
# and bool (is_month_start)
|
||||
get_ops = lambda x: x._datetimelike_ops
|
||||
|
||||
test_data = [
|
||||
("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
|
||||
("Period", get_ops(PeriodArray), s_pr, c_pr),
|
||||
("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)]
|
||||
|
||||
assert isinstance(c_dr.dt, Properties)
|
||||
|
||||
special_func_defs = [
|
||||
('strftime', ("%Y-%m-%d",), {}),
|
||||
('tz_convert', ("EST",), {}),
|
||||
('round', ("D",), {}),
|
||||
('floor', ("D",), {}),
|
||||
('ceil', ("D",), {}),
|
||||
('asfreq', ("D",), {}),
|
||||
# ('tz_localize', ("UTC",), {}),
|
||||
]
|
||||
_special_func_names = [f[0] for f in special_func_defs]
|
||||
|
||||
# the series is already localized
|
||||
_ignore_names = ['tz_localize', 'components']
|
||||
|
||||
for name, attr_names, s, c in test_data:
|
||||
func_names = [f
|
||||
for f in dir(s.dt)
|
||||
if not (f.startswith("_") or f in attr_names or f in
|
||||
_special_func_names or f in _ignore_names)]
|
||||
|
||||
func_defs = [(f, (), {}) for f in func_names]
|
||||
for f_def in special_func_defs:
|
||||
if f_def[0] in dir(s.dt):
|
||||
func_defs.append(f_def)
|
||||
|
||||
for func, args, kwargs in func_defs:
|
||||
with warnings.catch_warnings():
|
||||
if func == 'to_period':
|
||||
# dropping TZ
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
res = getattr(c.dt, func)(*args, **kwargs)
|
||||
exp = getattr(s.dt, func)(*args, **kwargs)
|
||||
|
||||
if isinstance(res, DataFrame):
|
||||
tm.assert_frame_equal(res, exp)
|
||||
elif isinstance(res, Series):
|
||||
tm.assert_series_equal(res, exp)
|
||||
else:
|
||||
tm.assert_almost_equal(res, exp)
|
||||
|
||||
for attr in attr_names:
|
||||
try:
|
||||
res = getattr(c.dt, attr)
|
||||
exp = getattr(s.dt, attr)
|
||||
except Exception as e:
|
||||
print(name, attr)
|
||||
raise e
|
||||
|
||||
if isinstance(res, DataFrame):
|
||||
tm.assert_frame_equal(res, exp)
|
||||
elif isinstance(res, Series):
|
||||
tm.assert_series_equal(res, exp)
|
||||
else:
|
||||
tm.assert_almost_equal(res, exp)
|
||||
|
||||
invalid = Series([1, 2, 3]).astype('category')
|
||||
msg = "Can only use .dt accessor with datetimelike"
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
invalid.dt
|
||||
assert not hasattr(invalid, 'str')
|
||||
@@ -0,0 +1,667 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from collections import Counter, OrderedDict, defaultdict
|
||||
from itertools import chain
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import lrange
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Index, Series, isna
|
||||
from pandas.conftest import _get_cython_table_params
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesApply():
|
||||
|
||||
def test_apply(self, datetime_series):
|
||||
with np.errstate(all='ignore'):
|
||||
tm.assert_series_equal(datetime_series.apply(np.sqrt),
|
||||
np.sqrt(datetime_series))
|
||||
|
||||
# element-wise apply
|
||||
import math
|
||||
tm.assert_series_equal(datetime_series.apply(math.exp),
|
||||
np.exp(datetime_series))
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name='foo', index=pd.Index([], name='bar'))
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3])
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
def test_apply_same_length_inference_bug(self):
|
||||
s = Series([1, 2])
|
||||
f = lambda x: (x, x + 1)
|
||||
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_apply_dont_convert_dtype(self):
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
f = lambda x: x if x > 0 else np.nan
|
||||
result = s.apply(f, convert_dtype=False)
|
||||
assert result.dtype == object
|
||||
|
||||
def test_with_string_args(self, datetime_series):
|
||||
|
||||
for arg in ['sum', 'mean', 'min', 'max', 'std']:
|
||||
result = datetime_series.apply(arg)
|
||||
expected = getattr(datetime_series, arg)()
|
||||
assert result == expected
|
||||
|
||||
def test_apply_args(self):
|
||||
s = Series(['foo,bar'])
|
||||
|
||||
result = s.apply(str.split, args=(',', ))
|
||||
assert result[0] == ['foo', 'bar']
|
||||
assert isinstance(result[0], list)
|
||||
|
||||
def test_series_map_box_timestamps(self):
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(pd.date_range('1/1/2000', periods=10))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
# it works!
|
||||
ser.map(func)
|
||||
ser.apply(func)
|
||||
|
||||
def test_apply_box(self):
|
||||
# ufunc will not be boxed. Same test cases as the test_map_box
|
||||
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns]'
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns, US/Eastern]'
|
||||
res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'timedelta64[ns]'
|
||||
res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
|
||||
exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period
|
||||
vals = [pd.Period('2011-01-01', freq='M'),
|
||||
pd.Period('2011-01-02', freq='M')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'Period[M]'
|
||||
res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__,
|
||||
x.freqstr))
|
||||
exp = pd.Series(['Period_M', 'Period_M'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_apply_datetimetz(self):
|
||||
values = pd.date_range('2011-01-01', '2011-01-02',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
s = pd.Series(values, name='XX')
|
||||
|
||||
result = s.apply(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range('2011-01-02', '2011-01-03',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
exp = pd.Series(exp_values, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.apply(lambda x: x.hour)
|
||||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_apply_dict_depr(self):
|
||||
|
||||
tsdf = pd.DataFrame(np.random.randn(10, 3),
|
||||
columns=['A', 'B', 'C'],
|
||||
index=pd.date_range('1/1/2000', periods=10))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
tsdf.A.agg({'foo': ['sum', 'mean']})
|
||||
|
||||
|
||||
class TestSeriesAggregate():
|
||||
|
||||
def test_transform(self, string_series):
|
||||
# transforming functions
|
||||
|
||||
with np.errstate(all='ignore'):
|
||||
|
||||
f_sqrt = np.sqrt(string_series)
|
||||
f_abs = np.abs(string_series)
|
||||
|
||||
# ufunc
|
||||
result = string_series.transform(np.sqrt)
|
||||
expected = f_sqrt.copy()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = string_series.apply(np.sqrt)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# list-like
|
||||
result = string_series.transform([np.sqrt])
|
||||
expected = f_sqrt.to_frame().copy()
|
||||
expected.columns = ['sqrt']
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.transform([np.sqrt])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.transform(['sqrt'])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# multiple items in list
|
||||
# these are in the order as if we are applying both functions per
|
||||
# series and then concatting
|
||||
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ['sqrt', 'absolute']
|
||||
result = string_series.apply([np.sqrt, np.abs])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.transform(['sqrt', 'abs'])
|
||||
expected.columns = ['sqrt', 'abs']
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# dict, provide renaming
|
||||
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ['foo', 'bar']
|
||||
expected = expected.unstack().rename('series')
|
||||
|
||||
result = string_series.apply({'foo': np.sqrt, 'bar': np.abs})
|
||||
assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
def test_transform_and_agg_error(self, string_series):
|
||||
# we are trying to transform with an aggregator
|
||||
with pytest.raises(ValueError):
|
||||
string_series.transform(['min', 'max'])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with np.errstate(all='ignore'):
|
||||
string_series.agg(['sqrt', 'max'])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with np.errstate(all='ignore'):
|
||||
string_series.transform(['sqrt', 'max'])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
with np.errstate(all='ignore'):
|
||||
string_series.agg({'foo': np.sqrt, 'bar': 'sum'})
|
||||
|
||||
def test_demo(self):
|
||||
# demonstration tests
|
||||
s = Series(range(6), dtype='int64', name='series')
|
||||
|
||||
result = s.agg(['min', 'max'])
|
||||
expected = Series([0, 5], index=['min', 'max'], name='series')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.agg({'foo': 'min'})
|
||||
expected = Series([0], index=['foo'], name='series')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# nested renaming
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.agg({'foo': ['min', 'max']})
|
||||
|
||||
expected = DataFrame(
|
||||
{'foo': [0, 5]},
|
||||
index=['min', 'max']).unstack().rename('series')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiple_aggregators_with_dict_api(self):
|
||||
|
||||
s = Series(range(6), dtype='int64', name='series')
|
||||
# nested renaming
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})
|
||||
|
||||
expected = DataFrame(
|
||||
{'foo': [5.0, np.nan, 0.0, np.nan],
|
||||
'bar': [np.nan, 2.5, np.nan, 15.0]},
|
||||
columns=['foo', 'bar'],
|
||||
index=['max', 'mean',
|
||||
'min', 'sum']).unstack().rename('series')
|
||||
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
|
||||
# test that we are evaluating row-by-row first
|
||||
# before vectorized evaluation
|
||||
result = string_series.apply(lambda x: str(x))
|
||||
expected = string_series.agg(lambda x: str(x))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = string_series.apply(str)
|
||||
expected = string_series.agg(str)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_with_nested_series(self, datetime_series):
|
||||
# GH 2316
|
||||
# .agg with a reducer and a transform, what to do
|
||||
result = datetime_series.apply(lambda x: Series(
|
||||
[x, x ** 2], index=['x', 'x^2']))
|
||||
expected = DataFrame({'x': datetime_series,
|
||||
'x^2': datetime_series ** 2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = datetime_series.agg(lambda x: Series(
|
||||
[x, x ** 2], index=['x', 'x^2']))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_replicate_describe(self, string_series):
|
||||
# this also tests a result set that is all scalars
|
||||
expected = string_series.describe()
|
||||
result = string_series.apply(OrderedDict(
|
||||
[('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('std', 'std'),
|
||||
('min', 'min'),
|
||||
('25%', lambda x: x.quantile(0.25)),
|
||||
('50%', 'median'),
|
||||
('75%', lambda x: x.quantile(0.75)),
|
||||
('max', 'max')]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_reduce(self, string_series):
|
||||
# reductions with named functions
|
||||
result = string_series.agg(['sum', 'mean'])
|
||||
expected = Series([string_series.sum(),
|
||||
string_series.mean()],
|
||||
['sum', 'mean'],
|
||||
name=string_series.name)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_non_callable_aggregates(self):
|
||||
# test agg using non-callable series attributes
|
||||
s = Series([1, 2, None])
|
||||
|
||||
# Calling agg w/ just a string arg same as calling s.arg
|
||||
result = s.agg('size')
|
||||
expected = s.size
|
||||
assert result == expected
|
||||
|
||||
# test when mixed w/ callable reducers
|
||||
result = s.agg(['size', 'count', 'mean'])
|
||||
expected = Series(OrderedDict([('size', 3.0),
|
||||
('count', 2.0),
|
||||
('mean', 1.5)]))
|
||||
assert_series_equal(result[expected.index], expected)
|
||||
|
||||
@pytest.mark.parametrize("series, func, expected", chain(
|
||||
_get_cython_table_params(Series(), [
|
||||
('sum', 0),
|
||||
('max', np.nan),
|
||||
('min', np.nan),
|
||||
('all', True),
|
||||
('any', False),
|
||||
('mean', np.nan),
|
||||
('prod', 1),
|
||||
('std', np.nan),
|
||||
('var', np.nan),
|
||||
('median', np.nan),
|
||||
]),
|
||||
_get_cython_table_params(Series([np.nan, 1, 2, 3]), [
|
||||
('sum', 6),
|
||||
('max', 3),
|
||||
('min', 1),
|
||||
('all', True),
|
||||
('any', True),
|
||||
('mean', 2),
|
||||
('prod', 6),
|
||||
('std', 1),
|
||||
('var', 1),
|
||||
('median', 2),
|
||||
]),
|
||||
_get_cython_table_params(Series('a b c'.split()), [
|
||||
('sum', 'abc'),
|
||||
('max', 'c'),
|
||||
('min', 'a'),
|
||||
('all', 'c'), # see GH12863
|
||||
('any', 'a'),
|
||||
]),
|
||||
))
|
||||
def test_agg_cython_table(self, series, func, expected):
|
||||
# GH21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
result = series.agg(func)
|
||||
if tm.is_number(expected):
|
||||
assert np.isclose(result, expected, equal_nan=True)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("series, func, expected", chain(
|
||||
_get_cython_table_params(Series(), [
|
||||
('cumprod', Series([], Index([]))),
|
||||
('cumsum', Series([], Index([]))),
|
||||
]),
|
||||
_get_cython_table_params(Series([np.nan, 1, 2, 3]), [
|
||||
('cumprod', Series([np.nan, 1, 2, 6])),
|
||||
('cumsum', Series([np.nan, 1, 3, 6])),
|
||||
]),
|
||||
_get_cython_table_params(Series('a b c'.split()), [
|
||||
('cumsum', Series(['a', 'ab', 'abc'])),
|
||||
]),
|
||||
))
|
||||
def test_agg_cython_table_transform(self, series, func, expected):
|
||||
# GH21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
result = series.agg(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("series, func, expected", chain(
|
||||
_get_cython_table_params(Series('a b c'.split()), [
|
||||
('mean', TypeError), # mean raises TypeError
|
||||
('prod', TypeError),
|
||||
('std', TypeError),
|
||||
('var', TypeError),
|
||||
('median', TypeError),
|
||||
('cumprod', TypeError),
|
||||
])
|
||||
))
|
||||
def test_agg_cython_table_raises(self, series, func, expected):
|
||||
# GH21224
|
||||
with pytest.raises(expected):
|
||||
# e.g. Series('a b'.split()).cumprod() will raise
|
||||
series.agg(func)
|
||||
|
||||
|
||||
class TestSeriesMap():
|
||||
|
||||
def test_map(self, datetime_series):
|
||||
index, data = tm.getMixedTypeDict()
|
||||
|
||||
source = Series(data['B'], index=data['C'])
|
||||
target = Series(data['C'][:4], index=data['D'][:4])
|
||||
|
||||
merged = target.map(source)
|
||||
|
||||
for k, v in compat.iteritems(merged):
|
||||
assert v == source[target[k]]
|
||||
|
||||
# input could be a dict
|
||||
merged = target.map(source.to_dict())
|
||||
|
||||
for k, v in compat.iteritems(merged):
|
||||
assert v == source[target[k]]
|
||||
|
||||
# function
|
||||
result = datetime_series.map(lambda x: x * 2)
|
||||
tm.assert_series_equal(result, datetime_series * 2)
|
||||
|
||||
# GH 10324
|
||||
a = Series([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series(["odd", "even", "odd", np.nan])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(['a', 'b', 'c', 'd'])
|
||||
b = Series([1, 2, 3, 4],
|
||||
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
|
||||
c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e']))
|
||||
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(['a', 'b', 'c', 'd'])
|
||||
b = Series(['B', 'C', 'D', 'E'], dtype='category',
|
||||
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
|
||||
c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))
|
||||
|
||||
exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
|
||||
categories=['B', 'C', 'D', 'E']))
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, 'B', 'C', 'D'])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
@pytest.mark.parametrize("index", tm.all_index_generator(10))
|
||||
def test_map_empty(self, index):
|
||||
s = Series(index)
|
||||
result = s.map({})
|
||||
|
||||
expected = pd.Series(np.nan, index=s.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_map_compat(self):
|
||||
# related GH 8024
|
||||
s = Series([True, True, False], index=[1, 2, 3])
|
||||
result = s.map({True: 'foo', False: 'bar'})
|
||||
expected = Series(['foo', 'foo', 'bar'], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_int(self):
|
||||
left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4})
|
||||
right = Series({1: 11, 2: 22, 3: 33})
|
||||
|
||||
assert left.dtype == np.float_
|
||||
assert issubclass(right.dtype.type, np.integer)
|
||||
|
||||
merged = left.map(right)
|
||||
assert merged.dtype == np.float_
|
||||
assert isna(merged['d'])
|
||||
assert not isna(merged['c'])
|
||||
|
||||
def test_map_type_inference(self):
|
||||
s = Series(lrange(3))
|
||||
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
|
||||
assert issubclass(s2.dtype.type, np.integer)
|
||||
|
||||
def test_map_decimal(self, string_series):
|
||||
from decimal import Decimal
|
||||
|
||||
result = string_series.map(lambda x: Decimal(str(x)))
|
||||
assert result.dtype == np.object_
|
||||
assert isinstance(result[0], Decimal)
|
||||
|
||||
def test_map_na_exclusion(self):
|
||||
s = Series([1.5, np.nan, 3, np.nan, 5])
|
||||
|
||||
result = s.map(lambda x: x * 2, na_action='ignore')
|
||||
exp = s * 2
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
def test_map_dict_with_tuple_keys(self):
|
||||
"""
|
||||
Due to new MultiIndex-ing behaviour in v0.14.0,
|
||||
dicts with tuple keys passed to map were being
|
||||
converted to a multi-index, preventing tuple values
|
||||
from being mapped properly.
|
||||
"""
|
||||
# GH 18496
|
||||
df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]})
|
||||
label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'}
|
||||
|
||||
df['labels'] = df['a'].map(label_mappings)
|
||||
df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index)
|
||||
# All labels should be filled now
|
||||
tm.assert_series_equal(df['labels'], df['expected_labels'],
|
||||
check_names=False)
|
||||
|
||||
def test_map_counter(self):
|
||||
s = Series(['a', 'b', 'c'], index=[1, 2, 3])
|
||||
counter = Counter()
|
||||
counter['b'] = 5
|
||||
counter['c'] += 1
|
||||
result = s.map(counter)
|
||||
expected = Series([0, 5, 1], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_defaultdict(self):
|
||||
s = Series([1, 2, 3], index=['a', 'b', 'c'])
|
||||
default_dict = defaultdict(lambda: 'blank')
|
||||
default_dict[1] = 'stuff'
|
||||
result = s.map(default_dict)
|
||||
expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c'])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_dict_subclass_with_missing(self):
|
||||
"""
|
||||
Test Series.map with a dictionary subclass that defines __missing__,
|
||||
i.e. sets a default value (GH #15999).
|
||||
"""
|
||||
class DictWithMissing(dict):
|
||||
def __missing__(self, key):
|
||||
return 'missing'
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithMissing({3: 'three'})
|
||||
result = s.map(dictionary)
|
||||
expected = Series(['missing', 'missing', 'three'])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_dict_subclass_without_missing(self):
|
||||
class DictWithoutMissing(dict):
|
||||
pass
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithoutMissing({3: 'three'})
|
||||
result = s.map(dictionary)
|
||||
expected = Series([np.nan, np.nan, 'three'])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_box(self):
|
||||
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns]'
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns, US/Eastern]'
|
||||
res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'timedelta64[ns]'
|
||||
res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
|
||||
exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period
|
||||
vals = [pd.Period('2011-01-01', freq='M'),
|
||||
pd.Period('2011-01-02', freq='M')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'Period[M]'
|
||||
res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__,
|
||||
x.freqstr))
|
||||
exp = pd.Series(['Period_M', 'Period_M'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_map_categorical(self):
|
||||
values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
|
||||
ordered=True)
|
||||
s = pd.Series(values, name='XX', index=list('abcdefg'))
|
||||
|
||||
result = s.map(lambda x: x.lower())
|
||||
exp_values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
|
||||
ordered=True)
|
||||
exp = pd.Series(exp_values, name='XX', index=list('abcdefg'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp_values)
|
||||
|
||||
result = s.map(lambda x: 'A')
|
||||
exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == np.object
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.map(lambda x: x, na_action='ignore')
|
||||
|
||||
def test_map_datetimetz(self):
|
||||
values = pd.date_range('2011-01-01', '2011-01-02',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
s = pd.Series(values, name='XX')
|
||||
|
||||
# keep tz
|
||||
result = s.map(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range('2011-01-02', '2011-01-03',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
exp = pd.Series(exp_values, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.map(lambda x: x.hour)
|
||||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.map(lambda x: x, na_action='ignore')
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize("vals,mapping,exp", [
|
||||
(list('abc'), {np.nan: 'not NaN'}, [np.nan] * 3 + ['not NaN']),
|
||||
(list('abc'), {'a': 'a letter'}, ['a letter'] + [np.nan] * 3),
|
||||
(list(range(3)), {0: 42}, [42] + [np.nan] * 3)])
|
||||
def test_map_missing_mixed(self, vals, mapping, exp):
|
||||
# GH20495
|
||||
s = pd.Series(vals + [np.nan])
|
||||
result = s.map(mapping)
|
||||
|
||||
tm.assert_series_equal(result, pd.Series(exp))
|
||||
@@ -0,0 +1,172 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series, compat
|
||||
from pandas.core.indexes.period import IncompatibleFrequency
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def _permute(obj):
|
||||
return obj.take(np.random.permutation(len(obj)))
|
||||
|
||||
|
||||
class TestSeriesFlexArithmetic(object):
|
||||
@pytest.mark.parametrize(
|
||||
'ts',
|
||||
[
|
||||
(lambda x: x, lambda x: x * 2, False),
|
||||
(lambda x: x, lambda x: x[::2], False),
|
||||
(lambda x: x, lambda x: 5, True),
|
||||
(lambda x: tm.makeFloatSeries(),
|
||||
lambda x: tm.makeFloatSeries(),
|
||||
True)
|
||||
])
|
||||
@pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv',
|
||||
'truediv', 'div', 'pow'])
|
||||
def test_flex_method_equivalence(self, opname, ts):
|
||||
# check that Series.{opname} behaves like Series.__{opname}__,
|
||||
tser = tm.makeTimeSeries().rename('ts')
|
||||
|
||||
series = ts[0](tser)
|
||||
other = ts[1](tser)
|
||||
check_reverse = ts[2]
|
||||
|
||||
if opname == 'div' and compat.PY3:
|
||||
pytest.skip('div test only for Py3')
|
||||
|
||||
op = getattr(Series, opname)
|
||||
|
||||
if op == 'div':
|
||||
alt = operator.truediv
|
||||
else:
|
||||
alt = getattr(operator, opname)
|
||||
|
||||
result = op(series, other)
|
||||
expected = alt(series, other)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
if check_reverse:
|
||||
rop = getattr(Series, "r" + opname)
|
||||
result = rop(series, other)
|
||||
expected = alt(other, series)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesArithmetic(object):
|
||||
# Some of these may end up in tests/arithmetic, but are not yet sorted
|
||||
|
||||
def test_add_series_with_period_index(self):
|
||||
rng = pd.period_range('1/1/2000', '1/1/2010', freq='A')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ts + ts[::2]
|
||||
expected = ts + ts
|
||||
expected[1::2] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts + _permute(ts[::2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
ts + ts.asfreq('D', how="end")
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparisons
|
||||
|
||||
class TestSeriesFlexComparison(object):
|
||||
def test_comparison_flex_basic(self):
|
||||
left = pd.Series(np.random.randn(10))
|
||||
right = pd.Series(np.random.randn(10))
|
||||
|
||||
tm.assert_series_equal(left.eq(right), left == right)
|
||||
tm.assert_series_equal(left.ne(right), left != right)
|
||||
tm.assert_series_equal(left.le(right), left < right)
|
||||
tm.assert_series_equal(left.lt(right), left <= right)
|
||||
tm.assert_series_equal(left.gt(right), left > right)
|
||||
tm.assert_series_equal(left.ge(right), left >= right)
|
||||
|
||||
# axis
|
||||
for axis in [0, None, 'index']:
|
||||
tm.assert_series_equal(left.eq(right, axis=axis), left == right)
|
||||
tm.assert_series_equal(left.ne(right, axis=axis), left != right)
|
||||
tm.assert_series_equal(left.le(right, axis=axis), left < right)
|
||||
tm.assert_series_equal(left.lt(right, axis=axis), left <= right)
|
||||
tm.assert_series_equal(left.gt(right, axis=axis), left > right)
|
||||
tm.assert_series_equal(left.ge(right, axis=axis), left >= right)
|
||||
|
||||
#
|
||||
msg = 'No axis named 1 for object type'
|
||||
for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(left, op)(right, axis=1)
|
||||
|
||||
|
||||
class TestSeriesComparison(object):
|
||||
def test_comparison_different_length(self):
|
||||
a = Series(['a', 'b', 'c'])
|
||||
b = Series(['b', 'a'])
|
||||
with pytest.raises(ValueError):
|
||||
a < b
|
||||
|
||||
a = Series([1, 2])
|
||||
b = Series([2, 3, 4])
|
||||
with pytest.raises(ValueError):
|
||||
a == b
|
||||
|
||||
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
|
||||
def test_ser_flex_cmp_return_dtypes(self, opname):
|
||||
# GH#15115
|
||||
ser = Series([1, 3, 2], index=range(3))
|
||||
const = 2
|
||||
|
||||
result = getattr(ser, opname)(const).get_dtype_counts()
|
||||
tm.assert_series_equal(result, Series([1], ['bool']))
|
||||
|
||||
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
|
||||
def test_ser_flex_cmp_return_dtypes_empty(self, opname):
|
||||
# GH#15115 empty Series case
|
||||
ser = Series([1, 3, 2], index=range(3))
|
||||
empty = ser.iloc[:0]
|
||||
const = 2
|
||||
|
||||
result = getattr(empty, opname)(const).get_dtype_counts()
|
||||
tm.assert_series_equal(result, Series([1], ['bool']))
|
||||
|
||||
@pytest.mark.parametrize('op', [operator.eq, operator.ne,
|
||||
operator.le, operator.lt,
|
||||
operator.ge, operator.gt])
|
||||
@pytest.mark.parametrize('names', [(None, None, None),
|
||||
('foo', 'bar', None),
|
||||
('baz', 'baz', 'baz')])
|
||||
def test_ser_cmp_result_names(self, names, op):
|
||||
# datetime64 dtype
|
||||
dti = pd.date_range('1949-06-07 03:00:00',
|
||||
freq='H', periods=5, name=names[0])
|
||||
ser = Series(dti).rename(names[1])
|
||||
result = op(ser, dti)
|
||||
assert result.name == names[2]
|
||||
|
||||
# datetime64tz dtype
|
||||
dti = dti.tz_localize('US/Central')
|
||||
ser = Series(dti).rename(names[1])
|
||||
result = op(ser, dti)
|
||||
assert result.name == names[2]
|
||||
|
||||
# timedelta64 dtype
|
||||
tdi = dti - dti.shift(1)
|
||||
ser = Series(tdi).rename(names[1])
|
||||
result = op(ser, tdi)
|
||||
assert result.name == names[2]
|
||||
|
||||
# categorical
|
||||
if op in [operator.eq, operator.ne]:
|
||||
# categorical dtype comparisons raise for inequalities
|
||||
cidx = tdi.astype('category')
|
||||
ser = Series(cidx).rename(names[1])
|
||||
result = op(ser, cidx)
|
||||
assert result.name == names[2]
|
||||
@@ -0,0 +1,174 @@
|
||||
# coding=utf-8
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series, Timestamp, date_range, isna, notna, offsets
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesAsof():
|
||||
|
||||
def test_basic(self):
|
||||
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
ts[15:30] = np.nan
|
||||
dates = date_range('1/1/1990', periods=N * 3, freq='25s')
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
mask = (result.index >= lb) & (result.index < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
val = result[result.index[result.index >= ub][0]]
|
||||
assert ts[ub] == val
|
||||
|
||||
def test_scalar(self):
|
||||
|
||||
N = 30
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
ts = Series(np.arange(N), index=rng)
|
||||
ts[5:10] = np.NaN
|
||||
ts[15:20] = np.NaN
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts[4]
|
||||
assert val2 == ts[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts[4]
|
||||
|
||||
# in there
|
||||
result = ts.asof(ts.index[3])
|
||||
assert result == ts[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0] - offsets.BDay()
|
||||
assert np.isnan(ts.asof(d))
|
||||
|
||||
def test_with_nan(self):
|
||||
# basic asof test
|
||||
rng = date_range('1/1/2000', '1/2/2000', freq='4h')
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
r = s.resample('2h').mean()
|
||||
|
||||
result = r.asof(r.index)
|
||||
expected = Series([0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.],
|
||||
index=date_range('1/1/2000', '1/2/2000', freq='2h'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[3:5] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series([0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.],
|
||||
index=date_range('1/1/2000', '1/2/2000', freq='2h'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[-3:] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series([0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.],
|
||||
index=date_range('1/1/2000', '1/2/2000', freq='2h'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
from pandas import period_range, PeriodIndex
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = period_range('1/1/1990', periods=N, freq='H')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
ts[15:30] = np.nan
|
||||
dates = date_range('1/1/1990', periods=N * 3, freq='37min')
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
pix = PeriodIndex(result.index.values, freq='H')
|
||||
mask = (pix >= lb) & (pix < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
ts[5:10] = np.nan
|
||||
ts[15:20] = np.nan
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts[4]
|
||||
assert val2 == ts[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts[4]
|
||||
|
||||
# in there
|
||||
assert ts.asof(ts.index[3]) == ts[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0].to_timestamp() - offsets.BDay()
|
||||
assert isna(ts.asof(d))
|
||||
|
||||
def test_errors(self):
|
||||
|
||||
s = Series([1, 2, 3],
|
||||
index=[Timestamp('20130101'),
|
||||
Timestamp('20130103'),
|
||||
Timestamp('20130102')])
|
||||
|
||||
# non-monotonic
|
||||
assert not s.index.is_monotonic
|
||||
with pytest.raises(ValueError):
|
||||
s.asof(s.index[0])
|
||||
|
||||
# subset with Series
|
||||
N = 10
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
s = Series(np.random.randn(N), index=rng)
|
||||
with pytest.raises(ValueError):
|
||||
s.asof(s.index[0], subset='foo')
|
||||
|
||||
def test_all_nans(self):
|
||||
# GH 15713
|
||||
# series is all nans
|
||||
result = Series([np.nan]).asof([0])
|
||||
expected = Series([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing non-default indexes
|
||||
N = 50
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
|
||||
dates = date_range('1/1/1990', periods=N * 3, freq='25s')
|
||||
result = Series(np.nan, index=rng).asof(dates)
|
||||
expected = Series(np.nan, index=dates)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing scalar input
|
||||
date = date_range('1/1/1990', periods=N * 3, freq='25s')[0]
|
||||
result = Series(np.nan, index=rng).asof(date)
|
||||
assert isna(result)
|
||||
|
||||
# test name is propagated
|
||||
result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5])
|
||||
expected = Series(np.nan, index=[4, 5], name='test')
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# Segregated collection of methods that require the BlockManager internal data
|
||||
# structure
|
||||
|
||||
|
||||
class TestSeriesBlockInternals(object):
|
||||
|
||||
def test_setitem_invalidates_datetime_index_freq(self):
|
||||
# GH#24096 altering a datetime64tz Series inplace invalidates the
|
||||
# `freq` attribute on the underlying DatetimeIndex
|
||||
|
||||
dti = pd.date_range('20130101', periods=3, tz='US/Eastern')
|
||||
ts = dti[1]
|
||||
ser = pd.Series(dti)
|
||||
assert ser._values is not dti
|
||||
assert ser._values._data.base is not dti._data._data.base
|
||||
assert dti.freq == 'D'
|
||||
ser.iloc[1] = pd.NaT
|
||||
assert ser._values.freq is None
|
||||
|
||||
# check that the DatetimeIndex was not altered in place
|
||||
assert ser._values is not dti
|
||||
assert ser._values._data.base is not dti._data._data.base
|
||||
assert dti[1] == ts
|
||||
assert dti.freq == 'D'
|
||||
|
||||
def test_dt64tz_setitem_does_not_mutate_dti(self):
|
||||
# GH#21907, GH#24096
|
||||
dti = pd.date_range('2016-01-01', periods=10, tz='US/Pacific')
|
||||
ts = dti[0]
|
||||
ser = pd.Series(dti)
|
||||
assert ser._values is not dti
|
||||
assert ser._values._data.base is not dti._data._data.base
|
||||
assert ser._data.blocks[0].values is not dti
|
||||
assert (ser._data.blocks[0].values._data.base
|
||||
is not dti._data._data.base)
|
||||
|
||||
ser[::3] = pd.NaT
|
||||
assert ser[0] is pd.NaT
|
||||
assert dti[0] == ts
|
||||
@@ -0,0 +1,373 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, DatetimeIndex, Series, compat, date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesCombine(object):
|
||||
|
||||
def test_append(self, datetime_series, string_series, object_series):
|
||||
appendedSeries = string_series.append(object_series)
|
||||
for idx, value in compat.iteritems(appendedSeries):
|
||||
if idx in string_series.index:
|
||||
assert value == string_series[idx]
|
||||
elif idx in object_series.index:
|
||||
assert value == object_series[idx]
|
||||
else:
|
||||
raise AssertionError("orphaned index!")
|
||||
|
||||
msg = "Indexes have overlapping values:"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
datetime_series.append(datetime_series, verify_integrity=True)
|
||||
|
||||
def test_append_many(self, datetime_series):
|
||||
pieces = [datetime_series[:5], datetime_series[5:10],
|
||||
datetime_series[10:]]
|
||||
|
||||
result = pieces[0].append(pieces[1:])
|
||||
assert_series_equal(result, datetime_series)
|
||||
|
||||
def test_append_duplicates(self):
|
||||
# GH 13677
|
||||
s1 = pd.Series([1, 2, 3])
|
||||
s2 = pd.Series([4, 5, 6])
|
||||
exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(s1.append(s2), exp)
|
||||
tm.assert_series_equal(pd.concat([s1, s2]), exp)
|
||||
|
||||
# the result must have RangeIndex
|
||||
exp = pd.Series([1, 2, 3, 4, 5, 6])
|
||||
tm.assert_series_equal(s1.append(s2, ignore_index=True),
|
||||
exp, check_index_type=True)
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True),
|
||||
exp, check_index_type=True)
|
||||
|
||||
msg = 'Indexes have overlapping values:'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s1.append(s2, verify_integrity=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.concat([s1, s2], verify_integrity=True)
|
||||
|
||||
def test_combine_scalar(self):
|
||||
# GH 21248
|
||||
# Note - combine() with another Series is tested elsewhere because
|
||||
# it is used when testing operators
|
||||
s = pd.Series([i * 10 for i in range(5)])
|
||||
result = s.combine(3, lambda x, y: x + y)
|
||||
expected = pd.Series([i * 10 + 3 for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.combine(22, lambda x, y: min(x, y))
|
||||
expected = pd.Series([min(i * 10, 22) for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self):
|
||||
values = tm.makeIntIndex(20).values.astype(float)
|
||||
series = Series(values, index=tm.makeIntIndex(20))
|
||||
|
||||
series_copy = series * 2
|
||||
series_copy[::2] = np.NaN
|
||||
|
||||
# nothing used from the input
|
||||
combined = series.combine_first(series_copy)
|
||||
|
||||
tm.assert_series_equal(combined, series)
|
||||
|
||||
# Holes filled from input
|
||||
combined = series_copy.combine_first(series)
|
||||
assert np.isfinite(combined).all()
|
||||
|
||||
tm.assert_series_equal(combined[::2], series[::2])
|
||||
tm.assert_series_equal(combined[1::2], series_copy[1::2])
|
||||
|
||||
# mixed types
|
||||
index = tm.makeStringIndex(20)
|
||||
floats = Series(tm.randn(20), index=index)
|
||||
strings = Series(tm.makeStringIndex(10), index=index[::2])
|
||||
|
||||
combined = strings.combine_first(floats)
|
||||
|
||||
tm.assert_series_equal(strings, combined.loc[index[::2]])
|
||||
tm.assert_series_equal(floats[1::2].astype(object),
|
||||
combined.loc[index[1::2]])
|
||||
|
||||
# corner case
|
||||
s = Series([1., 2, 3], index=[0, 1, 2])
|
||||
result = s.combine_first(Series([], index=[]))
|
||||
assert_series_equal(s, result)
|
||||
|
||||
def test_update(self):
|
||||
s = Series([1.5, nan, 3., 4., nan])
|
||||
s2 = Series([nan, 3.5, nan, 5.])
|
||||
s.update(s2)
|
||||
|
||||
expected = Series([1.5, 3.5, 3., 5., np.nan])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
|
||||
df['c'] = np.nan
|
||||
|
||||
df['c'].update(Series(['foo'], index=[0]))
|
||||
expected = DataFrame([[1, np.nan, 'foo'], [3, 2., np.nan]],
|
||||
columns=['a', 'b', 'c'])
|
||||
assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize('other, dtype, expected', [
|
||||
# other is int
|
||||
([61, 63], 'int32', pd.Series([10, 61, 12], dtype='int32')),
|
||||
([61, 63], 'int64', pd.Series([10, 61, 12])),
|
||||
([61, 63], float, pd.Series([10., 61., 12.])),
|
||||
([61, 63], object, pd.Series([10, 61, 12], dtype=object)),
|
||||
# other is float, but can be cast to int
|
||||
([61., 63.], 'int32', pd.Series([10, 61, 12], dtype='int32')),
|
||||
([61., 63.], 'int64', pd.Series([10, 61, 12])),
|
||||
([61., 63.], float, pd.Series([10., 61., 12.])),
|
||||
([61., 63.], object, pd.Series([10, 61., 12], dtype=object)),
|
||||
# others is float, cannot be cast to int
|
||||
([61.1, 63.1], 'int32', pd.Series([10., 61.1, 12.])),
|
||||
([61.1, 63.1], 'int64', pd.Series([10., 61.1, 12.])),
|
||||
([61.1, 63.1], float, pd.Series([10., 61.1, 12.])),
|
||||
([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)),
|
||||
# other is object, cannot be cast
|
||||
([(61,), (63,)], 'int32', pd.Series([10, (61,), 12])),
|
||||
([(61,), (63,)], 'int64', pd.Series([10, (61,), 12])),
|
||||
([(61,), (63,)], float, pd.Series([10., (61,), 12.])),
|
||||
([(61,), (63,)], object, pd.Series([10, (61,), 12]))
|
||||
])
|
||||
def test_update_dtypes(self, other, dtype, expected):
|
||||
|
||||
s = Series([10, 11, 12], dtype=dtype)
|
||||
other = Series(other, index=[1, 3])
|
||||
s.update(other)
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
def test_concat_empty_series_dtypes_roundtrips(self):
|
||||
|
||||
# round-tripping with self & like self
|
||||
dtypes = map(np.dtype, ['float64', 'int8', 'uint8', 'bool', 'm8[ns]',
|
||||
'M8[ns]'])
|
||||
|
||||
for dtype in dtypes:
|
||||
assert pd.concat([Series(dtype=dtype)]).dtype == dtype
|
||||
assert pd.concat([Series(dtype=dtype),
|
||||
Series(dtype=dtype)]).dtype == dtype
|
||||
|
||||
def int_result_type(dtype, dtype2):
|
||||
typs = {dtype.kind, dtype2.kind}
|
||||
if not len(typs - {'i', 'u', 'b'}) and (dtype.kind == 'i' or
|
||||
dtype2.kind == 'i'):
|
||||
return 'i'
|
||||
elif not len(typs - {'u', 'b'}) and (dtype.kind == 'u' or
|
||||
dtype2.kind == 'u'):
|
||||
return 'u'
|
||||
return None
|
||||
|
||||
def float_result_type(dtype, dtype2):
|
||||
typs = {dtype.kind, dtype2.kind}
|
||||
if not len(typs - {'f', 'i', 'u'}) and (dtype.kind == 'f' or
|
||||
dtype2.kind == 'f'):
|
||||
return 'f'
|
||||
return None
|
||||
|
||||
def get_result_type(dtype, dtype2):
|
||||
result = float_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
result = int_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
return 'O'
|
||||
|
||||
for dtype in dtypes:
|
||||
for dtype2 in dtypes:
|
||||
if dtype == dtype2:
|
||||
continue
|
||||
|
||||
expected = get_result_type(dtype, dtype2)
|
||||
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)
|
||||
]).dtype
|
||||
assert result.kind == expected
|
||||
|
||||
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
|
||||
ser1 = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20150103'],
|
||||
tz=tz_naive_fixture),
|
||||
name='ser1')
|
||||
ser2 = pd.Series(pd.DatetimeIndex(['20160514', '20160515', '20160516'],
|
||||
tz=tz_naive_fixture),
|
||||
index=[2, 3, 4], name='ser2')
|
||||
result = ser1.combine_first(ser2)
|
||||
exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103',
|
||||
'20160515', '20160516'],
|
||||
tz=tz_naive_fixture)
|
||||
exp = pd.Series(exp_vals, name='ser1')
|
||||
assert_series_equal(exp, result)
|
||||
|
||||
def test_concat_empty_series_dtypes(self):
|
||||
|
||||
# booleans
|
||||
assert pd.concat([Series(dtype=np.bool_),
|
||||
Series(dtype=np.int32)]).dtype == np.int32
|
||||
assert pd.concat([Series(dtype=np.bool_),
|
||||
Series(dtype=np.float32)]).dtype == np.object_
|
||||
|
||||
# datetime-like
|
||||
assert pd.concat([Series(dtype='m8[ns]'),
|
||||
Series(dtype=np.bool)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='m8[ns]'),
|
||||
Series(dtype=np.int64)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='M8[ns]'),
|
||||
Series(dtype=np.bool)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='M8[ns]'),
|
||||
Series(dtype=np.int64)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='M8[ns]'),
|
||||
Series(dtype=np.bool_),
|
||||
Series(dtype=np.int64)]).dtype == np.object_
|
||||
|
||||
# categorical
|
||||
assert pd.concat([Series(dtype='category'),
|
||||
Series(dtype='category')]).dtype == 'category'
|
||||
# GH 18515
|
||||
assert pd.concat([Series(np.array([]), dtype='category'),
|
||||
Series(dtype='float64')]).dtype == 'float64'
|
||||
assert pd.concat([Series(dtype='category'),
|
||||
Series(dtype='object')]).dtype == 'object'
|
||||
|
||||
# sparse
|
||||
# TODO: move?
|
||||
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
||||
dtype='float64').to_sparse()])
|
||||
assert result.dtype == 'Sparse[float64]'
|
||||
assert result.ftype == 'float64:sparse'
|
||||
|
||||
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
||||
dtype='float64')])
|
||||
# TODO: release-note: concat sparse dtype
|
||||
expected = pd.core.sparse.api.SparseDtype(np.float64)
|
||||
assert result.dtype == expected
|
||||
assert result.ftype == 'float64:sparse'
|
||||
|
||||
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
||||
dtype='object')])
|
||||
# TODO: release-note: concat sparse dtype
|
||||
expected = pd.core.sparse.api.SparseDtype('object')
|
||||
assert result.dtype == expected
|
||||
assert result.ftype == 'object:sparse'
|
||||
|
||||
def test_combine_first_dt64(self):
|
||||
from pandas.core.tools.datetimes import to_datetime
|
||||
s0 = to_datetime(Series(["2010", np.NaN]))
|
||||
s1 = to_datetime(Series([np.NaN, "2011"]))
|
||||
rs = s0.combine_first(s1)
|
||||
xp = to_datetime(Series(['2010', '2011']))
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
s0 = to_datetime(Series(["2010", np.NaN]))
|
||||
s1 = Series([np.NaN, "2011"])
|
||||
rs = s0.combine_first(s1)
|
||||
xp = Series([datetime(2010, 1, 1), '2011'])
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
|
||||
class TestTimeseries(object):
|
||||
|
||||
def test_append_concat(self):
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T')
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
|
||||
result = ts.append(ts)
|
||||
result_df = df.append(df)
|
||||
ex_index = DatetimeIndex(np.tile(rng.values, 2))
|
||||
tm.assert_index_equal(result.index, ex_index)
|
||||
tm.assert_index_equal(result_df.index, ex_index)
|
||||
|
||||
appended = rng.append(rng)
|
||||
tm.assert_index_equal(appended, ex_index)
|
||||
|
||||
appended = rng.append([rng, rng])
|
||||
ex_index = DatetimeIndex(np.tile(rng.values, 3))
|
||||
tm.assert_index_equal(appended, ex_index)
|
||||
|
||||
# different index names
|
||||
rng1 = rng.copy()
|
||||
rng2 = rng.copy()
|
||||
rng1.name = 'foo'
|
||||
rng2.name = 'bar'
|
||||
assert rng1.append(rng1).name == 'foo'
|
||||
assert rng1.append(rng2).name is None
|
||||
|
||||
def test_append_concat_tz(self):
|
||||
# see gh-2938
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
||||
tz='US/Eastern')
|
||||
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
||||
tz='US/Eastern')
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
|
||||
def test_append_concat_tz_explicit_pytz(self):
|
||||
# see gh-2938
|
||||
from pytz import timezone as timezone
|
||||
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
||||
tz=timezone('US/Eastern'))
|
||||
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
||||
tz=timezone('US/Eastern'))
|
||||
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
||||
tz=timezone('US/Eastern'))
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
|
||||
def test_append_concat_tz_dateutil(self):
|
||||
# see gh-2938
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
||||
tz='dateutil/US/Eastern')
|
||||
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
||||
tz='dateutil/US/Eastern')
|
||||
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
||||
tz='dateutil/US/Eastern')
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,556 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import calendar
|
||||
from datetime import date, datetime, time
|
||||
import locale
|
||||
import unicodedata
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
|
||||
from pandas.core.dtypes.common import is_integer_dtype, is_list_like
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame, DatetimeIndex, Index, PeriodIndex, Series, TimedeltaIndex,
|
||||
bdate_range, compat, date_range, period_range, timedelta_range)
|
||||
from pandas.core.arrays import PeriodArray
|
||||
import pandas.core.common as com
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesDatetimeValues():
|
||||
|
||||
def test_dt_namespace_accessor(self):
|
||||
|
||||
# GH 7207, 11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
ok_for_period = PeriodArray._datetimelike_ops
|
||||
ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq']
|
||||
ok_for_dt = DatetimeIndex._datetimelike_ops
|
||||
ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize',
|
||||
'tz_convert', 'normalize', 'strftime', 'round',
|
||||
'floor', 'ceil', 'day_name', 'month_name']
|
||||
ok_for_td = TimedeltaIndex._datetimelike_ops
|
||||
ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds',
|
||||
'round', 'floor', 'ceil']
|
||||
|
||||
def get_expected(s, name):
|
||||
result = getattr(Index(s._values), prop)
|
||||
if isinstance(result, np.ndarray):
|
||||
if is_integer_dtype(result):
|
||||
result = result.astype('int64')
|
||||
elif not is_list_like(result):
|
||||
return result
|
||||
return Series(result, index=s.index, name=s.name)
|
||||
|
||||
def compare(s, name):
|
||||
a = getattr(s.dt, prop)
|
||||
b = get_expected(s, prop)
|
||||
if not (is_list_like(a) and is_list_like(b)):
|
||||
assert a == b
|
||||
else:
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
# datetimeindex
|
||||
cases = [Series(date_range('20130101', periods=5), name='xxx'),
|
||||
Series(date_range('20130101', periods=5, freq='s'),
|
||||
name='xxx'),
|
||||
Series(date_range('20130101 00:00:00', periods=5, freq='ms'),
|
||||
name='xxx')]
|
||||
for s in cases:
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.tz_localize('US/Eastern')
|
||||
exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern')
|
||||
expected = Series(exp_values, index=s.index, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == 'US/Eastern'
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == DatetimeIndex(s.values, freq='infer').freq
|
||||
|
||||
# let's localize, then convert
|
||||
result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
|
||||
exp_values = (DatetimeIndex(s.values).tz_localize('UTC')
|
||||
.tz_convert('US/Eastern'))
|
||||
expected = Series(exp_values, index=s.index, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# datetimeindex with tz
|
||||
s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
|
||||
name='xxx')
|
||||
for prop in ok_for_dt:
|
||||
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.tz_convert('CET')
|
||||
expected = Series(s._values.tz_convert('CET'),
|
||||
index=s.index, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == 'CET'
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == DatetimeIndex(s.values, freq='infer').freq
|
||||
|
||||
# timedelta index
|
||||
cases = [Series(timedelta_range('1 day', periods=5),
|
||||
index=list('abcde'), name='xxx'),
|
||||
Series(timedelta_range('1 day 01:23:45', periods=5,
|
||||
freq='s'), name='xxx'),
|
||||
Series(timedelta_range('2 days 01:23:45.012345', periods=5,
|
||||
freq='ms'), name='xxx')]
|
||||
for s in cases:
|
||||
for prop in ok_for_td:
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_td_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.components
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_index_equal(result.index, s.index)
|
||||
|
||||
result = s.dt.to_pytimedelta()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.total_seconds()
|
||||
assert isinstance(result, pd.Series)
|
||||
assert result.dtype == 'float64'
|
||||
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == TimedeltaIndex(s.values, freq='infer').freq
|
||||
|
||||
# both
|
||||
index = date_range('20130101', periods=3, freq='D')
|
||||
s = Series(date_range('20140204', periods=3, freq='s'),
|
||||
index=index, name='xxx')
|
||||
exp = Series(np.array([2014, 2014, 2014], dtype='int64'),
|
||||
index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.year, exp)
|
||||
|
||||
exp = Series(np.array([2, 2, 2], dtype='int64'),
|
||||
index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.month, exp)
|
||||
|
||||
exp = Series(np.array([0, 1, 2], dtype='int64'),
|
||||
index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.second, exp)
|
||||
|
||||
exp = pd.Series([s[0]] * 3, index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.normalize(), exp)
|
||||
|
||||
# periodindex
|
||||
cases = [Series(period_range('20130101', periods=5, freq='D'),
|
||||
name='xxx')]
|
||||
for s in cases:
|
||||
for prop in ok_for_period:
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_period_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == PeriodIndex(s.values).freq
|
||||
|
||||
# test limited display api
|
||||
def get_dir(s):
|
||||
results = [r for r in s.dt.__dir__() if not r.startswith('_')]
|
||||
return list(sorted(set(results)))
|
||||
|
||||
s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
|
||||
|
||||
s = Series(period_range('20130101', periods=5,
|
||||
freq='D', name='xxx').astype(object))
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_period + ok_for_period_methods))))
|
||||
|
||||
# 11295
|
||||
# ambiguous time error on the conversions
|
||||
s = Series(pd.date_range('2015-01-01', '2016-01-01',
|
||||
freq='T'), name='xxx')
|
||||
s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago')
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
|
||||
exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T',
|
||||
tz='UTC').tz_convert('America/Chicago')
|
||||
expected = Series(exp_values, name='xxx')
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# no setting allowed
|
||||
s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
|
||||
with pytest.raises(ValueError, match="modifications"):
|
||||
s.dt.hour = 5
|
||||
|
||||
# trying to set a copy
|
||||
with pd.option_context('chained_assignment', 'raise'):
|
||||
with pytest.raises(com.SettingWithCopyError):
|
||||
s.dt.hour[0] = 5
|
||||
|
||||
@pytest.mark.parametrize('method, dates', [
|
||||
['round', ['2012-01-02', '2012-01-02', '2012-01-01']],
|
||||
['floor', ['2012-01-01', '2012-01-01', '2012-01-01']],
|
||||
['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']]
|
||||
])
|
||||
def test_dt_round(self, method, dates):
|
||||
# round
|
||||
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
|
||||
'2012-01-01 12:01:00',
|
||||
'2012-01-01 08:00:00']), name='xxx')
|
||||
result = getattr(s.dt, method)('D')
|
||||
expected = Series(pd.to_datetime(dates), name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_round_tz(self):
|
||||
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
|
||||
'2012-01-01 12:01:00',
|
||||
'2012-01-01 08:00:00']), name='xxx')
|
||||
result = (s.dt.tz_localize('UTC')
|
||||
.dt.tz_convert('US/Eastern')
|
||||
.dt.round('D'))
|
||||
|
||||
exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
|
||||
'2012-01-01']).tz_localize('US/Eastern')
|
||||
expected = Series(exp_values, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
|
||||
def test_dt_round_tz_ambiguous(self, method):
|
||||
# GH 18946 round near "fall back" DST
|
||||
df1 = pd.DataFrame([
|
||||
pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True),
|
||||
pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True),
|
||||
pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True)
|
||||
],
|
||||
columns=['date'])
|
||||
df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid')
|
||||
# infer
|
||||
result = getattr(df1.date.dt, method)('H', ambiguous='infer')
|
||||
expected = df1['date']
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# bool-array
|
||||
result = getattr(df1.date.dt, method)(
|
||||
'H', ambiguous=[True, False, False]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# NaT
|
||||
result = getattr(df1.date.dt, method)('H', ambiguous='NaT')
|
||||
expected = df1['date'].copy()
|
||||
expected.iloc[0:2] = pd.NaT
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# raise
|
||||
with pytest.raises(pytz.AmbiguousTimeError):
|
||||
getattr(df1.date.dt, method)('H', ambiguous='raise')
|
||||
|
||||
@pytest.mark.parametrize('method, ts_str, freq', [
|
||||
['ceil', '2018-03-11 01:59:00-0600', '5min'],
|
||||
['round', '2018-03-11 01:59:00-0600', '5min'],
|
||||
['floor', '2018-03-11 03:01:00-0500', '2H']])
|
||||
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
|
||||
# GH 23324 round near "spring forward" DST
|
||||
s = Series([pd.Timestamp(ts_str, tz='America/Chicago')])
|
||||
result = getattr(s.dt, method)(freq, nonexistent='shift_forward')
|
||||
expected = Series(
|
||||
[pd.Timestamp('2018-03-11 03:00:00', tz='America/Chicago')]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(s.dt, method)(freq, nonexistent='NaT')
|
||||
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(pytz.NonExistentTimeError,
|
||||
match='2018-03-11 02:00:00'):
|
||||
getattr(s.dt, method)(freq, nonexistent='raise')
|
||||
|
||||
def test_dt_namespace_accessor_categorical(self):
|
||||
# GH 19468
|
||||
dti = DatetimeIndex(['20171111', '20181212']).repeat(2)
|
||||
s = Series(pd.Categorical(dti), name='foo')
|
||||
result = s.dt.year
|
||||
expected = Series([2017, 2017, 2018, 2018], name='foo')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
s = Series(date_range('20130101', periods=5, freq='D'))
|
||||
with pytest.raises(AttributeError,
|
||||
match="You cannot add any new attribute"):
|
||||
s.dt.xlabel = "a"
|
||||
|
||||
@pytest.mark.parametrize('time_locale', [
|
||||
None] if tm.get_locales() is None else [None] + tm.get_locales())
|
||||
def test_dt_accessor_datetime_name_accessors(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
|
||||
'Friday', 'Saturday', 'Sunday']
|
||||
expected_months = ['January', 'February', 'March', 'April', 'May',
|
||||
'June', 'July', 'August', 'September',
|
||||
'October', 'November', 'December']
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
s = Series(date_range(freq='D', start=datetime(1998, 1, 1),
|
||||
periods=365))
|
||||
english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
|
||||
'Friday', 'Saturday', 'Sunday']
|
||||
for day, name, eng_name in zip(range(4, 11),
|
||||
expected_days,
|
||||
english_days):
|
||||
name = name.capitalize()
|
||||
assert s.dt.weekday_name[day] == eng_name
|
||||
assert s.dt.day_name(locale=time_locale)[day] == name
|
||||
s = s.append(Series([pd.NaT]))
|
||||
assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])
|
||||
|
||||
s = Series(date_range(freq='M', start='2012', end='2013'))
|
||||
result = s.dt.month_name(locale=time_locale)
|
||||
expected = Series([month.capitalize() for month in expected_months])
|
||||
|
||||
# work around https://github.com/pandas-dev/pandas/issues/22342
|
||||
if not compat.PY2:
|
||||
result = result.str.normalize("NFD")
|
||||
expected = expected.str.normalize("NFD")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for s_date, expected in zip(s, expected_months):
|
||||
result = s_date.month_name(locale=time_locale)
|
||||
expected = expected.capitalize()
|
||||
|
||||
if not compat.PY2:
|
||||
result = unicodedata.normalize("NFD", result)
|
||||
expected = unicodedata.normalize("NFD", expected)
|
||||
|
||||
assert result == expected
|
||||
|
||||
s = s.append(Series([pd.NaT]))
|
||||
assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])
|
||||
|
||||
def test_strftime(self):
|
||||
# GH 10086
|
||||
s = Series(date_range('20130101', periods=5))
|
||||
result = s.dt.strftime('%Y/%m/%d')
|
||||
expected = Series(['2013/01/01', '2013/01/02', '2013/01/03',
|
||||
'2013/01/04', '2013/01/05'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(date_range('2015-02-03 11:22:33.4567', periods=5))
|
||||
result = s.dt.strftime('%Y/%m/%d %H-%M-%S')
|
||||
expected = Series(['2015/02/03 11-22-33', '2015/02/04 11-22-33',
|
||||
'2015/02/05 11-22-33', '2015/02/06 11-22-33',
|
||||
'2015/02/07 11-22-33'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range('20130101', periods=5))
|
||||
result = s.dt.strftime('%Y/%m/%d')
|
||||
expected = Series(['2013/01/01', '2013/01/02', '2013/01/03',
|
||||
'2013/01/04', '2013/01/05'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range(
|
||||
'2015-02-03 11:22:33.4567', periods=5, freq='s'))
|
||||
result = s.dt.strftime('%Y/%m/%d %H-%M-%S')
|
||||
expected = Series(['2015/02/03 11-22-33', '2015/02/03 11-22-34',
|
||||
'2015/02/03 11-22-35', '2015/02/03 11-22-36',
|
||||
'2015/02/03 11-22-37'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(date_range('20130101', periods=5))
|
||||
s.iloc[0] = pd.NaT
|
||||
result = s.dt.strftime('%Y/%m/%d')
|
||||
expected = Series(['NaT', '2013/01/02', '2013/01/03', '2013/01/04',
|
||||
'2013/01/05'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
datetime_index = date_range('20150301', periods=5)
|
||||
result = datetime_index.strftime("%Y/%m/%d")
|
||||
|
||||
expected = Index(['2015/03/01', '2015/03/02', '2015/03/03',
|
||||
'2015/03/04', '2015/03/05'], dtype=np.object_)
|
||||
# dtype may be S10 or U10 depending on python version
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
period_index = period_range('20150301', periods=5)
|
||||
result = period_index.strftime("%Y/%m/%d")
|
||||
expected = Index(['2015/03/01', '2015/03/02', '2015/03/03',
|
||||
'2015/03/04', '2015/03/05'], dtype='=U10')
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14,
|
||||
32, 1)])
|
||||
result = s.dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range('20130101', periods=4, freq='H'))
|
||||
result = s.dt.strftime('%Y/%m/%d %H:%M:%S')
|
||||
expected = Series(["2013/01/01 00:00:00", "2013/01/01 01:00:00",
|
||||
"2013/01/01 02:00:00", "2013/01/01 03:00:00"])
|
||||
|
||||
s = Series(period_range('20130101', periods=4, freq='L'))
|
||||
result = s.dt.strftime('%Y/%m/%d %H:%M:%S.%l')
|
||||
expected = Series(["2013/01/01 00:00:00.000",
|
||||
"2013/01/01 00:00:00.001",
|
||||
"2013/01/01 00:00:00.002",
|
||||
"2013/01/01 00:00:00.003"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_valid_dt_with_missing_values(self):
|
||||
|
||||
from datetime import date, time
|
||||
|
||||
# GH 8689
|
||||
s = Series(date_range('20130101', periods=5, freq='D'))
|
||||
s.iloc[2] = pd.NaT
|
||||
|
||||
for attr in ['microsecond', 'nanosecond', 'second', 'minute', 'hour',
|
||||
'day']:
|
||||
expected = getattr(s.dt, attr).copy()
|
||||
expected.iloc[2] = np.nan
|
||||
result = getattr(s.dt, attr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.dt.date
|
||||
expected = Series(
|
||||
[date(2013, 1, 1), date(2013, 1, 2), np.nan, date(2013, 1, 4),
|
||||
date(2013, 1, 5)], dtype='object')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.dt.time
|
||||
expected = Series(
|
||||
[time(0), time(0), np.nan, time(0), time(0)], dtype='object')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.indexes.accessors import (
|
||||
CombinedDatetimelikeProperties, DatetimeProperties)
|
||||
assert Series.dt is CombinedDatetimelikeProperties
|
||||
|
||||
s = Series(date_range('2000-01-01', periods=3))
|
||||
assert isinstance(s.dt, DatetimeProperties)
|
||||
|
||||
@pytest.mark.parametrize('ser', [Series(np.arange(5)),
|
||||
Series(list('abcde')),
|
||||
Series(np.random.randn(5))])
|
||||
def test_dt_accessor_invalid(self, ser):
|
||||
# GH#9322 check that series with incorrect dtypes don't have attr
|
||||
with pytest.raises(AttributeError, match="only use .dt accessor"):
|
||||
ser.dt
|
||||
assert not hasattr(ser, 'dt')
|
||||
|
||||
def test_dt_accessor_updates_on_inplace(self):
|
||||
s = Series(pd.date_range('2018-01-01', periods=10))
|
||||
s[2] = None
|
||||
s.fillna(pd.Timestamp('2018-01-01'), inplace=True)
|
||||
result = s.dt.date
|
||||
assert result[0] == result[2]
|
||||
|
||||
def test_between(self):
|
||||
s = Series(bdate_range('1/1/2000', periods=20).astype(object))
|
||||
s[::2] = np.nan
|
||||
|
||||
result = s[s.between(s[3], s[17])]
|
||||
expected = s[3:18].dropna()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s[s.between(s[3], s[17], inclusive=False)]
|
||||
expected = s[5:16].dropna()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_date_tz(self):
|
||||
# GH11757
|
||||
rng = pd.DatetimeIndex(['2014-04-04 23:56',
|
||||
'2014-07-18 21:24',
|
||||
'2015-11-22 22:14'], tz="US/Eastern")
|
||||
s = Series(rng)
|
||||
expected = Series([date(2014, 4, 4),
|
||||
date(2014, 7, 18),
|
||||
date(2015, 11, 22)])
|
||||
assert_series_equal(s.dt.date, expected)
|
||||
assert_series_equal(s.apply(lambda x: x.date()), expected)
|
||||
|
||||
def test_datetime_understood(self):
|
||||
# Ensures it doesn't fail to create the right series
|
||||
# reported in issue#16726
|
||||
series = pd.Series(pd.date_range("2012-01-01", periods=3))
|
||||
offset = pd.offsets.DateOffset(days=6)
|
||||
result = series - offset
|
||||
expected = pd.Series(pd.to_datetime([
|
||||
'2011-12-26', '2011-12-27', '2011-12-28']))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_timetz_accessor(self, tz_naive_fixture):
|
||||
# GH21358
|
||||
tz = maybe_get_tz(tz_naive_fixture)
|
||||
|
||||
dtindex = pd.DatetimeIndex(['2014-04-04 23:56', '2014-07-18 21:24',
|
||||
'2015-11-22 22:14'], tz=tz)
|
||||
s = Series(dtindex)
|
||||
expected = Series([time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz),
|
||||
time(22, 14, tzinfo=tz)])
|
||||
result = s.dt.timetz
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_setitem_with_string_index(self):
|
||||
# GH 23451
|
||||
x = pd.Series([1, 2, 3], index=['Date', 'b', 'other'])
|
||||
x['Date'] = date.today()
|
||||
assert x.Date == date.today()
|
||||
assert x['Date'] == date.today()
|
||||
|
||||
def test_setitem_with_different_tz(self):
|
||||
# GH#24024
|
||||
ser = pd.Series(pd.date_range('2000', periods=2, tz="US/Central"))
|
||||
ser[0] = pd.Timestamp("2000", tz='US/Eastern')
|
||||
expected = pd.Series([
|
||||
pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"),
|
||||
], dtype=object)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@@ -0,0 +1,518 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import string
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import lrange, range, u
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, DataFrame, Index, Series, Timedelta, Timestamp, date_range)
|
||||
from pandas.api.types import CategoricalDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesDtypes(object):
|
||||
|
||||
def test_dt64_series_astype_object(self):
|
||||
dt64ser = Series(date_range('20130101', periods=3))
|
||||
result = dt64ser.astype(object)
|
||||
assert isinstance(result.iloc[0], datetime)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
def test_td64_series_astype_object(self):
|
||||
tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]')
|
||||
result = tdser.astype(object)
|
||||
assert isinstance(result.iloc[0], timedelta)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float32", "float64",
|
||||
"int64", "int32"])
|
||||
def test_astype(self, dtype):
|
||||
s = Series(np.random.randn(5), name='foo')
|
||||
as_typed = s.astype(dtype)
|
||||
|
||||
assert as_typed.dtype == dtype
|
||||
assert as_typed.name == s.name
|
||||
|
||||
def test_asobject_deprecated(self):
|
||||
s = Series(np.random.randn(5), name='foo')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
o = s.asobject
|
||||
assert isinstance(o, np.ndarray)
|
||||
|
||||
def test_dtype(self, datetime_series):
|
||||
|
||||
assert datetime_series.dtype == np.dtype('float64')
|
||||
assert datetime_series.dtypes == np.dtype('float64')
|
||||
assert datetime_series.ftype == 'float64:dense'
|
||||
assert datetime_series.ftypes == 'float64:dense'
|
||||
tm.assert_series_equal(datetime_series.get_dtype_counts(),
|
||||
Series(1, ['float64']))
|
||||
# GH18243 - Assert .get_ftype_counts is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
tm.assert_series_equal(datetime_series.get_ftype_counts(),
|
||||
Series(1, ['float64:dense']))
|
||||
|
||||
@pytest.mark.parametrize("value", [np.nan, np.inf])
|
||||
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
|
||||
def test_astype_cast_nan_inf_int(self, dtype, value):
|
||||
# gh-14265: check NaN and inf raise error when converting to int
|
||||
msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
|
||||
s = Series([value])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
|
||||
def test_astype_cast_object_int_fail(self, dtype):
|
||||
arr = Series(["car", "house", "tree", "1"])
|
||||
msg = r"invalid literal for (int|long)\(\) with base 10: 'car'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype(dtype)
|
||||
|
||||
def test_astype_cast_object_int(self):
|
||||
arr = Series(['1', '2', '3', '4'], dtype=object)
|
||||
result = arr.astype(int)
|
||||
|
||||
tm.assert_series_equal(result, Series(np.arange(1, 5)))
|
||||
|
||||
def test_astype_datetime(self):
|
||||
s = Series(iNaT, dtype='M8[ns]', index=lrange(5))
|
||||
|
||||
s = s.astype('O')
|
||||
assert s.dtype == np.object_
|
||||
|
||||
s = Series([datetime(2001, 1, 2, 0, 0)])
|
||||
|
||||
s = s.astype('O')
|
||||
assert s.dtype == np.object_
|
||||
|
||||
s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
|
||||
|
||||
s[1] = np.nan
|
||||
assert s.dtype == 'M8[ns]'
|
||||
|
||||
s = s.astype('O')
|
||||
assert s.dtype == np.object_
|
||||
|
||||
def test_astype_datetime64tz(self):
|
||||
s = Series(date_range('20130101', periods=3, tz='US/Eastern'))
|
||||
|
||||
# astype
|
||||
result = s.astype(object)
|
||||
expected = Series(s.astype(object), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# astype - object, preserves on construction
|
||||
result = Series(s.astype(object))
|
||||
expected = s.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# astype - datetime64[ns, tz]
|
||||
result = Series(s.values).astype('datetime64[ns, US/Eastern]')
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
result = Series(s.values).astype(s.dtype)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
result = s.astype('datetime64[ns, CET]')
|
||||
expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [compat.text_type, np.str_])
|
||||
@pytest.mark.parametrize("series", [Series([string.digits * 10,
|
||||
tm.rands(63),
|
||||
tm.rands(64),
|
||||
tm.rands(1000)]),
|
||||
Series([string.digits * 10,
|
||||
tm.rands(63),
|
||||
tm.rands(64), np.nan, 1.0])])
|
||||
def test_astype_str_map(self, dtype, series):
|
||||
# see gh-4405
|
||||
result = series.astype(dtype)
|
||||
expected = series.map(compat.text_type)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [str, compat.text_type])
|
||||
def test_astype_str_cast(self, dtype):
|
||||
# see gh-9757: test str and unicode on python 2.x
|
||||
# and just str on python 3.x
|
||||
ts = Series([Timestamp('2010-01-04 00:00:00')])
|
||||
s = ts.astype(dtype)
|
||||
|
||||
expected = Series([dtype('2010-01-04')])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
|
||||
s = ts.astype(dtype)
|
||||
|
||||
expected = Series([dtype('2010-01-04 00:00:00-05:00')])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
td = Series([Timedelta(1, unit='d')])
|
||||
s = td.astype(dtype)
|
||||
|
||||
expected = Series([dtype('1 days 00:00:00.000000000')])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_astype_unicode(self):
|
||||
# see gh-7758: A bit of magic is required to set
|
||||
# default encoding to utf-8
|
||||
digits = string.digits
|
||||
test_series = [
|
||||
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
|
||||
Series([u('データーサイエンス、お前はもう死んでいる')]),
|
||||
]
|
||||
|
||||
former_encoding = None
|
||||
|
||||
if not compat.PY3:
|
||||
# In Python, we can force the default encoding for this test
|
||||
former_encoding = sys.getdefaultencoding()
|
||||
reload(sys) # noqa
|
||||
|
||||
sys.setdefaultencoding("utf-8")
|
||||
if sys.getdefaultencoding() == "utf-8":
|
||||
test_series.append(Series([u('野菜食べないとやばい')
|
||||
.encode("utf-8")]))
|
||||
|
||||
for s in test_series:
|
||||
res = s.astype("unicode")
|
||||
expec = s.map(compat.text_type)
|
||||
tm.assert_series_equal(res, expec)
|
||||
|
||||
# Restore the former encoding
|
||||
if former_encoding is not None and former_encoding != "utf-8":
|
||||
reload(sys) # noqa
|
||||
sys.setdefaultencoding(former_encoding)
|
||||
|
||||
@pytest.mark.parametrize("dtype_class", [dict, Series])
|
||||
def test_astype_dict_like(self, dtype_class):
|
||||
# see gh-7271
|
||||
s = Series(range(0, 10, 2), name='abc')
|
||||
|
||||
dt1 = dtype_class({'abc': str})
|
||||
result = s.astype(dt1)
|
||||
expected = Series(['0', '2', '4', '6', '8'], name='abc')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt2 = dtype_class({'abc': 'float64'})
|
||||
result = s.astype(dt2)
|
||||
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64',
|
||||
name='abc')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt3 = dtype_class({'abc': str, 'def': str})
|
||||
msg = ("Only the Series name can be used for the key in Series dtype"
|
||||
r" mappings\.")
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.astype(dt3)
|
||||
|
||||
dt4 = dtype_class({0: str})
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.astype(dt4)
|
||||
|
||||
# GH16717
|
||||
# if dtypes provided is empty, it should error
|
||||
dt5 = dtype_class({})
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
s.astype(dt5)
|
||||
|
||||
def test_astype_categories_deprecation(self):
|
||||
|
||||
# deprecated 17636
|
||||
s = Series(['a', 'b', 'a'])
|
||||
expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s.astype('category', categories=['a', 'b'], ordered=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_from_categorical(self):
|
||||
items = ["a", "b", "c", "a"]
|
||||
s = Series(items)
|
||||
exp = Series(Categorical(items))
|
||||
res = s.astype('category')
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
items = [1, 2, 3, 1]
|
||||
s = Series(items)
|
||||
exp = Series(Categorical(items))
|
||||
res = s.astype('category')
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
df = DataFrame({"cats": [1, 2, 3, 4, 5, 6],
|
||||
"vals": [1, 2, 3, 4, 5, 6]})
|
||||
cats = Categorical([1, 2, 3, 4, 5, 6])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
df = DataFrame({"cats": ['a', 'b', 'b', 'a', 'a', 'd'],
|
||||
"vals": [1, 2, 3, 4, 5, 6]})
|
||||
cats = Categorical(['a', 'b', 'b', 'a', 'a', 'd'])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
# with keywords
|
||||
lst = ["a", "b", "c", "a"]
|
||||
s = Series(lst)
|
||||
exp = Series(Categorical(lst, ordered=True))
|
||||
res = s.astype(CategoricalDtype(None, ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series(Categorical(lst, categories=list('abcdef'), ordered=True))
|
||||
res = s.astype(CategoricalDtype(list('abcdef'), ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_astype_categorical_to_other(self):
|
||||
|
||||
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
|
||||
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=['value'], ascending=True)
|
||||
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
|
||||
right=False, labels=cat_labels)
|
||||
|
||||
s = df['value_group']
|
||||
expected = s
|
||||
tm.assert_series_equal(s.astype('category'), expected)
|
||||
tm.assert_series_equal(s.astype(CategoricalDtype()), expected)
|
||||
msg = (r"could not convert string to float: '(0 - 499|9500 - 9999)'|"
|
||||
r"invalid literal for float\(\): (0 - 499|9500 - 9999)")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype('float64')
|
||||
|
||||
cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
|
||||
exp = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
|
||||
tm.assert_series_equal(cat.astype('str'), exp)
|
||||
s2 = Series(Categorical(['1', '2', '3', '4']))
|
||||
exp2 = Series([1, 2, 3, 4]).astype(int)
|
||||
tm.assert_series_equal(s2.astype('int'), exp2)
|
||||
|
||||
# object don't sort correctly, so just compare that we have the same
|
||||
# values
|
||||
def cmp(a, b):
|
||||
tm.assert_almost_equal(
|
||||
np.sort(np.unique(a)), np.sort(np.unique(b)))
|
||||
|
||||
expected = Series(np.array(s.values), name='value_group')
|
||||
cmp(s.astype('object'), expected)
|
||||
cmp(s.astype(np.object_), expected)
|
||||
|
||||
# array conversion
|
||||
tm.assert_almost_equal(np.array(s), np.array(s.values))
|
||||
|
||||
# valid conversion
|
||||
for valid in [lambda x: x.astype('category'),
|
||||
lambda x: x.astype(CategoricalDtype()),
|
||||
lambda x: x.astype('object').astype('category'),
|
||||
lambda x: x.astype('object').astype(
|
||||
CategoricalDtype())
|
||||
]:
|
||||
|
||||
result = valid(s)
|
||||
# compare series values
|
||||
# internal .categories can't be compared because it is sorted
|
||||
tm.assert_series_equal(result, s, check_categorical=False)
|
||||
|
||||
# invalid conversion (these are NOT a dtype)
|
||||
msg = (r"invalid type <class 'pandas\.core\.arrays\.categorical\."
|
||||
"Categorical'> for astype")
|
||||
for invalid in [lambda x: x.astype(Categorical),
|
||||
lambda x: x.astype('object').astype(Categorical)]:
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
invalid(s)
|
||||
|
||||
@pytest.mark.parametrize('name', [None, 'foo'])
|
||||
@pytest.mark.parametrize('dtype_ordered', [True, False])
|
||||
@pytest.mark.parametrize('series_ordered', [True, False])
|
||||
def test_astype_categorical_to_categorical(self, name, dtype_ordered,
|
||||
series_ordered):
|
||||
# GH 10696/18593
|
||||
s_data = list('abcaacbab')
|
||||
s_dtype = CategoricalDtype(list('bac'), ordered=series_ordered)
|
||||
s = Series(s_data, dtype=s_dtype, name=name)
|
||||
|
||||
# unspecified categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = s.astype(dtype)
|
||||
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
|
||||
expected = Series(s_data, name=name, dtype=exp_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.astype('category', ordered=dtype_ordered)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# different categories
|
||||
dtype = CategoricalDtype(list('adc'), dtype_ordered)
|
||||
result = s.astype(dtype)
|
||||
expected = Series(s_data, name=name, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.astype(
|
||||
'category', categories=list('adc'), ordered=dtype_ordered)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# not specifying ordered, so only test once
|
||||
expected = s
|
||||
result = s.astype('category')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_categoricaldtype(self):
|
||||
s = Series(['a', 'b', 'a'])
|
||||
result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
|
||||
expected = Series(Categorical(['a', 'b', 'a'], ordered=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.astype(CategoricalDtype(['a', 'b'], ordered=False))
|
||||
expected = Series(Categorical(['a', 'b', 'a'], ordered=False))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.astype(CategoricalDtype(['a', 'b', 'c'], ordered=False))
|
||||
expected = Series(Categorical(['a', 'b', 'a'],
|
||||
categories=['a', 'b', 'c'],
|
||||
ordered=False))
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c']))
|
||||
|
||||
def test_astype_categoricaldtype_with_args(self):
|
||||
s = Series(['a', 'b'])
|
||||
type_ = CategoricalDtype(['a', 'b'])
|
||||
|
||||
msg = (r"Cannot specify a CategoricalDtype and also `categories` or"
|
||||
r" `ordered`\. Use `dtype=CategoricalDtype\(categories,"
|
||||
r" ordered\)` instead\.")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.astype(type_, ordered=True)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.astype(type_, categories=['a', 'b'])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.astype(type_, categories=['a', 'b'], ordered=False)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [
|
||||
np.datetime64,
|
||||
np.timedelta64,
|
||||
])
|
||||
def test_astype_generic_timestamp_no_frequency(self, dtype):
|
||||
# see gh-15524, gh-15987
|
||||
data = [1]
|
||||
s = Series(data)
|
||||
|
||||
msg = "dtype has no unit. Please pass in"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("dtype", np.typecodes['All'])
|
||||
def test_astype_empty_constructor_equality(self, dtype):
|
||||
# see gh-15524
|
||||
|
||||
if dtype not in (
|
||||
"S", "V", # poor support (if any) currently
|
||||
"M", "m" # Generic timestamps raise a ValueError. Already tested.
|
||||
):
|
||||
init_empty = Series([], dtype=dtype)
|
||||
as_type_empty = Series([]).astype(dtype)
|
||||
tm.assert_series_equal(init_empty, as_type_empty)
|
||||
|
||||
def test_complex(self):
|
||||
# see gh-4819: complex access for ndarray compat
|
||||
a = np.arange(5, dtype=np.float64)
|
||||
b = Series(a + 4j * a)
|
||||
|
||||
tm.assert_numpy_array_equal(a, b.real)
|
||||
tm.assert_numpy_array_equal(4 * a, b.imag)
|
||||
|
||||
b.real = np.arange(5) + 5
|
||||
tm.assert_numpy_array_equal(a + 5, b.real)
|
||||
tm.assert_numpy_array_equal(4 * a, b.imag)
|
||||
|
||||
def test_arg_for_errors_in_astype(self):
|
||||
# see gh-14878
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
msg = (r"Expected value of kwarg 'errors' to be one of \['raise',"
|
||||
r" 'ignore'\]\. Supplied value is 'False'")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(np.float64, errors=False)
|
||||
|
||||
s.astype(np.int8, errors='raise')
|
||||
|
||||
def test_intercept_astype_object(self):
|
||||
series = Series(date_range('1/1/2000', periods=10))
|
||||
|
||||
# This test no longer makes sense, as
|
||||
# Series is by default already M8[ns].
|
||||
expected = series.astype('object')
|
||||
|
||||
df = DataFrame({'a': series,
|
||||
'b': np.random.randn(len(series))})
|
||||
exp_dtypes = Series([np.dtype('datetime64[ns]'),
|
||||
np.dtype('float64')], index=['a', 'b'])
|
||||
tm.assert_series_equal(df.dtypes, exp_dtypes)
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
df = DataFrame({'a': series, 'b': ['foo'] * len(series)})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
def test_series_to_categorical(self):
|
||||
# see gh-16524: test conversion of Series to Categorical
|
||||
series = Series(['a', 'b', 'c'])
|
||||
|
||||
result = Series(series, dtype='category')
|
||||
expected = Series(['a', 'b', 'c'], dtype='category')
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_infer_objects_series(self):
|
||||
# GH 11221
|
||||
actual = Series(np.array([1, 2, 3], dtype='O')).infer_objects()
|
||||
expected = Series([1, 2, 3])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = Series(np.array([1, 2, 3, None], dtype='O')).infer_objects()
|
||||
expected = Series([1., 2., 3., np.nan])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
# only soft conversions, unconvertable pass thru unchanged
|
||||
actual = (Series(np.array([1, 2, 3, None, 'a'], dtype='O'))
|
||||
.infer_objects())
|
||||
expected = Series([1, 2, 3, None, 'a'])
|
||||
|
||||
assert actual.dtype == 'object'
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
def test_is_homogeneous_type(self):
|
||||
assert Series()._is_homogeneous_type
|
||||
assert Series([1, 2])._is_homogeneous_type
|
||||
assert Series(pd.Categorical([1, 2]))._is_homogeneous_type
|
||||
|
||||
@pytest.mark.parametrize("data", [
|
||||
pd.period_range("2000", periods=4),
|
||||
pd.IntervalIndex.from_breaks([1, 2, 3, 4])
|
||||
])
|
||||
def test_values_compatibility(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/23995
|
||||
result = pd.Series(data).values
|
||||
expected = np.array(data.astype(object))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@@ -0,0 +1,142 @@
|
||||
# coding=utf-8
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, Series
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_value_counts_nunique():
|
||||
# basics.rst doc example
|
||||
series = Series(np.random.randn(500))
|
||||
series[20:500] = np.nan
|
||||
series[10:20] = 5000
|
||||
result = series.nunique()
|
||||
assert result == 11
|
||||
|
||||
# GH 18051
|
||||
s = Series(Categorical([]))
|
||||
assert s.nunique() == 0
|
||||
s = Series(Categorical([np.nan]))
|
||||
assert s.nunique() == 0
|
||||
|
||||
|
||||
def test_unique():
|
||||
# GH714 also, dtype=float
|
||||
s = Series([1.2345] * 100)
|
||||
s[::2] = np.nan
|
||||
result = s.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
s = Series([1.2345] * 100, dtype='f4')
|
||||
s[::2] = np.nan
|
||||
result = s.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
# NAs in object arrays #714
|
||||
s = Series(['foo'] * 100, dtype='O')
|
||||
s[::2] = np.nan
|
||||
result = s.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
# decision about None
|
||||
s = Series([1, 2, 3, None, None, None], dtype=object)
|
||||
result = s.unique()
|
||||
expected = np.array([1, 2, 3, None], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# GH 18051
|
||||
s = Series(Categorical([]))
|
||||
tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False)
|
||||
s = Series(Categorical([np.nan]))
|
||||
tm.assert_categorical_equal(s.unique(), Categorical([np.nan]),
|
||||
check_dtype=False)
|
||||
|
||||
|
||||
def test_unique_data_ownership():
|
||||
# it works! #1807
|
||||
Series(Series(["a", "c", "b"]).unique()).sort_values()
|
||||
|
||||
|
||||
def test_is_unique():
|
||||
# GH11946
|
||||
s = Series(np.random.randint(0, 10, size=1000))
|
||||
assert s.is_unique is False
|
||||
s = Series(np.arange(1000))
|
||||
assert s.is_unique is True
|
||||
|
||||
|
||||
def test_is_unique_class_ne(capsys):
|
||||
# GH 20661
|
||||
class Foo(object):
|
||||
def __init__(self, val):
|
||||
self._value = val
|
||||
|
||||
def __ne__(self, other):
|
||||
raise Exception("NEQ not supported")
|
||||
|
||||
with capsys.disabled():
|
||||
li = [Foo(i) for i in range(5)]
|
||||
s = Series(li, index=[i for i in range(5)])
|
||||
s.is_unique
|
||||
captured = capsys.readouterr()
|
||||
assert len(captured.err) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'keep, expected',
|
||||
[
|
||||
('first', Series([False, False, False, False, True, True, False])),
|
||||
('last', Series([False, True, True, False, False, False, False])),
|
||||
(False, Series([False, True, True, False, True, True, False]))
|
||||
])
|
||||
def test_drop_duplicates(any_numpy_dtype, keep, expected):
|
||||
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
|
||||
|
||||
if tc.dtype == 'bool':
|
||||
pytest.skip('tested separately in test_drop_duplicates_bool')
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
sc.drop_duplicates(keep=keep, inplace=True)
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('keep, expected',
|
||||
[('first', Series([False, False, True, True])),
|
||||
('last', Series([True, True, False, False])),
|
||||
(False, Series([True, True, True, True]))])
|
||||
def test_drop_duplicates_bool(keep, expected):
|
||||
tc = Series([True, False, True, False])
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
sc.drop_duplicates(keep=keep, inplace=True)
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('keep, expected', [
|
||||
('first', Series([False, False, True, False, True], name='name')),
|
||||
('last', Series([True, True, False, False, False], name='name')),
|
||||
(False, Series([True, True, True, False, True], name='name'))
|
||||
])
|
||||
def test_duplicated_keep(keep, expected):
|
||||
s = Series(['a', 'b', 'b', 'c', 'a'], name='name')
|
||||
|
||||
result = s.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('keep, expected', [
|
||||
('first', Series([False, False, True, False, True])),
|
||||
('last', Series([True, True, False, False, False])),
|
||||
(False, Series([True, True, True, False, True]))
|
||||
])
|
||||
def test_duplicated_nan_none(keep, expected):
|
||||
s = Series([np.nan, 3, 3, None, np.nan], dtype=object)
|
||||
|
||||
result = s.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,343 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import NaT, Series, Timestamp
|
||||
from pandas.core.internals.blocks import IntBlock
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesInternals(object):
|
||||
|
||||
def test_convert_objects(self):
|
||||
|
||||
s = Series([1., 2, 3], index=['a', 'b', 'c'])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# force numeric conversion
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1'
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = r.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1.'
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = r.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = 'garbled'
|
||||
expected = s.copy()
|
||||
expected['a'] = np.nan
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = r.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4119, not converting a mixed type (e.g.floats and object)
|
||||
s = Series([1, 'na', 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_numeric=True)
|
||||
expected = Series([1, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, '', 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_numeric=True)
|
||||
expected = Series([1, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# dates
|
||||
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0)])
|
||||
s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
|
||||
Timestamp('20010104'), '20010105'],
|
||||
dtype='O')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates=True,
|
||||
convert_numeric=False)
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103')], dtype='M8[ns]')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103'),
|
||||
NaT, NaT, NaT, Timestamp('20010104'),
|
||||
Timestamp('20010105')], dtype='M8[ns]')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s2.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
assert_series_equal(result, expected)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s2.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# preserver all-nans (if convert_dates='coerce')
|
||||
s = Series(['foo', 'bar', 1, 1.0], dtype='O')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# preserver if non-object
|
||||
s = Series([1], dtype='float32')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# r = s.copy()
|
||||
# r[0] = np.nan
|
||||
# result = r.convert_objects(convert_dates=True,convert_numeric=False)
|
||||
# assert result.dtype == 'M8[ns]'
|
||||
|
||||
# dateutil parses some single letters into today's value as a date
|
||||
for x in 'abcdefghijklmnopqrstuvwxyz':
|
||||
s = Series([x])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce')
|
||||
assert_series_equal(result, s)
|
||||
s = Series([x.upper()])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce')
|
||||
assert_series_equal(result, s)
|
||||
|
||||
def test_convert_objects_preserve_bool(self):
|
||||
s = Series([1, True, 3, 5], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
r = s.convert_objects(convert_numeric=True)
|
||||
e = Series([1, 1, 3, 5], dtype='i8')
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
def test_convert_objects_preserve_all_bool(self):
|
||||
s = Series([False, True, False, False], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
r = s.convert_objects(convert_numeric=True)
|
||||
e = Series([False, True, False, False], dtype=bool)
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
# GH 10265
|
||||
def test_convert(self):
|
||||
# Tests: All to nans, coerce, true
|
||||
# Test coercion returns correct type
|
||||
s = Series(['a', 'b', 'c'])
|
||||
results = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([NaT] * 3)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(numeric=True, coerce=True)
|
||||
expected = Series([np.nan] * 3)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
expected = Series([NaT] * 3, dtype=np.dtype('m8[ns]'))
|
||||
results = s._convert(timedelta=True, coerce=True)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
dt = datetime(2001, 1, 1, 0, 0)
|
||||
td = dt - datetime(2000, 1, 1, 0, 0)
|
||||
|
||||
# Test coercion with mixed types
|
||||
s = Series(['a', '3.1415', dt, td])
|
||||
results = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([NaT, NaT, dt, NaT])
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(numeric=True, coerce=True)
|
||||
expected = Series([np.nan, 3.1415, np.nan, np.nan])
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(timedelta=True, coerce=True)
|
||||
expected = Series([NaT, NaT, NaT, td],
|
||||
dtype=np.dtype('m8[ns]'))
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
# Test standard conversion returns original
|
||||
results = s._convert(datetime=True)
|
||||
assert_series_equal(results, s)
|
||||
results = s._convert(numeric=True)
|
||||
expected = Series([np.nan, 3.1415, np.nan, np.nan])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(timedelta=True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
# test pass-through and non-conversion when other types selected
|
||||
s = Series(['1.0', '2.0', '3.0'])
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([1.0, 2.0, 3.0])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(True, False, True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)],
|
||||
dtype='O')
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0,
|
||||
0)])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(datetime=False, numeric=True, timedelta=True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
|
||||
s = Series([td, td], dtype='O')
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([td, td])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(True, True, False)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
s = Series([1., 2, 3], index=['a', 'b', 'c'])
|
||||
result = s._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# force numeric conversion
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1'
|
||||
result = r._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1.'
|
||||
result = r._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = 'garbled'
|
||||
result = r._convert(numeric=True)
|
||||
expected = s.copy()
|
||||
expected['a'] = np.nan
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4119, not converting a mixed type (e.g.floats and object)
|
||||
s = Series([1, 'na', 3, 4])
|
||||
result = s._convert(datetime=True, numeric=True)
|
||||
expected = Series([1, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, '', 3, 4])
|
||||
result = s._convert(datetime=True, numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# dates
|
||||
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0)])
|
||||
s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
|
||||
Timestamp('20010104'), '20010105'], dtype='O')
|
||||
|
||||
result = s._convert(datetime=True)
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103')], dtype='M8[ns]')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103'), NaT, NaT, NaT,
|
||||
Timestamp('20010104'), Timestamp('20010105')],
|
||||
dtype='M8[ns]')
|
||||
result = s2._convert(datetime=True, numeric=False, timedelta=False,
|
||||
coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
result = s2._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(['foo', 'bar', 1, 1.0], dtype='O')
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# preserver if non-object
|
||||
s = Series([1], dtype='float32')
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# r = s.copy()
|
||||
# r[0] = np.nan
|
||||
# result = r._convert(convert_dates=True,convert_numeric=False)
|
||||
# assert result.dtype == 'M8[ns]'
|
||||
|
||||
# dateutil parses some single letters into today's value as a date
|
||||
expected = Series([NaT])
|
||||
for x in 'abcdefghijklmnopqrstuvwxyz':
|
||||
s = Series([x])
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
s = Series([x.upper()])
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_convert_no_arg_error(self):
|
||||
s = Series(['1.0', '2'])
|
||||
msg = r"At least one of datetime, numeric or timedelta must be True\."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s._convert()
|
||||
|
||||
def test_convert_preserve_bool(self):
|
||||
s = Series([1, True, 3, 5], dtype=object)
|
||||
r = s._convert(datetime=True, numeric=True)
|
||||
e = Series([1, 1, 3, 5], dtype='i8')
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
def test_convert_preserve_all_bool(self):
|
||||
s = Series([False, True, False, False], dtype=object)
|
||||
r = s._convert(datetime=True, numeric=True)
|
||||
e = Series([False, True, False, False], dtype=bool)
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
def test_constructor_no_pandas_array(self):
|
||||
ser = pd.Series([1, 2, 3])
|
||||
result = pd.Series(ser.array)
|
||||
tm.assert_series_equal(ser, result)
|
||||
assert isinstance(result._data.blocks[0], IntBlock)
|
||||
|
||||
def test_from_array(self):
|
||||
result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]'))
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
result = pd.Series(pd.array(['2015'], dtype='datetime64[ns]'))
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
def test_from_list_dtype(self):
|
||||
result = pd.Series(['1H', '2H'], dtype='timedelta64[ns]')
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
result = pd.Series(['2015'], dtype='datetime64[ns]')
|
||||
assert result._data.blocks[0].is_extension is False
|
||||
|
||||
|
||||
def test_hasnans_unchached_for_series():
|
||||
# GH#19700
|
||||
idx = pd.Index([0, 1])
|
||||
assert idx.hasnans is False
|
||||
assert 'hasnans' in idx._cache
|
||||
ser = idx.to_series()
|
||||
assert ser.hasnans is False
|
||||
assert not hasattr(ser, '_cache')
|
||||
ser.iloc[-1] = np.nan
|
||||
assert ser.hasnans is True
|
||||
assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__
|
||||
@@ -0,0 +1,267 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import collections
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import StringIO, u
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal, assert_frame_equal, assert_series_equal, ensure_clean)
|
||||
|
||||
from pandas.io.common import _get_handle
|
||||
|
||||
|
||||
class TestSeriesToCSV():
|
||||
|
||||
def read_csv(self, path, **kwargs):
|
||||
params = dict(squeeze=True, index_col=0,
|
||||
header=None, parse_dates=True)
|
||||
params.update(**kwargs)
|
||||
|
||||
header = params.get("header")
|
||||
out = pd.read_csv(path, **params)
|
||||
|
||||
if header is None:
|
||||
out.name = out.index.name = None
|
||||
|
||||
return out
|
||||
|
||||
def test_from_csv_deprecation(self, datetime_series):
|
||||
# see gh-17812
|
||||
with ensure_clean() as path:
|
||||
datetime_series.to_csv(path, header=False)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
ts = self.read_csv(path)
|
||||
depr_ts = Series.from_csv(path)
|
||||
assert_series_equal(depr_ts, ts)
|
||||
|
||||
@pytest.mark.parametrize("arg", ["path", "header", "both"])
|
||||
def test_to_csv_deprecation(self, arg, datetime_series):
|
||||
# see gh-19715
|
||||
with ensure_clean() as path:
|
||||
if arg == "path":
|
||||
kwargs = dict(path=path, header=False)
|
||||
elif arg == "header":
|
||||
kwargs = dict(path_or_buf=path)
|
||||
else: # Both discrepancies match.
|
||||
kwargs = dict(path=path)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
datetime_series.to_csv(**kwargs)
|
||||
|
||||
# Make sure roundtrip still works.
|
||||
ts = self.read_csv(path)
|
||||
assert_series_equal(datetime_series, ts, check_names=False)
|
||||
|
||||
def test_from_csv(self, datetime_series, string_series):
|
||||
|
||||
with ensure_clean() as path:
|
||||
datetime_series.to_csv(path, header=False)
|
||||
ts = self.read_csv(path)
|
||||
assert_series_equal(datetime_series, ts, check_names=False)
|
||||
|
||||
assert ts.name is None
|
||||
assert ts.index.name is None
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
depr_ts = Series.from_csv(path)
|
||||
assert_series_equal(depr_ts, ts)
|
||||
|
||||
# see gh-10483
|
||||
datetime_series.to_csv(path, header=True)
|
||||
ts_h = self.read_csv(path, header=0)
|
||||
assert ts_h.name == "ts"
|
||||
|
||||
string_series.to_csv(path, header=False)
|
||||
series = self.read_csv(path)
|
||||
assert_series_equal(string_series, series, check_names=False)
|
||||
|
||||
assert series.name is None
|
||||
assert series.index.name is None
|
||||
|
||||
string_series.to_csv(path, header=True)
|
||||
series_h = self.read_csv(path, header=0)
|
||||
assert series_h.name == "series"
|
||||
|
||||
with open(path, "w") as outfile:
|
||||
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
|
||||
|
||||
series = self.read_csv(path, sep="|")
|
||||
check_series = Series({datetime(1998, 1, 1): 1.0,
|
||||
datetime(1999, 1, 1): 2.0})
|
||||
assert_series_equal(check_series, series)
|
||||
|
||||
series = self.read_csv(path, sep="|", parse_dates=False)
|
||||
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
|
||||
assert_series_equal(check_series, series)
|
||||
|
||||
def test_to_csv(self, datetime_series):
|
||||
import io
|
||||
|
||||
with ensure_clean() as path:
|
||||
datetime_series.to_csv(path, header=False)
|
||||
|
||||
with io.open(path, newline=None) as f:
|
||||
lines = f.readlines()
|
||||
assert (lines[1] != '\n')
|
||||
|
||||
datetime_series.to_csv(path, index=False, header=False)
|
||||
arr = np.loadtxt(path)
|
||||
assert_almost_equal(arr, datetime_series.values)
|
||||
|
||||
def test_to_csv_unicode_index(self):
|
||||
buf = StringIO()
|
||||
s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")])
|
||||
|
||||
s.to_csv(buf, encoding="UTF-8", header=False)
|
||||
buf.seek(0)
|
||||
|
||||
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
|
||||
assert_series_equal(s, s2)
|
||||
|
||||
def test_to_csv_float_format(self):
|
||||
|
||||
with ensure_clean() as filename:
|
||||
ser = Series([0.123456, 0.234567, 0.567567])
|
||||
ser.to_csv(filename, float_format="%.2f", header=False)
|
||||
|
||||
rs = self.read_csv(filename)
|
||||
xp = Series([0.12, 0.23, 0.57])
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
def test_to_csv_list_entries(self):
|
||||
s = Series(['jack and jill', 'jesse and frank'])
|
||||
|
||||
split = s.str.split(r'\s+and\s+')
|
||||
|
||||
buf = StringIO()
|
||||
split.to_csv(buf, header=False)
|
||||
|
||||
def test_to_csv_path_is_none(self):
|
||||
# GH 8215
|
||||
# Series.to_csv() was returning None, inconsistent with
|
||||
# DataFrame.to_csv() which returned string
|
||||
s = Series([1, 2, 3])
|
||||
csv_str = s.to_csv(path_or_buf=None, header=False)
|
||||
assert isinstance(csv_str, str)
|
||||
|
||||
@pytest.mark.parametrize('s,encoding', [
|
||||
(Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
|
||||
name='X'), None),
|
||||
# GH 21241, 21118
|
||||
(Series(['abc', 'def', 'ghi'], name='X'), 'ascii'),
|
||||
(Series(["123", u"你好", u"世界"], name=u"中文"), 'gb2312'),
|
||||
(Series(["123", u"Γειά σου", u"Κόσμε"], name=u"Ελληνικά"), 'cp737')
|
||||
])
|
||||
def test_to_csv_compression(self, s, encoding, compression):
|
||||
|
||||
with ensure_clean() as filename:
|
||||
|
||||
s.to_csv(filename, compression=compression, encoding=encoding,
|
||||
header=True)
|
||||
# test the round trip - to_csv -> read_csv
|
||||
result = pd.read_csv(filename, compression=compression,
|
||||
encoding=encoding, index_col=0, squeeze=True)
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# test the round trip using file handle - to_csv -> read_csv
|
||||
f, _handles = _get_handle(filename, 'w', compression=compression,
|
||||
encoding=encoding)
|
||||
with f:
|
||||
s.to_csv(f, encoding=encoding, header=True)
|
||||
result = pd.read_csv(filename, compression=compression,
|
||||
encoding=encoding, index_col=0, squeeze=True)
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# explicitly ensure file was compressed
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
text = fh.read().decode(encoding or 'utf8')
|
||||
assert s.name in text
|
||||
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
assert_series_equal(s, pd.read_csv(fh,
|
||||
index_col=0,
|
||||
squeeze=True,
|
||||
encoding=encoding))
|
||||
|
||||
|
||||
class TestSeriesIO():
|
||||
|
||||
def test_to_frame(self, datetime_series):
|
||||
datetime_series.name = None
|
||||
rs = datetime_series.to_frame()
|
||||
xp = pd.DataFrame(datetime_series.values, index=datetime_series.index)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
datetime_series.name = 'testname'
|
||||
rs = datetime_series.to_frame()
|
||||
xp = pd.DataFrame(dict(testname=datetime_series.values),
|
||||
index=datetime_series.index)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
rs = datetime_series.to_frame(name='testdifferent')
|
||||
xp = pd.DataFrame(dict(testdifferent=datetime_series.values),
|
||||
index=datetime_series.index)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
def test_timeseries_periodindex(self):
|
||||
# GH2891
|
||||
from pandas import period_range
|
||||
prng = period_range('1/1/2011', '1/1/2012', freq='M')
|
||||
ts = Series(np.random.randn(len(prng)), prng)
|
||||
new_ts = tm.round_trip_pickle(ts)
|
||||
assert new_ts.index.freq == 'M'
|
||||
|
||||
def test_pickle_preserve_name(self):
|
||||
for n in [777, 777., 'name', datetime(2001, 11, 11), (1, 2)]:
|
||||
unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n))
|
||||
assert unpickled.name == n
|
||||
|
||||
def _pickle_roundtrip_name(self, obj):
|
||||
|
||||
with ensure_clean() as path:
|
||||
obj.to_pickle(path)
|
||||
unpickled = pd.read_pickle(path)
|
||||
return unpickled
|
||||
|
||||
def test_to_frame_expanddim(self):
|
||||
# GH 9762
|
||||
|
||||
class SubclassedSeries(Series):
|
||||
|
||||
@property
|
||||
def _constructor_expanddim(self):
|
||||
return SubclassedFrame
|
||||
|
||||
class SubclassedFrame(DataFrame):
|
||||
pass
|
||||
|
||||
s = SubclassedSeries([1, 2, 3], name='X')
|
||||
result = s.to_frame()
|
||||
assert isinstance(result, SubclassedFrame)
|
||||
expected = SubclassedFrame({'X': [1, 2, 3]})
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('mapping', (
|
||||
dict,
|
||||
collections.defaultdict(list),
|
||||
collections.OrderedDict))
|
||||
def test_to_dict(self, mapping, datetime_series):
|
||||
# GH16122
|
||||
tm.assert_series_equal(
|
||||
Series(datetime_series.to_dict(mapping), name='ts'),
|
||||
datetime_series)
|
||||
from_method = Series(datetime_series.to_dict(collections.Counter))
|
||||
from_constructor = Series(collections
|
||||
.Counter(datetime_series.iteritems()))
|
||||
tm.assert_series_equal(from_method, from_constructor)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,749 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, DataFrame, Index, Series, bdate_range, date_range, isna)
|
||||
from pandas.core import ops
|
||||
import pandas.core.nanops as nanops
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal, assert_frame_equal, assert_series_equal)
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesLogicalOps(object):
|
||||
@pytest.mark.parametrize('bool_op', [operator.and_,
|
||||
operator.or_, operator.xor])
|
||||
def test_bool_operators_with_nas(self, bool_op):
|
||||
# boolean &, |, ^ should work with object arrays and propagate NAs
|
||||
ser = Series(bdate_range('1/1/2000', periods=10), dtype=object)
|
||||
ser[::2] = np.nan
|
||||
|
||||
mask = ser.isna()
|
||||
filled = ser.fillna(ser[0])
|
||||
|
||||
result = bool_op(ser < ser[9], ser > ser[3])
|
||||
|
||||
expected = bool_op(filled < filled[9], filled > filled[3])
|
||||
expected[mask] = False
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_operators_bitwise(self):
|
||||
# GH#9016: support bitwise op for integer types
|
||||
index = list('bca')
|
||||
|
||||
s_tft = Series([True, False, True], index=index)
|
||||
s_fff = Series([False, False, False], index=index)
|
||||
s_tff = Series([True, False, False], index=index)
|
||||
s_empty = Series([])
|
||||
|
||||
# TODO: unused
|
||||
# s_0101 = Series([0, 1, 0, 1])
|
||||
|
||||
s_0123 = Series(range(4), dtype='int64')
|
||||
s_3333 = Series([3] * 4)
|
||||
s_4444 = Series([4] * 4)
|
||||
|
||||
res = s_tft & s_empty
|
||||
expected = s_fff
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_tft | s_empty
|
||||
expected = s_tft
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 & s_3333
|
||||
expected = Series(range(4), dtype='int64')
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 | s_4444
|
||||
expected = Series(range(4, 8), dtype='int64')
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
s_a0b1c0 = Series([1], list('b'))
|
||||
|
||||
res = s_tft & s_a0b1c0
|
||||
expected = s_tff.reindex(list('abc'))
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_tft | s_a0b1c0
|
||||
expected = s_tft.reindex(list('abc'))
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
n0 = 0
|
||||
res = s_tft & n0
|
||||
expected = s_fff
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 & n0
|
||||
expected = Series([0] * 4)
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
n1 = 1
|
||||
res = s_tft & n1
|
||||
expected = s_tft
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123 & n1
|
||||
expected = Series([0, 1, 0, 1])
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
s_1111 = Series([1] * 4, dtype='int8')
|
||||
res = s_0123 & s_1111
|
||||
expected = Series([0, 1, 0, 1], dtype='int64')
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
|
||||
expected = Series([1, 1, 3, 3], dtype='int32')
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s_1111 & 'a'
|
||||
with pytest.raises(TypeError):
|
||||
s_1111 & ['a', 'b', 'c', 'd']
|
||||
with pytest.raises(TypeError):
|
||||
s_0123 & np.NaN
|
||||
with pytest.raises(TypeError):
|
||||
s_0123 & 3.14
|
||||
with pytest.raises(TypeError):
|
||||
s_0123 & [0.1, 4, 3.14, 2]
|
||||
|
||||
# s_0123 will be all false now because of reindexing like s_tft
|
||||
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
|
||||
assert_series_equal(s_tft & s_0123, exp)
|
||||
|
||||
# s_tft will be all false now because of reindexing like s_0123
|
||||
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
|
||||
assert_series_equal(s_0123 & s_tft, exp)
|
||||
|
||||
assert_series_equal(s_0123 & False, Series([False] * 4))
|
||||
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
|
||||
assert_series_equal(s_0123 & [False], Series([False] * 4))
|
||||
assert_series_equal(s_0123 & (False), Series([False] * 4))
|
||||
assert_series_equal(s_0123 & Series([False, np.NaN, False, False]),
|
||||
Series([False] * 4))
|
||||
|
||||
s_ftft = Series([False, True, False, True])
|
||||
assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)
|
||||
|
||||
s_abNd = Series(['a', 'b', np.NaN, 'd'])
|
||||
res = s_0123 & s_abNd
|
||||
expected = s_ftft
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
def test_scalar_na_logical_ops_corners(self):
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s & datetime(2005, 1, 1)
|
||||
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
|
||||
s[::2] = np.nan
|
||||
|
||||
expected = Series(True, index=s.index)
|
||||
expected[::2] = False
|
||||
result = s & list(s)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
d = DataFrame({'A': s})
|
||||
# TODO: Fix this exception - needs to be fixed! (see GH5035)
|
||||
# (previously this was a TypeError because series returned
|
||||
# NotImplemented
|
||||
|
||||
# this is an alignment issue; these are equivalent
|
||||
# https://github.com/pandas-dev/pandas/issues/5284
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
d.__and__(s, axis='columns')
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s & d
|
||||
|
||||
# this is wrong as its not a boolean result
|
||||
# result = d.__and__(s,axis='index')
|
||||
|
||||
@pytest.mark.parametrize('op', [
|
||||
operator.and_,
|
||||
operator.or_,
|
||||
operator.xor,
|
||||
|
||||
])
|
||||
def test_logical_ops_with_index(self, op):
|
||||
# GH#22092, GH#19792
|
||||
ser = Series([True, True, False, False])
|
||||
idx1 = Index([True, False, True, False])
|
||||
idx2 = Index([1, 0, 1, 0])
|
||||
|
||||
expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))])
|
||||
|
||||
result = op(ser, idx1)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))],
|
||||
dtype=bool)
|
||||
|
||||
result = op(ser, idx2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("op, expected", [
|
||||
(ops.rand_, pd.Index([False, True])),
|
||||
(ops.ror_, pd.Index([False, True])),
|
||||
(ops.rxor, pd.Index([])),
|
||||
])
|
||||
def test_reverse_ops_with_index(self, op, expected):
|
||||
# https://github.com/pandas-dev/pandas/pull/23628
|
||||
# multi-set Index ops are buggy, so let's avoid duplicates...
|
||||
ser = Series([True, False])
|
||||
idx = Index([False, True])
|
||||
result = op(ser, idx)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_logical_ops_label_based(self):
|
||||
# GH#4947
|
||||
# logical ops should be label based
|
||||
|
||||
a = Series([True, False, True], list('bca'))
|
||||
b = Series([False, True, False], list('abc'))
|
||||
|
||||
expected = Series([False, True, False], list('abc'))
|
||||
result = a & b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([True, True, False], list('abc'))
|
||||
result = a | b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([True, False, False], list('abc'))
|
||||
result = a ^ b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# rhs is bigger
|
||||
a = Series([True, False, True], list('bca'))
|
||||
b = Series([False, True, False, True], list('abcd'))
|
||||
|
||||
expected = Series([False, True, False, False], list('abcd'))
|
||||
result = a & b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([True, True, False, False], list('abcd'))
|
||||
result = a | b
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# filling
|
||||
|
||||
# vs empty
|
||||
result = a & Series([])
|
||||
expected = Series([False, False, False], list('bca'))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = a | Series([])
|
||||
expected = Series([True, False, True], list('bca'))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# vs non-matching
|
||||
result = a & Series([1], ['z'])
|
||||
expected = Series([False, False, False, False], list('abcz'))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = a | Series([1], ['z'])
|
||||
expected = Series([True, True, False, False], list('abcz'))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# identity
|
||||
# we would like s[s|e] == s to hold for any e, whether empty or not
|
||||
for e in [Series([]), Series([1], ['z']),
|
||||
Series(np.nan, b.index), Series(np.nan, a.index)]:
|
||||
result = a[a | e]
|
||||
assert_series_equal(result, a[a])
|
||||
|
||||
for e in [Series(['z'])]:
|
||||
result = a[a | e]
|
||||
assert_series_equal(result, a[a])
|
||||
|
||||
# vs scalars
|
||||
index = list('bca')
|
||||
t = Series([True, False, True])
|
||||
|
||||
for v in [True, 1, 2]:
|
||||
result = Series([True, False, True], index=index) | v
|
||||
expected = Series([True, True, True], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for v in [np.nan, 'foo']:
|
||||
with pytest.raises(TypeError):
|
||||
t | v
|
||||
|
||||
for v in [False, 0]:
|
||||
result = Series([True, False, True], index=index) | v
|
||||
expected = Series([True, False, True], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for v in [True, 1]:
|
||||
result = Series([True, False, True], index=index) & v
|
||||
expected = Series([True, False, True], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for v in [False, 0]:
|
||||
result = Series([True, False, True], index=index) & v
|
||||
expected = Series([False, False, False], index=index)
|
||||
assert_series_equal(result, expected)
|
||||
for v in [np.nan]:
|
||||
with pytest.raises(TypeError):
|
||||
t & v
|
||||
|
||||
def test_logical_ops_df_compat(self):
|
||||
# GH#1134
|
||||
s1 = pd.Series([True, False, True], index=list('ABC'), name='x')
|
||||
s2 = pd.Series([True, True, False], index=list('ABD'), name='x')
|
||||
|
||||
exp = pd.Series([True, False, False, False],
|
||||
index=list('ABCD'), name='x')
|
||||
assert_series_equal(s1 & s2, exp)
|
||||
assert_series_equal(s2 & s1, exp)
|
||||
|
||||
# True | np.nan => True
|
||||
exp = pd.Series([True, True, True, False],
|
||||
index=list('ABCD'), name='x')
|
||||
assert_series_equal(s1 | s2, exp)
|
||||
# np.nan | True => np.nan, filled with False
|
||||
exp = pd.Series([True, True, False, False],
|
||||
index=list('ABCD'), name='x')
|
||||
assert_series_equal(s2 | s1, exp)
|
||||
|
||||
# DataFrame doesn't fill nan with False
|
||||
exp = pd.DataFrame({'x': [True, False, np.nan, np.nan]},
|
||||
index=list('ABCD'))
|
||||
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp)
|
||||
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp)
|
||||
|
||||
exp = pd.DataFrame({'x': [True, True, np.nan, np.nan]},
|
||||
index=list('ABCD'))
|
||||
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp)
|
||||
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp)
|
||||
|
||||
# different length
|
||||
s3 = pd.Series([True, False, True], index=list('ABC'), name='x')
|
||||
s4 = pd.Series([True, True, True, True], index=list('ABCD'), name='x')
|
||||
|
||||
exp = pd.Series([True, False, True, False],
|
||||
index=list('ABCD'), name='x')
|
||||
assert_series_equal(s3 & s4, exp)
|
||||
assert_series_equal(s4 & s3, exp)
|
||||
|
||||
# np.nan | True => np.nan, filled with False
|
||||
exp = pd.Series([True, True, True, False],
|
||||
index=list('ABCD'), name='x')
|
||||
assert_series_equal(s3 | s4, exp)
|
||||
# True | np.nan => True
|
||||
exp = pd.Series([True, True, True, True],
|
||||
index=list('ABCD'), name='x')
|
||||
assert_series_equal(s4 | s3, exp)
|
||||
|
||||
exp = pd.DataFrame({'x': [True, False, True, np.nan]},
|
||||
index=list('ABCD'))
|
||||
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp)
|
||||
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp)
|
||||
|
||||
exp = pd.DataFrame({'x': [True, True, True, np.nan]},
|
||||
index=list('ABCD'))
|
||||
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp)
|
||||
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp)
|
||||
|
||||
|
||||
class TestSeriesComparisons(object):
|
||||
def test_comparisons(self):
|
||||
left = np.random.randn(10)
|
||||
right = np.random.randn(10)
|
||||
left[:3] = np.nan
|
||||
|
||||
result = nanops.nangt(left, right)
|
||||
with np.errstate(invalid='ignore'):
|
||||
expected = (left > right).astype('O')
|
||||
expected[:3] = np.nan
|
||||
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
s = Series(['a', 'b', 'c'])
|
||||
s2 = Series([False, True, False])
|
||||
|
||||
# it works!
|
||||
exp = Series([False, False, False])
|
||||
assert_series_equal(s == s2, exp)
|
||||
assert_series_equal(s2 == s, exp)
|
||||
|
||||
def test_categorical_comparisons(self):
|
||||
# GH 8938
|
||||
# allow equality comparisons
|
||||
a = Series(list('abc'), dtype="category")
|
||||
b = Series(list('abc'), dtype="object")
|
||||
c = Series(['a', 'b', 'cc'], dtype="object")
|
||||
d = Series(list('acb'), dtype="object")
|
||||
e = Categorical(list('abc'))
|
||||
f = Categorical(list('acb'))
|
||||
|
||||
# vs scalar
|
||||
assert not (a == 'a').all()
|
||||
assert ((a != 'a') == ~(a == 'a')).all()
|
||||
|
||||
assert not ('a' == a).all()
|
||||
assert (a == 'a')[0]
|
||||
assert ('a' == a)[0]
|
||||
assert not ('a' != a)[0]
|
||||
|
||||
# vs list-like
|
||||
assert (a == a).all()
|
||||
assert not (a != a).all()
|
||||
|
||||
assert (a == list(a)).all()
|
||||
assert (a == b).all()
|
||||
assert (b == a).all()
|
||||
assert ((~(a == b)) == (a != b)).all()
|
||||
assert ((~(b == a)) == (b != a)).all()
|
||||
|
||||
assert not (a == c).all()
|
||||
assert not (c == a).all()
|
||||
assert not (a == d).all()
|
||||
assert not (d == a).all()
|
||||
|
||||
# vs a cat-like
|
||||
assert (a == e).all()
|
||||
assert (e == a).all()
|
||||
assert not (a == f).all()
|
||||
assert not (f == a).all()
|
||||
|
||||
assert ((~(a == e) == (a != e)).all())
|
||||
assert ((~(e == a) == (e != a)).all())
|
||||
assert ((~(a == f) == (a != f)).all())
|
||||
assert ((~(f == a) == (f != a)).all())
|
||||
|
||||
# non-equality is not comparable
|
||||
with pytest.raises(TypeError):
|
||||
a < b
|
||||
with pytest.raises(TypeError):
|
||||
b < a
|
||||
with pytest.raises(TypeError):
|
||||
a > b
|
||||
with pytest.raises(TypeError):
|
||||
b > a
|
||||
|
||||
def test_comparison_tuples(self):
|
||||
# GH11339
|
||||
# comparisons vs tuple
|
||||
s = Series([(1, 1), (1, 2)])
|
||||
|
||||
result = s == (1, 2)
|
||||
expected = Series([False, True])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s != (1, 2)
|
||||
expected = Series([True, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s == (0, 0)
|
||||
expected = Series([False, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s != (0, 0)
|
||||
expected = Series([True, True])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([(1, 1), (1, 1)])
|
||||
|
||||
result = s == (1, 1)
|
||||
expected = Series([True, True])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s != (1, 1)
|
||||
expected = Series([False, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([frozenset([1]), frozenset([1, 2])])
|
||||
|
||||
result = s == frozenset([1])
|
||||
expected = Series([True, False])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_comparison_operators_with_nas(self):
|
||||
ser = Series(bdate_range('1/1/2000', periods=10), dtype=object)
|
||||
ser[::2] = np.nan
|
||||
|
||||
# test that comparisons work
|
||||
ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
|
||||
for op in ops:
|
||||
val = ser[5]
|
||||
|
||||
f = getattr(operator, op)
|
||||
result = f(ser, val)
|
||||
|
||||
expected = f(ser.dropna(), val).reindex(ser.index)
|
||||
|
||||
if op == 'ne':
|
||||
expected = expected.fillna(True).astype(bool)
|
||||
else:
|
||||
expected = expected.fillna(False).astype(bool)
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# fffffffuuuuuuuuuuuu
|
||||
# result = f(val, s)
|
||||
# expected = f(val, s.dropna()).reindex(s.index)
|
||||
# assert_series_equal(result, expected)
|
||||
|
||||
def test_unequal_categorical_comparison_raises_type_error(self):
|
||||
# unequal comparison should raise for unordered cats
|
||||
cat = Series(Categorical(list("abc")))
|
||||
with pytest.raises(TypeError):
|
||||
cat > "b"
|
||||
|
||||
cat = Series(Categorical(list("abc"), ordered=False))
|
||||
with pytest.raises(TypeError):
|
||||
cat > "b"
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||||
# and following comparisons with scalars not in categories should raise
|
||||
# for unequal comps, but not for equal/not equal
|
||||
cat = Series(Categorical(list("abc"), ordered=True))
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
cat < "d"
|
||||
with pytest.raises(TypeError):
|
||||
cat > "d"
|
||||
with pytest.raises(TypeError):
|
||||
"d" < cat
|
||||
with pytest.raises(TypeError):
|
||||
"d" > cat
|
||||
|
||||
tm.assert_series_equal(cat == "d", Series([False, False, False]))
|
||||
tm.assert_series_equal(cat != "d", Series([True, True, True]))
|
||||
|
||||
def test_ne(self):
|
||||
ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float)
|
||||
expected = [True, True, False, True, True]
|
||||
assert tm.equalContents(ts.index != 5, expected)
|
||||
assert tm.equalContents(~(ts.index == 5), expected)
|
||||
|
||||
def test_comp_ops_df_compat(self):
|
||||
# GH 1134
|
||||
s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
|
||||
s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x')
|
||||
|
||||
s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
|
||||
s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x')
|
||||
|
||||
for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]:
|
||||
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left == right
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left != right
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left < right
|
||||
|
||||
msg = "Can only compare identically-labeled DataFrame objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left.to_frame() == right.to_frame()
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left.to_frame() != right.to_frame()
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left.to_frame() < right.to_frame()
|
||||
|
||||
|
||||
class TestSeriesFlexComparisonOps(object):
|
||||
|
||||
def test_comparison_flex_alignment(self):
|
||||
left = Series([1, 3, 2], index=list('abc'))
|
||||
right = Series([2, 2, 2], index=list('bcd'))
|
||||
|
||||
exp = pd.Series([False, False, True, False], index=list('abcd'))
|
||||
assert_series_equal(left.eq(right), exp)
|
||||
|
||||
exp = pd.Series([True, True, False, True], index=list('abcd'))
|
||||
assert_series_equal(left.ne(right), exp)
|
||||
|
||||
exp = pd.Series([False, False, True, False], index=list('abcd'))
|
||||
assert_series_equal(left.le(right), exp)
|
||||
|
||||
exp = pd.Series([False, False, False, False], index=list('abcd'))
|
||||
assert_series_equal(left.lt(right), exp)
|
||||
|
||||
exp = pd.Series([False, True, True, False], index=list('abcd'))
|
||||
assert_series_equal(left.ge(right), exp)
|
||||
|
||||
exp = pd.Series([False, True, False, False], index=list('abcd'))
|
||||
assert_series_equal(left.gt(right), exp)
|
||||
|
||||
def test_comparison_flex_alignment_fill(self):
|
||||
left = Series([1, 3, 2], index=list('abc'))
|
||||
right = Series([2, 2, 2], index=list('bcd'))
|
||||
|
||||
exp = pd.Series([False, False, True, True], index=list('abcd'))
|
||||
assert_series_equal(left.eq(right, fill_value=2), exp)
|
||||
|
||||
exp = pd.Series([True, True, False, False], index=list('abcd'))
|
||||
assert_series_equal(left.ne(right, fill_value=2), exp)
|
||||
|
||||
exp = pd.Series([False, False, True, True], index=list('abcd'))
|
||||
assert_series_equal(left.le(right, fill_value=0), exp)
|
||||
|
||||
exp = pd.Series([False, False, False, True], index=list('abcd'))
|
||||
assert_series_equal(left.lt(right, fill_value=0), exp)
|
||||
|
||||
exp = pd.Series([True, True, True, False], index=list('abcd'))
|
||||
assert_series_equal(left.ge(right, fill_value=0), exp)
|
||||
|
||||
exp = pd.Series([True, True, False, False], index=list('abcd'))
|
||||
assert_series_equal(left.gt(right, fill_value=0), exp)
|
||||
|
||||
|
||||
class TestSeriesOperators(TestData):
|
||||
|
||||
def test_operators_empty_int_corner(self):
|
||||
s1 = Series([], [], dtype=np.int32)
|
||||
s2 = Series({'x': 0.})
|
||||
assert_series_equal(s1 * s2, Series([np.nan], index=['x']))
|
||||
|
||||
def test_ops_datetimelike_align(self):
|
||||
# GH 7500
|
||||
# datetimelike ops need to align
|
||||
dt = Series(date_range('2012-1-1', periods=3, freq='D'))
|
||||
dt.iloc[2] = np.nan
|
||||
dt2 = dt[::-1]
|
||||
|
||||
expected = Series([timedelta(0), timedelta(0), pd.NaT])
|
||||
# name is reset
|
||||
result = dt2 - dt
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(expected, name=0)
|
||||
result = (dt2.to_frame() - dt.to_frame())[0]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_operators_corner(self):
|
||||
series = self.ts
|
||||
|
||||
empty = Series([], index=Index([]))
|
||||
|
||||
result = series + empty
|
||||
assert np.isnan(result).all()
|
||||
|
||||
result = empty + Series([], index=Index([]))
|
||||
assert len(result) == 0
|
||||
|
||||
# TODO: this returned NotImplemented earlier, what to do?
|
||||
# deltas = Series([timedelta(1)] * 5, index=np.arange(5))
|
||||
# sub_deltas = deltas[::2]
|
||||
# deltas5 = deltas * 5
|
||||
# deltas = deltas + sub_deltas
|
||||
|
||||
# float + int
|
||||
int_ts = self.ts.astype(int)[:-5]
|
||||
added = self.ts + int_ts
|
||||
expected = Series(self.ts.values[:-5] + int_ts.values,
|
||||
index=self.ts.index[:-5], name='ts')
|
||||
tm.assert_series_equal(added[:-5], expected)
|
||||
|
||||
pairings = []
|
||||
for op in ['add', 'sub', 'mul', 'pow', 'truediv', 'floordiv']:
|
||||
fv = 0
|
||||
lop = getattr(Series, op)
|
||||
lequiv = getattr(operator, op)
|
||||
rop = getattr(Series, 'r' + op)
|
||||
# bind op at definition time...
|
||||
requiv = lambda x, y, op=op: getattr(operator, op)(y, x)
|
||||
pairings.append((lop, lequiv, fv))
|
||||
pairings.append((rop, requiv, fv))
|
||||
if compat.PY3:
|
||||
pairings.append((Series.div, operator.truediv, 1))
|
||||
pairings.append((Series.rdiv, lambda x, y: operator.truediv(y, x), 1))
|
||||
else:
|
||||
pairings.append((Series.div, operator.div, 1))
|
||||
pairings.append((Series.rdiv, lambda x, y: operator.div(y, x), 1))
|
||||
|
||||
@pytest.mark.parametrize('op, equiv_op, fv', pairings)
|
||||
def test_operators_combine(self, op, equiv_op, fv):
|
||||
def _check_fill(meth, op, a, b, fill_value=0):
|
||||
exp_index = a.index.union(b.index)
|
||||
a = a.reindex(exp_index)
|
||||
b = b.reindex(exp_index)
|
||||
|
||||
amask = isna(a)
|
||||
bmask = isna(b)
|
||||
|
||||
exp_values = []
|
||||
for i in range(len(exp_index)):
|
||||
with np.errstate(all='ignore'):
|
||||
if amask[i]:
|
||||
if bmask[i]:
|
||||
exp_values.append(np.nan)
|
||||
continue
|
||||
exp_values.append(op(fill_value, b[i]))
|
||||
elif bmask[i]:
|
||||
if amask[i]:
|
||||
exp_values.append(np.nan)
|
||||
continue
|
||||
exp_values.append(op(a[i], fill_value))
|
||||
else:
|
||||
exp_values.append(op(a[i], b[i]))
|
||||
|
||||
result = meth(a, b, fill_value=fill_value)
|
||||
expected = Series(exp_values, exp_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
a = Series([np.nan, 1., 2., 3., np.nan], index=np.arange(5))
|
||||
b = Series([np.nan, 1, np.nan, 3, np.nan, 4.], index=np.arange(6))
|
||||
|
||||
result = op(a, b)
|
||||
exp = equiv_op(a, b)
|
||||
assert_series_equal(result, exp)
|
||||
_check_fill(op, equiv_op, a, b, fill_value=fv)
|
||||
# should accept axis=0 or axis='rows'
|
||||
op(a, b, axis=0)
|
||||
|
||||
def test_operators_na_handling(self):
|
||||
from decimal import Decimal
|
||||
from datetime import date
|
||||
s = Series([Decimal('1.3'), Decimal('2.3')],
|
||||
index=[date(2012, 1, 1), date(2012, 1, 2)])
|
||||
|
||||
result = s + s.shift(1)
|
||||
result2 = s.shift(1) + s
|
||||
assert isna(result[0])
|
||||
assert isna(result2[0])
|
||||
|
||||
def test_op_duplicate_index(self):
|
||||
# GH14227
|
||||
s1 = Series([1, 2], index=[1, 1])
|
||||
s2 = Series([10, 10], index=[1, 2])
|
||||
result = s1 + s2
|
||||
expected = pd.Series([11, 12, np.nan], index=[1, 1, 2])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesUnaryOps(object):
|
||||
# __neg__, __pos__, __inv__
|
||||
|
||||
def test_neg(self):
|
||||
ser = tm.makeStringSeries()
|
||||
ser.name = 'series'
|
||||
assert_series_equal(-ser, -1 * ser)
|
||||
|
||||
def test_invert(self):
|
||||
ser = tm.makeStringSeries()
|
||||
ser.name = 'series'
|
||||
assert_series_equal(-(ser < 0), ~(ser < 0))
|
||||
@@ -0,0 +1,166 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Period, Series, period_range
|
||||
from pandas.core.arrays import PeriodArray
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesPeriod(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.series = Series(period_range('2000-01-01', periods=10, freq='D'))
|
||||
|
||||
def test_auto_conversion(self):
|
||||
series = Series(list(period_range('2000-01-01', periods=10, freq='D')))
|
||||
assert series.dtype == 'Period[D]'
|
||||
|
||||
series = pd.Series([pd.Period('2011-01-01', freq='D'),
|
||||
pd.Period('2011-02-01', freq='D')])
|
||||
assert series.dtype == 'Period[D]'
|
||||
|
||||
def test_getitem(self):
|
||||
assert self.series[1] == pd.Period('2000-01-02', freq='D')
|
||||
|
||||
result = self.series[[2, 4]]
|
||||
exp = pd.Series([pd.Period('2000-01-03', freq='D'),
|
||||
pd.Period('2000-01-05', freq='D')],
|
||||
index=[2, 4], dtype='Period[D]')
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == 'Period[D]'
|
||||
|
||||
def test_isna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('NaT', freq='M')])
|
||||
tm.assert_series_equal(s.isna(), Series([False, True]))
|
||||
tm.assert_series_equal(s.notna(), Series([True, False]))
|
||||
|
||||
def test_fillna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('NaT', freq='M')])
|
||||
|
||||
res = s.fillna(pd.Period('2012-01', freq='M'))
|
||||
exp = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('2012-01', freq='M')])
|
||||
tm.assert_series_equal(res, exp)
|
||||
assert res.dtype == 'Period[M]'
|
||||
|
||||
def test_dropna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('NaT', freq='M')])
|
||||
tm.assert_series_equal(s.dropna(),
|
||||
Series([pd.Period('2011-01', freq='M')]))
|
||||
|
||||
def test_between(self):
|
||||
left, right = self.series[[2, 7]]
|
||||
result = self.series.between(left, right)
|
||||
expected = (self.series >= left) & (self.series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# NaT support
|
||||
|
||||
@pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
|
||||
def test_NaT_scalar(self):
|
||||
series = Series([0, 1000, 2000, pd._libs.iNaT], dtype='period[D]')
|
||||
|
||||
val = series[3]
|
||||
assert pd.isna(val)
|
||||
|
||||
series[2] = val
|
||||
assert pd.isna(series[2])
|
||||
|
||||
@pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
|
||||
def test_NaT_cast(self):
|
||||
result = Series([np.nan]).astype('period[D]')
|
||||
expected = Series([pd.NaT])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_set_none(self):
|
||||
self.series[3] = None
|
||||
assert self.series[3] is pd.NaT
|
||||
|
||||
self.series[3:5] = None
|
||||
assert self.series[4] is pd.NaT
|
||||
|
||||
def test_set_nan(self):
|
||||
# Do we want to allow this?
|
||||
self.series[5] = np.nan
|
||||
assert self.series[5] is pd.NaT
|
||||
|
||||
self.series[5:7] = np.nan
|
||||
assert self.series[6] is pd.NaT
|
||||
|
||||
def test_intercept_astype_object(self):
|
||||
expected = self.series.astype('object')
|
||||
|
||||
df = DataFrame({'a': self.series,
|
||||
'b': np.random.randn(len(self.series))})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
def test_align_series(self, join_type):
|
||||
rng = period_range('1/1/2000', '1/1/2010', freq='A')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts.align(ts[::2], join=join_type)
|
||||
|
||||
def test_truncate(self):
|
||||
# GH 17717
|
||||
idx1 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-03')
|
||||
])
|
||||
series1 = pd.Series([1, 2, 3], index=idx1)
|
||||
result1 = series1.truncate(after='2017-09-02')
|
||||
|
||||
expected_idx1 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-02')
|
||||
])
|
||||
tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1))
|
||||
|
||||
idx2 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-03'),
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-03')
|
||||
])
|
||||
series2 = pd.Series([1, 2, 3], index=idx2)
|
||||
result2 = series2.sort_index().truncate(after='2017-09-02')
|
||||
|
||||
expected_idx2 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-02')
|
||||
])
|
||||
tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))
|
||||
|
||||
@pytest.mark.parametrize('input_vals', [
|
||||
[Period('2016-01', freq='M'), Period('2016-02', freq='M')],
|
||||
[Period('2016-01-01', freq='D'), Period('2016-01-02', freq='D')],
|
||||
[Period('2016-01-01 00:00:00', freq='H'),
|
||||
Period('2016-01-01 01:00:00', freq='H')],
|
||||
[Period('2016-01-01 00:00:00', freq='M'),
|
||||
Period('2016-01-01 00:01:00', freq='M')],
|
||||
[Period('2016-01-01 00:00:00', freq='S'),
|
||||
Period('2016-01-01 00:00:01', freq='S')]
|
||||
])
|
||||
def test_end_time_timevalues(self, input_vals):
|
||||
# GH 17157
|
||||
# Check that the time part of the Period is adjusted by end_time
|
||||
# when using the dt accessor on a Series
|
||||
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
|
||||
|
||||
s = Series(input_vals)
|
||||
result = s.dt.end_time
|
||||
expected = s.apply(lambda x: x.end_time)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,195 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index, Series
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesQuantile(TestData):
|
||||
|
||||
def test_quantile(self):
|
||||
|
||||
q = self.ts.quantile(0.1)
|
||||
assert q == np.percentile(self.ts.dropna(), 10)
|
||||
|
||||
q = self.ts.quantile(0.9)
|
||||
assert q == np.percentile(self.ts.dropna(), 90)
|
||||
|
||||
# object dtype
|
||||
q = Series(self.ts, dtype=object).quantile(0.9)
|
||||
assert q == np.percentile(self.ts.dropna(), 90)
|
||||
|
||||
# datetime64[ns] dtype
|
||||
dts = self.ts.index.to_series()
|
||||
q = dts.quantile(.2)
|
||||
assert q == Timestamp('2000-01-10 19:12:00')
|
||||
|
||||
# timedelta64[ns] dtype
|
||||
tds = dts.diff()
|
||||
q = tds.quantile(.25)
|
||||
assert q == pd.to_timedelta('24:00:00')
|
||||
|
||||
# GH7661
|
||||
result = Series([np.timedelta64('NaT')]).sum()
|
||||
assert result == pd.Timedelta(0)
|
||||
|
||||
msg = 'percentiles should all be in the interval \\[0, 1\\]'
|
||||
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.ts.quantile(invalid)
|
||||
|
||||
def test_quantile_multi(self):
|
||||
|
||||
qs = [.1, .9]
|
||||
result = self.ts.quantile(qs)
|
||||
expected = pd.Series([np.percentile(self.ts.dropna(), 10),
|
||||
np.percentile(self.ts.dropna(), 90)],
|
||||
index=qs, name=self.ts.name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dts = self.ts.index.to_series()
|
||||
dts.name = 'xxx'
|
||||
result = dts.quantile((.2, .2))
|
||||
expected = Series([Timestamp('2000-01-10 19:12:00'),
|
||||
Timestamp('2000-01-10 19:12:00')],
|
||||
index=[.2, .2], name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = self.ts.quantile([])
|
||||
expected = pd.Series([], name=self.ts.name, index=Index(
|
||||
[], dtype=float))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_interpolation(self):
|
||||
# see gh-10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = self.ts.quantile(0.1, interpolation='linear')
|
||||
assert q == np.percentile(self.ts.dropna(), 10)
|
||||
q1 = self.ts.quantile(0.1)
|
||||
assert q1 == np.percentile(self.ts.dropna(), 10)
|
||||
|
||||
# test with and without interpolation keyword
|
||||
assert q == q1
|
||||
|
||||
def test_quantile_interpolation_dtype(self):
|
||||
# GH #10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower')
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher')
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
def test_quantile_nan(self):
|
||||
|
||||
# GH 13098
|
||||
s = pd.Series([1, 2, 3, 4, np.nan])
|
||||
result = s.quantile(0.5)
|
||||
expected = 2.5
|
||||
assert result == expected
|
||||
|
||||
# all nan/empty
|
||||
cases = [Series([]), Series([np.nan, np.nan])]
|
||||
|
||||
for s in cases:
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5]))
|
||||
|
||||
res = s.quantile([0.2, 0.3])
|
||||
tm.assert_series_equal(res, pd.Series([np.nan, np.nan],
|
||||
index=[0.2, 0.3]))
|
||||
|
||||
@pytest.mark.parametrize('case', [
|
||||
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03')],
|
||||
[pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-03', tz='US/Eastern')],
|
||||
[pd.Timedelta('1 days'), pd.Timedelta('2 days'),
|
||||
pd.Timedelta('3 days')],
|
||||
# NaT
|
||||
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'), pd.NaT],
|
||||
[pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-03', tz='US/Eastern'), pd.NaT],
|
||||
[pd.Timedelta('1 days'), pd.Timedelta('2 days'),
|
||||
pd.Timedelta('3 days'), pd.NaT]])
|
||||
def test_quantile_box(self, case):
|
||||
s = pd.Series(case, name='XXX')
|
||||
res = s.quantile(0.5)
|
||||
assert res == case[1]
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = pd.Series([case[1]], index=[0.5], name='XXX')
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_datetime_timedelta_quantiles(self):
|
||||
# covers #9694
|
||||
assert pd.isna(Series([], dtype='M8[ns]').quantile(.5))
|
||||
assert pd.isna(Series([], dtype='m8[ns]').quantile(.5))
|
||||
|
||||
def test_quantile_nat(self):
|
||||
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
|
||||
tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5]))
|
||||
|
||||
@pytest.mark.parametrize('values, dtype', [
|
||||
([0, 0, 0, 1, 2, 3], 'Sparse[int]'),
|
||||
([0., None, 1., 2.], 'Sparse[float]'),
|
||||
])
|
||||
def test_quantile_sparse(self, values, dtype):
|
||||
ser = pd.Series(values, dtype=dtype)
|
||||
result = ser.quantile([0.5])
|
||||
expected = pd.Series(np.asarray(ser)).quantile([0.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_empty(self):
|
||||
|
||||
# floats
|
||||
s = Series([], dtype='float64')
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# int
|
||||
s = Series([], dtype='int64')
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# datetime
|
||||
s = Series([], dtype='datetime64[ns]')
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([pd.NaT], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
@@ -0,0 +1,506 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from distutils.version import LooseVersion
|
||||
from itertools import chain
|
||||
|
||||
import numpy as np
|
||||
from numpy import nan
|
||||
import pytest
|
||||
|
||||
from pandas._libs.algos import Infinity, NegInfinity
|
||||
from pandas._libs.tslib import iNaT
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import product
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import NaT, Series, Timestamp, date_range
|
||||
from pandas.api.types import CategoricalDtype
|
||||
from pandas.tests.series.common import TestData
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_series_equal
|
||||
|
||||
|
||||
class TestSeriesRank(TestData):
|
||||
s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3])
|
||||
|
||||
results = {
|
||||
'average': np.array([1.5, 5.5, 7.0, 3.5, nan,
|
||||
3.5, 1.5, 8.0, nan, 5.5]),
|
||||
'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]),
|
||||
'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]),
|
||||
'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]),
|
||||
'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]),
|
||||
}
|
||||
|
||||
def test_rank(self):
|
||||
pytest.importorskip('scipy.stats.special')
|
||||
rankdata = pytest.importorskip('scipy.stats.rankdata')
|
||||
|
||||
self.ts[::2] = np.nan
|
||||
self.ts[:10][::3] = 4.
|
||||
|
||||
ranks = self.ts.rank()
|
||||
oranks = self.ts.astype('O').rank()
|
||||
|
||||
assert_series_equal(ranks, oranks)
|
||||
|
||||
mask = np.isnan(self.ts)
|
||||
filled = self.ts.fillna(np.inf)
|
||||
|
||||
# rankdata returns a ndarray
|
||||
exp = Series(rankdata(filled), index=filled.index, name='ts')
|
||||
exp[mask] = np.nan
|
||||
|
||||
tm.assert_series_equal(ranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5).repeat(2))
|
||||
|
||||
iranks = iseries.rank()
|
||||
exp = iseries.astype(float).rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
exp = iseries / 5.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(1, 100))
|
||||
exp = Series(np.repeat(0.505, 100))
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries[1] = np.nan
|
||||
exp = Series(np.repeat(50.0 / 99.0, 100))
|
||||
exp[1] = np.nan
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(np.nan, 100))
|
||||
exp = iseries.copy()
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5)) + 1
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
rng = date_range('1/1/1990', periods=5)
|
||||
iseries = Series(np.arange(5), rng) + 1
|
||||
iseries.iloc[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
|
||||
exp = Series([2, 1, 3, 5, 4, 6.0])
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
# GH 5968
|
||||
iseries = Series(['3 day', '1 day 10m', '-2 day', NaT],
|
||||
dtype='m8[ns]')
|
||||
exp = Series([3, 2, 1, np.nan])
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
values = np.array(
|
||||
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40
|
||||
], dtype='float64')
|
||||
random_order = np.random.permutation(len(values))
|
||||
iseries = Series(values[random_order])
|
||||
exp = Series(random_order + 1.0, dtype='float64')
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
def test_rank_categorical(self):
|
||||
# GH issue #15420 rank incorrectly orders ordered categories
|
||||
|
||||
# Test ascending/descending ranking for ordered categoricals
|
||||
exp = Series([1., 2., 3., 4., 5., 6.])
|
||||
exp_desc = Series([6., 5., 4., 3., 2., 1.])
|
||||
ordered = Series(
|
||||
['first', 'second', 'third', 'fourth', 'fifth', 'sixth']
|
||||
).astype(CategoricalDtype(categories=['first', 'second', 'third',
|
||||
'fourth', 'fifth', 'sixth'],
|
||||
ordered=True))
|
||||
assert_series_equal(ordered.rank(), exp)
|
||||
assert_series_equal(ordered.rank(ascending=False), exp_desc)
|
||||
|
||||
# Unordered categoricals should be ranked as objects
|
||||
unordered = Series(['first', 'second', 'third', 'fourth',
|
||||
'fifth', 'sixth']).astype(
|
||||
CategoricalDtype(categories=['first', 'second', 'third',
|
||||
'fourth', 'fifth', 'sixth'],
|
||||
ordered=False))
|
||||
exp_unordered = Series([2., 4., 6., 3., 1., 5.])
|
||||
res = unordered.rank()
|
||||
assert_series_equal(res, exp_unordered)
|
||||
|
||||
unordered1 = Series(
|
||||
[1, 2, 3, 4, 5, 6],
|
||||
).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False))
|
||||
exp_unordered1 = Series([1., 2., 3., 4., 5., 6.])
|
||||
res1 = unordered1.rank()
|
||||
assert_series_equal(res1, exp_unordered1)
|
||||
|
||||
# Test na_option for rank data
|
||||
na_ser = Series(
|
||||
['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]
|
||||
).astype(CategoricalDtype(['first', 'second', 'third', 'fourth',
|
||||
'fifth', 'sixth', 'seventh'], True))
|
||||
|
||||
exp_top = Series([2., 3., 4., 5., 6., 7., 1.])
|
||||
exp_bot = Series([1., 2., 3., 4., 5., 6., 7.])
|
||||
exp_keep = Series([1., 2., 3., 4., 5., 6., np.NaN])
|
||||
|
||||
assert_series_equal(na_ser.rank(na_option='top'), exp_top)
|
||||
assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot)
|
||||
assert_series_equal(na_ser.rank(na_option='keep'), exp_keep)
|
||||
|
||||
# Test na_option for rank data with ascending False
|
||||
exp_top = Series([7., 6., 5., 4., 3., 2., 1.])
|
||||
exp_bot = Series([6., 5., 4., 3., 2., 1., 7.])
|
||||
exp_keep = Series([6., 5., 4., 3., 2., 1., np.NaN])
|
||||
|
||||
assert_series_equal(
|
||||
na_ser.rank(na_option='top', ascending=False),
|
||||
exp_top
|
||||
)
|
||||
assert_series_equal(
|
||||
na_ser.rank(na_option='bottom', ascending=False),
|
||||
exp_bot
|
||||
)
|
||||
assert_series_equal(
|
||||
na_ser.rank(na_option='keep', ascending=False),
|
||||
exp_keep
|
||||
)
|
||||
|
||||
# Test invalid values for na_option
|
||||
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
na_ser.rank(na_option='bad', ascending=False)
|
||||
|
||||
# invalid type
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
na_ser.rank(na_option=True, ascending=False)
|
||||
|
||||
# Test with pct=True
|
||||
na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype(
|
||||
CategoricalDtype(['first', 'second', 'third', 'fourth'], True))
|
||||
exp_top = Series([0.4, 0.6, 0.8, 1., 0.2])
|
||||
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.])
|
||||
exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN])
|
||||
|
||||
assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top)
|
||||
assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot)
|
||||
assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep)
|
||||
|
||||
def test_rank_signature(self):
|
||||
s = Series([0, 1])
|
||||
s.rank(method='average')
|
||||
msg = r"No axis named average for object type <(class|type) 'type'>"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.rank('average')
|
||||
|
||||
@pytest.mark.parametrize('contents,dtype', [
|
||||
([-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10,
|
||||
2, 40, np.inf],
|
||||
'float64'),
|
||||
([-np.inf, -50, -1, -1e-20, -1e-25, -1e-45, 0, 1e-40, 1e-20, 1e-10,
|
||||
2, 40, np.inf],
|
||||
'float32'),
|
||||
([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max],
|
||||
'uint8'),
|
||||
pytest.param([np.iinfo(np.int64).min, -100, 0, 1, 9999, 100000,
|
||||
1e10, np.iinfo(np.int64).max],
|
||||
'int64',
|
||||
marks=pytest.mark.xfail(
|
||||
reason="iNaT is equivalent to minimum value of dtype"
|
||||
"int64 pending issue GH#16674")),
|
||||
([NegInfinity(), '1', 'A', 'BA', 'Ba', 'C', Infinity()],
|
||||
'object')
|
||||
])
|
||||
def test_rank_inf(self, contents, dtype):
|
||||
dtype_na_map = {
|
||||
'float64': np.nan,
|
||||
'float32': np.nan,
|
||||
'int64': iNaT,
|
||||
'object': None
|
||||
}
|
||||
# Insert nans at random positions if underlying dtype has missing
|
||||
# value. Then adjust the expected order by adding nans accordingly
|
||||
# This is for testing whether rank calculation is affected
|
||||
# when values are interwined with nan values.
|
||||
values = np.array(contents, dtype=dtype)
|
||||
exp_order = np.array(range(len(values)), dtype='float64') + 1.0
|
||||
if dtype in dtype_na_map:
|
||||
na_value = dtype_na_map[dtype]
|
||||
nan_indices = np.random.choice(range(len(values)), 5)
|
||||
values = np.insert(values, nan_indices, na_value)
|
||||
exp_order = np.insert(exp_order, nan_indices, np.nan)
|
||||
# shuffle the testing array and expected results in the same way
|
||||
random_order = np.random.permutation(len(values))
|
||||
iseries = Series(values[random_order])
|
||||
exp = Series(exp_order[random_order], dtype='float64')
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
def test_rank_tie_methods(self):
|
||||
s = self.s
|
||||
|
||||
def _check(s, expected, method='average'):
|
||||
result = s.rank(method=method)
|
||||
tm.assert_series_equal(result, Series(expected))
|
||||
|
||||
dtypes = [None, object]
|
||||
disabled = {(object, 'first')}
|
||||
results = self.results
|
||||
|
||||
for method, dtype in product(results, dtypes):
|
||||
if (dtype, method) in disabled:
|
||||
continue
|
||||
series = s if dtype is None else s.astype(dtype)
|
||||
_check(series, results[method], method=method)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
@pytest.mark.parametrize('method', ['average', 'min', 'max', 'first',
|
||||
'dense'])
|
||||
@pytest.mark.parametrize('na_option', ['top', 'bottom', 'keep'])
|
||||
def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending):
|
||||
dtypes = [('object', None, Infinity(), NegInfinity()),
|
||||
('float64', np.nan, np.inf, -np.inf)]
|
||||
chunk = 3
|
||||
disabled = {('object', 'first')}
|
||||
|
||||
def _check(s, method, na_option, ascending):
|
||||
exp_ranks = {
|
||||
'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
|
||||
'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
|
||||
'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
|
||||
'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
|
||||
'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
|
||||
}
|
||||
ranks = exp_ranks[method]
|
||||
if na_option == 'top':
|
||||
order = [ranks[1], ranks[0], ranks[2]]
|
||||
elif na_option == 'bottom':
|
||||
order = [ranks[0], ranks[2], ranks[1]]
|
||||
else:
|
||||
order = [ranks[0], [np.nan] * chunk, ranks[1]]
|
||||
expected = order if ascending else order[::-1]
|
||||
expected = list(chain.from_iterable(expected))
|
||||
result = s.rank(method=method, na_option=na_option,
|
||||
ascending=ascending)
|
||||
tm.assert_series_equal(result, Series(expected, dtype='float64'))
|
||||
|
||||
for dtype, na_value, pos_inf, neg_inf in dtypes:
|
||||
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
|
||||
iseries = Series(in_arr, dtype=dtype)
|
||||
if (dtype, method) in disabled:
|
||||
continue
|
||||
_check(iseries, method, na_option, ascending)
|
||||
|
||||
def test_rank_desc_mix_nans_infs(self):
|
||||
# GH 19538
|
||||
# check descending ranking when mix nans and infs
|
||||
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
|
||||
result = iseries.rank(ascending=False)
|
||||
exp = Series([3, np.nan, 1, 4, 2], dtype='float64')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_rank_methods_series(self):
|
||||
pytest.importorskip('scipy.stats.special')
|
||||
rankdata = pytest.importorskip('scipy.stats.rankdata')
|
||||
import scipy
|
||||
|
||||
xs = np.random.randn(9)
|
||||
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
|
||||
np.random.shuffle(xs)
|
||||
|
||||
index = [chr(ord('a') + i) for i in range(len(xs))]
|
||||
|
||||
for vals in [xs, xs + 1e6, xs * 1e-6]:
|
||||
ts = Series(vals, index=index)
|
||||
|
||||
for m in ['average', 'min', 'max', 'first', 'dense']:
|
||||
result = ts.rank(method=m)
|
||||
sprank = rankdata(vals, m if m != 'first' else 'ordinal')
|
||||
expected = Series(sprank, index=index)
|
||||
|
||||
if LooseVersion(scipy.__version__) >= LooseVersion('0.17.0'):
|
||||
expected = expected.astype('float64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_dense_method(self):
|
||||
dtypes = ['O', 'f8', 'i8']
|
||||
in_out = [([1], [1]),
|
||||
([2], [1]),
|
||||
([0], [1]),
|
||||
([2, 2], [1, 1]),
|
||||
([1, 2, 3], [1, 2, 3]),
|
||||
([4, 2, 1], [3, 2, 1],),
|
||||
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
|
||||
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])]
|
||||
|
||||
for ser, exp in in_out:
|
||||
for dtype in dtypes:
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='dense')
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_descending(self):
|
||||
dtypes = ['O', 'f8', 'i8']
|
||||
|
||||
for dtype, method in product(dtypes, self.results):
|
||||
if 'i' in dtype:
|
||||
s = self.s.dropna()
|
||||
else:
|
||||
s = self.s.astype(dtype)
|
||||
|
||||
res = s.rank(ascending=False)
|
||||
expected = (s.max() - s).rank()
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
if method == 'first' and dtype == 'O':
|
||||
continue
|
||||
|
||||
expected = (s.max() - s).rank(method=method)
|
||||
res2 = s.rank(method=method, ascending=False)
|
||||
assert_series_equal(res2, expected)
|
||||
|
||||
def test_rank_int(self):
|
||||
s = self.s.dropna().astype('i8')
|
||||
|
||||
for method, res in compat.iteritems(self.results):
|
||||
result = s.rank(method=method)
|
||||
expected = Series(res).dropna()
|
||||
expected.index = result.index
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_object_bug(self):
|
||||
# GH 13445
|
||||
|
||||
# smoke tests
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=True)
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=False)
|
||||
|
||||
def test_rank_modify_inplace(self):
|
||||
# GH 18521
|
||||
# Check rank does not mutate series
|
||||
s = Series([Timestamp('2017-01-05 10:20:27.569000'), NaT])
|
||||
expected = s.copy()
|
||||
|
||||
s.rank()
|
||||
result = s
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# GH15630, pct should be on 100% basis when method='dense'
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1., 1.]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 2, 2. / 2, 2. / 2]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1. / 3, 1. / 3, 3. / 3, 3. / 3, 2. / 3]),
|
||||
([1, 1, 3, 3, 5, 5], [1. / 3, 1. / 3, 2. / 3, 2. / 3, 3. / 3, 3. / 3]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_dense_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='dense', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1. / 2, 1. / 2]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 2. / 3, 2. / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1. / 5, 1. / 5, 4. / 5, 4. / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1. / 6, 1. / 6, 3. / 6, 3. / 6, 5. / 6, 5. / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_min_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='min', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1., 1.]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 3. / 3, 3. / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [2. / 5, 2. / 5, 5. / 5, 5. / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [2. / 6, 2. / 6, 4. / 6, 4. / 6, 6. / 6, 6. / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_max_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='max', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1.5 / 2, 1.5 / 2]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 2.5 / 3, 2.5 / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5],
|
||||
[1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_average_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='average', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1. / 2, 2. / 2.]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1. / 5, 2. / 5, 4. / 5, 5. / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6, 6. / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_first_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='first', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.single
|
||||
def test_pct_max_many_rows():
|
||||
# GH 18271
|
||||
s = Series(np.arange(2**24 + 1))
|
||||
result = s.rank(pct=True).max()
|
||||
assert result == 1
|
||||
@@ -0,0 +1,282 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesReplace(TestData):
|
||||
def test_replace(self):
|
||||
N = 100
|
||||
ser = pd.Series(np.random.randn(N))
|
||||
ser[0:4] = np.nan
|
||||
ser[6:10] = 0
|
||||
|
||||
# replace list with a single value
|
||||
ser.replace([np.nan], -1, inplace=True)
|
||||
|
||||
exp = ser.fillna(-1)
|
||||
tm.assert_series_equal(ser, exp)
|
||||
|
||||
rs = ser.replace(0., np.nan)
|
||||
ser[ser == 0.] = np.nan
|
||||
tm.assert_series_equal(rs, ser)
|
||||
|
||||
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
|
||||
dtype=object)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = 'foo'
|
||||
ser[20:30] = 'bar'
|
||||
|
||||
# replace list with a single value
|
||||
rs = ser.replace([np.nan, 'foo', 'bar'], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
|
||||
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
ser = pd.Series([np.nan, 0, np.inf])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, pd.NaT])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
filled = ser.copy()
|
||||
filled[4] = 0
|
||||
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
|
||||
|
||||
ser = pd.Series(self.ts.index)
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
# malformed
|
||||
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.replace([1, 2, 3], [np.nan, 0])
|
||||
|
||||
# make sure that we aren't just masking a TypeError because bools don't
|
||||
# implement indexing
|
||||
with pytest.raises(TypeError, match='Cannot compare types .+'):
|
||||
ser.replace([1, 2], [np.nan, 0])
|
||||
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
|
||||
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
|
||||
|
||||
def test_replace_gh5319(self):
|
||||
# API change from 0.12?
|
||||
# GH 5319
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
result = ser.replace([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
result = ser.replace(np.nan)
|
||||
tm.assert_series_equal(result, expected)
|
||||
# GH 5797
|
||||
ser = pd.Series(pd.date_range('20130101', periods=5))
|
||||
expected = ser.copy()
|
||||
expected.loc[2] = pd.Timestamp('20120101')
|
||||
result = ser.replace({pd.Timestamp('20130103'):
|
||||
pd.Timestamp('20120101')})
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = ser.replace(pd.Timestamp('20130103'),
|
||||
pd.Timestamp('20120101'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 11792: Test with replacing NaT in a list with tz data
|
||||
ts = pd.Timestamp('2015/01/01', tz='UTC')
|
||||
s = pd.Series([pd.NaT, pd.Timestamp('2015/01/01', tz='UTC')])
|
||||
result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
|
||||
expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_with_single_list(self):
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
result = ser.replace([1, 2, 3])
|
||||
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
s = ser.copy()
|
||||
s.replace([1, 2, 3], inplace=True)
|
||||
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
# make sure things don't get corrupted when fillna call fails
|
||||
s = ser.copy()
|
||||
msg = (r"Invalid fill method\. Expecting pad \(ffill\) or backfill"
|
||||
r" \(bfill\)\. Got crash_cymbal")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.replace([1, 2, 3], inplace=True, method='crash_cymbal')
|
||||
tm.assert_series_equal(s, ser)
|
||||
|
||||
def test_replace_with_empty_list(self):
|
||||
# GH 21977
|
||||
s = pd.Series([[1], [2, 3], [], np.nan, [4]])
|
||||
expected = s
|
||||
result = s.replace([], np.nan)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 19266
|
||||
with pytest.raises(ValueError, match="cannot assign mismatch"):
|
||||
s.replace({np.nan: []})
|
||||
with pytest.raises(ValueError, match="cannot assign mismatch"):
|
||||
s.replace({np.nan: ['dummy', 'alt']})
|
||||
|
||||
def test_replace_mixed_types(self):
|
||||
s = pd.Series(np.arange(5), dtype='int64')
|
||||
|
||||
def check_replace(to_rep, val, expected):
|
||||
sc = s.copy()
|
||||
r = s.replace(to_rep, val)
|
||||
sc.replace(to_rep, val, inplace=True)
|
||||
tm.assert_series_equal(expected, r)
|
||||
tm.assert_series_equal(expected, sc)
|
||||
|
||||
# MUST upcast to float
|
||||
e = pd.Series([0., 1., 2., 3., 4.])
|
||||
tr, v = [3], [3.0]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# MUST upcast to float
|
||||
e = pd.Series([0, 1, 2, 3.5, 4])
|
||||
tr, v = [3], [3.5]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, 'a'])
|
||||
tr, v = [3, 4], [3.5, 'a']
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# again casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp('20130101')])
|
||||
tr, v = [3, 4], [3.5, pd.Timestamp('20130101')]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, True], dtype='object')
|
||||
tr, v = [3, 4], [3.5, True]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# test an object with dates + floats + integers + strings
|
||||
dr = pd.date_range('1/1/2001', '1/10/2001',
|
||||
freq='D').to_series().reset_index(drop=True)
|
||||
result = dr.astype(object).replace(
|
||||
[dr[0], dr[1], dr[2]], [1.0, 2, 'a'])
|
||||
expected = pd.Series([1.0, 2, 'a'] + dr[3:].tolist(), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_bool_with_string_no_op(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace('fun', 'in-the-sun')
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
def test_replace_bool_with_string(self):
|
||||
# nonexistent elements
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, '2u')
|
||||
expected = pd.Series(['2u', False, '2u'])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_bool_with_bool(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, False)
|
||||
expected = pd.Series([False] * len(s))
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_with_dict_with_bool_keys(self):
|
||||
s = pd.Series([True, False, True])
|
||||
with pytest.raises(TypeError, match='Cannot compare types .+'):
|
||||
s.replace({'asdf': 'asdb', True: 'yes'})
|
||||
|
||||
def test_replace2(self):
|
||||
N = 100
|
||||
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
|
||||
dtype=object)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = 'foo'
|
||||
ser[20:30] = 'bar'
|
||||
|
||||
# replace list with a single value
|
||||
rs = ser.replace([np.nan, 'foo', 'bar'], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
def test_replace_with_empty_dictlike(self):
|
||||
# GH 15289
|
||||
s = pd.Series(list('abcd'))
|
||||
tm.assert_series_equal(s, s.replace(dict()))
|
||||
tm.assert_series_equal(s, s.replace(pd.Series([])))
|
||||
|
||||
def test_replace_string_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace('2', np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_replacer_equals_replacement(self):
|
||||
# GH 20656
|
||||
# make sure all replacers are matching against original values
|
||||
s = pd.Series(['a', 'b'])
|
||||
expected = pd.Series(['b', 'a'])
|
||||
result = s.replace({'a': 'b', 'b': 'a'})
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_unicode_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace(u'2', np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_mixed_types_with_string(self):
|
||||
# Testing mixed
|
||||
s = pd.Series([1, 2, 3, '4', 4, 5])
|
||||
result = s.replace([2, '4'], np.nan)
|
||||
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
|
||||
tm.assert_series_equal(expected, result)
|
||||
@@ -0,0 +1,476 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import lrange, range, u
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical, DataFrame, Index, Series, date_range, option_context,
|
||||
period_range, timedelta_range)
|
||||
from pandas.core.base import StringMixin
|
||||
from pandas.core.index import MultiIndex
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesRepr(TestData):
|
||||
|
||||
def test_multilevel_name_print(self):
|
||||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
|
||||
'three']],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
s = Series(lrange(0, len(index)), index=index, name='sth')
|
||||
expected = ["first second", "foo one 0",
|
||||
" two 1", " three 2",
|
||||
"bar one 3", " two 4",
|
||||
"baz two 5", " three 6",
|
||||
"qux one 7", " two 8",
|
||||
" three 9", "Name: sth, dtype: int64"]
|
||||
expected = "\n".join(expected)
|
||||
assert repr(s) == expected
|
||||
|
||||
def test_name_printing(self):
|
||||
# Test small Series.
|
||||
s = Series([0, 1, 2])
|
||||
|
||||
s.name = "test"
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
s.name = None
|
||||
assert "Name:" not in repr(s)
|
||||
|
||||
# Test big Series (diff code path).
|
||||
s = Series(lrange(0, 1000))
|
||||
|
||||
s.name = "test"
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
s.name = None
|
||||
assert "Name:" not in repr(s)
|
||||
|
||||
s = Series(index=date_range('20010101', '20020101'), name='test')
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
def test_repr(self):
|
||||
str(self.ts)
|
||||
str(self.series)
|
||||
str(self.series.astype(int))
|
||||
str(self.objSeries)
|
||||
|
||||
str(Series(tm.randn(1000), index=np.arange(1000)))
|
||||
str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1)))
|
||||
|
||||
# empty
|
||||
str(self.empty)
|
||||
|
||||
# with NaNs
|
||||
self.series[5:7] = np.NaN
|
||||
str(self.series)
|
||||
|
||||
# with Nones
|
||||
ots = self.ts.astype('O')
|
||||
ots[::2] = None
|
||||
repr(ots)
|
||||
|
||||
# various names
|
||||
for name in ['', 1, 1.2, 'foo', u('\u03B1\u03B2\u03B3'),
|
||||
'loooooooooooooooooooooooooooooooooooooooooooooooooooong',
|
||||
('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3),
|
||||
(u('\u03B1'), u('\u03B2'), u('\u03B3')),
|
||||
(u('\u03B1'), 'bar')]:
|
||||
self.series.name = name
|
||||
repr(self.series)
|
||||
|
||||
biggie = Series(tm.randn(1000), index=np.arange(1000),
|
||||
name=('foo', 'bar', 'baz'))
|
||||
repr(biggie)
|
||||
|
||||
# 0 as name
|
||||
ser = Series(np.random.randn(100), name=0)
|
||||
rep_str = repr(ser)
|
||||
assert "Name: 0" in rep_str
|
||||
|
||||
# tidy repr
|
||||
ser = Series(np.random.randn(1001), name=0)
|
||||
rep_str = repr(ser)
|
||||
assert "Name: 0" in rep_str
|
||||
|
||||
ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
|
||||
assert "\t" not in repr(ser)
|
||||
assert "\r" not in repr(ser)
|
||||
assert "a\n" not in repr(ser)
|
||||
|
||||
# with empty series (#4651)
|
||||
s = Series([], dtype=np.int64, name='foo')
|
||||
assert repr(s) == 'Series([], Name: foo, dtype: int64)'
|
||||
|
||||
s = Series([], dtype=np.int64, name=None)
|
||||
assert repr(s) == 'Series([], dtype: int64)'
|
||||
|
||||
def test_tidy_repr(self):
|
||||
a = Series([u("\u05d0")] * 1000)
|
||||
a.name = 'title1'
|
||||
repr(a) # should not raise exception
|
||||
|
||||
def test_repr_bool_fails(self, capsys):
|
||||
s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)])
|
||||
|
||||
# It works (with no Cython exception barf)!
|
||||
repr(s)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.err == ''
|
||||
|
||||
def test_repr_name_iterable_indexable(self):
|
||||
s = Series([1, 2, 3], name=np.int64(3))
|
||||
|
||||
# it works!
|
||||
repr(s)
|
||||
|
||||
s.name = (u("\u05d0"), ) * 2
|
||||
repr(s)
|
||||
|
||||
def test_repr_should_return_str(self):
|
||||
# https://docs.python.org/3/reference/datamodel.html#object.__repr__
|
||||
# ...The return value must be a string object.
|
||||
|
||||
# (str on py2.x, str (unicode) on py3)
|
||||
|
||||
data = [8, 5, 3, 5]
|
||||
index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")]
|
||||
df = Series(data, index=index1)
|
||||
assert type(df.__repr__() == str) # both py2 / 3
|
||||
|
||||
def test_repr_max_rows(self):
|
||||
# GH 6863
|
||||
with pd.option_context('max_rows', None):
|
||||
str(Series(range(1001))) # should not raise exception
|
||||
|
||||
def test_unicode_string_with_unicode(self):
|
||||
df = Series([u("\u05d0")], name=u("\u05d1"))
|
||||
if compat.PY3:
|
||||
str(df)
|
||||
else:
|
||||
compat.text_type(df)
|
||||
|
||||
def test_bytestring_with_unicode(self):
|
||||
df = Series([u("\u05d0")], name=u("\u05d1"))
|
||||
if compat.PY3:
|
||||
bytes(df)
|
||||
else:
|
||||
str(df)
|
||||
|
||||
def test_timeseries_repr_object_dtype(self):
|
||||
index = Index([datetime(2000, 1, 1) + timedelta(i)
|
||||
for i in range(1000)], dtype=object)
|
||||
ts = Series(np.random.randn(len(index)), index)
|
||||
repr(ts)
|
||||
|
||||
ts = tm.makeTimeSeries(1000)
|
||||
assert repr(ts).splitlines()[-1].startswith('Freq:')
|
||||
|
||||
ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)]
|
||||
repr(ts2).splitlines()[-1]
|
||||
|
||||
def test_latex_repr(self):
|
||||
result = r"""\begin{tabular}{ll}
|
||||
\toprule
|
||||
{} & 0 \\
|
||||
\midrule
|
||||
0 & $\alpha$ \\
|
||||
1 & b \\
|
||||
2 & c \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
"""
|
||||
with option_context('display.latex.escape', False,
|
||||
'display.latex.repr', True):
|
||||
s = Series([r'$\alpha$', 'b', 'c'])
|
||||
assert result == s._repr_latex_()
|
||||
|
||||
assert s._repr_latex_() is None
|
||||
|
||||
|
||||
class TestCategoricalRepr(object):
|
||||
|
||||
def test_categorical_repr_unicode(self):
|
||||
# GH#21002 if len(index) > 60, sys.getdefaultencoding()=='ascii',
|
||||
# and we are working in PY2, then rendering a Categorical could raise
|
||||
# UnicodeDecodeError by trying to decode when it shouldn't
|
||||
|
||||
class County(StringMixin):
|
||||
name = u'San Sebastián'
|
||||
state = u'PR'
|
||||
|
||||
def __unicode__(self):
|
||||
return self.name + u', ' + self.state
|
||||
|
||||
cat = pd.Categorical([County() for n in range(61)])
|
||||
idx = pd.Index(cat)
|
||||
ser = idx.to_series()
|
||||
|
||||
if compat.PY3:
|
||||
# no reloading of sys, just check that the default (utf8) works
|
||||
# as expected
|
||||
repr(ser)
|
||||
str(ser)
|
||||
|
||||
else:
|
||||
# set sys.defaultencoding to ascii, then change it back after
|
||||
# the test
|
||||
with tm.set_defaultencoding('ascii'):
|
||||
repr(ser)
|
||||
str(ser)
|
||||
|
||||
def test_categorical_repr(self):
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = u("0 1\n1 2\n2 3\n3 4\n" +
|
||||
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]")
|
||||
|
||||
assert exp == a.__unicode__()
|
||||
|
||||
a = Series(Categorical(["a", "b"] * 25))
|
||||
exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" +
|
||||
"Length: 50, dtype: category\nCategories (2, object): [a, b]")
|
||||
with option_context("display.max_rows", 5):
|
||||
assert exp == repr(a)
|
||||
|
||||
levs = list("abcdefghijklmnopqrstuvwxyz")
|
||||
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
|
||||
exp = u("0 a\n1 b\n" + "dtype: category\n"
|
||||
"Categories (26, object): [a < b < c < d ... w < x < y < z]")
|
||||
assert exp == a.__unicode__()
|
||||
|
||||
def test_categorical_series_repr(self):
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = """0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: category
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
s = Series(Categorical(np.arange(10)))
|
||||
exp = """0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
dtype: category
|
||||
Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_ordered(self):
|
||||
s = Series(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: category
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
s = Series(Categorical(np.arange(10), ordered=True))
|
||||
exp = """0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
dtype: category
|
||||
Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_datetime(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00:00
|
||||
1 2011-01-01 10:00:00
|
||||
2 2011-01-01 11:00:00
|
||||
3 2011-01-01 12:00:00
|
||||
4 2011-01-01 13:00:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
|
||||
2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00:00-05:00
|
||||
1 2011-01-01 10:00:00-05:00
|
||||
2 2011-01-01 11:00:00-05:00
|
||||
3 2011-01-01 12:00:00-05:00
|
||||
4 2011-01-01 13:00:00-05:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
|
||||
2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_datetime_ordered(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00:00
|
||||
1 2011-01-01 10:00:00
|
||||
2 2011-01-01 11:00:00
|
||||
3 2011-01-01 12:00:00
|
||||
4 2011-01-01 13:00:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00:00-05:00
|
||||
1 2011-01-01 10:00:00-05:00
|
||||
2 2011-01-01 11:00:00-05:00
|
||||
3 2011-01-01 12:00:00-05:00
|
||||
4 2011-01-01 13:00:00-05:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_period(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00
|
||||
1 2011-01-01 10:00
|
||||
2 2011-01-01 11:00
|
||||
3 2011-01-01 12:00
|
||||
4 2011-01-01 13:00
|
||||
dtype: category
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01
|
||||
1 2011-02
|
||||
2 2011-03
|
||||
3 2011-04
|
||||
4 2011-05
|
||||
dtype: category
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_period_ordered(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00
|
||||
1 2011-01-01 10:00
|
||||
2 2011-01-01 11:00
|
||||
3 2011-01-01 12:00
|
||||
4 2011-01-01 13:00
|
||||
dtype: category
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01
|
||||
1 2011-02
|
||||
2 2011-03
|
||||
3 2011-04
|
||||
4 2011-05
|
||||
dtype: category
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_timedelta(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 1 days
|
||||
1 2 days
|
||||
2 3 days
|
||||
3 4 days
|
||||
4 5 days
|
||||
dtype: category
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 0 days 01:00:00
|
||||
1 1 days 01:00:00
|
||||
2 2 days 01:00:00
|
||||
3 3 days 01:00:00
|
||||
4 4 days 01:00:00
|
||||
5 5 days 01:00:00
|
||||
6 6 days 01:00:00
|
||||
7 7 days 01:00:00
|
||||
8 8 days 01:00:00
|
||||
9 9 days 01:00:00
|
||||
dtype: category
|
||||
Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
|
||||
8 days 01:00:00, 9 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 1 days
|
||||
1 2 days
|
||||
2 3 days
|
||||
3 4 days
|
||||
4 5 days
|
||||
dtype: category
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 0 days 01:00:00
|
||||
1 1 days 01:00:00
|
||||
2 2 days 01:00:00
|
||||
3 3 days 01:00:00
|
||||
4 4 days 01:00:00
|
||||
5 5 days 01:00:00
|
||||
6 6 days 01:00:00
|
||||
7 7 days 01:00:00
|
||||
8 8 days 01:00:00
|
||||
9 9 days 01:00:00
|
||||
dtype: category
|
||||
Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
|
||||
8 days 01:00:00 < 9 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
@@ -0,0 +1,266 @@
|
||||
# coding=utf-8
|
||||
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical, DataFrame, IntervalIndex, MultiIndex, Series
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_almost_equal, assert_series_equal
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesSorting(TestData):
|
||||
|
||||
def test_sort_values(self):
|
||||
|
||||
# check indexes are reordered corresponding with the values
|
||||
ser = Series([3, 2, 4, 1], ['A', 'B', 'C', 'D'])
|
||||
expected = Series([1, 2, 3, 4], ['D', 'B', 'A', 'C'])
|
||||
result = ser.sort_values()
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
ts = self.ts.copy()
|
||||
ts[:5] = np.NaN
|
||||
vals = ts.values
|
||||
|
||||
result = ts.sort_values()
|
||||
assert np.isnan(result[-5:]).all()
|
||||
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
|
||||
|
||||
# na_position
|
||||
result = ts.sort_values(na_position='first')
|
||||
assert np.isnan(result[:5]).all()
|
||||
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
|
||||
|
||||
# something object-type
|
||||
ser = Series(['A', 'B'], [1, 2])
|
||||
# no failure
|
||||
ser.sort_values()
|
||||
|
||||
# ascending=False
|
||||
ordered = ts.sort_values(ascending=False)
|
||||
expected = np.sort(ts.dropna().values)[::-1]
|
||||
assert_almost_equal(expected, ordered.dropna().values)
|
||||
ordered = ts.sort_values(ascending=False, na_position='first')
|
||||
assert_almost_equal(expected, ordered.dropna().values)
|
||||
|
||||
# ascending=[False] should behave the same as ascending=False
|
||||
ordered = ts.sort_values(ascending=[False])
|
||||
expected = ts.sort_values(ascending=False)
|
||||
assert_series_equal(expected, ordered)
|
||||
ordered = ts.sort_values(ascending=[False], na_position='first')
|
||||
expected = ts.sort_values(ascending=False, na_position='first')
|
||||
assert_series_equal(expected, ordered)
|
||||
|
||||
msg = "ascending must be boolean"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=None)
|
||||
msg = r"Length of ascending \(0\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[])
|
||||
msg = r"Length of ascending \(3\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[1, 2, 3])
|
||||
msg = r"Length of ascending \(2\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[False, False])
|
||||
msg = "ascending must be boolean"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending='foobar')
|
||||
|
||||
# inplace=True
|
||||
ts = self.ts.copy()
|
||||
ts.sort_values(ascending=False, inplace=True)
|
||||
tm.assert_series_equal(ts, self.ts.sort_values(ascending=False))
|
||||
tm.assert_index_equal(ts.index,
|
||||
self.ts.sort_values(ascending=False).index)
|
||||
|
||||
# GH 5856/5853
|
||||
# Series.sort_values operating on a view
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
s = df.iloc[:, 0]
|
||||
|
||||
msg = ("This Series is a view of some other array, to sort in-place"
|
||||
" you must create a copy")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.sort_values(inplace=True)
|
||||
|
||||
def test_sort_index(self):
|
||||
rindex = list(self.ts.index)
|
||||
random.shuffle(rindex)
|
||||
|
||||
random_order = self.ts.reindex(rindex)
|
||||
sorted_series = random_order.sort_index()
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
# descending
|
||||
sorted_series = random_order.sort_index(ascending=False)
|
||||
assert_series_equal(sorted_series,
|
||||
self.ts.reindex(self.ts.index[::-1]))
|
||||
|
||||
# compat on level
|
||||
sorted_series = random_order.sort_index(level=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
# compat on axis
|
||||
sorted_series = random_order.sort_index(axis=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
msg = r"No axis named 1 for object type <(class|type) 'type'>"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
random_order.sort_values(axis=1)
|
||||
|
||||
sorted_series = random_order.sort_index(level=0, axis=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
random_order.sort_index(level=0, axis=1)
|
||||
|
||||
def test_sort_index_inplace(self):
|
||||
|
||||
# For #11402
|
||||
rindex = list(self.ts.index)
|
||||
random.shuffle(rindex)
|
||||
|
||||
# descending
|
||||
random_order = self.ts.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=False, inplace=True)
|
||||
|
||||
assert result is None
|
||||
tm.assert_series_equal(random_order, self.ts.reindex(
|
||||
self.ts.index[::-1]))
|
||||
|
||||
# ascending
|
||||
random_order = self.ts.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=True, inplace=True)
|
||||
|
||||
assert result is None
|
||||
tm.assert_series_equal(random_order, self.ts)
|
||||
|
||||
@pytest.mark.parametrize("level", ['A', 0]) # GH 21052
|
||||
def test_sort_index_multiindex(self, level):
|
||||
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
|
||||
s = Series([1, 2], mi)
|
||||
backwards = s.iloc[[1, 0]]
|
||||
|
||||
# implicit sort_remaining=True
|
||||
res = s.sort_index(level=level)
|
||||
assert_series_equal(backwards, res)
|
||||
|
||||
# GH13496
|
||||
# sort has no effect without remaining lvls
|
||||
res = s.sort_index(level=level, sort_remaining=False)
|
||||
assert_series_equal(s, res)
|
||||
|
||||
def test_sort_index_kind(self):
|
||||
# GH #14444 & #13589: Add support for sort algo choosing
|
||||
series = Series(index=[3, 2, 1, 4, 3])
|
||||
expected_series = Series(index=[1, 2, 3, 3, 4])
|
||||
|
||||
index_sorted_series = series.sort_index(kind='mergesort')
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
index_sorted_series = series.sort_index(kind='quicksort')
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
index_sorted_series = series.sort_index(kind='heapsort')
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
def test_sort_index_na_position(self):
|
||||
series = Series(index=[3, 2, 1, 4, 3, np.nan])
|
||||
|
||||
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4])
|
||||
index_sorted_series = series.sort_index(na_position='first')
|
||||
assert_series_equal(expected_series_first, index_sorted_series)
|
||||
|
||||
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan])
|
||||
index_sorted_series = series.sort_index(na_position='last')
|
||||
assert_series_equal(expected_series_last, index_sorted_series)
|
||||
|
||||
def test_sort_index_intervals(self):
|
||||
s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays(
|
||||
[0, 1, 2, 3],
|
||||
[1, 2, 3, 4]))
|
||||
|
||||
result = s.sort_index()
|
||||
expected = s
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.sort_index(ascending=False)
|
||||
expected = Series([3, 2, 1, np.nan], IntervalIndex.from_arrays(
|
||||
[3, 2, 1, 0],
|
||||
[4, 3, 2, 1]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_sort_values_categorical(self):
|
||||
|
||||
c = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat = Series(c.copy())
|
||||
|
||||
# sort in the categories order
|
||||
expected = Series(
|
||||
Categorical(["a", "a", "b", "b"],
|
||||
ordered=False), index=[0, 3, 1, 2])
|
||||
result = cat.sort_values()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
cat = Series(Categorical(["a", "c", "b", "d"], categories=[
|
||||
"a", "b", "c", "d"], ordered=True))
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
raw_cat1 = Categorical(["a", "b", "c", "d"],
|
||||
categories=["a", "b", "c", "d"], ordered=False)
|
||||
raw_cat2 = Categorical(["a", "b", "c", "d"],
|
||||
categories=["d", "c", "b", "a"], ordered=True)
|
||||
s = ["a", "b", "c", "d"]
|
||||
df = DataFrame({"unsort": raw_cat1,
|
||||
"sort": raw_cat2,
|
||||
"string": s,
|
||||
"values": [1, 2, 3, 4]})
|
||||
|
||||
# Cats must be sorted in a dataframe
|
||||
res = df.sort_values(by=["string"], ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
|
||||
assert res["sort"].dtype == "category"
|
||||
|
||||
res = df.sort_values(by=["sort"], ascending=False)
|
||||
exp = df.sort_values(by=["string"], ascending=True)
|
||||
tm.assert_series_equal(res["values"], exp["values"])
|
||||
assert res["sort"].dtype == "category"
|
||||
assert res["unsort"].dtype == "category"
|
||||
|
||||
# unordered cat, but we allow this
|
||||
df.sort_values(by=["unsort"], ascending=False)
|
||||
|
||||
# multi-columns sort
|
||||
# GH 7848
|
||||
df = DataFrame({"id": [6, 5, 4, 3, 2, 1],
|
||||
"raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
|
||||
df["grade"] = Categorical(df["raw_grade"], ordered=True)
|
||||
df['grade'] = df['grade'].cat.set_categories(['b', 'e', 'a'])
|
||||
|
||||
# sorts 'grade' according to the order of the categories
|
||||
result = df.sort_values(by=['grade'])
|
||||
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi
|
||||
result = df.sort_values(by=['grade', 'id'])
|
||||
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,108 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesSubclassing(object):
|
||||
|
||||
def test_indexing_sliced(self):
|
||||
s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'))
|
||||
res = s.loc[['a', 'b']]
|
||||
exp = tm.SubclassedSeries([1, 2], index=list('ab'))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = s.iloc[[2, 3]]
|
||||
exp = tm.SubclassedSeries([3, 4], index=list('cd'))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = s.loc[['a', 'b']]
|
||||
exp = tm.SubclassedSeries([1, 2], index=list('ab'))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_to_frame(self):
|
||||
s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'), name='xxx')
|
||||
res = s.to_frame()
|
||||
exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd'))
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_subclass_unstack(self):
|
||||
# GH 15564
|
||||
s = tm.SubclassedSeries(
|
||||
[1, 2, 3, 4], index=[list('aabb'), list('xyxy')])
|
||||
|
||||
res = s.unstack()
|
||||
exp = tm.SubclassedDataFrame(
|
||||
{'x': [1, 3], 'y': [2, 4]}, index=['a', 'b'])
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
|
||||
class TestSparseSeriesSubclassing(object):
|
||||
|
||||
def test_subclass_sparse_slice(self):
|
||||
# int64
|
||||
s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
|
||||
exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
|
||||
tm.assert_sp_series_equal(s.loc[1:3], exp)
|
||||
assert s.loc[1:3].dtype == SparseDtype(np.int64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s.iloc[1:3], exp)
|
||||
assert s.iloc[1:3].dtype == SparseDtype(np.int64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s[1:3], exp)
|
||||
assert s[1:3].dtype == SparseDtype(np.int64)
|
||||
|
||||
# float64
|
||||
s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.])
|
||||
exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3])
|
||||
tm.assert_sp_series_equal(s.loc[1:3], exp)
|
||||
assert s.loc[1:3].dtype == SparseDtype(np.float64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s.iloc[1:3], exp)
|
||||
assert s.iloc[1:3].dtype == SparseDtype(np.float64)
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s[1:3], exp)
|
||||
assert s[1:3].dtype == SparseDtype(np.float64)
|
||||
|
||||
def test_subclass_sparse_addition(self):
|
||||
s1 = tm.SubclassedSparseSeries([1, 3, 5])
|
||||
s2 = tm.SubclassedSparseSeries([-2, 5, 12])
|
||||
exp = tm.SubclassedSparseSeries([-1, 8, 17])
|
||||
tm.assert_sp_series_equal(s1 + s2, exp)
|
||||
|
||||
s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0])
|
||||
s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0])
|
||||
exp = tm.SubclassedSparseSeries([5., 7., 9.])
|
||||
tm.assert_sp_series_equal(s1 + s2, exp)
|
||||
|
||||
def test_subclass_sparse_to_frame(self):
|
||||
s = tm.SubclassedSparseSeries([1, 2], index=list('ab'), name='xxx')
|
||||
res = s.to_frame()
|
||||
|
||||
exp_arr = pd.SparseArray([1, 2], dtype=np.int64, kind='block',
|
||||
fill_value=0)
|
||||
exp = tm.SubclassedSparseDataFrame({'xxx': exp_arr},
|
||||
index=list('ab'),
|
||||
default_fill_value=0)
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# create from int dict
|
||||
res = tm.SubclassedSparseDataFrame({'xxx': [1, 2]},
|
||||
index=list('ab'),
|
||||
default_fill_value=0)
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
s = tm.SubclassedSparseSeries([1.1, 2.1], index=list('ab'),
|
||||
name='xxx')
|
||||
res = s.to_frame()
|
||||
exp = tm.SubclassedSparseDataFrame({'xxx': [1.1, 2.1]},
|
||||
index=list('ab'))
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,366 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Tests for Series timezone-related methods
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
from dateutil.tz import tzoffset
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs import conversion, timezones
|
||||
from pandas.compat import lrange
|
||||
|
||||
from pandas import DatetimeIndex, Index, NaT, Series, Timestamp
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesTimezones(object):
|
||||
# -----------------------------------------------------------------
|
||||
# Series.tz_localize
|
||||
def test_series_tz_localize(self):
|
||||
|
||||
rng = date_range('1/1/2011', periods=100, freq='H')
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
result = ts.tz_localize('utc')
|
||||
assert result.index.tz.zone == 'UTC'
|
||||
|
||||
# Can't localize if already tz-aware
|
||||
rng = date_range('1/1/2011', periods=100, freq='H', tz='utc')
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
with pytest.raises(TypeError, match='Already tz-aware'):
|
||||
ts.tz_localize('US/Eastern')
|
||||
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
def test_tz_localize_errors_deprecation(self):
|
||||
# GH 22644
|
||||
tz = 'Europe/Warsaw'
|
||||
n = 60
|
||||
rng = date_range(start='2015-03-29 02:00:00', periods=n, freq='min')
|
||||
ts = Series(rng)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
with pytest.raises(ValueError):
|
||||
ts.dt.tz_localize(tz, errors='foo')
|
||||
# make sure errors='coerce' gets mapped correctly to nonexistent
|
||||
result = ts.dt.tz_localize(tz, errors='coerce')
|
||||
expected = ts.dt.tz_localize(tz, nonexistent='NaT')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_tz_localize_ambiguous_bool(self):
|
||||
# make sure that we are correctly accepting bool values as ambiguous
|
||||
|
||||
# GH#14402
|
||||
ts = Timestamp('2015-11-01 01:00:03')
|
||||
expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central')
|
||||
expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central')
|
||||
|
||||
ser = Series([ts])
|
||||
expected0 = Series([expected0])
|
||||
expected1 = Series([expected1])
|
||||
|
||||
with pytest.raises(pytz.AmbiguousTimeError):
|
||||
ser.dt.tz_localize('US/Central')
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=True)
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=[True])
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=False)
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=[False])
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
@pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw'])
|
||||
@pytest.mark.parametrize('method, exp', [
|
||||
['shift_forward', '2015-03-29 03:00:00'],
|
||||
['NaT', NaT],
|
||||
['raise', None],
|
||||
['foo', 'invalid']
|
||||
])
|
||||
def test_series_tz_localize_nonexistent(self, tz, method, exp):
|
||||
# GH 8917
|
||||
n = 60
|
||||
dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min')
|
||||
s = Series(1, dti)
|
||||
if method == 'raise':
|
||||
with pytest.raises(pytz.NonExistentTimeError):
|
||||
s.tz_localize(tz, nonexistent=method)
|
||||
elif exp == 'invalid':
|
||||
with pytest.raises(ValueError):
|
||||
dti.tz_localize(tz, nonexistent=method)
|
||||
else:
|
||||
result = s.tz_localize(tz, nonexistent=method)
|
||||
expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_series_tz_localize_empty(self, tzstr):
|
||||
# GH#2248
|
||||
ser = Series()
|
||||
|
||||
ser2 = ser.tz_localize('utc')
|
||||
assert ser2.index.tz == pytz.utc
|
||||
|
||||
ser2 = ser.tz_localize(tzstr)
|
||||
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Series.tz_convert
|
||||
|
||||
def test_series_tz_convert(self):
|
||||
rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern')
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
result = ts.tz_convert('Europe/Berlin')
|
||||
assert result.index.tz.zone == 'Europe/Berlin'
|
||||
|
||||
# can't convert tz-naive
|
||||
rng = date_range('1/1/2011', periods=200, freq='D')
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
with pytest.raises(TypeError, match="Cannot convert tz-naive"):
|
||||
ts.tz_convert('US/Eastern')
|
||||
|
||||
def test_series_tz_convert_to_utc(self):
|
||||
base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
|
||||
tz='UTC')
|
||||
idx1 = base.tz_convert('Asia/Tokyo')[:2]
|
||||
idx2 = base.tz_convert('US/Eastern')[1:]
|
||||
|
||||
res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
|
||||
tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Series.append
|
||||
|
||||
def test_series_append_aware(self):
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'],
|
||||
tz='US/Eastern')
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
assert ts_result.index.tz == rng1.tz
|
||||
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC')
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'],
|
||||
tz='UTC')
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
utc = rng1.tz
|
||||
assert utc == ts_result.index.tz
|
||||
|
||||
# GH#7795
|
||||
# different tz coerces to object dtype, not UTC
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H',
|
||||
tz='US/Central')
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'),
|
||||
Timestamp('1/1/2011 02:00', tz='US/Central')])
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
|
||||
def test_series_append_aware_naive(self):
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
|
||||
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
expected = ser1.index.astype(object).append(ser2.index.astype(object))
|
||||
assert ts_result.index.equals(expected)
|
||||
|
||||
# mixed
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H')
|
||||
rng2 = lrange(100)
|
||||
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
|
||||
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
expected = ser1.index.astype(object).append(ser2.index)
|
||||
assert ts_result.index.equals(expected)
|
||||
|
||||
def test_series_append_dst(self):
|
||||
rng1 = date_range('1/1/2016 01:00', periods=3, freq='H',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('8/1/2016 01:00', periods=3, freq='H',
|
||||
tz='US/Eastern')
|
||||
ser1 = Series([1, 2, 3], index=rng1)
|
||||
ser2 = Series([10, 11, 12], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00',
|
||||
'2016-01-01 03:00', '2016-08-01 01:00',
|
||||
'2016-08-01 02:00', '2016-08-01 03:00'],
|
||||
tz='US/Eastern')
|
||||
exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
assert ts_result.index.tz == rng1.tz
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
def test_dateutil_tzoffset_support(self):
|
||||
values = [188.5, 328.25]
|
||||
tzinfo = tzoffset(None, 7200)
|
||||
index = [datetime(2012, 5, 11, 11, tzinfo=tzinfo),
|
||||
datetime(2012, 5, 11, 12, tzinfo=tzinfo)]
|
||||
series = Series(data=values, index=index)
|
||||
|
||||
assert series.index.tz == tzinfo
|
||||
|
||||
# it works! #2443
|
||||
repr(series.index[0])
|
||||
|
||||
@pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_tz_aware_asfreq(self, tz):
|
||||
dr = date_range('2011-12-01', '2012-07-20', freq='D', tz=tz)
|
||||
|
||||
ser = Series(np.random.randn(len(dr)), index=dr)
|
||||
|
||||
# it works!
|
||||
ser.asfreq('T')
|
||||
|
||||
@pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_string_index_alias_tz_aware(self, tz):
|
||||
rng = date_range('1/1/2000', periods=10, tz=tz)
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ser['1/3/2000']
|
||||
tm.assert_almost_equal(result, ser[2])
|
||||
|
||||
# TODO: De-duplicate with test below
|
||||
def test_series_add_tz_mismatch_converts_to_utc_duplicate(self):
|
||||
rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern')
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts_moscow = ser.tz_convert('Europe/Moscow')
|
||||
|
||||
result = ser + ts_moscow
|
||||
assert result.index.tz is pytz.utc
|
||||
|
||||
result = ts_moscow + ser
|
||||
assert result.index.tz is pytz.utc
|
||||
|
||||
def test_series_add_tz_mismatch_converts_to_utc(self):
|
||||
rng = date_range('1/1/2011', periods=100, freq='H', tz='utc')
|
||||
|
||||
perm = np.random.permutation(100)[:90]
|
||||
ser1 = Series(np.random.randn(90),
|
||||
index=rng.take(perm).tz_convert('US/Eastern'))
|
||||
|
||||
perm = np.random.permutation(100)[:90]
|
||||
ser2 = Series(np.random.randn(90),
|
||||
index=rng.take(perm).tz_convert('Europe/Berlin'))
|
||||
|
||||
result = ser1 + ser2
|
||||
|
||||
uts1 = ser1.tz_convert('utc')
|
||||
uts2 = ser2.tz_convert('utc')
|
||||
expected = uts1 + uts2
|
||||
|
||||
assert result.index.tz == pytz.UTC
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_add_aware_naive_raises(self):
|
||||
rng = date_range('1/1/2011', periods=10, freq='H')
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ser_utc = ser.tz_localize('utc')
|
||||
|
||||
with pytest.raises(Exception):
|
||||
ser + ser_utc
|
||||
|
||||
with pytest.raises(Exception):
|
||||
ser_utc + ser
|
||||
|
||||
def test_series_align_aware(self):
|
||||
idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern')
|
||||
ser = Series(np.random.randn(len(idx1)), index=idx1)
|
||||
ser_central = ser.tz_convert('US/Central')
|
||||
# # different timezones convert to UTC
|
||||
|
||||
new1, new2 = ser.align(ser_central)
|
||||
assert new1.index.tz == pytz.UTC
|
||||
assert new2.index.tz == pytz.UTC
|
||||
|
||||
@pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_localized_at_time_between_time(self, tzstr):
|
||||
from datetime import time
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
rng = date_range('4/16/2012', '5/1/2012', freq='H')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts_local = ts.tz_localize(tzstr)
|
||||
|
||||
result = ts_local.at_time(time(10, 0))
|
||||
expected = ts.at_time(time(10, 0)).tz_localize(tzstr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
t1, t2 = time(10, 0), time(11, 0)
|
||||
result = ts_local.between_time(t1, t2)
|
||||
expected = ts.between_time(t1, t2).tz_localize(tzstr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
@pytest.mark.parametrize('tzstr', ['Europe/Berlin',
|
||||
'dateutil/Europe/Berlin'])
|
||||
def test_getitem_pydatetime_tz(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
index = date_range(start='2012-12-24 16:00', end='2012-12-24 18:00',
|
||||
freq='H', tz=tzstr)
|
||||
ts = Series(index=index, data=index.hour)
|
||||
time_pandas = Timestamp('2012-12-24 17:00', tz=tzstr)
|
||||
|
||||
dt = datetime(2012, 12, 24, 17, 0)
|
||||
time_datetime = conversion.localize_pydatetime(dt, tz)
|
||||
assert ts[time_pandas] == ts[time_datetime]
|
||||
|
||||
def test_series_truncate_datetimeindex_tz(self):
|
||||
# GH 9243
|
||||
idx = date_range('4/1/2005', '4/30/2005', freq='D', tz='US/Pacific')
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
|
||||
expected = Series([1, 2, 3], index=idx[1:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('copy', [True, False])
|
||||
@pytest.mark.parametrize('method, tz', [
|
||||
['tz_localize', None],
|
||||
['tz_convert', 'Europe/Berlin']
|
||||
])
|
||||
def test_tz_localize_convert_copy_inplace_mutate(self, copy, method, tz):
|
||||
# GH 6326
|
||||
result = Series(np.arange(0, 5),
|
||||
index=date_range('20131027', periods=5, freq='1H',
|
||||
tz=tz))
|
||||
getattr(result, method)('UTC', copy=copy)
|
||||
expected = Series(np.arange(0, 5),
|
||||
index=date_range('20131027', periods=5, freq='1H',
|
||||
tz=tz))
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,19 @@
|
||||
import pytest
|
||||
|
||||
|
||||
class TestSeriesValidate(object):
|
||||
"""Tests for error handling related to data types of method arguments."""
|
||||
|
||||
@pytest.mark.parametrize("func", ["reset_index", "_set_name",
|
||||
"sort_values", "sort_index",
|
||||
"rename", "dropna"])
|
||||
@pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0])
|
||||
def test_validate_bool_args(self, string_series, func, inplace):
|
||||
msg = "For argument \"inplace\" expected type bool"
|
||||
kwargs = dict(inplace=inplace)
|
||||
|
||||
if func == "_set_name":
|
||||
kwargs["name"] = "hello"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(string_series, func)(**kwargs)
|
||||
Reference in New Issue
Block a user