pruned venvs
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,30 +0,0 @@
|
||||
from pandas.util._decorators import cache_readonly
|
||||
import pandas.util.testing as tm
|
||||
import pandas as pd
|
||||
|
||||
_ts = tm.makeTimeSeries()
|
||||
|
||||
|
||||
class TestData(object):
|
||||
|
||||
@cache_readonly
|
||||
def ts(self):
|
||||
ts = _ts.copy()
|
||||
ts.name = 'ts'
|
||||
return ts
|
||||
|
||||
@cache_readonly
|
||||
def series(self):
|
||||
series = tm.makeStringSeries()
|
||||
series.name = 'series'
|
||||
return series
|
||||
|
||||
@cache_readonly
|
||||
def objSeries(self):
|
||||
objSeries = tm.makeObjectSeries()
|
||||
objSeries.name = 'objects'
|
||||
return objSeries
|
||||
|
||||
@cache_readonly
|
||||
def empty(self):
|
||||
return pd.Series([], index=[])
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,8 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from pandas.tests.series.common import TestData
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def test_data():
|
||||
return TestData()
|
||||
-548
@@ -1,548 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from numpy import nan
|
||||
|
||||
from pandas import compat
|
||||
|
||||
from pandas import (Series, date_range, isna, Categorical)
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.util.testing import (assert_series_equal)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'first_slice,second_slice', [
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]]
|
||||
])
|
||||
@pytest.mark.parametrize('fill', [None, -1])
|
||||
def test_align(test_data, first_slice, second_slice, join_type, fill):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
if fill is not None:
|
||||
diff_a = aa.index.difference(join_index)
|
||||
diff_b = ab.index.difference(join_index)
|
||||
if len(diff_a) > 0:
|
||||
assert (aa.reindex(diff_a) == fill).all()
|
||||
if len(diff_b) > 0:
|
||||
assert (ab.reindex(diff_b) == fill).all()
|
||||
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
if fill is not None:
|
||||
ea = ea.fillna(fill)
|
||||
eb = eb.fillna(fill)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
assert aa.name == 'ts'
|
||||
assert ea.name == 'ts'
|
||||
assert ab.name == 'ts'
|
||||
assert eb.name == 'ts'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'first_slice,second_slice', [
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]]
|
||||
])
|
||||
@pytest.mark.parametrize('method', ['pad', 'bfill'])
|
||||
@pytest.mark.parametrize('limit', [None, 1])
|
||||
def test_align_fill_method(test_data,
|
||||
first_slice, second_slice,
|
||||
join_type, method, limit):
|
||||
a = test_data.ts[slice(*first_slice)]
|
||||
b = test_data.ts[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
ea = ea.fillna(method=method, limit=limit)
|
||||
eb = eb.fillna(method=method, limit=limit)
|
||||
|
||||
assert_series_equal(aa, ea)
|
||||
assert_series_equal(ab, eb)
|
||||
|
||||
|
||||
def test_align_nocopy(test_data):
|
||||
b = test_data.ts[:5].copy()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join='left')
|
||||
ra[:5] = 5
|
||||
assert not (a[:5] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
ra, _ = a.align(b, join='left', copy=False)
|
||||
ra[:5] = 5
|
||||
assert (a[:5] == 5).all()
|
||||
|
||||
# do copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join='right')
|
||||
rb[:3] = 5
|
||||
assert not (b[:3] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = test_data.ts.copy()
|
||||
b = test_data.ts[:5].copy()
|
||||
_, rb = a.align(b, join='right', copy=False)
|
||||
rb[:2] = 5
|
||||
assert (b[:2] == 5).all()
|
||||
|
||||
|
||||
def test_align_same_index(test_data):
|
||||
a, b = test_data.ts.align(test_data.ts, copy=False)
|
||||
assert a.index is test_data.ts.index
|
||||
assert b.index is test_data.ts.index
|
||||
|
||||
a, b = test_data.ts.align(test_data.ts, copy=True)
|
||||
assert a.index is not test_data.ts.index
|
||||
assert b.index is not test_data.ts.index
|
||||
|
||||
|
||||
def test_align_multiindex():
|
||||
# GH 10665
|
||||
|
||||
midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
|
||||
names=('a', 'b', 'c'))
|
||||
idx = pd.Index(range(2), name='b')
|
||||
s1 = pd.Series(np.arange(12, dtype='int64'), index=midx)
|
||||
s2 = pd.Series(np.arange(2, dtype='int64'), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = s1.align(s2, join='left')
|
||||
res2l, res2r = s2.align(s1, join='right')
|
||||
|
||||
expl = s1
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = s1.align(s2, join='right')
|
||||
res2l, res2r = s2.align(s1, join='left')
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
|
||||
names=('a', 'b', 'c'))
|
||||
expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
|
||||
def test_reindex(test_data):
|
||||
identity = test_data.series.reindex(test_data.series.index)
|
||||
|
||||
# __array_interface__ is not defined for older numpies
|
||||
# and on some pythons
|
||||
try:
|
||||
assert np.may_share_memory(test_data.series.index, identity.index)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
assert identity.index.is_(test_data.series.index)
|
||||
assert identity.index.identical(test_data.series.index)
|
||||
|
||||
subIndex = test_data.series.index[10:20]
|
||||
subSeries = test_data.series.reindex(subIndex)
|
||||
|
||||
for idx, val in compat.iteritems(subSeries):
|
||||
assert val == test_data.series[idx]
|
||||
|
||||
subIndex2 = test_data.ts.index[10:20]
|
||||
subTS = test_data.ts.reindex(subIndex2)
|
||||
|
||||
for idx, val in compat.iteritems(subTS):
|
||||
assert val == test_data.ts[idx]
|
||||
stuffSeries = test_data.ts.reindex(subIndex)
|
||||
|
||||
assert np.isnan(stuffSeries).all()
|
||||
|
||||
# This is extremely important for the Cython code to not screw up
|
||||
nonContigIndex = test_data.ts.index[::2]
|
||||
subNonContig = test_data.ts.reindex(nonContigIndex)
|
||||
for idx, val in compat.iteritems(subNonContig):
|
||||
assert val == test_data.ts[idx]
|
||||
|
||||
# return a copy the same index here
|
||||
result = test_data.ts.reindex()
|
||||
assert not (result is test_data.ts)
|
||||
|
||||
|
||||
def test_reindex_nan():
|
||||
ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
|
||||
|
||||
i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j])
|
||||
|
||||
ts.index = ts.index.astype('object')
|
||||
|
||||
# reindex coerces index.dtype to float, loc/iloc doesn't
|
||||
assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
|
||||
|
||||
|
||||
def test_reindex_series_add_nat():
|
||||
rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
|
||||
series = Series(rng)
|
||||
|
||||
result = series.reindex(lrange(15))
|
||||
assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))
|
||||
|
||||
mask = result.isna()
|
||||
assert mask[-5:].all()
|
||||
assert not mask[:-5].any()
|
||||
|
||||
|
||||
def test_reindex_with_datetimes():
|
||||
rng = date_range('1/1/2000', periods=20)
|
||||
ts = Series(np.random.randn(20), index=rng)
|
||||
|
||||
result = ts.reindex(list(ts.index[5:10]))
|
||||
expected = ts[5:10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[list(ts.index[5:10])]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_corner(test_data):
|
||||
# (don't forget to fix this) I think it's fixed
|
||||
test_data.empty.reindex(test_data.ts.index, method='pad') # it works
|
||||
|
||||
# corner case: pad empty series
|
||||
reindexed = test_data.empty.reindex(test_data.ts.index, method='pad')
|
||||
|
||||
# pass non-Index
|
||||
reindexed = test_data.ts.reindex(list(test_data.ts.index))
|
||||
assert_series_equal(test_data.ts, reindexed)
|
||||
|
||||
# bad fill method
|
||||
ts = test_data.ts[::2]
|
||||
pytest.raises(Exception, ts.reindex, test_data.ts.index, method='foo')
|
||||
|
||||
|
||||
def test_reindex_pad():
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
s2 = s[::2]
|
||||
|
||||
reindexed = s2.reindex(s.index, method='pad')
|
||||
reindexed2 = s2.reindex(s.index, method='ffill')
|
||||
assert_series_equal(reindexed, reindexed2)
|
||||
|
||||
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
|
||||
assert_series_equal(reindexed, expected)
|
||||
|
||||
# GH4604
|
||||
s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
|
||||
new_index = ['a', 'g', 'c', 'f']
|
||||
expected = Series([1, 1, 3, 3], index=new_index)
|
||||
|
||||
# this changes dtype because the ffill happens after
|
||||
result = s.reindex(new_index).ffill()
|
||||
assert_series_equal(result, expected.astype('float64'))
|
||||
|
||||
result = s.reindex(new_index).ffill(downcast='infer')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1, 5, 3, 5], index=new_index)
|
||||
result = s.reindex(new_index, method='ffill')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# inference of new dtype
|
||||
s = Series([True, False, False, True], index=list('abcd'))
|
||||
new_index = 'agc'
|
||||
result = s.reindex(list(new_index)).ffill()
|
||||
expected = Series([True, True, False], index=list(new_index))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH4618 shifted series downcasting
|
||||
s = Series(False, index=lrange(0, 5))
|
||||
result = s.shift(1).fillna(method='bfill')
|
||||
expected = Series(False, index=lrange(0, 5))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_nearest():
|
||||
s = Series(np.arange(10, dtype='int64'))
|
||||
target = [0.1, 0.9, 1.5, 2.0]
|
||||
actual = s.reindex(target, method='nearest')
|
||||
expected = Series(np.around(target).astype('int64'), target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method='nearest')
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex_like(actual, method='nearest', tolerance=1)
|
||||
assert_series_equal(expected, actual)
|
||||
actual = s.reindex_like(actual, method='nearest',
|
||||
tolerance=[1, 2, 3, 4])
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method='nearest', tolerance=0.2)
|
||||
expected = Series([0, 1, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
actual = s.reindex(target, method='nearest',
|
||||
tolerance=[0.3, 0.01, 0.4, 3])
|
||||
expected = Series([0, np.nan, np.nan, 2], target)
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
|
||||
def test_reindex_backfill():
|
||||
pass
|
||||
|
||||
|
||||
def test_reindex_int(test_data):
|
||||
ts = test_data.ts[::2]
|
||||
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_int = int_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_int.dtype == np.float_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_int = int_ts.reindex(int_ts.index[::2])
|
||||
assert reindexed_int.dtype == np.int_
|
||||
|
||||
|
||||
def test_reindex_bool(test_data):
|
||||
# A series other than float, int, string, or object
|
||||
ts = test_data.ts[::2]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_bool = bool_ts.reindex(test_data.ts.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_bool.dtype == np.object_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
|
||||
assert reindexed_bool.dtype == np.bool_
|
||||
|
||||
|
||||
def test_reindex_bool_pad(test_data):
|
||||
# fail
|
||||
ts = test_data.ts[5:]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
filled_bool = bool_ts.reindex(test_data.ts.index, method='pad')
|
||||
assert isna(filled_bool[:5]).all()
|
||||
|
||||
|
||||
def test_reindex_categorical():
|
||||
index = date_range('20000101', periods=3)
|
||||
|
||||
# reindexing to an invalid Categorical
|
||||
s = Series(['a', 'b', 'c'], dtype='category')
|
||||
result = s.reindex(index)
|
||||
expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
|
||||
categories=['a', 'b', 'c']))
|
||||
expected.index = index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# partial reindexing
|
||||
expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
|
||||
'c']))
|
||||
expected.index = [1, 2]
|
||||
result = s.reindex([1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(Categorical(
|
||||
values=['c', np.nan], categories=['a', 'b', 'c']))
|
||||
expected.index = [2, 3]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_like(test_data):
|
||||
other = test_data.ts[::2]
|
||||
assert_series_equal(test_data.ts.reindex(other.index),
|
||||
test_data.ts.reindex_like(other))
|
||||
|
||||
# GH 7179
|
||||
day1 = datetime(2013, 3, 5)
|
||||
day2 = datetime(2013, 5, 5)
|
||||
day3 = datetime(2014, 3, 5)
|
||||
|
||||
series1 = Series([5, None, None], [day1, day2, day3])
|
||||
series2 = Series([None, None], [day1, day3])
|
||||
|
||||
result = series1.reindex_like(series2, method='pad')
|
||||
expected = Series([5, np.nan], index=[day1, day3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_fill_value():
|
||||
# -----------------------------------------------------------
|
||||
# floats
|
||||
floats = Series([1., 2., 3.])
|
||||
result = floats.reindex([1, 2, 3])
|
||||
expected = Series([2., 3., np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = floats.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2., 3., 0], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# ints
|
||||
ints = Series([1, 2, 3])
|
||||
|
||||
result = ints.reindex([1, 2, 3])
|
||||
expected = Series([2., 3., np.nan], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# don't upcast
|
||||
result = ints.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2, 3, 0], index=[1, 2, 3])
|
||||
assert issubclass(result.dtype.type, np.integer)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# objects
|
||||
objects = Series([1, 2, 3], dtype=object)
|
||||
|
||||
result = objects.reindex([1, 2, 3])
|
||||
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = objects.reindex([1, 2, 3], fill_value='foo')
|
||||
expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# bools
|
||||
bools = Series([True, False, True])
|
||||
|
||||
result = bools.reindex([1, 2, 3])
|
||||
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = bools.reindex([1, 2, 3], fill_value=False)
|
||||
expected = Series([False, True, False], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_rename():
|
||||
# GH 17407
|
||||
s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
|
||||
result = s.rename(str)
|
||||
expected = s.rename(lambda i: str(i))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
assert result.name == expected.name
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, index, drop_labels,'
|
||||
' axis, expected_data, expected_index',
|
||||
[
|
||||
# Unique Index
|
||||
([1, 2], ['one', 'two'], ['two'],
|
||||
0, [1], ['one']),
|
||||
([1, 2], ['one', 'two'], ['two'],
|
||||
'rows', [1], ['one']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], ['two'],
|
||||
0, [1, 2], ['one', 'one']),
|
||||
|
||||
# GH 5248 Non-Unique Index
|
||||
([1, 1, 2], ['one', 'two', 'one'], 'two',
|
||||
0, [1, 2], ['one', 'one']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], ['one'],
|
||||
0, [1], ['two']),
|
||||
([1, 1, 2], ['one', 'two', 'one'], 'one',
|
||||
0, [1], ['two'])])
|
||||
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
|
||||
expected_data, expected_index):
|
||||
|
||||
s = Series(data=data, index=index)
|
||||
result = s.drop(drop_labels, axis=axis)
|
||||
expected = Series(data=expected_data, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data, index, drop_labels,'
|
||||
' axis, error_type, error_desc',
|
||||
[
|
||||
# single string/tuple-like
|
||||
(range(3), list('abc'), 'bc',
|
||||
0, KeyError, 'not found in axis'),
|
||||
|
||||
# bad axis
|
||||
(range(3), list('abc'), ('a',),
|
||||
0, KeyError, 'not found in axis'),
|
||||
(range(3), list('abc'), 'one',
|
||||
'columns', ValueError, 'No axis named columns')])
|
||||
def test_drop_exception_raised(data, index, drop_labels,
|
||||
axis, error_type, error_desc):
|
||||
|
||||
with tm.assert_raises_regex(error_type, error_desc):
|
||||
Series(data, index=index).drop(drop_labels, axis=axis)
|
||||
|
||||
|
||||
def test_drop_with_ignore_errors():
|
||||
# errors='ignore'
|
||||
s = Series(range(3), index=list('abc'))
|
||||
result = s.drop('bc', errors='ignore')
|
||||
tm.assert_series_equal(result, s)
|
||||
result = s.drop(['a', 'd'], errors='ignore')
|
||||
expected = s.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 8522
|
||||
s = Series([2, 3], index=[True, False])
|
||||
assert s.index.is_object()
|
||||
result = s.drop(True)
|
||||
expected = Series([3], index=[False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
|
||||
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
|
||||
def test_drop_empty_list(index, drop_labels):
|
||||
# GH 21494
|
||||
expected_index = [i for i in index if i not in drop_labels]
|
||||
series = pd.Series(index=index).drop(drop_labels)
|
||||
tm.assert_series_equal(series, pd.Series(index=expected_index))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data, index, drop_labels', [
|
||||
(None, [1, 2, 3], [1, 4]),
|
||||
(None, [1, 2, 2], [1, 4]),
|
||||
([2, 3], [0, 1], [False, True])
|
||||
])
|
||||
def test_drop_non_empty_list(data, index, drop_labels):
|
||||
# GH 21494 and GH 16877
|
||||
with tm.assert_raises_regex(KeyError, 'not found in axis'):
|
||||
pd.Series(data=data, index=index).drop(drop_labels)
|
||||
-601
@@ -1,601 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from pandas import (Series, date_range, isna, Index, Timestamp)
|
||||
from pandas.compat import lrange, range
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
from pandas.core.indexing import IndexingError
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
from pandas.util.testing import (assert_series_equal)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_getitem_boolean(test_data):
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
|
||||
# passing list is OK
|
||||
result = s[list(mask)]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, s.index[mask])
|
||||
|
||||
|
||||
def test_getitem_boolean_empty():
|
||||
s = Series([], dtype=np.int64)
|
||||
s.index.name = 'index_name'
|
||||
s = s[s.isna()]
|
||||
assert s.index.name == 'index_name'
|
||||
assert s.dtype == np.int64
|
||||
|
||||
# GH5877
|
||||
# indexing with empty series
|
||||
s = Series(['A', 'B'])
|
||||
expected = Series(np.nan, index=['C'], dtype=object)
|
||||
result = s[Series(['C'], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(['A', 'B'])
|
||||
expected = Series(dtype=object, index=Index([], dtype='int64'))
|
||||
result = s[Series([], dtype=object)]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
def f():
|
||||
s[Series([], dtype=bool)]
|
||||
|
||||
pytest.raises(IndexingError, f)
|
||||
|
||||
def f():
|
||||
s[Series([True], dtype=bool)]
|
||||
|
||||
pytest.raises(IndexingError, f)
|
||||
|
||||
|
||||
def test_getitem_boolean_object(test_data):
|
||||
# using column from DataFrame
|
||||
|
||||
s = test_data.series
|
||||
mask = s > s.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = s[omask]
|
||||
expected = s[mask]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
cop = s.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
pytest.raises(Exception, s.__getitem__, omask)
|
||||
pytest.raises(Exception, s.__setitem__, omask, 5)
|
||||
|
||||
|
||||
def test_getitem_setitem_boolean_corner(test_data):
|
||||
ts = test_data.ts
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
# these used to raise...??
|
||||
|
||||
pytest.raises(Exception, ts.__getitem__, mask_shifted)
|
||||
pytest.raises(Exception, ts.__setitem__, mask_shifted, 1)
|
||||
# ts[mask_shifted]
|
||||
# ts[mask_shifted] = 1
|
||||
|
||||
pytest.raises(Exception, ts.loc.__getitem__, mask_shifted)
|
||||
pytest.raises(Exception, ts.loc.__setitem__, mask_shifted, 1)
|
||||
# ts.loc[mask_shifted]
|
||||
# ts.loc[mask_shifted] = 2
|
||||
|
||||
|
||||
def test_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
# similar indexed series
|
||||
result = test_data.series.copy()
|
||||
result[mask] = test_data.series * 2
|
||||
expected = test_data.series * 2
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
# needs alignment
|
||||
result = test_data.series.copy()
|
||||
result[mask] = (test_data.series * 2)[0:5]
|
||||
expected = (test_data.series * 2)[0:5].reindex_like(test_data.series)
|
||||
expected[-mask] = test_data.series[mask]
|
||||
assert_series_equal(result[mask], expected[mask])
|
||||
|
||||
|
||||
def test_get_set_boolean_different_order(test_data):
|
||||
ordered = test_data.series.sort_values()
|
||||
|
||||
# setting
|
||||
copy = test_data.series.copy()
|
||||
copy[ordered > 0] = 0
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[expected > 0] = 0
|
||||
|
||||
assert_series_equal(copy, expected)
|
||||
|
||||
# getting
|
||||
sel = test_data.series[ordered > 0]
|
||||
exp = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(sel, exp)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# unsafe dtype changes
|
||||
for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16,
|
||||
np.float32, np.float64]:
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
mask = s < 5
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype=dtype)
|
||||
assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
# these are allowed operations, but are upcasted
|
||||
for dtype in [np.int64, np.float64]:
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
mask = s < 5
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
s[mask] = values
|
||||
expected = Series(values + lrange(5, 10), dtype='float64')
|
||||
assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
# GH 9731
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
mask = s > 5
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
s[mask] = values
|
||||
expected = Series(lrange(6) + values, dtype='float64')
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# can't do these as we are forced to change the itemsize of the input
|
||||
# to something we cannot
|
||||
for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]:
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
mask = s < 5
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
pytest.raises(Exception, s.__setitem__, tuple(mask), values)
|
||||
|
||||
# GH3235
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
mask = s < 5
|
||||
s[mask] = lrange(2, 7)
|
||||
expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64')
|
||||
assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype='int64')
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64')
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
def f():
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def f():
|
||||
s[mask] = [0] * 5
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_raise_on_error_deprecation():
|
||||
# gh-14968
|
||||
# deprecation of raise_on_error
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.where(cond, raise_on_error=True)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.mask(cond, raise_on_error=True)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert (s.shape == rs.shape)
|
||||
assert (rs is not s)
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
pytest.raises(ValueError, s.where, 1)
|
||||
pytest.raises(ValueError, s.where, cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]),
|
||||
[0, 2, 3])
|
||||
pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]),
|
||||
[])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cond', [
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")]
|
||||
])
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
# slice
|
||||
s = Series(list('abc'))
|
||||
|
||||
def f():
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
assert_series_equal(s.astype(np.int64), expected, )
|
||||
|
||||
# slice with step
|
||||
s = Series(list('abcdef'))
|
||||
|
||||
def f():
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
s = Series(list('abcdef'))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, 'b', 1, 'd', 'e', 'f'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list('abcdef'))
|
||||
|
||||
def f():
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(['a', 'b', 'c', 0, 1, 'f'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list('abc'))
|
||||
|
||||
def f():
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
s = Series(list('abc'))
|
||||
|
||||
def f():
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
# scalar
|
||||
s = Series(list('abc'))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), 'b', 'c'])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('size', range(2, 6))
|
||||
@pytest.mark.parametrize('mask', [
|
||||
[True, False, False, False, False],
|
||||
[True, False],
|
||||
[False]
|
||||
])
|
||||
@pytest.mark.parametrize('item', [
|
||||
2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min
|
||||
])
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize('box', [
|
||||
lambda x: np.array([x]),
|
||||
lambda x: [x],
|
||||
lambda x: (x,)
|
||||
])
|
||||
def test_broadcast(size, mask, item, box):
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series([item if use_item else data[
|
||||
i] for i, use_item in enumerate(selection)])
|
||||
|
||||
s = Series(data)
|
||||
s[selection] = box(item)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[cond])
|
||||
assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan],
|
||||
index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = pd.Series([1, 2, 3])
|
||||
w = s.where(s > 1, 'X')
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
w = s.where(s > 1, ['X', 'Y', 'Z'])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
w = s.where(s > 1, np.array(['X', 'Y', 'Z']))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == 'object'
|
||||
|
||||
|
||||
def test_where_timedelta_coerce():
|
||||
s = Series([1, 2], dtype='timedelta64[ns]')
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype='object')
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetime_conversion():
|
||||
s = Series(date_range('20130102', periods=2))
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = s.where(mask, [10, 10])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, 10.0)
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, 10.0])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
rs = s.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype='object')
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
# GH 15701
|
||||
timestamps = ['2016-12-31 12:00:04+00:00',
|
||||
'2016-12-31 12:00:04.010000+00:00']
|
||||
s = Series([pd.Timestamp(t) for t in timestamps])
|
||||
rs = s.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, s[1]])
|
||||
assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
assert_series_equal(rs, rs2)
|
||||
|
||||
pytest.raises(ValueError, s.mask, 1)
|
||||
pytest.raises(ValueError, s.mask, cond[:3].values, -s)
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.mask(s > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
assert_series_equal(rs.dropna(), s[~cond])
|
||||
assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
assert_series_equal(rs, s.mask(cond, -s))
|
||||
-33
@@ -1,33 +0,0 @@
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_getitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series(4, index=list('ABCD'))
|
||||
result = s[lambda x: 'A']
|
||||
assert result == s.loc['A']
|
||||
|
||||
result = s[lambda x: ['A', 'B']]
|
||||
tm.assert_series_equal(result, s.loc[['A', 'B']])
|
||||
|
||||
result = s[lambda x: [True, False, True, True]]
|
||||
tm.assert_series_equal(result, s.iloc[[0, 2, 3]])
|
||||
|
||||
|
||||
def test_setitem_callable():
|
||||
# GH 12533
|
||||
s = pd.Series([1, 2, 3, 4], index=list('ABCD'))
|
||||
s[lambda x: 'A'] = -1
|
||||
tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD')))
|
||||
|
||||
|
||||
def test_setitem_other_callable():
|
||||
# GH 13299
|
||||
inc = lambda x: x + 1
|
||||
|
||||
s = pd.Series([1, 2, -1, 4])
|
||||
s[s < 0] = inc
|
||||
|
||||
expected = pd.Series([1, 2, inc, 4])
|
||||
tm.assert_series_equal(s, expected)
|
||||
-709
@@ -1,709 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import (Series, DataFrame,
|
||||
date_range, Timestamp, DatetimeIndex, NaT)
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
from pandas.util.testing import (assert_series_equal,
|
||||
assert_frame_equal, assert_almost_equal)
|
||||
|
||||
import pandas.util.testing as tm
|
||||
|
||||
import pandas._libs.index as _index
|
||||
from pandas._libs import tslib
|
||||
|
||||
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1),
|
||||
end=datetime(2010, 1, 1))
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
assert s[48] == 48
|
||||
assert s['1/2/2009'] == 48
|
||||
assert s['2009-1-2'] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
pytest.raises(KeyError, s.__getitem__, '2009-1-3')
|
||||
|
||||
assert_series_equal(s['3/6/2009':'2009-06-05'],
|
||||
s[datetime(2009, 3, 6):datetime(2009, 6, 5)])
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1),
|
||||
end=datetime(2010, 1, 1))
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
s[48] = -1
|
||||
assert s[48] == -1
|
||||
s['1/2/2009'] = -2
|
||||
assert s[48] == -2
|
||||
s['1/2/2009':'2009-06-05'] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
def test_dti_snap():
|
||||
dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002',
|
||||
'1/5/2002', '1/6/2002', '1/7/2002'], freq='D')
|
||||
|
||||
res = dti.snap(freq='W-MON')
|
||||
exp = date_range('12/31/2001', '1/7/2002', freq='w-mon')
|
||||
exp = exp.repeat([3, 4])
|
||||
assert (res == exp).all()
|
||||
|
||||
res = dti.snap(freq='B')
|
||||
|
||||
exp = date_range('1/1/2002', '1/7/2002', freq='b')
|
||||
exp = exp.repeat([1, 1, 1, 2, 2])
|
||||
assert (res == exp).all()
|
||||
|
||||
|
||||
def test_dti_reset_index_round_trip():
|
||||
dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D')
|
||||
d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti)
|
||||
d2 = d1.reset_index()
|
||||
assert d2.dtypes[0] == np.dtype('M8[ns]')
|
||||
d3 = d2.set_index('index')
|
||||
assert_frame_equal(d1, d3, check_names=False)
|
||||
|
||||
# #2329
|
||||
stamp = datetime(2012, 11, 22)
|
||||
df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value'])
|
||||
df = df.set_index('Date')
|
||||
|
||||
assert df.index[0] == stamp
|
||||
assert df.reset_index()['Date'][0] == stamp
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# #1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s = Series().set_value(dates[0], 1.)
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s2 = s.set_value(dates[1], np.nan)
|
||||
|
||||
exp = Series([1., np.nan], index=index)
|
||||
|
||||
assert_series_equal(s2, exp)
|
||||
|
||||
# s = Series(index[:1], index[:1])
|
||||
# s2 = s.set_value(dates[1], index[1])
|
||||
# assert s2.values.dtype == 'M8[ns]'
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_slice_locs_indexerror():
|
||||
times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10)
|
||||
for i in range(100000)]
|
||||
s = Series(lrange(100000), times)
|
||||
s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)]
|
||||
|
||||
|
||||
def test_slicing_datetimes():
|
||||
# GH 7523
|
||||
|
||||
# unique
|
||||
df = DataFrame(np.arange(4., dtype='float64'),
|
||||
index=[datetime(2001, 1, i, 10, 00)
|
||||
for i in [1, 2, 3, 4]])
|
||||
result = df.loc[datetime(2001, 1, 1, 10):]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[:datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11):]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc['20010101 11':]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# duplicates
|
||||
df = pd.DataFrame(np.arange(5., dtype='float64'),
|
||||
index=[datetime(2001, 1, i, 10, 00)
|
||||
for i in [1, 2, 2, 3, 4]])
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 10):]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[:datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)]
|
||||
assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc[datetime(2001, 1, 1, 11):]
|
||||
expected = df.iloc[1:]
|
||||
assert_frame_equal(result, expected)
|
||||
result = df.loc['20010101 11':]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_datetime64_duplicated():
|
||||
dates = date_range('2010-07-01', end='2010-08-05')
|
||||
|
||||
tst = DataFrame({'symbol': 'AAA', 'date': dates})
|
||||
result = tst.duplicated(['date', 'symbol'])
|
||||
assert (-result).all()
|
||||
|
||||
tst = DataFrame({'date': dates})
|
||||
result = tst.duplicated()
|
||||
assert (-result).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_pytz():
|
||||
from pytz import timezone as tz
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
|
||||
# comparison dates with datetime MUST be localized!
|
||||
date = tz('US/Central').localize(datetime(1990, 1, 1, 3))
|
||||
result[date] = 0
|
||||
result[date] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetime_tz_dateutil():
|
||||
from dateutil.tz import tzutc
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
|
||||
|
||||
tz = lambda x: tzutc() if x == 'UTC' else gettz(
|
||||
x) # handle special case for utc in dateutil
|
||||
|
||||
from pandas import date_range
|
||||
|
||||
N = 50
|
||||
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H',
|
||||
tz='America/New_York')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0
|
||||
result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# repeat all the above with naive datetimes
|
||||
result = ts[datetime(1990, 1, 1, 4)]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4)] = 0
|
||||
result[datetime(1990, 1, 1, 4)] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0
|
||||
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = datetime(1990, 1, 1, 4)
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
with pytest.raises(TypeError):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
|
||||
rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
from pandas import period_range
|
||||
|
||||
N = 50
|
||||
rng = period_range('1/1/1990', periods=N, freq='H')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts[4]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result[4:8] = ts[4:8]
|
||||
assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_median_slice_bug():
|
||||
index = date_range('20090415', '20090519', freq='2B')
|
||||
s = Series(np.random.randn(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
result = s[indexer]
|
||||
expected = s[indexer[0]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
from pandas import date_range
|
||||
|
||||
index = date_range('1/1/2000', '1/7/2000')
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp('1/8/2000')
|
||||
|
||||
pytest.raises(KeyError, s.__getitem__, stamp)
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
pytest.raises(KeyError, s.__getitem__, stamp)
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
"""
|
||||
test duplicates in time series
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def dups():
|
||||
dates = [datetime(2000, 1, 2), datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 2), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 3), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 5)]
|
||||
|
||||
return Series(np.random.randn(len(dates)), index=dates)
|
||||
|
||||
|
||||
def test_constructor(dups):
|
||||
assert isinstance(dups, Series)
|
||||
assert isinstance(dups.index, DatetimeIndex)
|
||||
|
||||
|
||||
def test_is_unique_monotonic(dups):
|
||||
assert not dups.index.is_unique
|
||||
|
||||
|
||||
def test_index_unique(dups):
|
||||
uniques = dups.index.unique()
|
||||
expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4), datetime(2000, 1, 5)])
|
||||
assert uniques.dtype == 'M8[ns]' # sanity
|
||||
tm.assert_index_equal(uniques, expected)
|
||||
assert dups.index.nunique() == 4
|
||||
|
||||
# #2563
|
||||
assert isinstance(uniques, DatetimeIndex)
|
||||
|
||||
dups_local = dups.index.tz_localize('US/Eastern')
|
||||
dups_local.name = 'foo'
|
||||
result = dups_local.unique()
|
||||
expected = DatetimeIndex(expected, name='foo')
|
||||
expected = expected.tz_localize('US/Eastern')
|
||||
assert result.tz is not None
|
||||
assert result.name == 'foo'
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# NaT, note this is excluded
|
||||
arr = [1370745748 + t for t in range(20)] + [tslib.iNaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
|
||||
for t in range(20)] + [NaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
|
||||
def test_index_dupes_contains():
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
ix = DatetimeIndex([d, d])
|
||||
assert d in ix
|
||||
|
||||
|
||||
def test_duplicate_dates_indexing(dups):
|
||||
ts = dups
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
assert_series_equal(result, expected)
|
||||
else:
|
||||
assert_almost_equal(result, expected[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
assert_series_equal(cp, expected)
|
||||
|
||||
pytest.raises(KeyError, ts.__getitem__, datetime(2000, 1, 6))
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_range_slice():
|
||||
idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000',
|
||||
'1/4/2000'])
|
||||
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts['1/2/2000':]
|
||||
expected = ts[1:]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = ts['1/2/2000':'1/3/2000']
|
||||
expected = ts[1:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_average_dup_values(dups):
|
||||
result = dups.groupby(level=0).mean()
|
||||
expected = dups.groupby(dups.index).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff():
|
||||
import datetime
|
||||
# #1821
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
try:
|
||||
_index._SIZE_CUTOFF = 1000
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = datetime.timedelta(seconds=1)
|
||||
half_sec = datetime.timedelta(microseconds=500000)
|
||||
d = datetime.datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(np.random.randn(len(dates), 4),
|
||||
index=dates,
|
||||
columns=list('ABCD'))
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
finally:
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start='2011-01-01', end='2011-01-15')
|
||||
ts = Series(np.random.rand(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
# TODO: unused?
|
||||
s = str(t) # noqa
|
||||
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
compare(slice('2011-01-01', '2011-01-15'))
|
||||
compare(slice('2010-12-30', '2011-01-15'))
|
||||
compare(slice('2011-01-01', '2011-01-16'))
|
||||
|
||||
# partial ranges
|
||||
compare(slice('2011-01-01', '2011-01-6'))
|
||||
compare(slice('2011-01-06', '2011-01-8'))
|
||||
compare(slice('2011-01-06', '2011-01-12'))
|
||||
|
||||
# single values
|
||||
result = ts2['2011'].sort_index()
|
||||
expected = ts['2011']
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq='M')
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.permutation(20))
|
||||
|
||||
result = ts['2005']
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq='M')
|
||||
ts = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
expected = ts['2001']
|
||||
expected.name = 'A'
|
||||
|
||||
df = DataFrame(dict(A=ts))
|
||||
result = df['2001']['A']
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# setting
|
||||
ts['2001'] = 1
|
||||
expected = ts['2001']
|
||||
expected.name = 'A'
|
||||
|
||||
df.loc['2001', 'A'] = 1
|
||||
|
||||
result = df['2001']['A']
|
||||
assert_series_equal(expected, result)
|
||||
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
|
||||
freq='H')
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013-05']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
|
||||
freq='S')
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013-05']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
idx = [Timestamp('2013-05-31 00:00'),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))]
|
||||
ts = Series(lrange(len(idx)), index=idx)
|
||||
expected = ts['2013']
|
||||
assert_series_equal(expected, ts)
|
||||
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(np.random.rand(5, 5),
|
||||
columns=['open', 'high', 'low', 'close', 'volume'],
|
||||
index=date_range('2012-01-02 18:01:00',
|
||||
periods=5, tz='US/Central', freq='s'))
|
||||
expected = df.loc[[df.index[2]]]
|
||||
|
||||
# this is a single date, so will raise
|
||||
pytest.raises(KeyError, df.__getitem__, '2012-01-02 18:01:02', )
|
||||
pytest.raises(KeyError, df.__getitem__, df.index[2], )
|
||||
|
||||
|
||||
"""
|
||||
test NaT support
|
||||
"""
|
||||
|
||||
|
||||
def test_set_none_nan():
|
||||
series = Series(date_range('1/1/2000', periods=10))
|
||||
series[3] = None
|
||||
assert series[3] is NaT
|
||||
|
||||
series[3:5] = None
|
||||
assert series[4] is NaT
|
||||
|
||||
series[5] = np.nan
|
||||
assert series[5] is NaT
|
||||
|
||||
series[5:7] = np.nan
|
||||
assert series[6] is NaT
|
||||
|
||||
|
||||
def test_nat_operations():
|
||||
# GH 8617
|
||||
s = Series([0, pd.NaT], dtype='m8[ns]')
|
||||
exp = s[0]
|
||||
assert s.median() == exp
|
||||
assert s.min() == exp
|
||||
assert s.max() == exp
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ["round", "floor", "ceil"])
|
||||
@pytest.mark.parametrize('freq', ["s", "5s", "min", "5min", "h", "5h"])
|
||||
def test_round_nat(method, freq):
|
||||
# GH14940
|
||||
s = Series([pd.NaT])
|
||||
expected = Series(pd.NaT)
|
||||
round_method = getattr(s.dt, method)
|
||||
assert_series_equal(round_method(freq), expected)
|
||||
@@ -1,38 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import Series
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
from pandas.util.testing import (assert_series_equal,
|
||||
assert_almost_equal)
|
||||
|
||||
|
||||
def test_iloc():
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
for i in range(len(s)):
|
||||
result = s.iloc[i]
|
||||
exp = s[s.index[i]]
|
||||
assert_almost_equal(result, exp)
|
||||
|
||||
# pass a slice
|
||||
result = s.iloc[slice(1, 3)]
|
||||
expected = s.loc[2:4]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# test slice is a view
|
||||
result[:] = 0
|
||||
assert (s[1:3] == 0).all()
|
||||
|
||||
# list of integers
|
||||
result = s.iloc[[0, 2, 3, 4, 5]]
|
||||
expected = s.reindex(s.index[[0, 2, 3, 4, 5]])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_nonunique():
|
||||
s = Series([0, 1, 2], index=[0, 1, 0])
|
||||
assert s.iloc[2] == 2
|
||||
-770
@@ -1,770 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
""" test get/set & misc """
|
||||
|
||||
import pytest
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
from pandas import (Series, DataFrame, MultiIndex,
|
||||
Timestamp, Timedelta, Categorical)
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
|
||||
from pandas.util.testing import (assert_series_equal)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b'])
|
||||
|
||||
pytest.raises(IndexError, s.__getitem__, 5)
|
||||
pytest.raises(IndexError, s.__setitem__, 5, 0)
|
||||
|
||||
pytest.raises(KeyError, s.__getitem__, 'c')
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
pytest.raises(IndexError, s.__getitem__, 5)
|
||||
pytest.raises(IndexError, s.__setitem__, 5, 0)
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
result = test_data.ts[indices]
|
||||
expected = test_data.ts.reindex(indices)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts[indices[0]:indices[2]]
|
||||
expected = test_data.ts.loc[indices[0]:indices[2]]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[inds]
|
||||
expected = s.reindex(inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[arr_inds]
|
||||
expected = s.reindex(arr_inds)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
|
||||
index=['a', 'b', 'c'])
|
||||
expected = Timestamp('2011-01-01', tz='US/Eastern')
|
||||
result = s.loc['a']
|
||||
assert result == expected
|
||||
result = s.iloc[0]
|
||||
assert result == expected
|
||||
result = s['a']
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis():
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
np.fix(s)
|
||||
|
||||
result = s[...]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
s[...] = 5
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
def test_getitem_get(test_data):
|
||||
test_series = test_data.series
|
||||
test_obj_series = test_data.objSeries
|
||||
|
||||
idx1 = test_series.index[5]
|
||||
idx2 = test_obj_series.index[5]
|
||||
|
||||
assert test_series[idx1] == test_series.get(idx1)
|
||||
assert test_obj_series[idx2] == test_obj_series.get(idx2)
|
||||
|
||||
assert test_series[idx1] == test_series[5]
|
||||
assert test_obj_series[idx2] == test_obj_series[5]
|
||||
|
||||
assert test_series.get(-1) == test_series.get(test_series.index[-1])
|
||||
assert test_series[5] == test_series.get(test_series.index[5])
|
||||
|
||||
# missing
|
||||
d = test_data.ts.index[0] - BDay()
|
||||
pytest.raises(KeyError, test_data.ts.__getitem__, d)
|
||||
|
||||
# None
|
||||
# GH 5652
|
||||
for s in [Series(), Series(index=list('abc'))]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_getitem_fancy(test_data):
|
||||
slice1 = test_data.series[[1, 2, 3]]
|
||||
slice2 = test_data.objSeries[[1, 2, 3]]
|
||||
assert test_data.series.index[2] == slice1.index[1]
|
||||
assert test_data.objSeries.index[2] == slice2.index[1]
|
||||
assert test_data.series[2] == slice1[1]
|
||||
assert test_data.objSeries[2] == slice2[1]
|
||||
|
||||
|
||||
def test_getitem_generator(test_data):
|
||||
gen = (x > 0 for x in test_data.series)
|
||||
result = test_data.series[gen]
|
||||
result2 = test_data.series[iter(test_data.series > 0)]
|
||||
expected = test_data.series[test_data.series > 0]
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
def test_type_promotion():
|
||||
# GH12599
|
||||
s = pd.Series()
|
||||
s["a"] = pd.Timestamp("2016-01-01")
|
||||
s["b"] = 3.0
|
||||
s["c"] = "foo"
|
||||
expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"],
|
||||
index=["a", "b", "c"])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'result_1, duplicate_item, expected_1',
|
||||
[
|
||||
[
|
||||
pd.Series({1: 12, 2: [1, 2, 2, 3]}), pd.Series({1: 313}),
|
||||
pd.Series({1: 12, }, dtype=object),
|
||||
],
|
||||
[
|
||||
pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
pd.Series({1: [1, 2, 3]}), pd.Series({1: [1, 2, 3], }),
|
||||
],
|
||||
])
|
||||
def test_getitem_with_duplicates_indices(
|
||||
result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1.append(duplicate_item)
|
||||
expected = expected_1.append(duplicate_item)
|
||||
assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_out_of_bounds(test_data):
|
||||
# don't segfault, GH #495
|
||||
pytest.raises(IndexError, test_data.ts.__getitem__, len(test_data.ts))
|
||||
|
||||
# GH #917
|
||||
s = Series([])
|
||||
pytest.raises(IndexError, s.__getitem__, -1)
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ['a', 'b', 'c'])
|
||||
|
||||
assert s.iloc[0] == s['a']
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s['a'], 5)
|
||||
|
||||
|
||||
def test_getitem_box_float64(test_data):
|
||||
value = test_data.ts[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = pd.date_range('20090415', '20090519', freq='B')
|
||||
ser = Series(rng)
|
||||
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
rng = pd.date_range('20090415', '20090519', freq='B')
|
||||
ser = Series(rng, index=rng)
|
||||
assert isinstance(ser[5], pd.Timestamp)
|
||||
|
||||
assert isinstance(ser.iat[5], pd.Timestamp)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror():
|
||||
s = Series(lrange(10), index=lrange(0, 20, 2))
|
||||
pytest.raises(KeyError, s.__getitem__, 1)
|
||||
pytest.raises(KeyError, s.loc.__getitem__, 1)
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b'])
|
||||
assert is_scalar(obj['c'])
|
||||
assert obj['c'] == 0
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing():
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah'])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
expected = s.loc[['foo', 'bar', 'bah', 'bam']]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s[['foo', 'bar', 'bah', 'bam']]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64)
|
||||
expected = Series([3, 4], index=['C', 'C'], dtype=np.int64)
|
||||
result = s['C']
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror():
|
||||
s = Series(lrange(10), index=lrange(0, 20, 2))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
s2[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[1] = 5
|
||||
expected = s.append(Series([5], index=[1]))
|
||||
assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_getitem_dataframe():
|
||||
rng = list(range(10))
|
||||
s = pd.Series(10, index=rng)
|
||||
df = pd.DataFrame(rng, index=rng)
|
||||
pytest.raises(TypeError, s.__getitem__, df > 5)
|
||||
|
||||
|
||||
def test_setitem(test_data):
|
||||
test_data.ts[test_data.ts.index[5]] = np.NaN
|
||||
test_data.ts[[1, 2, 17]] = np.NaN
|
||||
test_data.ts[6] = np.NaN
|
||||
assert np.isnan(test_data.ts[6])
|
||||
assert np.isnan(test_data.ts[2])
|
||||
test_data.ts[np.isnan(test_data.ts)] = 5
|
||||
assert not np.isnan(test_data.ts[2])
|
||||
|
||||
# caught this bug when writing tests
|
||||
series = Series(tm.makeIntIndex(20).astype(float),
|
||||
index=tm.makeIntIndex(20))
|
||||
|
||||
series[::2] = 0
|
||||
assert (series[::2] == 0).all()
|
||||
|
||||
# set item that's not contained
|
||||
s = test_data.series.copy()
|
||||
s['foobar'] = 1
|
||||
|
||||
app = Series([1], index=['foobar'], name='series')
|
||||
expected = test_data.series.append(app)
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# Test for issue #10193
|
||||
key = pd.Timestamp('2012-01-01')
|
||||
series = pd.Series()
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, [key])
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
series = pd.Series([], pd.DatetimeIndex([], freq='D'))
|
||||
series[key] = 47
|
||||
expected = pd.Series(47, pd.DatetimeIndex([key], freq='D'))
|
||||
assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_setitem_dtypes():
|
||||
# change dtypes
|
||||
# GH 4463
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.iloc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
s[0] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
s = Series([False])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan]))
|
||||
|
||||
s = Series([False, True])
|
||||
s.loc[0] = np.nan
|
||||
assert_series_equal(s, Series([np.nan, 1.0]))
|
||||
|
||||
|
||||
def test_set_value(test_data):
|
||||
idx = test_data.ts.index[10]
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
res = test_data.ts.set_value(idx, 0)
|
||||
assert res is test_data.ts
|
||||
assert test_data.ts[idx] == 0
|
||||
|
||||
# equiv
|
||||
s = test_data.series.copy()
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
res = s.set_value('foobar', 0)
|
||||
assert res is s
|
||||
assert res.index[-1] == 'foobar'
|
||||
assert res['foobar'] == 0
|
||||
|
||||
s = test_data.series.copy()
|
||||
s.loc['foobar'] = 0
|
||||
assert s.index[-1] == 'foobar'
|
||||
assert s['foobar'] == 0
|
||||
|
||||
|
||||
def test_setslice(test_data):
|
||||
sl = test_data.ts[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique
|
||||
|
||||
|
||||
def test_basic_getitem_setitem_corner(test_data):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
with tm.assert_raises_regex(ValueError, 'tuple-index'):
|
||||
test_data.ts[:, 2]
|
||||
with tm.assert_raises_regex(ValueError, 'tuple-index'):
|
||||
test_data.ts[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] will work but not two slices
|
||||
result = test_data.ts[[slice(None, 5)]]
|
||||
expected = test_data.ts[:5]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
pytest.raises(Exception, test_data.ts.__getitem__,
|
||||
[5, slice(None, None)])
|
||||
pytest.raises(Exception, test_data.ts.__setitem__,
|
||||
[5, slice(None, None)], 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('tz', ['US/Eastern', 'UTC', 'Asia/Tokyo'])
|
||||
def test_setitem_with_tz(tz):
|
||||
orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
|
||||
tz=tz))
|
||||
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2016-01-01 02:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
|
||||
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2012-01-01 00:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_setitem_with_tz_dst():
|
||||
# GH XXX
|
||||
tz = 'US/Eastern'
|
||||
orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
|
||||
tz=tz))
|
||||
assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
# scalar
|
||||
s = orig.copy()
|
||||
s[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00-05:00', tz=tz),
|
||||
pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# vector
|
||||
vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
|
||||
pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
|
||||
assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
|
||||
|
||||
s[[1, 2]] = vals
|
||||
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
|
||||
pd.Timestamp('2011-01-01 00:00', tz=tz),
|
||||
pd.Timestamp('2012-01-01 00:00', tz=tz)])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.loc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.iloc[[1, 2]] = vals
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_categorial_assigning_ops():
|
||||
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
|
||||
s = orig.copy()
|
||||
s[:] = "a"
|
||||
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[1] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[s.index > 0] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s[[False, True]] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
s = orig.copy()
|
||||
s.index = ["x", "y"]
|
||||
s["y"] = "a"
|
||||
exp = Series(Categorical(["b", "a"], categories=["a", "b"]),
|
||||
index=["x", "y"])
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
# ensure that one can set something to np.nan
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
|
||||
s[1] = np.nan
|
||||
tm.assert_series_equal(s, exp)
|
||||
|
||||
|
||||
def test_slice(test_data):
|
||||
numSlice = test_data.series[10:20]
|
||||
numSliceEnd = test_data.series[-10:]
|
||||
objSlice = test_data.objSeries[10:20]
|
||||
|
||||
assert test_data.series.index[9] not in numSlice.index
|
||||
assert test_data.objSeries.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == test_data.series.index[11]
|
||||
assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = test_data.series[10:20]
|
||||
sl[:] = 0
|
||||
|
||||
assert (test_data.series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
s = Series(1, index=['a', 'a', 'b', 'b', 'c'])
|
||||
s[::-1] # it works!
|
||||
|
||||
|
||||
def test_ix_setitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[inds] = 5
|
||||
|
||||
expected = test_data.series.copy()
|
||||
expected[[3, 4, 7]] = 5
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result.iloc[5:10] = 10
|
||||
expected[5:10] = 10
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set slice with indices
|
||||
d1, d2 = test_data.series.index[[5, 15]]
|
||||
result.loc[d1:d2] = 6
|
||||
expected[5:16] = 6 # because it's inclusive
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# set index value
|
||||
test_data.series.loc[d1] = 4
|
||||
test_data.series.loc[d2] = 6
|
||||
assert test_data.series[d1] == 4
|
||||
assert test_data.series[d2] == 6
|
||||
|
||||
|
||||
def test_setitem_na():
|
||||
# these induce dtype changes
|
||||
expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
|
||||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# gets coerced to float, right?
|
||||
expected = Series([np.nan, 1, np.nan, 0])
|
||||
s = Series([True, True, False, False])
|
||||
s[::2] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8,
|
||||
9])
|
||||
s = Series(np.arange(10))
|
||||
s[:5] = np.nan
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([])
|
||||
s.loc['B'] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B']))
|
||||
|
||||
s = s.reindex(s.index.insert(0, 'A'))
|
||||
tm.assert_series_equal(s, Series(
|
||||
[np.nan, Timedelta('1 days')], index=['A', 'B']))
|
||||
|
||||
result = s.fillna(timedelta(1))
|
||||
expected = Series(Timedelta('1 days'), index=['A', 'B'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s.loc['A'] = timedelta(1)
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 14155
|
||||
s = Series(10 * [np.timedelta64(10, 'm')])
|
||||
s.loc[[1, 2, 3]] = np.timedelta64(20, 'm')
|
||||
expected = pd.Series(10 * [np.timedelta64(10, 'm')])
|
||||
expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm'))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_underlying_data_conversion():
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ['a', 'b', 'c']})
|
||||
df.set_index(['a', 'b', 'c'], inplace=True)
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df['val'] = 0
|
||||
df
|
||||
df['val'].update(s)
|
||||
|
||||
expected = DataFrame(
|
||||
dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
|
||||
expected.set_index(['a', 'b', 'c'], inplace=True)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 3970
|
||||
# these are chained assignments as well
|
||||
pd.set_option('chained_assignment', None)
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
df["bb"].iloc[0] = .13
|
||||
|
||||
# TODO: unused
|
||||
df_tmp = df.iloc[ck] # noqa
|
||||
|
||||
df["bb"].iloc[0] = .15
|
||||
assert df['bb'].iloc[0] == 0.15
|
||||
pd.set_option('chained_assignment', 'raise')
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
|
||||
df['c'] = np.nan
|
||||
df['c'].update(pd.Series(['foo'], index=[0]))
|
||||
|
||||
expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan]))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(test_data):
|
||||
seq = test_data.ts[[5, 10, 15]]
|
||||
seq[1] = np.NaN
|
||||
assert not np.isnan(test_data.ts[10])
|
||||
|
||||
|
||||
def test_cast_on_putmask():
|
||||
# GH 2746
|
||||
|
||||
# need to upcast
|
||||
s = Series([1, 2], index=[1, 2], dtype='int64')
|
||||
s[[True, False]] = Series([0], index=[1], dtype='int64')
|
||||
expected = Series([0, 2], index=[1, 2], dtype='int64')
|
||||
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_type_promote_putmask():
|
||||
# GH8387: test that changing types does not break alignment
|
||||
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
|
||||
left, mask = ts.copy(), ts > 0
|
||||
right = ts[mask].copy().map(str)
|
||||
left[mask] = right
|
||||
assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
|
||||
|
||||
s = Series([0, 1, 2, 0])
|
||||
mask = s > 0
|
||||
s2 = s[mask].map(str)
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, '1', '2', 0]))
|
||||
|
||||
s = Series([0, 'foo', 'bar', 0])
|
||||
mask = Series([False, True, True, False])
|
||||
s2 = s[mask]
|
||||
s[mask] = s2
|
||||
assert_series_equal(s, Series([0, 'foo', 'bar', 0]))
|
||||
|
||||
|
||||
def test_multilevel_preserve_name():
|
||||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
|
||||
'three']],
|
||||
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
s = Series(np.random.randn(len(index)), index=index, name='sth')
|
||||
|
||||
result = s['foo']
|
||||
result2 = s.loc['foo']
|
||||
assert result.name == s.name
|
||||
assert result2.name == s.name
|
||||
|
||||
|
||||
def test_setitem_scalar_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
for n in range(len(series)):
|
||||
with pytest.raises(ValueError):
|
||||
series[n] = 1
|
||||
|
||||
assert array[n] == 0
|
||||
|
||||
|
||||
def test_setitem_slice_into_readonly_backing_data():
|
||||
# GH14359: test that you cannot mutate a read only buffer
|
||||
|
||||
array = np.zeros(5)
|
||||
array.flags.writeable = False # make the array immutable
|
||||
series = Series(array)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
series[1:3] = 1
|
||||
|
||||
assert not array.any()
|
||||
|
||||
|
||||
"""
|
||||
miscellaneous methods
|
||||
"""
|
||||
|
||||
|
||||
def test_select(test_data):
|
||||
# deprecated: gh-12410
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
n = len(test_data.ts)
|
||||
result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
|
||||
expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_data.ts.select(lambda x: x.weekday() == 2)
|
||||
expected = test_data.ts[test_data.ts.index.weekday == 2]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pop():
|
||||
# GH 6600
|
||||
df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })
|
||||
k = df.iloc[4]
|
||||
|
||||
result = k.pop('B')
|
||||
assert result == 4
|
||||
|
||||
expected = Series([0, 0], index=['A', 'C'], name=4)
|
||||
assert_series_equal(k, expected)
|
||||
|
||||
|
||||
def test_take():
|
||||
s = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = s.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = s.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
pytest.raises(IndexError, s.take, [1, 10])
|
||||
pytest.raises(IndexError, s.take, [2, 5])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.take([-1, 3, 4], convert=False)
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
s = Series(pd.Categorical(['a', 'b', 'c']))
|
||||
result = s.take([-2, -2, 0])
|
||||
expected = Series(pd.Categorical(['b', 'b', 'a'],
|
||||
categories=['a', 'b', 'c']),
|
||||
index=[1, 1, 0])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_head_tail(test_data):
|
||||
assert_series_equal(test_data.series.head(), test_data.series[:5])
|
||||
assert_series_equal(test_data.series.head(0), test_data.series[0:0])
|
||||
assert_series_equal(test_data.series.tail(), test_data.series[-5:])
|
||||
assert_series_equal(test_data.series.tail(0), test_data.series[0:0])
|
||||
@@ -1,150 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import (Series, Timestamp)
|
||||
|
||||
from pandas.compat import lrange
|
||||
from pandas.util.testing import (assert_series_equal)
|
||||
|
||||
|
||||
def test_loc_getitem(test_data):
|
||||
inds = test_data.series.index[[3, 4, 7]]
|
||||
assert_series_equal(
|
||||
test_data.series.loc[inds],
|
||||
test_data.series.reindex(inds))
|
||||
assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2])
|
||||
|
||||
# slice with indices
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
result = test_data.ts.loc[d1:d2]
|
||||
expected = test_data.ts.truncate(d1, d2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# boolean
|
||||
mask = test_data.series > test_data.series.median()
|
||||
assert_series_equal(test_data.series.loc[mask], test_data.series[mask])
|
||||
|
||||
# ask for index value
|
||||
assert test_data.ts.loc[d1] == test_data.ts[d1]
|
||||
assert test_data.ts.loc[d2] == test_data.ts[d2]
|
||||
|
||||
|
||||
def test_loc_getitem_not_monotonic(test_data):
|
||||
d1, d2 = test_data.ts.index[[5, 15]]
|
||||
|
||||
ts2 = test_data.ts[::2][[1, 2, 0]]
|
||||
|
||||
pytest.raises(KeyError, ts2.loc.__getitem__, slice(d1, d2))
|
||||
pytest.raises(KeyError, ts2.loc.__setitem__, slice(d1, d2), 0)
|
||||
|
||||
|
||||
def test_loc_getitem_setitem_integer_slice_keyerrors():
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
|
||||
# this is OK
|
||||
cp = s.copy()
|
||||
cp.iloc[4:10] = 0
|
||||
assert (cp.iloc[4:10] == 0).all()
|
||||
|
||||
# so is this
|
||||
cp = s.copy()
|
||||
cp.iloc[3:11] = 0
|
||||
assert (cp.iloc[3:11] == 0).values.all()
|
||||
|
||||
result = s.iloc[2:6]
|
||||
result2 = s.loc[3:11]
|
||||
expected = s.reindex([4, 6, 8, 10])
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
# non-monotonic, raise KeyError
|
||||
s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]]
|
||||
pytest.raises(KeyError, s2.loc.__getitem__, slice(3, 11))
|
||||
pytest.raises(KeyError, s2.loc.__setitem__, slice(3, 11), 0)
|
||||
|
||||
|
||||
def test_loc_getitem_iterator(test_data):
|
||||
idx = iter(test_data.series.index[:10])
|
||||
result = test_data.series.loc[idx]
|
||||
assert_series_equal(result, test_data.series[:10])
|
||||
|
||||
|
||||
def test_loc_setitem_boolean(test_data):
|
||||
mask = test_data.series > test_data.series.median()
|
||||
|
||||
result = test_data.series.copy()
|
||||
result.loc[mask] = 0
|
||||
expected = test_data.series
|
||||
expected[mask] = 0
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_setitem_corner(test_data):
|
||||
inds = list(test_data.series.index[[5, 8, 12]])
|
||||
test_data.series.loc[inds] = 5
|
||||
pytest.raises(Exception, test_data.series.loc.__setitem__,
|
||||
inds + ['foo'], 5)
|
||||
|
||||
|
||||
def test_basic_setitem_with_labels(test_data):
|
||||
indices = test_data.ts.index[[5, 10, 15]]
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices] = 0
|
||||
exp.loc[indices] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = test_data.ts.copy()
|
||||
exp = test_data.ts.copy()
|
||||
cp[indices[0]:indices[2]] = 0
|
||||
exp.loc[indices[0]:indices[2]] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
# integer indexes, be careful
|
||||
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
||||
inds = [0, 4, 6]
|
||||
arr_inds = np.array([0, 4, 6])
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[inds] = 0
|
||||
s.loc[inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
cp = s.copy()
|
||||
exp = s.copy()
|
||||
s[arr_inds] = 0
|
||||
s.loc[arr_inds] = 0
|
||||
assert_series_equal(cp, exp)
|
||||
|
||||
inds_notfound = [0, 4, 5, 6]
|
||||
arr_inds_notfound = np.array([0, 4, 5, 6])
|
||||
pytest.raises(Exception, s.__setitem__, inds_notfound, 0)
|
||||
pytest.raises(Exception, s.__setitem__, arr_inds_notfound, 0)
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
|
||||
index=['a', 'b', 'c'])
|
||||
s2 = s.copy()
|
||||
expected = Timestamp('2011-01-03', tz='US/Eastern')
|
||||
s2.loc['a'] = expected
|
||||
result = s2.loc['a']
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2.iloc[0] = expected
|
||||
result = s2.iloc[0]
|
||||
assert result == expected
|
||||
|
||||
s2 = s.copy()
|
||||
s2['a'] = expected
|
||||
result = s2['a']
|
||||
assert result == expected
|
||||
-251
@@ -1,251 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import (Index, Series, DataFrame)
|
||||
|
||||
from pandas.compat import lrange, range
|
||||
from pandas.util.testing import (assert_series_equal)
|
||||
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45,
|
||||
51, 39, 55, 43, 54, 52, 51, 54]))
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56,
|
||||
45, 51, 39, 55, 43, 54, 52, 51, 54]),
|
||||
index=pd.Float64Index(
|
||||
[25.0, 36.0, 49.0, 64.0, 81.0, 100.0,
|
||||
121.0, 144.0, 169.0, 196.0, 1225.0,
|
||||
1296.0, 1369.0, 1444.0, 1521.0, 1600.0,
|
||||
1681.0, 1764.0, 1849.0, 1936.0],
|
||||
dtype='object'))
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default='Missing')
|
||||
assert result == 'Missing'
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default='Missing')
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default='Missing')
|
||||
assert result == 'Missing'
|
||||
|
||||
|
||||
def test_get_nan():
|
||||
# GH 8569
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default='Missing') == 'Missing'
|
||||
|
||||
|
||||
def test_get_nan_multiple():
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = pd.Float64Index(range(10)).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx),
|
||||
Series([2, np.nan], index=idx))
|
||||
|
||||
idx = [2, np.nan]
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
assert_series_equal(s.get(idx),
|
||||
Series([2, np.nan], index=idx))
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert(s.get(idx) is None)
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert(s.get(idx) is None)
|
||||
|
||||
|
||||
def test_delitem():
|
||||
# GH 5542
|
||||
# should delete the item inplace
|
||||
s = Series(lrange(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(lrange(1, 5), index=lrange(1, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(lrange(2, 5), index=lrange(2, 5))
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# empty
|
||||
s = Series()
|
||||
|
||||
def f():
|
||||
del s[0]
|
||||
|
||||
pytest.raises(KeyError, f)
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='int64')))
|
||||
s[0] = 1
|
||||
assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='int64')))
|
||||
|
||||
# Index(dtype=object)
|
||||
s = Series(1, index=['a'])
|
||||
del s['a']
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='object')))
|
||||
s['a'] = 1
|
||||
assert_series_equal(s, Series(1, index=['a']))
|
||||
del s['a']
|
||||
assert_series_equal(s, Series(dtype='int64', index=Index(
|
||||
[], dtype='object')))
|
||||
|
||||
|
||||
def test_slice_float64():
|
||||
values = np.arange(10., 50., 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
s = Series(np.random.randn(20), index=index)
|
||||
|
||||
result = s[start:end]
|
||||
expected = s.iloc[5:16]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[start:end]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(np.random.randn(20, 3), index=index)
|
||||
|
||||
result = df[start:end]
|
||||
expected = df.iloc[5:16]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[start:end]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_negative_out_of_bounds():
|
||||
s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
|
||||
|
||||
pytest.raises(IndexError, s.__getitem__, -11)
|
||||
pytest.raises(IndexError, s.__setitem__, -11, 'foo')
|
||||
|
||||
|
||||
def test_getitem_regression():
|
||||
s = Series(lrange(5), index=lrange(5))
|
||||
result = s[lrange(5)]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_bug():
|
||||
s = Series(lrange(10), lrange(10))
|
||||
result = s[-12:]
|
||||
assert_series_equal(result, s)
|
||||
|
||||
result = s[-7:]
|
||||
assert_series_equal(result, s[3:])
|
||||
|
||||
result = s[:-12]
|
||||
assert_series_equal(result, s[:0])
|
||||
|
||||
s = Series(lrange(10), lrange(10))
|
||||
s[-12:] = 0
|
||||
assert (s == 0).all()
|
||||
|
||||
s[:-12] = 5
|
||||
assert (s == 0).all()
|
||||
|
||||
|
||||
def test_getitem_setitem_slice_integers():
|
||||
s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
|
||||
|
||||
result = s[:4]
|
||||
expected = s.reindex([2, 4, 6, 8])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s[:4] = 0
|
||||
assert (s[:4] == 0).all()
|
||||
assert not (s[4:] == 0).any()
|
||||
|
||||
|
||||
def test_setitem_float_labels():
|
||||
# note labels are floats
|
||||
s = Series(['a', 'b', 'c'], index=[0, 0.5, 1])
|
||||
tmp = s.copy()
|
||||
|
||||
s.loc[1] = 'zoo'
|
||||
tmp.iloc[2] = 'zoo'
|
||||
|
||||
assert_series_equal(s, tmp)
|
||||
|
||||
|
||||
def test_slice_float_get_set(test_data):
|
||||
pytest.raises(TypeError, lambda: test_data.ts[4.0:10.0])
|
||||
|
||||
def f():
|
||||
test_data.ts[4.0:10.0] = 0
|
||||
|
||||
pytest.raises(TypeError, f)
|
||||
|
||||
pytest.raises(TypeError, test_data.ts.__getitem__, slice(4.5, 10.0))
|
||||
pytest.raises(TypeError, test_data.ts.__setitem__, slice(4.5, 10.0), 0)
|
||||
|
||||
|
||||
def test_slice_floats2():
|
||||
s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float))
|
||||
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
i = np.arange(10, 20, dtype=float)
|
||||
i[2] = 12.2
|
||||
s.index = i
|
||||
assert len(s.loc[12.0:]) == 8
|
||||
assert len(s.loc[12.5:]) == 7
|
||||
|
||||
|
||||
def test_int_indexing():
|
||||
s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
|
||||
|
||||
pytest.raises(KeyError, s.__getitem__, 5)
|
||||
|
||||
pytest.raises(KeyError, s.__getitem__, 'c')
|
||||
|
||||
# not monotonic
|
||||
s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
|
||||
|
||||
pytest.raises(KeyError, s.__getitem__, 5)
|
||||
|
||||
pytest.raises(KeyError, s.__getitem__, 'c')
|
||||
|
||||
|
||||
def test_getitem_int64(test_data):
|
||||
idx = np.int64(5)
|
||||
assert test_data.ts[idx] == test_data.ts[5]
|
||||
@@ -1,297 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import Index, Series
|
||||
from pandas.core.index import MultiIndex, RangeIndex
|
||||
|
||||
from pandas.compat import lrange, range, zip
|
||||
from pandas.util.testing import assert_series_equal, assert_frame_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesAlterAxes(TestData):
|
||||
|
||||
def test_setindex(self):
|
||||
# wrong type
|
||||
series = self.series.copy()
|
||||
pytest.raises(TypeError, setattr, series, 'index', None)
|
||||
|
||||
# wrong length
|
||||
series = self.series.copy()
|
||||
pytest.raises(Exception, setattr, series, 'index',
|
||||
np.arange(len(series) - 1))
|
||||
|
||||
# works
|
||||
series = self.series.copy()
|
||||
series.index = np.arange(len(series))
|
||||
assert isinstance(series.index, Index)
|
||||
|
||||
def test_rename(self):
|
||||
renamer = lambda x: x.strftime('%Y%m%d')
|
||||
renamed = self.ts.rename(renamer)
|
||||
assert renamed.index[0] == renamer(self.ts.index[0])
|
||||
|
||||
# dict
|
||||
rename_dict = dict(zip(self.ts.index, renamed.index))
|
||||
renamed2 = self.ts.rename(rename_dict)
|
||||
assert_series_equal(renamed, renamed2)
|
||||
|
||||
# partial dict
|
||||
s = Series(np.arange(4), index=['a', 'b', 'c', 'd'], dtype='int64')
|
||||
renamed = s.rename({'b': 'foo', 'd': 'bar'})
|
||||
tm.assert_index_equal(renamed.index, Index(['a', 'foo', 'c', 'bar']))
|
||||
|
||||
# index with name
|
||||
renamer = Series(np.arange(4),
|
||||
index=Index(['a', 'b', 'c', 'd'], name='name'),
|
||||
dtype='int64')
|
||||
renamed = renamer.rename({})
|
||||
assert renamed.index.name == renamer.index.name
|
||||
|
||||
def test_rename_by_series(self):
|
||||
s = Series(range(5), name='foo')
|
||||
renamer = Series({1: 10, 2: 20})
|
||||
result = s.rename(renamer)
|
||||
expected = Series(range(5), index=[0, 10, 20, 3, 4], name='foo')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_set_name(self):
|
||||
s = Series(range(4), index=list('abcd'))
|
||||
for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]:
|
||||
result = s.rename(name)
|
||||
assert result.name == name
|
||||
tm.assert_numpy_array_equal(result.index.values, s.index.values)
|
||||
assert s.name is None
|
||||
|
||||
def test_rename_set_name_inplace(self):
|
||||
s = Series(range(3), index=list('abc'))
|
||||
for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]:
|
||||
s.rename(name, inplace=True)
|
||||
assert s.name == name
|
||||
|
||||
exp = np.array(['a', 'b', 'c'], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(s.index.values, exp)
|
||||
|
||||
def test_rename_axis_supported(self):
|
||||
# Supporting axis for compatibility, detailed in GH-18589
|
||||
s = Series(range(5))
|
||||
s.rename({}, axis=0)
|
||||
s.rename({}, axis='index')
|
||||
with tm.assert_raises_regex(ValueError, 'No axis named 5'):
|
||||
s.rename({}, axis=5)
|
||||
|
||||
def test_set_name_attribute(self):
|
||||
s = Series([1, 2, 3])
|
||||
s2 = Series([1, 2, 3], name='bar')
|
||||
for name in [7, 7., 'name', datetime(2001, 1, 1), (1,), u"\u05D0"]:
|
||||
s.name = name
|
||||
assert s.name == name
|
||||
s2.name = name
|
||||
assert s2.name == name
|
||||
|
||||
def test_set_name(self):
|
||||
s = Series([1, 2, 3])
|
||||
s2 = s._set_name('foo')
|
||||
assert s2.name == 'foo'
|
||||
assert s.name is None
|
||||
assert s is not s2
|
||||
|
||||
def test_rename_inplace(self):
|
||||
renamer = lambda x: x.strftime('%Y%m%d')
|
||||
expected = renamer(self.ts.index[0])
|
||||
|
||||
self.ts.rename(renamer, inplace=True)
|
||||
assert self.ts.index[0] == expected
|
||||
|
||||
def test_set_index_makes_timeseries(self):
|
||||
idx = tm.makeDateIndex(10)
|
||||
|
||||
s = Series(lrange(10))
|
||||
s.index = idx
|
||||
assert s.index.is_all_dates
|
||||
|
||||
def test_reset_index(self):
|
||||
df = tm.makeDataFrame()[:5]
|
||||
ser = df.stack()
|
||||
ser.index.names = ['hash', 'category']
|
||||
|
||||
ser.name = 'value'
|
||||
df = ser.reset_index()
|
||||
assert 'value' in df
|
||||
|
||||
df = ser.reset_index(name='value2')
|
||||
assert 'value2' in df
|
||||
|
||||
# check inplace
|
||||
s = ser.reset_index(drop=True)
|
||||
s2 = ser
|
||||
s2.reset_index(drop=True, inplace=True)
|
||||
assert_series_equal(s, s2)
|
||||
|
||||
# level
|
||||
index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
|
||||
labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
|
||||
[0, 1, 0, 1, 0, 1]])
|
||||
s = Series(np.random.randn(6), index=index)
|
||||
rs = s.reset_index(level=1)
|
||||
assert len(rs.columns) == 2
|
||||
|
||||
rs = s.reset_index(level=[0, 2], drop=True)
|
||||
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
|
||||
assert isinstance(rs, Series)
|
||||
|
||||
def test_reset_index_level(self):
|
||||
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]],
|
||||
columns=['A', 'B', 'C'])
|
||||
|
||||
for levels in ['A', 'B'], [0, 1]:
|
||||
# With MultiIndex
|
||||
s = df.set_index(['A', 'B'])['C']
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df.set_index('B'))
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df.set_index('B'))
|
||||
|
||||
result = s.reset_index(level=levels)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.set_index(['A', 'B']).reset_index(level=levels,
|
||||
drop=True)
|
||||
tm.assert_frame_equal(result, df[['C']])
|
||||
|
||||
with tm.assert_raises_regex(KeyError, 'Level E '):
|
||||
s.reset_index(level=['A', 'E'])
|
||||
|
||||
# With single-level Index
|
||||
s = df.set_index('A')['B']
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df[['A', 'B']])
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df[['A', 'B']])
|
||||
|
||||
result = s.reset_index(level=levels[0], drop=True)
|
||||
tm.assert_series_equal(result, df['B'])
|
||||
|
||||
with tm.assert_raises_regex(IndexError, 'Too many levels'):
|
||||
s.reset_index(level=[0, 1, 2])
|
||||
|
||||
# Check that .reset_index([],drop=True) doesn't fail
|
||||
result = pd.Series(range(4)).reset_index([], drop=True)
|
||||
expected = pd.Series(range(4))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_reset_index_range(self):
|
||||
# GH 12071
|
||||
s = pd.Series(range(2), name='A', dtype='int64')
|
||||
series_result = s.reset_index()
|
||||
assert isinstance(series_result.index, RangeIndex)
|
||||
series_expected = pd.DataFrame([[0, 0], [1, 1]],
|
||||
columns=['index', 'A'],
|
||||
index=RangeIndex(stop=2))
|
||||
assert_frame_equal(series_result, series_expected)
|
||||
|
||||
def test_reorder_levels(self):
|
||||
index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
|
||||
labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
|
||||
[0, 1, 0, 1, 0, 1]],
|
||||
names=['L0', 'L1', 'L2'])
|
||||
s = Series(np.arange(6), index=index)
|
||||
|
||||
# no change, position
|
||||
result = s.reorder_levels([0, 1, 2])
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# no change, labels
|
||||
result = s.reorder_levels(['L0', 'L1', 'L2'])
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# rotate, position
|
||||
result = s.reorder_levels([1, 2, 0])
|
||||
e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']],
|
||||
labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1],
|
||||
[0, 0, 0, 0, 0, 0]],
|
||||
names=['L1', 'L2', 'L0'])
|
||||
expected = Series(np.arange(6), index=e_idx)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_axis_inplace(self):
|
||||
# GH 15704
|
||||
series = self.ts.copy()
|
||||
expected = series.rename_axis('foo')
|
||||
result = series.copy()
|
||||
no_return = result.rename_axis('foo', inplace=True)
|
||||
|
||||
assert no_return is None
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_set_axis_inplace(self):
|
||||
# GH14636
|
||||
|
||||
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64')
|
||||
|
||||
expected = s.copy()
|
||||
expected.index = list('abcd')
|
||||
|
||||
for axis in 0, 'index':
|
||||
# inplace=True
|
||||
# The FutureWarning comes from the fact that we would like to have
|
||||
# inplace default to False some day
|
||||
for inplace, warn in (None, FutureWarning), (True, None):
|
||||
result = s.copy()
|
||||
kwargs = {'inplace': inplace}
|
||||
with tm.assert_produces_warning(warn):
|
||||
result.set_axis(list('abcd'), axis=axis, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# inplace=False
|
||||
result = s.set_axis(list('abcd'), axis=0, inplace=False)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# omitting the "axis" parameter
|
||||
with tm.assert_produces_warning(None):
|
||||
result = s.set_axis(list('abcd'), inplace=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# wrong values for the "axis" parameter
|
||||
for axis in 2, 'foo':
|
||||
with tm.assert_raises_regex(ValueError, 'No axis named'):
|
||||
s.set_axis(list('abcd'), axis=axis, inplace=False)
|
||||
|
||||
def test_set_axis_prior_to_deprecation_signature(self):
|
||||
s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64')
|
||||
|
||||
expected = s.copy()
|
||||
expected.index = list('abcd')
|
||||
|
||||
for axis in 0, 'index':
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.set_axis(0, list('abcd'), inplace=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reset_index_drop_errors(self):
|
||||
# GH 20925
|
||||
|
||||
# KeyError raised for series index when passed level name is missing
|
||||
s = pd.Series(range(4))
|
||||
with tm.assert_raises_regex(KeyError, 'must be same as name'):
|
||||
s.reset_index('wrong', drop=True)
|
||||
with tm.assert_raises_regex(KeyError, 'must be same as name'):
|
||||
s.reset_index('wrong')
|
||||
|
||||
# KeyError raised for series when level to be dropped is missing
|
||||
s = pd.Series(range(4), index=pd.MultiIndex.from_product([[1, 2]] * 2))
|
||||
with tm.assert_raises_regex(KeyError, 'not found'):
|
||||
s.reset_index('wrong', drop=True)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,756 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
from collections import OrderedDict
|
||||
import pydoc
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import Index, Series, DataFrame, date_range
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
|
||||
from pandas.compat import range, lzip, isidentifier, string_types
|
||||
from pandas import (compat, Categorical, period_range, timedelta_range,
|
||||
DatetimeIndex, PeriodIndex, TimedeltaIndex)
|
||||
import pandas.io.formats.printing as printing
|
||||
from pandas.util.testing import (assert_series_equal,
|
||||
ensure_clean)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class SharedWithSparse(object):
|
||||
"""
|
||||
A collection of tests Series and SparseSeries can share.
|
||||
|
||||
In generic tests on this class, use ``self._assert_series_equal()``
|
||||
which is implemented in sub-classes.
|
||||
"""
|
||||
def _assert_series_equal(self, left, right):
|
||||
"""Dispatch to series class dependent assertion"""
|
||||
raise NotImplementedError
|
||||
|
||||
def test_scalarop_preserve_name(self):
|
||||
result = self.ts * 2
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_copy_name(self):
|
||||
result = self.ts.copy()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_copy_index_name_checking(self):
|
||||
# don't want to be able to modify the index stored elsewhere after
|
||||
# making a copy
|
||||
|
||||
self.ts.index.name = None
|
||||
assert self.ts.index.name is None
|
||||
assert self.ts is self.ts
|
||||
|
||||
cp = self.ts.copy()
|
||||
cp.index.name = 'foo'
|
||||
printing.pprint_thing(self.ts.index.name)
|
||||
assert self.ts.index.name is None
|
||||
|
||||
def test_append_preserve_name(self):
|
||||
result = self.ts[:5].append(self.ts[5:])
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_binop_maybe_preserve_name(self):
|
||||
# names match, preserve
|
||||
result = self.ts * self.ts
|
||||
assert result.name == self.ts.name
|
||||
result = self.ts.mul(self.ts)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts * self.ts[:-2]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
# names don't match, don't preserve
|
||||
cp = self.ts.copy()
|
||||
cp.name = 'something else'
|
||||
result = self.ts + cp
|
||||
assert result.name is None
|
||||
result = self.ts.add(cp)
|
||||
assert result.name is None
|
||||
|
||||
ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow']
|
||||
ops = ops + ['r' + op for op in ops]
|
||||
for op in ops:
|
||||
# names match, preserve
|
||||
s = self.ts.copy()
|
||||
result = getattr(s, op)(s)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
# names don't match, don't preserve
|
||||
cp = self.ts.copy()
|
||||
cp.name = 'changed'
|
||||
result = getattr(s, op)(cp)
|
||||
assert result.name is None
|
||||
|
||||
def test_combine_first_name(self):
|
||||
result = self.ts.combine_first(self.ts[:5])
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_getitem_preserve_name(self):
|
||||
result = self.ts[self.ts > 0]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts[[0, 2, 4]]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
result = self.ts[5:10]
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_pickle(self):
|
||||
unp_series = self._pickle_roundtrip(self.series)
|
||||
unp_ts = self._pickle_roundtrip(self.ts)
|
||||
assert_series_equal(unp_series, self.series)
|
||||
assert_series_equal(unp_ts, self.ts)
|
||||
|
||||
def _pickle_roundtrip(self, obj):
|
||||
|
||||
with ensure_clean() as path:
|
||||
obj.to_pickle(path)
|
||||
unpickled = pd.read_pickle(path)
|
||||
return unpickled
|
||||
|
||||
def test_argsort_preserve_name(self):
|
||||
result = self.ts.argsort()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_sort_index_name(self):
|
||||
result = self.ts.sort_index(ascending=False)
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_to_sparse_pass_name(self):
|
||||
result = self.ts.to_sparse()
|
||||
assert result.name == self.ts.name
|
||||
|
||||
def test_constructor_dict(self):
|
||||
d = {'a': 0., 'b': 1., 'c': 2.}
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass(d, index=sorted(d.keys()))
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
result = self.series_klass(d, index=['b', 'c', 'd', 'a'])
|
||||
expected = self.series_klass([1, 2, np.nan, 0],
|
||||
index=['b', 'c', 'd', 'a'])
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_constructor_subclass_dict(self):
|
||||
data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
|
||||
series = self.series_klass(data)
|
||||
expected = self.series_klass(dict(compat.iteritems(data)))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
def test_constructor_ordereddict(self):
|
||||
# GH3283
|
||||
data = OrderedDict(
|
||||
('col%s' % i, np.random.random()) for i in range(12))
|
||||
|
||||
series = self.series_klass(data)
|
||||
expected = self.series_klass(list(data.values()), list(data.keys()))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
# Test with subclass
|
||||
class A(OrderedDict):
|
||||
pass
|
||||
|
||||
series = self.series_klass(A(data))
|
||||
self._assert_series_equal(series, expected)
|
||||
|
||||
def test_constructor_dict_multiindex(self):
|
||||
d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.}
|
||||
_d = sorted(d.items())
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass(
|
||||
[x[1] for x in _d],
|
||||
index=pd.MultiIndex.from_tuples([x[0] for x in _d]))
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
d['z'] = 111.
|
||||
_d.insert(0, ('z', d['z']))
|
||||
result = self.series_klass(d)
|
||||
expected = self.series_klass([x[1] for x in _d],
|
||||
index=pd.Index([x[0] for x in _d],
|
||||
tupleize_cols=False))
|
||||
result = result.reindex(index=expected.index)
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_constructor_dict_timedelta_index(self):
|
||||
# GH #12169 : Resample category data with timedelta index
|
||||
# construct Series from dict as data and TimedeltaIndex as index
|
||||
# will result NaN in result Series data
|
||||
expected = self.series_klass(
|
||||
data=['A', 'B', 'C'],
|
||||
index=pd.to_timedelta([0, 10, 20], unit='s')
|
||||
)
|
||||
|
||||
result = self.series_klass(
|
||||
data={pd.to_timedelta(0, unit='s'): 'A',
|
||||
pd.to_timedelta(10, unit='s'): 'B',
|
||||
pd.to_timedelta(20, unit='s'): 'C'},
|
||||
index=pd.to_timedelta([0, 10, 20], unit='s')
|
||||
)
|
||||
self._assert_series_equal(result, expected)
|
||||
|
||||
def test_from_array_deprecated(self):
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
self.series_klass.from_array([1, 2, 3])
|
||||
|
||||
|
||||
class TestSeriesMisc(TestData, SharedWithSparse):
|
||||
|
||||
series_klass = Series
|
||||
# SharedWithSparse tests use generic, series_klass-agnostic assertion
|
||||
_assert_series_equal = staticmethod(tm.assert_series_equal)
|
||||
|
||||
def test_tab_completion(self):
|
||||
# GH 9910
|
||||
s = Series(list('abcd'))
|
||||
# Series of str values should have .str but not .dt/.cat in __dir__
|
||||
assert 'str' in dir(s)
|
||||
assert 'dt' not in dir(s)
|
||||
assert 'cat' not in dir(s)
|
||||
|
||||
# similarly for .dt
|
||||
s = Series(date_range('1/1/2015', periods=5))
|
||||
assert 'dt' in dir(s)
|
||||
assert 'str' not in dir(s)
|
||||
assert 'cat' not in dir(s)
|
||||
|
||||
# Similarly for .cat, but with the twist that str and dt should be
|
||||
# there if the categories are of that type first cat and str.
|
||||
s = Series(list('abbcd'), dtype="category")
|
||||
assert 'cat' in dir(s)
|
||||
assert 'str' in dir(s) # as it is a string categorical
|
||||
assert 'dt' not in dir(s)
|
||||
|
||||
# similar to cat and str
|
||||
s = Series(date_range('1/1/2015', periods=5)).astype("category")
|
||||
assert 'cat' in dir(s)
|
||||
assert 'str' not in dir(s)
|
||||
assert 'dt' in dir(s) # as it is a datetime categorical
|
||||
|
||||
def test_tab_completion_with_categorical(self):
|
||||
# test the tab completion display
|
||||
ok_for_cat = ['categories', 'codes', 'ordered', 'set_categories',
|
||||
'add_categories', 'remove_categories',
|
||||
'rename_categories', 'reorder_categories',
|
||||
'remove_unused_categories', 'as_ordered', 'as_unordered']
|
||||
|
||||
def get_dir(s):
|
||||
results = [r for r in s.cat.__dir__() if not r.startswith('_')]
|
||||
return list(sorted(set(results)))
|
||||
|
||||
s = Series(list('aabbcde')).astype('category')
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
|
||||
|
||||
@pytest.mark.parametrize("index", [
|
||||
tm.makeUnicodeIndex(10),
|
||||
tm.makeStringIndex(10),
|
||||
tm.makeCategoricalIndex(10),
|
||||
Index(['foo', 'bar', 'baz'] * 2),
|
||||
tm.makeDateIndex(10),
|
||||
tm.makePeriodIndex(10),
|
||||
tm.makeTimedeltaIndex(10),
|
||||
tm.makeIntIndex(10),
|
||||
tm.makeUIntIndex(10),
|
||||
tm.makeIntIndex(10),
|
||||
tm.makeFloatIndex(10),
|
||||
Index([True, False]),
|
||||
Index(['a{}'.format(i) for i in range(101)]),
|
||||
pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')),
|
||||
pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ])
|
||||
def test_index_tab_completion(self, index):
|
||||
# dir contains string-like values of the Index.
|
||||
s = pd.Series(index=index)
|
||||
dir_s = dir(s)
|
||||
for i, x in enumerate(s.index.unique(level=0)):
|
||||
if i < 100:
|
||||
assert (not isinstance(x, string_types) or
|
||||
not isidentifier(x) or x in dir_s)
|
||||
else:
|
||||
assert x not in dir_s
|
||||
|
||||
def test_not_hashable(self):
|
||||
s_empty = Series()
|
||||
s = Series([1])
|
||||
pytest.raises(TypeError, hash, s_empty)
|
||||
pytest.raises(TypeError, hash, s)
|
||||
|
||||
def test_contains(self):
|
||||
tm.assert_contains_all(self.ts.index, self.ts)
|
||||
|
||||
def test_iter(self):
|
||||
for i, val in enumerate(self.series):
|
||||
assert val == self.series[i]
|
||||
|
||||
for i, val in enumerate(self.ts):
|
||||
assert val == self.ts[i]
|
||||
|
||||
def test_keys(self):
|
||||
# HACK: By doing this in two stages, we avoid 2to3 wrapping the call
|
||||
# to .keys() in a list()
|
||||
getkeys = self.ts.keys
|
||||
assert getkeys() is self.ts.index
|
||||
|
||||
def test_values(self):
|
||||
tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)
|
||||
|
||||
def test_iteritems(self):
|
||||
for idx, val in compat.iteritems(self.series):
|
||||
assert val == self.series[idx]
|
||||
|
||||
for idx, val in compat.iteritems(self.ts):
|
||||
assert val == self.ts[idx]
|
||||
|
||||
# assert is lazy (genrators don't define reverse, lists do)
|
||||
assert not hasattr(self.series.iteritems(), 'reverse')
|
||||
|
||||
def test_items(self):
|
||||
for idx, val in self.series.items():
|
||||
assert val == self.series[idx]
|
||||
|
||||
for idx, val in self.ts.items():
|
||||
assert val == self.ts[idx]
|
||||
|
||||
# assert is lazy (genrators don't define reverse, lists do)
|
||||
assert not hasattr(self.series.items(), 'reverse')
|
||||
|
||||
def test_raise_on_info(self):
|
||||
s = Series(np.random.randn(10))
|
||||
with pytest.raises(AttributeError):
|
||||
s.info()
|
||||
|
||||
def test_copy(self):
|
||||
|
||||
for deep in [None, False, True]:
|
||||
s = Series(np.arange(10), dtype='float64')
|
||||
|
||||
# default deep is True
|
||||
if deep is None:
|
||||
s2 = s.copy()
|
||||
else:
|
||||
s2 = s.copy(deep=deep)
|
||||
|
||||
s2[::2] = np.NaN
|
||||
|
||||
if deep is None or deep is True:
|
||||
# Did not modify original Series
|
||||
assert np.isnan(s2[0])
|
||||
assert not np.isnan(s[0])
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert np.isnan(s2[0])
|
||||
assert np.isnan(s[0])
|
||||
|
||||
# GH 11794
|
||||
# copy of tz-aware
|
||||
expected = Series([Timestamp('2012/01/01', tz='UTC')])
|
||||
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
|
||||
|
||||
for deep in [None, False, True]:
|
||||
|
||||
s = Series([Timestamp('2012/01/01', tz='UTC')])
|
||||
|
||||
if deep is None:
|
||||
s2 = s.copy()
|
||||
else:
|
||||
s2 = s.copy(deep=deep)
|
||||
|
||||
s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
|
||||
|
||||
# default deep is True
|
||||
if deep is None or deep is True:
|
||||
# Did not modify original Series
|
||||
assert_series_equal(s2, expected2)
|
||||
assert_series_equal(s, expected)
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert_series_equal(s2, expected2)
|
||||
assert_series_equal(s, expected2)
|
||||
|
||||
def test_axis_alias(self):
|
||||
s = Series([1, 2, np.nan])
|
||||
assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
|
||||
assert s.dropna().sum('rows') == 3
|
||||
assert s._get_axis_number('rows') == 0
|
||||
assert s._get_axis_name('rows') == 'index'
|
||||
|
||||
def test_class_axis(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/18147
|
||||
# no exception and no empty docstring
|
||||
assert pydoc.getdoc(Series.index)
|
||||
|
||||
def test_numpy_unique(self):
|
||||
# it works!
|
||||
np.unique(self.ts)
|
||||
|
||||
def test_ndarray_compat(self):
|
||||
|
||||
# test numpy compat with Series as sub-class of NDFrame
|
||||
tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],
|
||||
index=date_range('1/1/2000', periods=1000))
|
||||
|
||||
def f(x):
|
||||
return x[x.idxmax()]
|
||||
|
||||
result = tsdf.apply(f)
|
||||
expected = tsdf.max()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# .item()
|
||||
s = Series([1])
|
||||
result = s.item()
|
||||
assert result == 1
|
||||
assert s.item() == s.iloc[0]
|
||||
|
||||
# using an ndarray like function
|
||||
s = Series(np.random.randn(10))
|
||||
result = Series(np.ones_like(s))
|
||||
expected = Series(1, index=range(10), dtype='float64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ravel
|
||||
s = Series(np.random.randn(10))
|
||||
tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F'))
|
||||
|
||||
# compress
|
||||
# GH 6658
|
||||
s = Series([0, 1., -1], index=list('abc'))
|
||||
result = np.compress(s > 0, s)
|
||||
tm.assert_series_equal(result, Series([1.], index=['b']))
|
||||
|
||||
result = np.compress(s < -1, s)
|
||||
# result empty Index(dtype=object) as the same as original
|
||||
exp = Series([], dtype='float64', index=Index([], dtype='object'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
s = Series([0, 1., -1], index=[.1, .2, .3])
|
||||
result = np.compress(s > 0, s)
|
||||
tm.assert_series_equal(result, Series([1.], index=[.2]))
|
||||
|
||||
result = np.compress(s < -1, s)
|
||||
# result empty Float64Index as the same as original
|
||||
exp = Series([], dtype='float64', index=Index([], dtype='float64'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_str_attribute(self):
|
||||
# GH9068
|
||||
methods = ['strip', 'rstrip', 'lstrip']
|
||||
s = Series([' jack', 'jill ', ' jesse ', 'frank'])
|
||||
for method in methods:
|
||||
expected = Series([getattr(str, method)(x) for x in s.values])
|
||||
assert_series_equal(getattr(Series.str, method)(s.str), expected)
|
||||
|
||||
# str accessor only valid with string values
|
||||
s = Series(range(5))
|
||||
with tm.assert_raises_regex(AttributeError,
|
||||
'only use .str accessor'):
|
||||
s.str.repeat(2)
|
||||
|
||||
def test_empty_method(self):
|
||||
s_empty = pd.Series()
|
||||
assert s_empty.empty
|
||||
|
||||
for full_series in [pd.Series([1]), pd.Series(index=[1])]:
|
||||
assert not full_series.empty
|
||||
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip('IPython', minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; s = pd.Series()"
|
||||
ip.run_code(code)
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter('ignore'):
|
||||
list(ip.Completer.completions('s.', 1))
|
||||
|
||||
|
||||
class TestCategoricalSeries(object):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.cat.set_categories([1, 2, 3]),
|
||||
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
|
||||
lambda x: x.cat.rename_categories([1, 2, 3]),
|
||||
lambda x: x.cat.remove_unused_categories(),
|
||||
lambda x: x.cat.remove_categories([2]),
|
||||
lambda x: x.cat.add_categories([4]),
|
||||
lambda x: x.cat.as_ordered(),
|
||||
lambda x: x.cat.as_unordered(),
|
||||
])
|
||||
def test_getname_categorical_accessor(self, method):
|
||||
# GH 17509
|
||||
s = Series([1, 2, 3], name='A').astype('category')
|
||||
expected = 'A'
|
||||
result = method(s).name
|
||||
assert result == expected
|
||||
|
||||
def test_cat_accessor(self):
|
||||
s = Series(Categorical(["a", "b", np.nan, "a"]))
|
||||
tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
|
||||
assert not s.cat.ordered, False
|
||||
|
||||
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
|
||||
s.cat.set_categories(["b", "a"], inplace=True)
|
||||
tm.assert_categorical_equal(s.values, exp)
|
||||
|
||||
res = s.cat.set_categories(["b", "a"])
|
||||
tm.assert_categorical_equal(res.values, exp)
|
||||
|
||||
s[:] = "a"
|
||||
s = s.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(s.cat.categories, Index(["a"]))
|
||||
|
||||
def test_cat_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.arrays.categorical import CategoricalAccessor
|
||||
assert Series.cat is CategoricalAccessor
|
||||
s = Series(list('aabbcde')).astype('category')
|
||||
assert isinstance(s.cat, CategoricalAccessor)
|
||||
|
||||
invalid = Series([1])
|
||||
with tm.assert_raises_regex(AttributeError,
|
||||
"only use .cat accessor"):
|
||||
invalid.cat
|
||||
assert not hasattr(invalid, 'cat')
|
||||
|
||||
def test_cat_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
c = Series(list('aabbcde')).astype('category')
|
||||
with tm.assert_raises_regex(AttributeError,
|
||||
"You cannot add any new attribute"):
|
||||
c.cat.xlabel = "a"
|
||||
|
||||
def test_categorical_delegations(self):
|
||||
|
||||
# invalid accessor
|
||||
pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat)
|
||||
tm.assert_raises_regex(
|
||||
AttributeError,
|
||||
r"Can only use .cat accessor with a 'category' dtype",
|
||||
lambda: Series([1, 2, 3]).cat)
|
||||
pytest.raises(AttributeError, lambda: Series(['a', 'b', 'c']).cat)
|
||||
pytest.raises(AttributeError, lambda: Series(np.arange(5.)).cat)
|
||||
pytest.raises(AttributeError,
|
||||
lambda: Series([Timestamp('20130101')]).cat)
|
||||
|
||||
# Series should delegate calls to '.categories', '.codes', '.ordered'
|
||||
# and the methods '.set_categories()' 'drop_unused_categories()' to the
|
||||
# categorical# -*- coding: utf-8 -*-
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
s.cat.categories = [1, 2, 3]
|
||||
exp_categories = Index([1, 2, 3])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
|
||||
exp_codes = Series([0, 1, 2, 0], dtype='int8')
|
||||
tm.assert_series_equal(s.cat.codes, exp_codes)
|
||||
|
||||
assert s.cat.ordered
|
||||
s = s.cat.as_unordered()
|
||||
assert not s.cat.ordered
|
||||
s.cat.as_ordered(inplace=True)
|
||||
assert s.cat.ordered
|
||||
|
||||
# reorder
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
s = s.cat.set_categories(["c", "b", "a"])
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp_values)
|
||||
|
||||
# remove unused categories
|
||||
s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"
|
||||
]))
|
||||
exp_categories = Index(["a", "b"])
|
||||
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
|
||||
s = s.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(s.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(s.__array__(), exp_values)
|
||||
|
||||
# This method is likely to be confused, so test that it raises an error
|
||||
# on wrong inputs:
|
||||
def f():
|
||||
s.set_categories([4, 3, 2, 1])
|
||||
|
||||
pytest.raises(Exception, f)
|
||||
# right: s.cat.set_categories([4,3,2,1])
|
||||
|
||||
# GH18862 (let Series.cat.rename_categories take callables)
|
||||
s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
result = s.cat.rename_categories(lambda x: x.upper())
|
||||
expected = Series(Categorical(["A", "B", "C", "A"],
|
||||
categories=["A", "B", "C"],
|
||||
ordered=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_str_accessor_api_for_categorical(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10661
|
||||
from pandas.core.strings import StringMethods
|
||||
s = Series(list('aabb'))
|
||||
s = s + " " + s
|
||||
c = s.astype('category')
|
||||
assert isinstance(c.str, StringMethods)
|
||||
|
||||
# str functions, which need special arguments
|
||||
special_func_defs = [
|
||||
('cat', (list("zyxw"),), {"sep": ","}),
|
||||
('center', (10,), {}),
|
||||
('contains', ("a",), {}),
|
||||
('count', ("a",), {}),
|
||||
('decode', ("UTF-8",), {}),
|
||||
('encode', ("UTF-8",), {}),
|
||||
('endswith', ("a",), {}),
|
||||
('extract', ("([a-z]*) ",), {"expand": False}),
|
||||
('extract', ("([a-z]*) ",), {"expand": True}),
|
||||
('extractall', ("([a-z]*) ",), {}),
|
||||
('find', ("a",), {}),
|
||||
('findall', ("a",), {}),
|
||||
('index', (" ",), {}),
|
||||
('ljust', (10,), {}),
|
||||
('match', ("a"), {}), # deprecated...
|
||||
('normalize', ("NFC",), {}),
|
||||
('pad', (10,), {}),
|
||||
('partition', (" ",), {"expand": False}), # not default
|
||||
('partition', (" ",), {"expand": True}), # default
|
||||
('repeat', (3,), {}),
|
||||
('replace', ("a", "z"), {}),
|
||||
('rfind', ("a",), {}),
|
||||
('rindex', (" ",), {}),
|
||||
('rjust', (10,), {}),
|
||||
('rpartition', (" ",), {"expand": False}), # not default
|
||||
('rpartition', (" ",), {"expand": True}), # default
|
||||
('slice', (0, 1), {}),
|
||||
('slice_replace', (0, 1, "z"), {}),
|
||||
('split', (" ",), {"expand": False}), # default
|
||||
('split', (" ",), {"expand": True}), # not default
|
||||
('startswith', ("a",), {}),
|
||||
('wrap', (2,), {}),
|
||||
('zfill', (10,), {})
|
||||
]
|
||||
_special_func_names = [f[0] for f in special_func_defs]
|
||||
|
||||
# * get, join: they need a individual elements of type lists, but
|
||||
# we can't make a categorical with lists as individual categories.
|
||||
# -> `s.str.split(" ").astype("category")` will error!
|
||||
# * `translate` has different interfaces for py2 vs. py3
|
||||
_ignore_names = ["get", "join", "translate"]
|
||||
|
||||
str_func_names = [f for f in dir(s.str) if not (
|
||||
f.startswith("_") or
|
||||
f in _special_func_names or
|
||||
f in _ignore_names)]
|
||||
|
||||
func_defs = [(f, (), {}) for f in str_func_names]
|
||||
func_defs.extend(special_func_defs)
|
||||
|
||||
for func, args, kwargs in func_defs:
|
||||
res = getattr(c.str, func)(*args, **kwargs)
|
||||
exp = getattr(s.str, func)(*args, **kwargs)
|
||||
|
||||
if isinstance(res, DataFrame):
|
||||
tm.assert_frame_equal(res, exp)
|
||||
else:
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
invalid = Series([1, 2, 3]).astype('category')
|
||||
with tm.assert_raises_regex(AttributeError,
|
||||
"Can only use .str "
|
||||
"accessor with string"):
|
||||
invalid.str
|
||||
assert not hasattr(invalid, 'str')
|
||||
|
||||
def test_dt_accessor_api_for_categorical(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10661
|
||||
from pandas.core.indexes.accessors import Properties
|
||||
|
||||
s_dr = Series(date_range('1/1/2015', periods=5, tz="MET"))
|
||||
c_dr = s_dr.astype("category")
|
||||
|
||||
s_pr = Series(period_range('1/1/2015', freq='D', periods=5))
|
||||
c_pr = s_pr.astype("category")
|
||||
|
||||
s_tdr = Series(timedelta_range('1 days', '10 days'))
|
||||
c_tdr = s_tdr.astype("category")
|
||||
|
||||
# only testing field (like .day)
|
||||
# and bool (is_month_start)
|
||||
get_ops = lambda x: x._datetimelike_ops
|
||||
|
||||
test_data = [
|
||||
("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
|
||||
("Period", get_ops(PeriodIndex), s_pr, c_pr),
|
||||
("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)]
|
||||
|
||||
assert isinstance(c_dr.dt, Properties)
|
||||
|
||||
special_func_defs = [
|
||||
('strftime', ("%Y-%m-%d",), {}),
|
||||
('tz_convert', ("EST",), {}),
|
||||
('round', ("D",), {}),
|
||||
('floor', ("D",), {}),
|
||||
('ceil', ("D",), {}),
|
||||
('asfreq', ("D",), {}),
|
||||
# ('tz_localize', ("UTC",), {}),
|
||||
]
|
||||
_special_func_names = [f[0] for f in special_func_defs]
|
||||
|
||||
# the series is already localized
|
||||
_ignore_names = ['tz_localize', 'components']
|
||||
|
||||
for name, attr_names, s, c in test_data:
|
||||
func_names = [f
|
||||
for f in dir(s.dt)
|
||||
if not (f.startswith("_") or f in attr_names or f in
|
||||
_special_func_names or f in _ignore_names)]
|
||||
|
||||
func_defs = [(f, (), {}) for f in func_names]
|
||||
for f_def in special_func_defs:
|
||||
if f_def[0] in dir(s.dt):
|
||||
func_defs.append(f_def)
|
||||
|
||||
for func, args, kwargs in func_defs:
|
||||
res = getattr(c.dt, func)(*args, **kwargs)
|
||||
exp = getattr(s.dt, func)(*args, **kwargs)
|
||||
|
||||
if isinstance(res, DataFrame):
|
||||
tm.assert_frame_equal(res, exp)
|
||||
elif isinstance(res, Series):
|
||||
tm.assert_series_equal(res, exp)
|
||||
else:
|
||||
tm.assert_almost_equal(res, exp)
|
||||
|
||||
for attr in attr_names:
|
||||
try:
|
||||
res = getattr(c.dt, attr)
|
||||
exp = getattr(s.dt, attr)
|
||||
except Exception as e:
|
||||
print(name, attr)
|
||||
raise e
|
||||
|
||||
if isinstance(res, DataFrame):
|
||||
tm.assert_frame_equal(res, exp)
|
||||
elif isinstance(res, Series):
|
||||
tm.assert_series_equal(res, exp)
|
||||
else:
|
||||
tm.assert_almost_equal(res, exp)
|
||||
|
||||
invalid = Series([1, 2, 3]).astype('category')
|
||||
with tm.assert_raises_regex(
|
||||
AttributeError, "Can only use .dt accessor with datetimelike"):
|
||||
invalid.dt
|
||||
assert not hasattr(invalid, 'str')
|
||||
@@ -1,589 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
from collections import Counter, defaultdict, OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import (Index, Series, DataFrame, isna)
|
||||
from pandas.compat import lrange
|
||||
from pandas import compat
|
||||
from pandas.util.testing import assert_series_equal, assert_frame_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesApply(TestData):
|
||||
|
||||
def test_apply(self):
|
||||
with np.errstate(all='ignore'):
|
||||
tm.assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts))
|
||||
|
||||
# element-wise apply
|
||||
import math
|
||||
tm.assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts))
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name='foo', index=pd.Index([], name='bar'))
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3])
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
def test_apply_same_length_inference_bug(self):
|
||||
s = Series([1, 2])
|
||||
f = lambda x: (x, x + 1)
|
||||
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_apply_dont_convert_dtype(self):
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
f = lambda x: x if x > 0 else np.nan
|
||||
result = s.apply(f, convert_dtype=False)
|
||||
assert result.dtype == object
|
||||
|
||||
def test_with_string_args(self):
|
||||
|
||||
for arg in ['sum', 'mean', 'min', 'max', 'std']:
|
||||
result = self.ts.apply(arg)
|
||||
expected = getattr(self.ts, arg)()
|
||||
assert result == expected
|
||||
|
||||
def test_apply_args(self):
|
||||
s = Series(['foo,bar'])
|
||||
|
||||
result = s.apply(str.split, args=(',', ))
|
||||
assert result[0] == ['foo', 'bar']
|
||||
assert isinstance(result[0], list)
|
||||
|
||||
def test_series_map_box_timestamps(self):
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(pd.date_range('1/1/2000', periods=10))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
# it works!
|
||||
ser.map(func)
|
||||
ser.apply(func)
|
||||
|
||||
def test_apply_box(self):
|
||||
# ufunc will not be boxed. Same test cases as the test_map_box
|
||||
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns]'
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns, US/Eastern]'
|
||||
res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'timedelta64[ns]'
|
||||
res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
|
||||
exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period (object dtype, not boxed)
|
||||
vals = [pd.Period('2011-01-01', freq='M'),
|
||||
pd.Period('2011-01-02', freq='M')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'object'
|
||||
res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__,
|
||||
x.freqstr))
|
||||
exp = pd.Series(['Period_M', 'Period_M'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_apply_datetimetz(self):
|
||||
values = pd.date_range('2011-01-01', '2011-01-02',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
s = pd.Series(values, name='XX')
|
||||
|
||||
result = s.apply(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range('2011-01-02', '2011-01-03',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
exp = pd.Series(exp_values, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.apply(lambda x: x.hour)
|
||||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_apply_dict_depr(self):
|
||||
|
||||
tsdf = pd.DataFrame(np.random.randn(10, 3),
|
||||
columns=['A', 'B', 'C'],
|
||||
index=pd.date_range('1/1/2000', periods=10))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
tsdf.A.agg({'foo': ['sum', 'mean']})
|
||||
|
||||
|
||||
class TestSeriesAggregate(TestData):
|
||||
|
||||
def test_transform(self):
|
||||
# transforming functions
|
||||
|
||||
with np.errstate(all='ignore'):
|
||||
|
||||
f_sqrt = np.sqrt(self.series)
|
||||
f_abs = np.abs(self.series)
|
||||
|
||||
# ufunc
|
||||
result = self.series.transform(np.sqrt)
|
||||
expected = f_sqrt.copy()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = self.series.apply(np.sqrt)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# list-like
|
||||
result = self.series.transform([np.sqrt])
|
||||
expected = f_sqrt.to_frame().copy()
|
||||
expected.columns = ['sqrt']
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = self.series.transform([np.sqrt])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = self.series.transform(['sqrt'])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# multiple items in list
|
||||
# these are in the order as if we are applying both functions per
|
||||
# series and then concatting
|
||||
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ['sqrt', 'absolute']
|
||||
result = self.series.apply([np.sqrt, np.abs])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = self.series.transform(['sqrt', 'abs'])
|
||||
expected.columns = ['sqrt', 'abs']
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# dict, provide renaming
|
||||
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ['foo', 'bar']
|
||||
expected = expected.unstack().rename('series')
|
||||
|
||||
result = self.series.apply({'foo': np.sqrt, 'bar': np.abs})
|
||||
assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
def test_transform_and_agg_error(self):
|
||||
# we are trying to transform with an aggregator
|
||||
def f():
|
||||
self.series.transform(['min', 'max'])
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def f():
|
||||
with np.errstate(all='ignore'):
|
||||
self.series.agg(['sqrt', 'max'])
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def f():
|
||||
with np.errstate(all='ignore'):
|
||||
self.series.transform(['sqrt', 'max'])
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def f():
|
||||
with np.errstate(all='ignore'):
|
||||
self.series.agg({'foo': np.sqrt, 'bar': 'sum'})
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def test_demo(self):
|
||||
# demonstration tests
|
||||
s = Series(range(6), dtype='int64', name='series')
|
||||
|
||||
result = s.agg(['min', 'max'])
|
||||
expected = Series([0, 5], index=['min', 'max'], name='series')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.agg({'foo': 'min'})
|
||||
expected = Series([0], index=['foo'], name='series')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# nested renaming
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.agg({'foo': ['min', 'max']})
|
||||
|
||||
expected = DataFrame(
|
||||
{'foo': [0, 5]},
|
||||
index=['min', 'max']).unstack().rename('series')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiple_aggregators_with_dict_api(self):
|
||||
|
||||
s = Series(range(6), dtype='int64', name='series')
|
||||
# nested renaming
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})
|
||||
|
||||
expected = DataFrame(
|
||||
{'foo': [5.0, np.nan, 0.0, np.nan],
|
||||
'bar': [np.nan, 2.5, np.nan, 15.0]},
|
||||
columns=['foo', 'bar'],
|
||||
index=['max', 'mean',
|
||||
'min', 'sum']).unstack().rename('series')
|
||||
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
def test_agg_apply_evaluate_lambdas_the_same(self):
|
||||
# test that we are evaluating row-by-row first
|
||||
# before vectorized evaluation
|
||||
result = self.series.apply(lambda x: str(x))
|
||||
expected = self.series.agg(lambda x: str(x))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = self.series.apply(str)
|
||||
expected = self.series.agg(str)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_with_nested_series(self):
|
||||
# GH 2316
|
||||
# .agg with a reducer and a transform, what to do
|
||||
result = self.ts.apply(lambda x: Series(
|
||||
[x, x ** 2], index=['x', 'x^2']))
|
||||
expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = self.ts.agg(lambda x: Series(
|
||||
[x, x ** 2], index=['x', 'x^2']))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_replicate_describe(self):
|
||||
# this also tests a result set that is all scalars
|
||||
expected = self.series.describe()
|
||||
result = self.series.apply(OrderedDict(
|
||||
[('count', 'count'),
|
||||
('mean', 'mean'),
|
||||
('std', 'std'),
|
||||
('min', 'min'),
|
||||
('25%', lambda x: x.quantile(0.25)),
|
||||
('50%', 'median'),
|
||||
('75%', lambda x: x.quantile(0.75)),
|
||||
('max', 'max')]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_reduce(self):
|
||||
# reductions with named functions
|
||||
result = self.series.agg(['sum', 'mean'])
|
||||
expected = Series([self.series.sum(),
|
||||
self.series.mean()],
|
||||
['sum', 'mean'],
|
||||
name=self.series.name)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_non_callable_aggregates(self):
|
||||
# test agg using non-callable series attributes
|
||||
s = Series([1, 2, None])
|
||||
|
||||
# Calling agg w/ just a string arg same as calling s.arg
|
||||
result = s.agg('size')
|
||||
expected = s.size
|
||||
assert result == expected
|
||||
|
||||
# test when mixed w/ callable reducers
|
||||
result = s.agg(['size', 'count', 'mean'])
|
||||
expected = Series(OrderedDict([('size', 3.0),
|
||||
('count', 2.0),
|
||||
('mean', 1.5)]))
|
||||
assert_series_equal(result[expected.index], expected)
|
||||
|
||||
|
||||
class TestSeriesMap(TestData):
|
||||
|
||||
def test_map(self):
|
||||
index, data = tm.getMixedTypeDict()
|
||||
|
||||
source = Series(data['B'], index=data['C'])
|
||||
target = Series(data['C'][:4], index=data['D'][:4])
|
||||
|
||||
merged = target.map(source)
|
||||
|
||||
for k, v in compat.iteritems(merged):
|
||||
assert v == source[target[k]]
|
||||
|
||||
# input could be a dict
|
||||
merged = target.map(source.to_dict())
|
||||
|
||||
for k, v in compat.iteritems(merged):
|
||||
assert v == source[target[k]]
|
||||
|
||||
# function
|
||||
result = self.ts.map(lambda x: x * 2)
|
||||
tm.assert_series_equal(result, self.ts * 2)
|
||||
|
||||
# GH 10324
|
||||
a = Series([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series(["odd", "even", "odd", np.nan])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(['a', 'b', 'c', 'd'])
|
||||
b = Series([1, 2, 3, 4],
|
||||
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
|
||||
c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e']))
|
||||
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(['a', 'b', 'c', 'd'])
|
||||
b = Series(['B', 'C', 'D', 'E'], dtype='category',
|
||||
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
|
||||
c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))
|
||||
|
||||
exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
|
||||
categories=['B', 'C', 'D', 'E']))
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, 'B', 'C', 'D'])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
@pytest.mark.parametrize("index", tm.all_index_generator(10))
|
||||
def test_map_empty(self, index):
|
||||
s = Series(index)
|
||||
result = s.map({})
|
||||
|
||||
expected = pd.Series(np.nan, index=s.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_map_compat(self):
|
||||
# related GH 8024
|
||||
s = Series([True, True, False], index=[1, 2, 3])
|
||||
result = s.map({True: 'foo', False: 'bar'})
|
||||
expected = Series(['foo', 'foo', 'bar'], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_int(self):
|
||||
left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4})
|
||||
right = Series({1: 11, 2: 22, 3: 33})
|
||||
|
||||
assert left.dtype == np.float_
|
||||
assert issubclass(right.dtype.type, np.integer)
|
||||
|
||||
merged = left.map(right)
|
||||
assert merged.dtype == np.float_
|
||||
assert isna(merged['d'])
|
||||
assert not isna(merged['c'])
|
||||
|
||||
def test_map_type_inference(self):
|
||||
s = Series(lrange(3))
|
||||
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
|
||||
assert issubclass(s2.dtype.type, np.integer)
|
||||
|
||||
def test_map_decimal(self):
|
||||
from decimal import Decimal
|
||||
|
||||
result = self.series.map(lambda x: Decimal(str(x)))
|
||||
assert result.dtype == np.object_
|
||||
assert isinstance(result[0], Decimal)
|
||||
|
||||
def test_map_na_exclusion(self):
|
||||
s = Series([1.5, np.nan, 3, np.nan, 5])
|
||||
|
||||
result = s.map(lambda x: x * 2, na_action='ignore')
|
||||
exp = s * 2
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
def test_map_dict_with_tuple_keys(self):
|
||||
"""
|
||||
Due to new MultiIndex-ing behaviour in v0.14.0,
|
||||
dicts with tuple keys passed to map were being
|
||||
converted to a multi-index, preventing tuple values
|
||||
from being mapped properly.
|
||||
"""
|
||||
# GH 18496
|
||||
df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]})
|
||||
label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'}
|
||||
|
||||
df['labels'] = df['a'].map(label_mappings)
|
||||
df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index)
|
||||
# All labels should be filled now
|
||||
tm.assert_series_equal(df['labels'], df['expected_labels'],
|
||||
check_names=False)
|
||||
|
||||
def test_map_counter(self):
|
||||
s = Series(['a', 'b', 'c'], index=[1, 2, 3])
|
||||
counter = Counter()
|
||||
counter['b'] = 5
|
||||
counter['c'] += 1
|
||||
result = s.map(counter)
|
||||
expected = Series([0, 5, 1], index=[1, 2, 3])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_defaultdict(self):
|
||||
s = Series([1, 2, 3], index=['a', 'b', 'c'])
|
||||
default_dict = defaultdict(lambda: 'blank')
|
||||
default_dict[1] = 'stuff'
|
||||
result = s.map(default_dict)
|
||||
expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c'])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_dict_subclass_with_missing(self):
|
||||
"""
|
||||
Test Series.map with a dictionary subclass that defines __missing__,
|
||||
i.e. sets a default value (GH #15999).
|
||||
"""
|
||||
class DictWithMissing(dict):
|
||||
def __missing__(self, key):
|
||||
return 'missing'
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithMissing({3: 'three'})
|
||||
result = s.map(dictionary)
|
||||
expected = Series(['missing', 'missing', 'three'])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_dict_subclass_without_missing(self):
|
||||
class DictWithoutMissing(dict):
|
||||
pass
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithoutMissing({3: 'three'})
|
||||
result = s.map(dictionary)
|
||||
expected = Series([np.nan, np.nan, 'three'])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_map_box(self):
|
||||
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns]'
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'datetime64[ns, US/Eastern]'
|
||||
res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||||
x.day, x.tz))
|
||||
exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'timedelta64[ns]'
|
||||
res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
|
||||
exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period (object dtype, not boxed)
|
||||
vals = [pd.Period('2011-01-01', freq='M'),
|
||||
pd.Period('2011-01-02', freq='M')]
|
||||
s = pd.Series(vals)
|
||||
assert s.dtype == 'object'
|
||||
res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__,
|
||||
x.freqstr))
|
||||
exp = pd.Series(['Period_M', 'Period_M'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_map_categorical(self):
|
||||
values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
|
||||
ordered=True)
|
||||
s = pd.Series(values, name='XX', index=list('abcdefg'))
|
||||
|
||||
result = s.map(lambda x: x.lower())
|
||||
exp_values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
|
||||
ordered=True)
|
||||
exp = pd.Series(exp_values, name='XX', index=list('abcdefg'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp_values)
|
||||
|
||||
result = s.map(lambda x: 'A')
|
||||
exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == np.object
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.map(lambda x: x, na_action='ignore')
|
||||
|
||||
def test_map_datetimetz(self):
|
||||
values = pd.date_range('2011-01-01', '2011-01-02',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
s = pd.Series(values, name='XX')
|
||||
|
||||
# keep tz
|
||||
result = s.map(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range('2011-01-02', '2011-01-03',
|
||||
freq='H').tz_localize('Asia/Tokyo')
|
||||
exp = pd.Series(exp_values, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.map(lambda x: x.hour)
|
||||
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
s.map(lambda x: x, na_action='ignore')
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize("vals,mapping,exp", [
|
||||
(list('abc'), {np.nan: 'not NaN'}, [np.nan] * 3 + ['not NaN']),
|
||||
(list('abc'), {'a': 'a letter'}, ['a letter'] + [np.nan] * 3),
|
||||
(list(range(3)), {0: 42}, [42] + [np.nan] * 3)])
|
||||
def test_map_missing_mixed(self, vals, mapping, exp):
|
||||
# GH20495
|
||||
s = pd.Series(vals + [np.nan])
|
||||
result = s.map(mapping)
|
||||
|
||||
tm.assert_series_equal(result, pd.Series(exp))
|
||||
@@ -1,908 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from datetime import datetime, timedelta
|
||||
import operator
|
||||
from decimal import Decimal
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series, Timestamp, Timedelta, Period, NaT
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tdser():
|
||||
"""
|
||||
Return a Series with dtype='timedelta64[ns]', including a NaT.
|
||||
"""
|
||||
return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparisons
|
||||
|
||||
class TestSeriesComparison(object):
|
||||
def test_compare_invalid(self):
|
||||
# GH#8058
|
||||
# ops testing
|
||||
a = pd.Series(np.random.randn(5), name=0)
|
||||
b = pd.Series(np.random.randn(5))
|
||||
b.name = pd.Timestamp('2000-01-01')
|
||||
tm.assert_series_equal(a / b, 1 / (b / a))
|
||||
|
||||
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
|
||||
def test_ser_flex_cmp_return_dtypes(self, opname):
|
||||
# GH#15115
|
||||
ser = Series([1, 3, 2], index=range(3))
|
||||
const = 2
|
||||
|
||||
result = getattr(ser, opname)(const).get_dtype_counts()
|
||||
tm.assert_series_equal(result, Series([1], ['bool']))
|
||||
|
||||
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
|
||||
def test_ser_flex_cmp_return_dtypes_empty(self, opname):
|
||||
# GH#15115 empty Series case
|
||||
ser = Series([1, 3, 2], index=range(3))
|
||||
empty = ser.iloc[:0]
|
||||
const = 2
|
||||
|
||||
result = getattr(empty, opname)(const).get_dtype_counts()
|
||||
tm.assert_series_equal(result, Series([1], ['bool']))
|
||||
|
||||
@pytest.mark.parametrize('op', [operator.eq, operator.ne,
|
||||
operator.le, operator.lt,
|
||||
operator.ge, operator.gt])
|
||||
@pytest.mark.parametrize('names', [(None, None, None),
|
||||
('foo', 'bar', None),
|
||||
('baz', 'baz', 'baz')])
|
||||
def test_ser_cmp_result_names(self, names, op):
|
||||
# datetime64 dtype
|
||||
dti = pd.date_range('1949-06-07 03:00:00',
|
||||
freq='H', periods=5, name=names[0])
|
||||
ser = Series(dti).rename(names[1])
|
||||
result = op(ser, dti)
|
||||
assert result.name == names[2]
|
||||
|
||||
# datetime64tz dtype
|
||||
dti = dti.tz_localize('US/Central')
|
||||
ser = Series(dti).rename(names[1])
|
||||
result = op(ser, dti)
|
||||
assert result.name == names[2]
|
||||
|
||||
# timedelta64 dtype
|
||||
tdi = dti - dti.shift(1)
|
||||
ser = Series(tdi).rename(names[1])
|
||||
result = op(ser, tdi)
|
||||
assert result.name == names[2]
|
||||
|
||||
# categorical
|
||||
if op in [operator.eq, operator.ne]:
|
||||
# categorical dtype comparisons raise for inequalities
|
||||
cidx = tdi.astype('category')
|
||||
ser = Series(cidx).rename(names[1])
|
||||
result = op(ser, cidx)
|
||||
assert result.name == names[2]
|
||||
|
||||
|
||||
class TestTimestampSeriesComparison(object):
|
||||
def test_dt64_ser_cmp_date_warning(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/21359
|
||||
# Remove this test and enble invalid test below
|
||||
ser = pd.Series(pd.date_range('20010101', periods=10), name='dates')
|
||||
date = ser.iloc[0].to_pydatetime().date()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
result = ser == date
|
||||
expected = pd.Series([True] + [False] * 9, name='dates')
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert "Comparing Series of datetimes " in str(m[0].message)
|
||||
assert "will not compare equal" in str(m[0].message)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
result = ser != date
|
||||
tm.assert_series_equal(result, ~expected)
|
||||
assert "will not compare equal" in str(m[0].message)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
result = ser <= date
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert "a TypeError will be raised" in str(m[0].message)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
result = ser < date
|
||||
tm.assert_series_equal(result, pd.Series([False] * 10, name='dates'))
|
||||
assert "a TypeError will be raised" in str(m[0].message)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
result = ser >= date
|
||||
tm.assert_series_equal(result, pd.Series([True] * 10, name='dates'))
|
||||
assert "a TypeError will be raised" in str(m[0].message)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
result = ser > date
|
||||
tm.assert_series_equal(result, pd.Series([False] + [True] * 9,
|
||||
name='dates'))
|
||||
assert "a TypeError will be raised" in str(m[0].message)
|
||||
|
||||
@pytest.mark.skip(reason="GH-21359")
|
||||
def test_dt64ser_cmp_date_invalid(self):
|
||||
# GH#19800 datetime.date comparison raises to
|
||||
# match DatetimeIndex/Timestamp. This also matches the behavior
|
||||
# of stdlib datetime.datetime
|
||||
ser = pd.Series(pd.date_range('20010101', periods=10), name='dates')
|
||||
date = ser.iloc[0].to_pydatetime().date()
|
||||
assert not (ser == date).any()
|
||||
assert (ser != date).all()
|
||||
with pytest.raises(TypeError):
|
||||
ser > date
|
||||
with pytest.raises(TypeError):
|
||||
ser < date
|
||||
with pytest.raises(TypeError):
|
||||
ser >= date
|
||||
with pytest.raises(TypeError):
|
||||
ser <= date
|
||||
|
||||
def test_dt64ser_cmp_period_scalar(self):
|
||||
ser = Series(pd.period_range('2000-01-01', periods=10, freq='D'))
|
||||
val = Period('2000-01-04', freq='D')
|
||||
result = ser > val
|
||||
expected = Series([x > val for x in ser])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
val = ser[5]
|
||||
result = ser > val
|
||||
expected = Series([x > val for x in ser])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_timestamp_compare_series(self):
|
||||
# make sure we can compare Timestamps on the right AND left hand side
|
||||
# GH#4982
|
||||
ser = pd.Series(pd.date_range('20010101', periods=10), name='dates')
|
||||
s_nat = ser.copy(deep=True)
|
||||
|
||||
ser[0] = pd.Timestamp('nat')
|
||||
ser[3] = pd.Timestamp('nat')
|
||||
|
||||
ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'}
|
||||
|
||||
for left, right in ops.items():
|
||||
left_f = getattr(operator, left)
|
||||
right_f = getattr(operator, right)
|
||||
|
||||
# no nats
|
||||
expected = left_f(ser, pd.Timestamp('20010109'))
|
||||
result = right_f(pd.Timestamp('20010109'), ser)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# nats
|
||||
expected = left_f(ser, pd.Timestamp('nat'))
|
||||
result = right_f(pd.Timestamp('nat'), ser)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# compare to timestamp with series containing nats
|
||||
expected = left_f(s_nat, pd.Timestamp('20010109'))
|
||||
result = right_f(pd.Timestamp('20010109'), s_nat)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# compare to nat with series containing nats
|
||||
expected = left_f(s_nat, pd.Timestamp('nat'))
|
||||
result = right_f(pd.Timestamp('nat'), s_nat)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_timestamp_equality(self):
|
||||
# GH#11034
|
||||
ser = pd.Series([pd.Timestamp('2000-01-29 01:59:00'), 'NaT'])
|
||||
result = ser != ser
|
||||
tm.assert_series_equal(result, pd.Series([False, True]))
|
||||
result = ser != ser[0]
|
||||
tm.assert_series_equal(result, pd.Series([False, True]))
|
||||
result = ser != ser[1]
|
||||
tm.assert_series_equal(result, pd.Series([True, True]))
|
||||
|
||||
result = ser == ser
|
||||
tm.assert_series_equal(result, pd.Series([True, False]))
|
||||
result = ser == ser[0]
|
||||
tm.assert_series_equal(result, pd.Series([True, False]))
|
||||
result = ser == ser[1]
|
||||
tm.assert_series_equal(result, pd.Series([False, False]))
|
||||
|
||||
|
||||
class TestTimedeltaSeriesComparisons(object):
|
||||
def test_compare_timedelta_series(self):
|
||||
# regresssion test for GH5963
|
||||
s = pd.Series([timedelta(days=1), timedelta(days=2)])
|
||||
actual = s > timedelta(days=1)
|
||||
expected = pd.Series([False, True])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
class TestPeriodSeriesComparisons(object):
|
||||
@pytest.mark.parametrize('freq', ['M', '2M', '3M'])
|
||||
def test_cmp_series_period_scalar(self, freq):
|
||||
# GH 13200
|
||||
base = Series([Period(x, freq=freq) for x in
|
||||
['2011-01', '2011-02', '2011-03', '2011-04']])
|
||||
p = Period('2011-02', freq=freq)
|
||||
|
||||
exp = Series([False, True, False, False])
|
||||
tm.assert_series_equal(base == p, exp)
|
||||
tm.assert_series_equal(p == base, exp)
|
||||
|
||||
exp = Series([True, False, True, True])
|
||||
tm.assert_series_equal(base != p, exp)
|
||||
tm.assert_series_equal(p != base, exp)
|
||||
|
||||
exp = Series([False, False, True, True])
|
||||
tm.assert_series_equal(base > p, exp)
|
||||
tm.assert_series_equal(p < base, exp)
|
||||
|
||||
exp = Series([True, False, False, False])
|
||||
tm.assert_series_equal(base < p, exp)
|
||||
tm.assert_series_equal(p > base, exp)
|
||||
|
||||
exp = Series([False, True, True, True])
|
||||
tm.assert_series_equal(base >= p, exp)
|
||||
tm.assert_series_equal(p <= base, exp)
|
||||
|
||||
exp = Series([True, True, False, False])
|
||||
tm.assert_series_equal(base <= p, exp)
|
||||
tm.assert_series_equal(p >= base, exp)
|
||||
|
||||
# different base freq
|
||||
msg = "Input has different freq=A-DEC from Period"
|
||||
with tm.assert_raises_regex(IncompatibleFrequency, msg):
|
||||
base <= Period('2011', freq='A')
|
||||
|
||||
with tm.assert_raises_regex(IncompatibleFrequency, msg):
|
||||
Period('2011', freq='A') >= base
|
||||
|
||||
@pytest.mark.parametrize('freq', ['M', '2M', '3M'])
|
||||
def test_cmp_series_period_series(self, freq):
|
||||
# GH#13200
|
||||
base = Series([Period(x, freq=freq) for x in
|
||||
['2011-01', '2011-02', '2011-03', '2011-04']])
|
||||
|
||||
ser = Series([Period(x, freq=freq) for x in
|
||||
['2011-02', '2011-01', '2011-03', '2011-05']])
|
||||
|
||||
exp = Series([False, False, True, False])
|
||||
tm.assert_series_equal(base == ser, exp)
|
||||
|
||||
exp = Series([True, True, False, True])
|
||||
tm.assert_series_equal(base != ser, exp)
|
||||
|
||||
exp = Series([False, True, False, False])
|
||||
tm.assert_series_equal(base > ser, exp)
|
||||
|
||||
exp = Series([True, False, False, True])
|
||||
tm.assert_series_equal(base < ser, exp)
|
||||
|
||||
exp = Series([False, True, True, False])
|
||||
tm.assert_series_equal(base >= ser, exp)
|
||||
|
||||
exp = Series([True, False, True, True])
|
||||
tm.assert_series_equal(base <= ser, exp)
|
||||
|
||||
ser2 = Series([Period(x, freq='A') for x in
|
||||
['2011', '2011', '2011', '2011']])
|
||||
|
||||
# different base freq
|
||||
msg = "Input has different freq=A-DEC from Period"
|
||||
with tm.assert_raises_regex(IncompatibleFrequency, msg):
|
||||
base <= ser2
|
||||
|
||||
def test_cmp_series_period_series_mixed_freq(self):
|
||||
# GH#13200
|
||||
base = Series([Period('2011', freq='A'),
|
||||
Period('2011-02', freq='M'),
|
||||
Period('2013', freq='A'),
|
||||
Period('2011-04', freq='M')])
|
||||
|
||||
ser = Series([Period('2012', freq='A'),
|
||||
Period('2011-01', freq='M'),
|
||||
Period('2013', freq='A'),
|
||||
Period('2011-05', freq='M')])
|
||||
|
||||
exp = Series([False, False, True, False])
|
||||
tm.assert_series_equal(base == ser, exp)
|
||||
|
||||
exp = Series([True, True, False, True])
|
||||
tm.assert_series_equal(base != ser, exp)
|
||||
|
||||
exp = Series([False, True, False, False])
|
||||
tm.assert_series_equal(base > ser, exp)
|
||||
|
||||
exp = Series([True, False, False, True])
|
||||
tm.assert_series_equal(base < ser, exp)
|
||||
|
||||
exp = Series([False, True, True, False])
|
||||
tm.assert_series_equal(base >= ser, exp)
|
||||
|
||||
exp = Series([True, False, True, True])
|
||||
tm.assert_series_equal(base <= ser, exp)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Arithmetic
|
||||
|
||||
class TestSeriesDivision(object):
|
||||
# __div__, __rdiv__, __floordiv__, __rfloordiv__
|
||||
# for non-timestamp/timedelta/period dtypes
|
||||
|
||||
def test_divide_decimal(self):
|
||||
# resolves issue GH#9787
|
||||
expected = Series([Decimal(5)])
|
||||
|
||||
ser = Series([Decimal(10)])
|
||||
result = ser / Decimal(2)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series([Decimal(10)])
|
||||
result = ser // Decimal(2)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_div_equiv_binop(self):
|
||||
# Test Series.div as well as Series.__div__
|
||||
# float/integer issue
|
||||
# GH#7785
|
||||
first = Series([1, 0], name='first')
|
||||
second = Series([-0.01, -0.02], name='second')
|
||||
expected = Series([-0.01, -np.inf])
|
||||
|
||||
result = second.div(first)
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
result = second / first
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype2', [
|
||||
np.int64, np.int32, np.int16, np.int8,
|
||||
np.float64, np.float32, np.float16,
|
||||
np.uint64, np.uint32, np.uint16, np.uint8])
|
||||
@pytest.mark.parametrize('dtype1', [np.int64, np.float64, np.uint64])
|
||||
def test_ser_div_ser(self, dtype1, dtype2):
|
||||
# no longer do integer div for any ops, but deal with the 0's
|
||||
first = Series([3, 4, 5, 8], name='first').astype(dtype1)
|
||||
second = Series([0, 0, 0, 3], name='second').astype(dtype2)
|
||||
|
||||
with np.errstate(all='ignore'):
|
||||
expected = Series(first.values.astype(np.float64) / second.values,
|
||||
dtype='float64', name=None)
|
||||
expected.iloc[0:3] = np.inf
|
||||
|
||||
result = first / second
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert not result.equals(second / first)
|
||||
|
||||
def test_rdiv_zero_compat(self):
|
||||
# GH#8674
|
||||
zero_array = np.array([0] * 5)
|
||||
data = np.random.randn(5)
|
||||
expected = Series([0.] * 5)
|
||||
|
||||
result = zero_array / Series(data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(zero_array) / data
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(zero_array) / Series(data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_div_zero_inf_signs(self):
|
||||
# GH#9144, inf signing
|
||||
ser = Series([-1, 0, 1], name='first')
|
||||
expected = Series([-np.inf, np.nan, np.inf], name='first')
|
||||
|
||||
result = ser / 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rdiv_zero(self):
|
||||
# GH#9144
|
||||
ser = Series([-1, 0, 1], name='first')
|
||||
expected = Series([0.0, np.nan, 0.0], name='first')
|
||||
|
||||
result = 0 / ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_floordiv_div(self):
|
||||
# GH#9144
|
||||
ser = Series([-1, 0, 1], name='first')
|
||||
|
||||
result = ser // 0
|
||||
expected = Series([-np.inf, np.nan, np.inf], name='first')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesArithmetic(object):
|
||||
# Standard, numeric, or otherwise not-Timestamp/Timedelta/Period dtypes
|
||||
@pytest.mark.parametrize('data', [
|
||||
[1, 2, 3],
|
||||
[1.1, 2.2, 3.3],
|
||||
[Timestamp('2011-01-01'), Timestamp('2011-01-02'), pd.NaT],
|
||||
['x', 'y', 1]])
|
||||
@pytest.mark.parametrize('dtype', [None, object])
|
||||
def test_series_radd_str_invalid(self, dtype, data):
|
||||
ser = Series(data, dtype=dtype)
|
||||
with pytest.raises(TypeError):
|
||||
'foo_' + ser
|
||||
|
||||
# TODO: parametrize, better name
|
||||
def test_object_ser_add_invalid(self):
|
||||
# invalid ops
|
||||
obj_ser = tm.makeObjectSeries()
|
||||
obj_ser.name = 'objects'
|
||||
with pytest.raises(Exception):
|
||||
obj_ser + 1
|
||||
with pytest.raises(Exception):
|
||||
obj_ser + np.array(1, dtype=np.int64)
|
||||
with pytest.raises(Exception):
|
||||
obj_ser - 1
|
||||
with pytest.raises(Exception):
|
||||
obj_ser - np.array(1, dtype=np.int64)
|
||||
|
||||
@pytest.mark.parametrize('dtype', [None, object])
|
||||
def test_series_with_dtype_radd_nan(self, dtype):
|
||||
ser = pd.Series([1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype)
|
||||
|
||||
result = np.nan + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype', [None, object])
|
||||
def test_series_with_dtype_radd_int(self, dtype):
|
||||
ser = pd.Series([1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([2, 3, 4], dtype=dtype)
|
||||
|
||||
result = 1 + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + 1
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_radd_str(self):
|
||||
ser = pd.Series(['x', np.nan, 'x'])
|
||||
tm.assert_series_equal('a' + ser, pd.Series(['ax', np.nan, 'ax']))
|
||||
tm.assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa']))
|
||||
|
||||
@pytest.mark.parametrize('dtype', [None, object])
|
||||
def test_series_with_dtype_radd_timedelta(self, dtype):
|
||||
# note this test is _not_ aimed at timedelta64-dtyped Series
|
||||
ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'),
|
||||
pd.Timedelta('3 days')], dtype=dtype)
|
||||
expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'),
|
||||
pd.Timedelta('6 days')])
|
||||
|
||||
result = pd.Timedelta('3 days') + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + pd.Timedelta('3 days')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestPeriodSeriesArithmetic(object):
|
||||
def test_ops_series_timedelta(self):
|
||||
# GH 13043
|
||||
ser = pd.Series([pd.Period('2015-01-01', freq='D'),
|
||||
pd.Period('2015-01-02', freq='D')], name='xxx')
|
||||
assert ser.dtype == object
|
||||
|
||||
expected = pd.Series([pd.Period('2015-01-02', freq='D'),
|
||||
pd.Period('2015-01-03', freq='D')], name='xxx')
|
||||
|
||||
result = ser + pd.Timedelta('1 days')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Timedelta('1 days') + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + pd.tseries.offsets.Day()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.tseries.offsets.Day() + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_ops_series_period(self):
|
||||
# GH 13043
|
||||
ser = pd.Series([pd.Period('2015-01-01', freq='D'),
|
||||
pd.Period('2015-01-02', freq='D')], name='xxx')
|
||||
assert ser.dtype == object
|
||||
|
||||
per = pd.Period('2015-01-10', freq='D')
|
||||
# dtype will be object because of original dtype
|
||||
expected = pd.Series([9, 8], name='xxx', dtype=object)
|
||||
tm.assert_series_equal(per - ser, expected)
|
||||
tm.assert_series_equal(ser - per, -1 * expected)
|
||||
|
||||
s2 = pd.Series([pd.Period('2015-01-05', freq='D'),
|
||||
pd.Period('2015-01-04', freq='D')], name='xxx')
|
||||
assert s2.dtype == object
|
||||
|
||||
expected = pd.Series([4, 2], name='xxx', dtype=object)
|
||||
tm.assert_series_equal(s2 - ser, expected)
|
||||
tm.assert_series_equal(ser - s2, -1 * expected)
|
||||
|
||||
|
||||
class TestTimestampSeriesArithmetic(object):
|
||||
def test_timestamp_sub_series(self):
|
||||
ser = pd.Series(pd.date_range('2014-03-17', periods=2, freq='D',
|
||||
tz='US/Eastern'))
|
||||
ts = ser[0]
|
||||
|
||||
delta_series = pd.Series([np.timedelta64(0, 'D'),
|
||||
np.timedelta64(1, 'D')])
|
||||
tm.assert_series_equal(ser - ts, delta_series)
|
||||
tm.assert_series_equal(ts - ser, -delta_series)
|
||||
|
||||
def test_dt64ser_sub_datetime_dtype(self):
|
||||
ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00))
|
||||
dt = datetime(1993, 6, 22, 13, 30)
|
||||
ser = Series([ts])
|
||||
result = pd.to_timedelta(np.abs(ser - dt))
|
||||
assert result.dtype == 'timedelta64[ns]'
|
||||
|
||||
|
||||
class TestTimedeltaSeriesAdditionSubtraction(object):
|
||||
# Tests for Series[timedelta64[ns]] __add__, __sub__, __radd__, __rsub__
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Operations with int-like others
|
||||
|
||||
def test_td64series_add_int_series_invalid(self, tdser):
|
||||
with pytest.raises(TypeError):
|
||||
tdser + Series([2, 3, 4])
|
||||
|
||||
@pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds')
|
||||
def test_td64series_radd_int_series_invalid(self, tdser):
|
||||
with pytest.raises(TypeError):
|
||||
Series([2, 3, 4]) + tdser
|
||||
|
||||
def test_td64series_sub_int_series_invalid(self, tdser):
|
||||
with pytest.raises(TypeError):
|
||||
tdser - Series([2, 3, 4])
|
||||
|
||||
@pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds')
|
||||
def test_td64series_rsub_int_series_invalid(self, tdser):
|
||||
with pytest.raises(TypeError):
|
||||
Series([2, 3, 4]) - tdser
|
||||
|
||||
def test_td64_series_add_intlike(self):
|
||||
# GH#19123
|
||||
tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT'])
|
||||
ser = Series(tdi)
|
||||
|
||||
other = Series([20, 30, 40], dtype='uint8')
|
||||
|
||||
pytest.raises(TypeError, ser.__add__, 1)
|
||||
pytest.raises(TypeError, ser.__sub__, 1)
|
||||
|
||||
pytest.raises(TypeError, ser.__add__, other)
|
||||
pytest.raises(TypeError, ser.__sub__, other)
|
||||
|
||||
pytest.raises(TypeError, ser.__add__, other.values)
|
||||
pytest.raises(TypeError, ser.__sub__, other.values)
|
||||
|
||||
pytest.raises(TypeError, ser.__add__, pd.Index(other))
|
||||
pytest.raises(TypeError, ser.__sub__, pd.Index(other))
|
||||
|
||||
@pytest.mark.parametrize('scalar', [1, 1.5, np.array(2)])
|
||||
def test_td64series_add_sub_numeric_scalar_invalid(self, scalar, tdser):
|
||||
with pytest.raises(TypeError):
|
||||
tdser + scalar
|
||||
with pytest.raises(TypeError):
|
||||
scalar + tdser
|
||||
with pytest.raises(TypeError):
|
||||
tdser - scalar
|
||||
with pytest.raises(TypeError):
|
||||
scalar - tdser
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
|
||||
'uint64', 'uint32', 'uint16', 'uint8',
|
||||
'float64', 'float32', 'float16'])
|
||||
@pytest.mark.parametrize('vector', [
|
||||
np.array([1, 2, 3]),
|
||||
pd.Index([1, 2, 3]),
|
||||
pytest.param(Series([1, 2, 3]),
|
||||
marks=pytest.mark.xfail(reason='GH#19123 integer '
|
||||
'interpreted as nanos'))
|
||||
])
|
||||
def test_td64series_add_sub_numeric_array_invalid(self, vector,
|
||||
dtype, tdser):
|
||||
vector = vector.astype(dtype)
|
||||
with pytest.raises(TypeError):
|
||||
tdser + vector
|
||||
with pytest.raises(TypeError):
|
||||
vector + tdser
|
||||
with pytest.raises(TypeError):
|
||||
tdser - vector
|
||||
with pytest.raises(TypeError):
|
||||
vector - tdser
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Operations with datetime-like others
|
||||
|
||||
def test_td64series_add_sub_timestamp(self):
|
||||
# GH#11925
|
||||
tdser = Series(pd.timedelta_range('1 day', periods=3))
|
||||
ts = Timestamp('2012-01-01')
|
||||
expected = Series(pd.date_range('2012-01-02', periods=3))
|
||||
tm.assert_series_equal(ts + tdser, expected)
|
||||
tm.assert_series_equal(tdser + ts, expected)
|
||||
|
||||
expected2 = Series(pd.date_range('2011-12-31', periods=3, freq='-1D'))
|
||||
tm.assert_series_equal(ts - tdser, expected2)
|
||||
tm.assert_series_equal(ts + (-tdser), expected2)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
tdser - ts
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Operations with timedelta-like others (including DateOffsets)
|
||||
|
||||
@pytest.mark.parametrize('names', [(None, None, None),
|
||||
('Egon', 'Venkman', None),
|
||||
('NCC1701D', 'NCC1701D', 'NCC1701D')])
|
||||
def test_td64_series_with_tdi(self, names):
|
||||
# GH#17250 make sure result dtype is correct
|
||||
# GH#19043 make sure names are propagated correctly
|
||||
tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0])
|
||||
ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1])
|
||||
expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)],
|
||||
name=names[2])
|
||||
|
||||
result = tdi + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == 'timedelta64[ns]'
|
||||
|
||||
result = ser + tdi
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == 'timedelta64[ns]'
|
||||
|
||||
expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)],
|
||||
name=names[2])
|
||||
|
||||
result = tdi - ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.dtype == 'timedelta64[ns]'
|
||||
|
||||
result = ser - tdi
|
||||
tm.assert_series_equal(result, -expected)
|
||||
assert result.dtype == 'timedelta64[ns]'
|
||||
|
||||
def test_td64_sub_NaT(self):
|
||||
# GH#18808
|
||||
ser = Series([NaT, Timedelta('1s')])
|
||||
res = ser - NaT
|
||||
expected = Series([NaT, NaT], dtype='timedelta64[ns]')
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
|
||||
class TestTimedeltaSeriesMultiplicationDivision(object):
|
||||
# Tests for Series[timedelta64[ns]]
|
||||
# __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# __floordiv__, __rfloordiv__
|
||||
|
||||
@pytest.mark.parametrize('scalar_td', [
|
||||
timedelta(minutes=5, seconds=4),
|
||||
Timedelta('5m4s'),
|
||||
Timedelta('5m4s').to_timedelta64()])
|
||||
def test_timedelta_floordiv(self, scalar_td):
|
||||
# GH#18831
|
||||
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||||
td1.iloc[2] = np.nan
|
||||
|
||||
result = td1 // scalar_td
|
||||
expected = Series([0, 0, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('scalar_td', [
|
||||
timedelta(minutes=5, seconds=4),
|
||||
Timedelta('5m4s'),
|
||||
Timedelta('5m4s').to_timedelta64()])
|
||||
def test_timedelta_rfloordiv(self, scalar_td):
|
||||
# GH#18831
|
||||
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||||
td1.iloc[2] = np.nan
|
||||
result = scalar_td // td1
|
||||
expected = Series([1, 1, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('scalar_td', [
|
||||
timedelta(minutes=5, seconds=4),
|
||||
Timedelta('5m4s'),
|
||||
Timedelta('5m4s').to_timedelta64()])
|
||||
def test_timedelta_rfloordiv_explicit(self, scalar_td):
|
||||
# GH#18831
|
||||
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||||
td1.iloc[2] = np.nan
|
||||
|
||||
# We can test __rfloordiv__ using this syntax,
|
||||
# see `test_timedelta_rfloordiv`
|
||||
result = td1.__rfloordiv__(scalar_td)
|
||||
expected = Series([1, 1, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Operations with int-like others
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
|
||||
'uint64', 'uint32', 'uint16', 'uint8',
|
||||
'float64', 'float32', 'float16'])
|
||||
@pytest.mark.parametrize('vector', [np.array([20, 30, 40]),
|
||||
pd.Index([20, 30, 40]),
|
||||
Series([20, 30, 40])])
|
||||
def test_td64series_div_numeric_array(self, vector, dtype, tdser):
|
||||
# GH#4521
|
||||
# divide/multiply by integers
|
||||
vector = vector.astype(dtype)
|
||||
expected = Series(['2.95D', '1D 23H 12m', 'NaT'],
|
||||
dtype='timedelta64[ns]')
|
||||
|
||||
result = tdser / vector
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
vector / tdser
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
|
||||
'uint64', 'uint32', 'uint16', 'uint8',
|
||||
'float64', 'float32', 'float16'])
|
||||
@pytest.mark.parametrize('vector', [np.array([20, 30, 40]),
|
||||
pd.Index([20, 30, 40]),
|
||||
Series([20, 30, 40])])
|
||||
def test_td64series_mul_numeric_array(self, vector, dtype, tdser):
|
||||
# GH#4521
|
||||
# divide/multiply by integers
|
||||
vector = vector.astype(dtype)
|
||||
|
||||
expected = Series(['1180 Days', '1770 Days', 'NaT'],
|
||||
dtype='timedelta64[ns]')
|
||||
|
||||
result = tdser * vector
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
|
||||
'uint64', 'uint32', 'uint16', 'uint8',
|
||||
'float64', 'float32', 'float16'])
|
||||
@pytest.mark.parametrize('vector', [
|
||||
np.array([20, 30, 40]),
|
||||
pytest.param(pd.Index([20, 30, 40]),
|
||||
marks=pytest.mark.xfail(reason='__mul__ raises '
|
||||
'instead of returning '
|
||||
'NotImplemented')),
|
||||
Series([20, 30, 40])
|
||||
])
|
||||
def test_td64series_rmul_numeric_array(self, vector, dtype, tdser):
|
||||
# GH#4521
|
||||
# divide/multiply by integers
|
||||
vector = vector.astype(dtype)
|
||||
|
||||
expected = Series(['1180 Days', '1770 Days', 'NaT'],
|
||||
dtype='timedelta64[ns]')
|
||||
|
||||
result = vector * tdser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)])
|
||||
def test_td64series_mul_numeric_scalar(self, one, tdser):
|
||||
# GH#4521
|
||||
# divide/multiply by integers
|
||||
expected = Series(['-59 Days', '-59 Days', 'NaT'],
|
||||
dtype='timedelta64[ns]')
|
||||
|
||||
result = tdser * (-one)
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = (-one) * tdser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(['118 Days', '118 Days', 'NaT'],
|
||||
dtype='timedelta64[ns]')
|
||||
|
||||
result = tdser * (2 * one)
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = (2 * one) * tdser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('two', [
|
||||
2, 2.0,
|
||||
pytest.param(np.array(2),
|
||||
marks=pytest.mark.xfail(reason='GH#19011 is_list_like '
|
||||
'incorrectly True.')),
|
||||
pytest.param(np.array(2.0),
|
||||
marks=pytest.mark.xfail(reason='GH#19011 is_list_like '
|
||||
'incorrectly True.')),
|
||||
])
|
||||
def test_td64series_div_numeric_scalar(self, two, tdser):
|
||||
# GH#4521
|
||||
# divide/multiply by integers
|
||||
expected = Series(['29.5D', '29.5D', 'NaT'], dtype='timedelta64[ns]')
|
||||
|
||||
result = tdser / two
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Operations with timedelta-like others
|
||||
|
||||
@pytest.mark.parametrize('names', [(None, None, None),
|
||||
('Egon', 'Venkman', None),
|
||||
('NCC1701D', 'NCC1701D', 'NCC1701D')])
|
||||
def test_tdi_mul_int_series(self, names):
|
||||
# GH#19042
|
||||
tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'],
|
||||
name=names[0])
|
||||
ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1])
|
||||
|
||||
expected = Series(['0days', '1day', '4days', '9days', '16days'],
|
||||
dtype='timedelta64[ns]',
|
||||
name=names[2])
|
||||
|
||||
result = ser * tdi
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# The direct operation tdi * ser still needs to be fixed.
|
||||
result = ser.__rmul__(tdi)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('names', [(None, None, None),
|
||||
('Egon', 'Venkman', None),
|
||||
('NCC1701D', 'NCC1701D', 'NCC1701D')])
|
||||
def test_float_series_rdiv_tdi(self, names):
|
||||
# GH#19042
|
||||
# TODO: the direct operation TimedeltaIndex / Series still
|
||||
# needs to be fixed.
|
||||
tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'],
|
||||
name=names[0])
|
||||
ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1])
|
||||
|
||||
expected = Series([tdi[n] / ser[n] for n in range(len(ser))],
|
||||
dtype='timedelta64[ns]',
|
||||
name=names[2])
|
||||
|
||||
result = ser.__rdiv__(tdi)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('scalar_td', [
|
||||
timedelta(minutes=5, seconds=4),
|
||||
Timedelta('5m4s'),
|
||||
Timedelta('5m4s').to_timedelta64()])
|
||||
def test_td64series_mul_timedeltalike_invalid(self, scalar_td):
|
||||
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||||
td1.iloc[2] = np.nan
|
||||
|
||||
# check that we are getting a TypeError
|
||||
# with 'operate' (from core/ops.py) for the ops that are not
|
||||
# defined
|
||||
pattern = 'operate|unsupported|cannot|not supported'
|
||||
with tm.assert_raises_regex(TypeError, pattern):
|
||||
td1 * scalar_td
|
||||
with tm.assert_raises_regex(TypeError, pattern):
|
||||
scalar_td * td1
|
||||
|
||||
|
||||
class TestTimedeltaSeriesInvalidArithmeticOps(object):
|
||||
@pytest.mark.parametrize('scalar_td', [
|
||||
timedelta(minutes=5, seconds=4),
|
||||
Timedelta('5m4s'),
|
||||
Timedelta('5m4s').to_timedelta64()])
|
||||
def test_td64series_pow_invalid(self, scalar_td):
|
||||
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||||
td1.iloc[2] = np.nan
|
||||
|
||||
# check that we are getting a TypeError
|
||||
# with 'operate' (from core/ops.py) for the ops that are not
|
||||
# defined
|
||||
pattern = 'operate|unsupported|cannot|not supported'
|
||||
with tm.assert_raises_regex(TypeError, pattern):
|
||||
scalar_td ** td1
|
||||
with tm.assert_raises_regex(TypeError, pattern):
|
||||
td1 ** scalar_td
|
||||
@@ -1,178 +0,0 @@
|
||||
# coding=utf-8
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
from pandas import (offsets, Series, notna,
|
||||
isna, date_range, Timestamp)
|
||||
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesAsof(TestData):
|
||||
|
||||
def test_basic(self):
|
||||
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
ts[15:30] = np.nan
|
||||
dates = date_range('1/1/1990', periods=N * 3, freq='25s')
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
mask = (result.index >= lb) & (result.index < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
val = result[result.index[result.index >= ub][0]]
|
||||
assert ts[ub] == val
|
||||
|
||||
def test_scalar(self):
|
||||
|
||||
N = 30
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
ts = Series(np.arange(N), index=rng)
|
||||
ts[5:10] = np.NaN
|
||||
ts[15:20] = np.NaN
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts[4]
|
||||
assert val2 == ts[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts[4]
|
||||
|
||||
# in there
|
||||
result = ts.asof(ts.index[3])
|
||||
assert result == ts[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0] - offsets.BDay()
|
||||
assert np.isnan(ts.asof(d))
|
||||
|
||||
def test_with_nan(self):
|
||||
# basic asof test
|
||||
rng = date_range('1/1/2000', '1/2/2000', freq='4h')
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
r = s.resample('2h').mean()
|
||||
|
||||
result = r.asof(r.index)
|
||||
expected = Series([0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.],
|
||||
index=date_range('1/1/2000', '1/2/2000', freq='2h'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[3:5] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series([0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.],
|
||||
index=date_range('1/1/2000', '1/2/2000', freq='2h'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[-3:] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series([0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.],
|
||||
index=date_range('1/1/2000', '1/2/2000', freq='2h'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
from pandas import period_range, PeriodIndex
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = period_range('1/1/1990', periods=N, freq='H')
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
ts[15:30] = np.nan
|
||||
dates = date_range('1/1/1990', periods=N * 3, freq='37min')
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
pix = PeriodIndex(result.index.values, freq='H')
|
||||
mask = (pix >= lb) & (pix < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
ts[5:10] = np.nan
|
||||
ts[15:20] = np.nan
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts[4]
|
||||
assert val2 == ts[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts[4]
|
||||
|
||||
# in there
|
||||
assert ts.asof(ts.index[3]) == ts[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0].to_timestamp() - offsets.BDay()
|
||||
assert isna(ts.asof(d))
|
||||
|
||||
def test_errors(self):
|
||||
|
||||
s = Series([1, 2, 3],
|
||||
index=[Timestamp('20130101'),
|
||||
Timestamp('20130103'),
|
||||
Timestamp('20130102')])
|
||||
|
||||
# non-monotonic
|
||||
assert not s.index.is_monotonic
|
||||
with pytest.raises(ValueError):
|
||||
s.asof(s.index[0])
|
||||
|
||||
# subset with Series
|
||||
N = 10
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
s = Series(np.random.randn(N), index=rng)
|
||||
with pytest.raises(ValueError):
|
||||
s.asof(s.index[0], subset='foo')
|
||||
|
||||
def test_all_nans(self):
|
||||
# GH 15713
|
||||
# series is all nans
|
||||
result = Series([np.nan]).asof([0])
|
||||
expected = Series([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing non-default indexes
|
||||
N = 50
|
||||
rng = date_range('1/1/1990', periods=N, freq='53s')
|
||||
|
||||
dates = date_range('1/1/1990', periods=N * 3, freq='25s')
|
||||
result = Series(np.nan, index=rng).asof(dates)
|
||||
expected = Series(np.nan, index=dates)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing scalar input
|
||||
date = date_range('1/1/1990', periods=N * 3, freq='25s')[0]
|
||||
result = Series(np.nan, index=rng).asof(date)
|
||||
assert isna(result)
|
||||
|
||||
# test name is propagated
|
||||
result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5])
|
||||
expected = Series(np.nan, index=[4, 5], name='test')
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -1,312 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from numpy import nan
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import Series, DataFrame, date_range, DatetimeIndex
|
||||
|
||||
from pandas import compat
|
||||
from pandas.util.testing import assert_series_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesCombine(TestData):
|
||||
|
||||
def test_append(self):
|
||||
appendedSeries = self.series.append(self.objSeries)
|
||||
for idx, value in compat.iteritems(appendedSeries):
|
||||
if idx in self.series.index:
|
||||
assert value == self.series[idx]
|
||||
elif idx in self.objSeries.index:
|
||||
assert value == self.objSeries[idx]
|
||||
else:
|
||||
self.fail("orphaned index!")
|
||||
|
||||
pytest.raises(ValueError, self.ts.append, self.ts,
|
||||
verify_integrity=True)
|
||||
|
||||
def test_append_many(self):
|
||||
pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]]
|
||||
|
||||
result = pieces[0].append(pieces[1:])
|
||||
assert_series_equal(result, self.ts)
|
||||
|
||||
def test_append_duplicates(self):
|
||||
# GH 13677
|
||||
s1 = pd.Series([1, 2, 3])
|
||||
s2 = pd.Series([4, 5, 6])
|
||||
exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(s1.append(s2), exp)
|
||||
tm.assert_series_equal(pd.concat([s1, s2]), exp)
|
||||
|
||||
# the result must have RangeIndex
|
||||
exp = pd.Series([1, 2, 3, 4, 5, 6])
|
||||
tm.assert_series_equal(s1.append(s2, ignore_index=True),
|
||||
exp, check_index_type=True)
|
||||
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True),
|
||||
exp, check_index_type=True)
|
||||
|
||||
msg = 'Indexes have overlapping values:'
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
s1.append(s2, verify_integrity=True)
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
pd.concat([s1, s2], verify_integrity=True)
|
||||
|
||||
def test_combine_first(self):
|
||||
values = tm.makeIntIndex(20).values.astype(float)
|
||||
series = Series(values, index=tm.makeIntIndex(20))
|
||||
|
||||
series_copy = series * 2
|
||||
series_copy[::2] = np.NaN
|
||||
|
||||
# nothing used from the input
|
||||
combined = series.combine_first(series_copy)
|
||||
|
||||
tm.assert_series_equal(combined, series)
|
||||
|
||||
# Holes filled from input
|
||||
combined = series_copy.combine_first(series)
|
||||
assert np.isfinite(combined).all()
|
||||
|
||||
tm.assert_series_equal(combined[::2], series[::2])
|
||||
tm.assert_series_equal(combined[1::2], series_copy[1::2])
|
||||
|
||||
# mixed types
|
||||
index = tm.makeStringIndex(20)
|
||||
floats = Series(tm.randn(20), index=index)
|
||||
strings = Series(tm.makeStringIndex(10), index=index[::2])
|
||||
|
||||
combined = strings.combine_first(floats)
|
||||
|
||||
tm.assert_series_equal(strings, combined.loc[index[::2]])
|
||||
tm.assert_series_equal(floats[1::2].astype(object),
|
||||
combined.loc[index[1::2]])
|
||||
|
||||
# corner case
|
||||
s = Series([1., 2, 3], index=[0, 1, 2])
|
||||
result = s.combine_first(Series([], index=[]))
|
||||
assert_series_equal(s, result)
|
||||
|
||||
def test_update(self):
|
||||
s = Series([1.5, nan, 3., 4., nan])
|
||||
s2 = Series([nan, 3.5, nan, 5.])
|
||||
s.update(s2)
|
||||
|
||||
expected = Series([1.5, 3.5, 3., 5., np.nan])
|
||||
assert_series_equal(s, expected)
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
|
||||
df['c'] = np.nan
|
||||
|
||||
# this will fail as long as series is a sub-class of ndarray
|
||||
# df['c'].update(Series(['foo'],index=[0])) #####
|
||||
|
||||
def test_concat_empty_series_dtypes_roundtrips(self):
|
||||
|
||||
# round-tripping with self & like self
|
||||
dtypes = map(np.dtype, ['float64', 'int8', 'uint8', 'bool', 'm8[ns]',
|
||||
'M8[ns]'])
|
||||
|
||||
for dtype in dtypes:
|
||||
assert pd.concat([Series(dtype=dtype)]).dtype == dtype
|
||||
assert pd.concat([Series(dtype=dtype),
|
||||
Series(dtype=dtype)]).dtype == dtype
|
||||
|
||||
def int_result_type(dtype, dtype2):
|
||||
typs = set([dtype.kind, dtype2.kind])
|
||||
if not len(typs - set(['i', 'u', 'b'])) and (dtype.kind == 'i' or
|
||||
dtype2.kind == 'i'):
|
||||
return 'i'
|
||||
elif not len(typs - set(['u', 'b'])) and (dtype.kind == 'u' or
|
||||
dtype2.kind == 'u'):
|
||||
return 'u'
|
||||
return None
|
||||
|
||||
def float_result_type(dtype, dtype2):
|
||||
typs = set([dtype.kind, dtype2.kind])
|
||||
if not len(typs - set(['f', 'i', 'u'])) and (dtype.kind == 'f' or
|
||||
dtype2.kind == 'f'):
|
||||
return 'f'
|
||||
return None
|
||||
|
||||
def get_result_type(dtype, dtype2):
|
||||
result = float_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
result = int_result_type(dtype, dtype2)
|
||||
if result is not None:
|
||||
return result
|
||||
return 'O'
|
||||
|
||||
for dtype in dtypes:
|
||||
for dtype2 in dtypes:
|
||||
if dtype == dtype2:
|
||||
continue
|
||||
|
||||
expected = get_result_type(dtype, dtype2)
|
||||
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)
|
||||
]).dtype
|
||||
assert result.kind == expected
|
||||
|
||||
def test_concat_empty_series_dtypes(self):
|
||||
|
||||
# booleans
|
||||
assert pd.concat([Series(dtype=np.bool_),
|
||||
Series(dtype=np.int32)]).dtype == np.int32
|
||||
assert pd.concat([Series(dtype=np.bool_),
|
||||
Series(dtype=np.float32)]).dtype == np.object_
|
||||
|
||||
# datetime-like
|
||||
assert pd.concat([Series(dtype='m8[ns]'),
|
||||
Series(dtype=np.bool)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='m8[ns]'),
|
||||
Series(dtype=np.int64)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='M8[ns]'),
|
||||
Series(dtype=np.bool)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='M8[ns]'),
|
||||
Series(dtype=np.int64)]).dtype == np.object_
|
||||
assert pd.concat([Series(dtype='M8[ns]'),
|
||||
Series(dtype=np.bool_),
|
||||
Series(dtype=np.int64)]).dtype == np.object_
|
||||
|
||||
# categorical
|
||||
assert pd.concat([Series(dtype='category'),
|
||||
Series(dtype='category')]).dtype == 'category'
|
||||
# GH 18515
|
||||
assert pd.concat([Series(np.array([]), dtype='category'),
|
||||
Series(dtype='float64')]).dtype == 'float64'
|
||||
assert pd.concat([Series(dtype='category'),
|
||||
Series(dtype='object')]).dtype == 'object'
|
||||
|
||||
# sparse
|
||||
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
||||
dtype='float64').to_sparse()])
|
||||
assert result.dtype == np.float64
|
||||
assert result.ftype == 'float64:sparse'
|
||||
|
||||
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
||||
dtype='float64')])
|
||||
assert result.dtype == np.float64
|
||||
assert result.ftype == 'float64:sparse'
|
||||
|
||||
result = pd.concat([Series(dtype='float64').to_sparse(), Series(
|
||||
dtype='object')])
|
||||
assert result.dtype == np.object_
|
||||
assert result.ftype == 'object:dense'
|
||||
|
||||
def test_combine_first_dt64(self):
|
||||
from pandas.core.tools.datetimes import to_datetime
|
||||
s0 = to_datetime(Series(["2010", np.NaN]))
|
||||
s1 = to_datetime(Series([np.NaN, "2011"]))
|
||||
rs = s0.combine_first(s1)
|
||||
xp = to_datetime(Series(['2010', '2011']))
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
s0 = to_datetime(Series(["2010", np.NaN]))
|
||||
s1 = Series([np.NaN, "2011"])
|
||||
rs = s0.combine_first(s1)
|
||||
xp = Series([datetime(2010, 1, 1), '2011'])
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
|
||||
class TestTimeseries(object):
|
||||
|
||||
def test_append_concat(self):
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T')
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
|
||||
result = ts.append(ts)
|
||||
result_df = df.append(df)
|
||||
ex_index = DatetimeIndex(np.tile(rng.values, 2))
|
||||
tm.assert_index_equal(result.index, ex_index)
|
||||
tm.assert_index_equal(result_df.index, ex_index)
|
||||
|
||||
appended = rng.append(rng)
|
||||
tm.assert_index_equal(appended, ex_index)
|
||||
|
||||
appended = rng.append([rng, rng])
|
||||
ex_index = DatetimeIndex(np.tile(rng.values, 3))
|
||||
tm.assert_index_equal(appended, ex_index)
|
||||
|
||||
# different index names
|
||||
rng1 = rng.copy()
|
||||
rng2 = rng.copy()
|
||||
rng1.name = 'foo'
|
||||
rng2.name = 'bar'
|
||||
assert rng1.append(rng1).name == 'foo'
|
||||
assert rng1.append(rng2).name is None
|
||||
|
||||
def test_append_concat_tz(self):
|
||||
# see gh-2938
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
||||
tz='US/Eastern')
|
||||
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
||||
tz='US/Eastern')
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
|
||||
def test_append_concat_tz_explicit_pytz(self):
|
||||
# see gh-2938
|
||||
from pytz import timezone as timezone
|
||||
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
||||
tz=timezone('US/Eastern'))
|
||||
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
||||
tz=timezone('US/Eastern'))
|
||||
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
||||
tz=timezone('US/Eastern'))
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
|
||||
def test_append_concat_tz_dateutil(self):
|
||||
# see gh-2938
|
||||
rng = date_range('5/8/2012 1:45', periods=10, freq='5T',
|
||||
tz='dateutil/US/Eastern')
|
||||
rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T',
|
||||
tz='dateutil/US/Eastern')
|
||||
rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T',
|
||||
tz='dateutil/US/Eastern')
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
||||
ts2 = Series(np.random.randn(len(rng2)), rng2)
|
||||
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
|
||||
|
||||
result = ts.append(ts2)
|
||||
result_df = df.append(df2)
|
||||
tm.assert_index_equal(result.index, rng3)
|
||||
tm.assert_index_equal(result_df.index, rng3)
|
||||
|
||||
appended = rng.append(rng2)
|
||||
tm.assert_index_equal(appended, rng3)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,462 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import locale
|
||||
import calendar
|
||||
import pytest
|
||||
|
||||
from datetime import datetime, date
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas.core.dtypes.common import is_integer_dtype, is_list_like
|
||||
from pandas import (Index, Series, DataFrame, bdate_range,
|
||||
date_range, period_range, timedelta_range,
|
||||
PeriodIndex, DatetimeIndex, TimedeltaIndex)
|
||||
import pandas.core.common as com
|
||||
|
||||
from pandas.util.testing import assert_series_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesDatetimeValues(TestData):
|
||||
|
||||
def test_dt_namespace_accessor(self):
|
||||
|
||||
# GH 7207, 11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
ok_for_period = PeriodIndex._datetimelike_ops
|
||||
ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq']
|
||||
ok_for_dt = DatetimeIndex._datetimelike_ops
|
||||
ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize',
|
||||
'tz_convert', 'normalize', 'strftime', 'round',
|
||||
'floor', 'ceil', 'day_name', 'month_name']
|
||||
ok_for_td = TimedeltaIndex._datetimelike_ops
|
||||
ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds',
|
||||
'round', 'floor', 'ceil']
|
||||
|
||||
def get_expected(s, name):
|
||||
result = getattr(Index(s._values), prop)
|
||||
if isinstance(result, np.ndarray):
|
||||
if is_integer_dtype(result):
|
||||
result = result.astype('int64')
|
||||
elif not is_list_like(result):
|
||||
return result
|
||||
return Series(result, index=s.index, name=s.name)
|
||||
|
||||
def compare(s, name):
|
||||
a = getattr(s.dt, prop)
|
||||
b = get_expected(s, prop)
|
||||
if not (is_list_like(a) and is_list_like(b)):
|
||||
assert a == b
|
||||
else:
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
# datetimeindex
|
||||
cases = [Series(date_range('20130101', periods=5), name='xxx'),
|
||||
Series(date_range('20130101', periods=5, freq='s'),
|
||||
name='xxx'),
|
||||
Series(date_range('20130101 00:00:00', periods=5, freq='ms'),
|
||||
name='xxx')]
|
||||
for s in cases:
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.tz_localize('US/Eastern')
|
||||
exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern')
|
||||
expected = Series(exp_values, index=s.index, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == 'US/Eastern'
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == DatetimeIndex(s.values, freq='infer').freq
|
||||
|
||||
# let's localize, then convert
|
||||
result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
|
||||
exp_values = (DatetimeIndex(s.values).tz_localize('UTC')
|
||||
.tz_convert('US/Eastern'))
|
||||
expected = Series(exp_values, index=s.index, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# round
|
||||
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
|
||||
'2012-01-01 12:01:00',
|
||||
'2012-01-01 08:00:00']), name='xxx')
|
||||
result = s.dt.round('D')
|
||||
expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
|
||||
'2012-01-01']), name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# round with tz
|
||||
result = (s.dt.tz_localize('UTC')
|
||||
.dt.tz_convert('US/Eastern')
|
||||
.dt.round('D'))
|
||||
exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
|
||||
'2012-01-01']).tz_localize('US/Eastern')
|
||||
expected = Series(exp_values, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# floor
|
||||
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
|
||||
'2012-01-01 12:01:00',
|
||||
'2012-01-01 08:00:00']), name='xxx')
|
||||
result = s.dt.floor('D')
|
||||
expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01',
|
||||
'2012-01-01']), name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ceil
|
||||
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
|
||||
'2012-01-01 12:01:00',
|
||||
'2012-01-01 08:00:00']), name='xxx')
|
||||
result = s.dt.ceil('D')
|
||||
expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
|
||||
'2012-01-02']), name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# datetimeindex with tz
|
||||
s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
|
||||
name='xxx')
|
||||
for prop in ok_for_dt:
|
||||
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.tz_convert('CET')
|
||||
expected = Series(s._values.tz_convert('CET'),
|
||||
index=s.index, name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == 'CET'
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == DatetimeIndex(s.values, freq='infer').freq
|
||||
|
||||
# timedelta index
|
||||
cases = [Series(timedelta_range('1 day', periods=5),
|
||||
index=list('abcde'), name='xxx'),
|
||||
Series(timedelta_range('1 day 01:23:45', periods=5,
|
||||
freq='s'), name='xxx'),
|
||||
Series(timedelta_range('2 days 01:23:45.012345', periods=5,
|
||||
freq='ms'), name='xxx')]
|
||||
for s in cases:
|
||||
for prop in ok_for_td:
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_td_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
result = s.dt.components
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_index_equal(result.index, s.index)
|
||||
|
||||
result = s.dt.to_pytimedelta()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = s.dt.total_seconds()
|
||||
assert isinstance(result, pd.Series)
|
||||
assert result.dtype == 'float64'
|
||||
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == TimedeltaIndex(s.values, freq='infer').freq
|
||||
|
||||
# both
|
||||
index = date_range('20130101', periods=3, freq='D')
|
||||
s = Series(date_range('20140204', periods=3, freq='s'),
|
||||
index=index, name='xxx')
|
||||
exp = Series(np.array([2014, 2014, 2014], dtype='int64'),
|
||||
index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.year, exp)
|
||||
|
||||
exp = Series(np.array([2, 2, 2], dtype='int64'),
|
||||
index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.month, exp)
|
||||
|
||||
exp = Series(np.array([0, 1, 2], dtype='int64'),
|
||||
index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.second, exp)
|
||||
|
||||
exp = pd.Series([s[0]] * 3, index=index, name='xxx')
|
||||
tm.assert_series_equal(s.dt.normalize(), exp)
|
||||
|
||||
# periodindex
|
||||
cases = [Series(period_range('20130101', periods=5, freq='D'),
|
||||
name='xxx')]
|
||||
for s in cases:
|
||||
for prop in ok_for_period:
|
||||
# we test freq below
|
||||
if prop != 'freq':
|
||||
compare(s, prop)
|
||||
|
||||
for prop in ok_for_period_methods:
|
||||
getattr(s.dt, prop)
|
||||
|
||||
freq_result = s.dt.freq
|
||||
assert freq_result == PeriodIndex(s.values).freq
|
||||
|
||||
# test limited display api
|
||||
def get_dir(s):
|
||||
results = [r for r in s.dt.__dir__() if not r.startswith('_')]
|
||||
return list(sorted(set(results)))
|
||||
|
||||
s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
|
||||
|
||||
s = Series(period_range('20130101', periods=5,
|
||||
freq='D', name='xxx').astype(object))
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_period + ok_for_period_methods))))
|
||||
|
||||
# 11295
|
||||
# ambiguous time error on the conversions
|
||||
s = Series(pd.date_range('2015-01-01', '2016-01-01',
|
||||
freq='T'), name='xxx')
|
||||
s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago')
|
||||
results = get_dir(s)
|
||||
tm.assert_almost_equal(
|
||||
results, list(sorted(set(ok_for_dt + ok_for_dt_methods))))
|
||||
exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T',
|
||||
tz='UTC').tz_convert('America/Chicago')
|
||||
expected = Series(exp_values, name='xxx')
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# no setting allowed
|
||||
s = Series(date_range('20130101', periods=5, freq='D'), name='xxx')
|
||||
with tm.assert_raises_regex(ValueError, "modifications"):
|
||||
s.dt.hour = 5
|
||||
|
||||
# trying to set a copy
|
||||
with pd.option_context('chained_assignment', 'raise'):
|
||||
|
||||
def f():
|
||||
s.dt.hour[0] = 5
|
||||
|
||||
pytest.raises(com.SettingWithCopyError, f)
|
||||
|
||||
def test_dt_namespace_accessor_categorical(self):
|
||||
# GH 19468
|
||||
dti = DatetimeIndex(['20171111', '20181212']).repeat(2)
|
||||
s = Series(pd.Categorical(dti), name='foo')
|
||||
result = s.dt.year
|
||||
expected = Series([2017, 2017, 2018, 2018], name='foo')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
s = Series(date_range('20130101', periods=5, freq='D'))
|
||||
with tm.assert_raises_regex(AttributeError,
|
||||
"You cannot add any new attribute"):
|
||||
s.dt.xlabel = "a"
|
||||
|
||||
@pytest.mark.parametrize('time_locale', [
|
||||
None] if tm.get_locales() is None else [None] + tm.get_locales())
|
||||
def test_dt_accessor_datetime_name_accessors(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
|
||||
'Friday', 'Saturday', 'Sunday']
|
||||
expected_months = ['January', 'February', 'March', 'April', 'May',
|
||||
'June', 'July', 'August', 'September',
|
||||
'October', 'November', 'December']
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
s = Series(DatetimeIndex(freq='D', start=datetime(1998, 1, 1),
|
||||
periods=365))
|
||||
english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
|
||||
'Friday', 'Saturday', 'Sunday']
|
||||
for day, name, eng_name in zip(range(4, 11),
|
||||
expected_days,
|
||||
english_days):
|
||||
name = name.capitalize()
|
||||
assert s.dt.weekday_name[day] == eng_name
|
||||
assert s.dt.day_name(locale=time_locale)[day] == name
|
||||
s = s.append(Series([pd.NaT]))
|
||||
assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])
|
||||
|
||||
s = Series(DatetimeIndex(freq='M', start='2012', end='2013'))
|
||||
result = s.dt.month_name(locale=time_locale)
|
||||
expected = Series([month.capitalize() for month in expected_months])
|
||||
tm.assert_series_equal(result, expected)
|
||||
for s_date, expected in zip(s, expected_months):
|
||||
result = s_date.month_name(locale=time_locale)
|
||||
assert result == expected.capitalize()
|
||||
s = s.append(Series([pd.NaT]))
|
||||
assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])
|
||||
|
||||
def test_strftime(self):
|
||||
# GH 10086
|
||||
s = Series(date_range('20130101', periods=5))
|
||||
result = s.dt.strftime('%Y/%m/%d')
|
||||
expected = Series(['2013/01/01', '2013/01/02', '2013/01/03',
|
||||
'2013/01/04', '2013/01/05'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(date_range('2015-02-03 11:22:33.4567', periods=5))
|
||||
result = s.dt.strftime('%Y/%m/%d %H-%M-%S')
|
||||
expected = Series(['2015/02/03 11-22-33', '2015/02/04 11-22-33',
|
||||
'2015/02/05 11-22-33', '2015/02/06 11-22-33',
|
||||
'2015/02/07 11-22-33'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range('20130101', periods=5))
|
||||
result = s.dt.strftime('%Y/%m/%d')
|
||||
expected = Series(['2013/01/01', '2013/01/02', '2013/01/03',
|
||||
'2013/01/04', '2013/01/05'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range(
|
||||
'2015-02-03 11:22:33.4567', periods=5, freq='s'))
|
||||
result = s.dt.strftime('%Y/%m/%d %H-%M-%S')
|
||||
expected = Series(['2015/02/03 11-22-33', '2015/02/03 11-22-34',
|
||||
'2015/02/03 11-22-35', '2015/02/03 11-22-36',
|
||||
'2015/02/03 11-22-37'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(date_range('20130101', periods=5))
|
||||
s.iloc[0] = pd.NaT
|
||||
result = s.dt.strftime('%Y/%m/%d')
|
||||
expected = Series(['NaT', '2013/01/02', '2013/01/03', '2013/01/04',
|
||||
'2013/01/05'])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
datetime_index = date_range('20150301', periods=5)
|
||||
result = datetime_index.strftime("%Y/%m/%d")
|
||||
|
||||
expected = Index(['2015/03/01', '2015/03/02', '2015/03/03',
|
||||
'2015/03/04', '2015/03/05'], dtype=np.object_)
|
||||
# dtype may be S10 or U10 depending on python version
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
period_index = period_range('20150301', periods=5)
|
||||
result = period_index.strftime("%Y/%m/%d")
|
||||
expected = Index(['2015/03/01', '2015/03/02', '2015/03/03',
|
||||
'2015/03/04', '2015/03/05'], dtype='=U10')
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14,
|
||||
32, 1)])
|
||||
result = s.dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(period_range('20130101', periods=4, freq='H'))
|
||||
result = s.dt.strftime('%Y/%m/%d %H:%M:%S')
|
||||
expected = Series(["2013/01/01 00:00:00", "2013/01/01 01:00:00",
|
||||
"2013/01/01 02:00:00", "2013/01/01 03:00:00"])
|
||||
|
||||
s = Series(period_range('20130101', periods=4, freq='L'))
|
||||
result = s.dt.strftime('%Y/%m/%d %H:%M:%S.%l')
|
||||
expected = Series(["2013/01/01 00:00:00.000",
|
||||
"2013/01/01 00:00:00.001",
|
||||
"2013/01/01 00:00:00.002",
|
||||
"2013/01/01 00:00:00.003"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_valid_dt_with_missing_values(self):
|
||||
|
||||
from datetime import date, time
|
||||
|
||||
# GH 8689
|
||||
s = Series(date_range('20130101', periods=5, freq='D'))
|
||||
s.iloc[2] = pd.NaT
|
||||
|
||||
for attr in ['microsecond', 'nanosecond', 'second', 'minute', 'hour',
|
||||
'day']:
|
||||
expected = getattr(s.dt, attr).copy()
|
||||
expected.iloc[2] = np.nan
|
||||
result = getattr(s.dt, attr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.dt.date
|
||||
expected = Series(
|
||||
[date(2013, 1, 1), date(2013, 1, 2), np.nan, date(2013, 1, 4),
|
||||
date(2013, 1, 5)], dtype='object')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.dt.time
|
||||
expected = Series(
|
||||
[time(0), time(0), np.nan, time(0), time(0)], dtype='object')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.indexes.accessors import (
|
||||
CombinedDatetimelikeProperties, DatetimeProperties)
|
||||
assert Series.dt is CombinedDatetimelikeProperties
|
||||
|
||||
s = Series(date_range('2000-01-01', periods=3))
|
||||
assert isinstance(s.dt, DatetimeProperties)
|
||||
|
||||
for s in [Series(np.arange(5)), Series(list('abcde')),
|
||||
Series(np.random.randn(5))]:
|
||||
with tm.assert_raises_regex(AttributeError,
|
||||
"only use .dt accessor"):
|
||||
s.dt
|
||||
assert not hasattr(s, 'dt')
|
||||
|
||||
def test_between(self):
|
||||
s = Series(bdate_range('1/1/2000', periods=20).astype(object))
|
||||
s[::2] = np.nan
|
||||
|
||||
result = s[s.between(s[3], s[17])]
|
||||
expected = s[3:18].dropna()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s[s.between(s[3], s[17], inclusive=False)]
|
||||
expected = s[5:16].dropna()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_date_tz(self):
|
||||
# GH11757
|
||||
rng = pd.DatetimeIndex(['2014-04-04 23:56',
|
||||
'2014-07-18 21:24',
|
||||
'2015-11-22 22:14'], tz="US/Eastern")
|
||||
s = Series(rng)
|
||||
expected = Series([date(2014, 4, 4),
|
||||
date(2014, 7, 18),
|
||||
date(2015, 11, 22)])
|
||||
assert_series_equal(s.dt.date, expected)
|
||||
assert_series_equal(s.apply(lambda x: x.date()), expected)
|
||||
|
||||
def test_datetime_understood(self):
|
||||
# Ensures it doesn't fail to create the right series
|
||||
# reported in issue#16726
|
||||
series = pd.Series(pd.date_range("2012-01-01", periods=3))
|
||||
offset = pd.offsets.DateOffset(days=6)
|
||||
result = series - offset
|
||||
expected = pd.Series(pd.to_datetime([
|
||||
'2011-12-26', '2011-12-27', '2011-12-28']))
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -1,508 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import sys
|
||||
import string
|
||||
import warnings
|
||||
|
||||
from numpy import nan
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Series, Timestamp, Timedelta, DataFrame, date_range,
|
||||
Categorical, Index
|
||||
)
|
||||
from pandas.api.types import CategoricalDtype
|
||||
import pandas._libs.tslib as tslib
|
||||
|
||||
from pandas.compat import lrange, range, u
|
||||
from pandas import compat
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesDtypes(TestData):
|
||||
|
||||
def test_dt64_series_astype_object(self):
|
||||
dt64ser = Series(date_range('20130101', periods=3))
|
||||
result = dt64ser.astype(object)
|
||||
assert isinstance(result.iloc[0], datetime)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
def test_td64_series_astype_object(self):
|
||||
tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]')
|
||||
result = tdser.astype(object)
|
||||
assert isinstance(result.iloc[0], timedelta)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float32", "float64",
|
||||
"int64", "int32"])
|
||||
def test_astype(self, dtype):
|
||||
s = Series(np.random.randn(5), name='foo')
|
||||
as_typed = s.astype(dtype)
|
||||
|
||||
assert as_typed.dtype == dtype
|
||||
assert as_typed.name == s.name
|
||||
|
||||
def test_asobject_deprecated(self):
|
||||
s = Series(np.random.randn(5), name='foo')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
o = s.asobject
|
||||
assert isinstance(o, np.ndarray)
|
||||
|
||||
def test_dtype(self):
|
||||
|
||||
assert self.ts.dtype == np.dtype('float64')
|
||||
assert self.ts.dtypes == np.dtype('float64')
|
||||
assert self.ts.ftype == 'float64:dense'
|
||||
assert self.ts.ftypes == 'float64:dense'
|
||||
tm.assert_series_equal(self.ts.get_dtype_counts(),
|
||||
Series(1, ['float64']))
|
||||
# GH18243 - Assert .get_ftype_counts is deprecated
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
tm.assert_series_equal(self.ts.get_ftype_counts(),
|
||||
Series(1, ['float64:dense']))
|
||||
|
||||
@pytest.mark.parametrize("value", [np.nan, np.inf])
|
||||
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
|
||||
def test_astype_cast_nan_inf_int(self, dtype, value):
|
||||
# gh-14265: check NaN and inf raise error when converting to int
|
||||
msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
|
||||
s = Series([value])
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
s.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
|
||||
def test_astype_cast_object_int_fail(self, dtype):
|
||||
arr = Series(["car", "house", "tree", "1"])
|
||||
with pytest.raises(ValueError):
|
||||
arr.astype(dtype)
|
||||
|
||||
def test_astype_cast_object_int(self):
|
||||
arr = Series(['1', '2', '3', '4'], dtype=object)
|
||||
result = arr.astype(int)
|
||||
|
||||
tm.assert_series_equal(result, Series(np.arange(1, 5)))
|
||||
|
||||
def test_astype_datetime(self):
|
||||
s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5))
|
||||
|
||||
s = s.astype('O')
|
||||
assert s.dtype == np.object_
|
||||
|
||||
s = Series([datetime(2001, 1, 2, 0, 0)])
|
||||
|
||||
s = s.astype('O')
|
||||
assert s.dtype == np.object_
|
||||
|
||||
s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
|
||||
|
||||
s[1] = np.nan
|
||||
assert s.dtype == 'M8[ns]'
|
||||
|
||||
s = s.astype('O')
|
||||
assert s.dtype == np.object_
|
||||
|
||||
def test_astype_datetime64tz(self):
|
||||
s = Series(date_range('20130101', periods=3, tz='US/Eastern'))
|
||||
|
||||
# astype
|
||||
result = s.astype(object)
|
||||
expected = Series(s.astype(object), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# astype - object, preserves on construction
|
||||
result = Series(s.astype(object))
|
||||
expected = s.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# astype - datetime64[ns, tz]
|
||||
result = Series(s.values).astype('datetime64[ns, US/Eastern]')
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
result = Series(s.values).astype(s.dtype)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
result = s.astype('datetime64[ns, CET]')
|
||||
expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [compat.text_type, np.str_])
|
||||
@pytest.mark.parametrize("series", [Series([string.digits * 10,
|
||||
tm.rands(63),
|
||||
tm.rands(64),
|
||||
tm.rands(1000)]),
|
||||
Series([string.digits * 10,
|
||||
tm.rands(63),
|
||||
tm.rands(64), nan, 1.0])])
|
||||
def test_astype_str_map(self, dtype, series):
|
||||
# see gh-4405
|
||||
result = series.astype(dtype)
|
||||
expected = series.map(compat.text_type)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [str, compat.text_type])
|
||||
def test_astype_str_cast(self, dtype):
|
||||
# see gh-9757: test str and unicode on python 2.x
|
||||
# and just str on python 3.x
|
||||
ts = Series([Timestamp('2010-01-04 00:00:00')])
|
||||
s = ts.astype(dtype)
|
||||
|
||||
expected = Series([dtype('2010-01-04')])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
|
||||
s = ts.astype(dtype)
|
||||
|
||||
expected = Series([dtype('2010-01-04 00:00:00-05:00')])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
td = Series([Timedelta(1, unit='d')])
|
||||
s = td.astype(dtype)
|
||||
|
||||
expected = Series([dtype('1 days 00:00:00.000000000')])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
def test_astype_unicode(self):
|
||||
# see gh-7758: A bit of magic is required to set
|
||||
# default encoding to utf-8
|
||||
digits = string.digits
|
||||
test_series = [
|
||||
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
|
||||
Series([u('データーサイエンス、お前はもう死んでいる')]),
|
||||
]
|
||||
|
||||
former_encoding = None
|
||||
|
||||
if not compat.PY3:
|
||||
# In Python, we can force the default encoding for this test
|
||||
former_encoding = sys.getdefaultencoding()
|
||||
reload(sys) # noqa
|
||||
|
||||
sys.setdefaultencoding("utf-8")
|
||||
if sys.getdefaultencoding() == "utf-8":
|
||||
test_series.append(Series([u('野菜食べないとやばい')
|
||||
.encode("utf-8")]))
|
||||
|
||||
for s in test_series:
|
||||
res = s.astype("unicode")
|
||||
expec = s.map(compat.text_type)
|
||||
tm.assert_series_equal(res, expec)
|
||||
|
||||
# Restore the former encoding
|
||||
if former_encoding is not None and former_encoding != "utf-8":
|
||||
reload(sys) # noqa
|
||||
sys.setdefaultencoding(former_encoding)
|
||||
|
||||
@pytest.mark.parametrize("dtype_class", [dict, Series])
|
||||
def test_astype_dict_like(self, dtype_class):
|
||||
# see gh-7271
|
||||
s = Series(range(0, 10, 2), name='abc')
|
||||
|
||||
dt1 = dtype_class({'abc': str})
|
||||
result = s.astype(dt1)
|
||||
expected = Series(['0', '2', '4', '6', '8'], name='abc')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt2 = dtype_class({'abc': 'float64'})
|
||||
result = s.astype(dt2)
|
||||
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64',
|
||||
name='abc')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt3 = dtype_class({'abc': str, 'def': str})
|
||||
with pytest.raises(KeyError):
|
||||
s.astype(dt3)
|
||||
|
||||
dt4 = dtype_class({0: str})
|
||||
with pytest.raises(KeyError):
|
||||
s.astype(dt4)
|
||||
|
||||
# GH16717
|
||||
# if dtypes provided is empty, it should error
|
||||
dt5 = dtype_class({})
|
||||
with pytest.raises(KeyError):
|
||||
s.astype(dt5)
|
||||
|
||||
def test_astype_categories_deprecation(self):
|
||||
|
||||
# deprecated 17636
|
||||
s = Series(['a', 'b', 'a'])
|
||||
expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = s.astype('category', categories=['a', 'b'], ordered=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_from_categorical(self):
|
||||
l = ["a", "b", "c", "a"]
|
||||
s = Series(l)
|
||||
exp = Series(Categorical(l))
|
||||
res = s.astype('category')
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
l = [1, 2, 3, 1]
|
||||
s = Series(l)
|
||||
exp = Series(Categorical(l))
|
||||
res = s.astype('category')
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
df = DataFrame({"cats": [1, 2, 3, 4, 5, 6],
|
||||
"vals": [1, 2, 3, 4, 5, 6]})
|
||||
cats = Categorical([1, 2, 3, 4, 5, 6])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
df = DataFrame({"cats": ['a', 'b', 'b', 'a', 'a', 'd'],
|
||||
"vals": [1, 2, 3, 4, 5, 6]})
|
||||
cats = Categorical(['a', 'b', 'b', 'a', 'a', 'd'])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
# with keywords
|
||||
l = ["a", "b", "c", "a"]
|
||||
s = Series(l)
|
||||
exp = Series(Categorical(l, ordered=True))
|
||||
res = s.astype(CategoricalDtype(None, ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series(Categorical(l, categories=list('abcdef'), ordered=True))
|
||||
res = s.astype(CategoricalDtype(list('abcdef'), ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_astype_categorical_to_other(self):
|
||||
|
||||
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
|
||||
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=['value'], ascending=True)
|
||||
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
|
||||
right=False, labels=cat_labels)
|
||||
|
||||
s = df['value_group']
|
||||
expected = s
|
||||
tm.assert_series_equal(s.astype('category'), expected)
|
||||
tm.assert_series_equal(s.astype(CategoricalDtype()), expected)
|
||||
pytest.raises(ValueError, lambda: s.astype('float64'))
|
||||
|
||||
cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
|
||||
exp = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
|
||||
tm.assert_series_equal(cat.astype('str'), exp)
|
||||
s2 = Series(Categorical(['1', '2', '3', '4']))
|
||||
exp2 = Series([1, 2, 3, 4]).astype(int)
|
||||
tm.assert_series_equal(s2.astype('int'), exp2)
|
||||
|
||||
# object don't sort correctly, so just compare that we have the same
|
||||
# values
|
||||
def cmp(a, b):
|
||||
tm.assert_almost_equal(
|
||||
np.sort(np.unique(a)), np.sort(np.unique(b)))
|
||||
|
||||
expected = Series(np.array(s.values), name='value_group')
|
||||
cmp(s.astype('object'), expected)
|
||||
cmp(s.astype(np.object_), expected)
|
||||
|
||||
# array conversion
|
||||
tm.assert_almost_equal(np.array(s), np.array(s.values))
|
||||
|
||||
# valid conversion
|
||||
for valid in [lambda x: x.astype('category'),
|
||||
lambda x: x.astype(CategoricalDtype()),
|
||||
lambda x: x.astype('object').astype('category'),
|
||||
lambda x: x.astype('object').astype(
|
||||
CategoricalDtype())
|
||||
]:
|
||||
|
||||
result = valid(s)
|
||||
# compare series values
|
||||
# internal .categories can't be compared because it is sorted
|
||||
tm.assert_series_equal(result, s, check_categorical=False)
|
||||
|
||||
# invalid conversion (these are NOT a dtype)
|
||||
for invalid in [lambda x: x.astype(Categorical),
|
||||
lambda x: x.astype('object').astype(Categorical)]:
|
||||
pytest.raises(TypeError, lambda: invalid(s))
|
||||
|
||||
@pytest.mark.parametrize('name', [None, 'foo'])
|
||||
@pytest.mark.parametrize('dtype_ordered', [True, False])
|
||||
@pytest.mark.parametrize('series_ordered', [True, False])
|
||||
def test_astype_categorical_to_categorical(self, name, dtype_ordered,
|
||||
series_ordered):
|
||||
# GH 10696/18593
|
||||
s_data = list('abcaacbab')
|
||||
s_dtype = CategoricalDtype(list('bac'), ordered=series_ordered)
|
||||
s = Series(s_data, dtype=s_dtype, name=name)
|
||||
|
||||
# unspecified categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = s.astype(dtype)
|
||||
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
|
||||
expected = Series(s_data, name=name, dtype=exp_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.astype('category', ordered=dtype_ordered)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# different categories
|
||||
dtype = CategoricalDtype(list('adc'), dtype_ordered)
|
||||
result = s.astype(dtype)
|
||||
expected = Series(s_data, name=name, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result = s.astype(
|
||||
'category', categories=list('adc'), ordered=dtype_ordered)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# not specifying ordered, so only test once
|
||||
expected = s
|
||||
result = s.astype('category')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_categoricaldtype(self):
|
||||
s = Series(['a', 'b', 'a'])
|
||||
result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
|
||||
expected = Series(Categorical(['a', 'b', 'a'], ordered=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.astype(CategoricalDtype(['a', 'b'], ordered=False))
|
||||
expected = Series(Categorical(['a', 'b', 'a'], ordered=False))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.astype(CategoricalDtype(['a', 'b', 'c'], ordered=False))
|
||||
expected = Series(Categorical(['a', 'b', 'a'],
|
||||
categories=['a', 'b', 'c'],
|
||||
ordered=False))
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c']))
|
||||
|
||||
def test_astype_categoricaldtype_with_args(self):
|
||||
s = Series(['a', 'b'])
|
||||
type_ = CategoricalDtype(['a', 'b'])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s.astype(type_, ordered=True)
|
||||
with pytest.raises(TypeError):
|
||||
s.astype(type_, categories=['a', 'b'])
|
||||
with pytest.raises(TypeError):
|
||||
s.astype(type_, categories=['a', 'b'], ordered=False)
|
||||
|
||||
def test_astype_generic_timestamp_deprecated(self):
|
||||
# see gh-15524
|
||||
data = [1]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s = Series(data)
|
||||
dtype = np.datetime64
|
||||
result = s.astype(dtype)
|
||||
expected = Series(data, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
s = Series(data)
|
||||
dtype = np.timedelta64
|
||||
result = s.astype(dtype)
|
||||
expected = Series(data, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", np.typecodes['All'])
|
||||
def test_astype_empty_constructor_equality(self, dtype):
|
||||
# see gh-15524
|
||||
|
||||
if dtype not in ('S', 'V'): # poor support (if any) currently
|
||||
with warnings.catch_warnings(record=True):
|
||||
# Generic timestamp dtypes ('M' and 'm') are deprecated,
|
||||
# but we test that already in series/test_constructors.py
|
||||
|
||||
init_empty = Series([], dtype=dtype)
|
||||
as_type_empty = Series([]).astype(dtype)
|
||||
tm.assert_series_equal(init_empty, as_type_empty)
|
||||
|
||||
def test_complex(self):
|
||||
# see gh-4819: complex access for ndarray compat
|
||||
a = np.arange(5, dtype=np.float64)
|
||||
b = Series(a + 4j * a)
|
||||
|
||||
tm.assert_numpy_array_equal(a, b.real)
|
||||
tm.assert_numpy_array_equal(4 * a, b.imag)
|
||||
|
||||
b.real = np.arange(5) + 5
|
||||
tm.assert_numpy_array_equal(a + 5, b.real)
|
||||
tm.assert_numpy_array_equal(4 * a, b.imag)
|
||||
|
||||
def test_arg_for_errors_in_astype(self):
|
||||
# see gh-14878
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
s.astype(np.float64, errors=False)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.astype(np.int8, raise_on_error=True)
|
||||
|
||||
s.astype(np.int8, errors='raise')
|
||||
|
||||
def test_intercept_astype_object(self):
|
||||
series = Series(date_range('1/1/2000', periods=10))
|
||||
|
||||
# This test no longer makes sense, as
|
||||
# Series is by default already M8[ns].
|
||||
expected = series.astype('object')
|
||||
|
||||
df = DataFrame({'a': series,
|
||||
'b': np.random.randn(len(series))})
|
||||
exp_dtypes = Series([np.dtype('datetime64[ns]'),
|
||||
np.dtype('float64')], index=['a', 'b'])
|
||||
tm.assert_series_equal(df.dtypes, exp_dtypes)
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
df = DataFrame({'a': series, 'b': ['foo'] * len(series)})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
def test_series_to_categorical(self):
|
||||
# see gh-16524: test conversion of Series to Categorical
|
||||
series = Series(['a', 'b', 'c'])
|
||||
|
||||
result = Series(series, dtype='category')
|
||||
expected = Series(['a', 'b', 'c'], dtype='category')
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_infer_objects_series(self):
|
||||
# GH 11221
|
||||
actual = Series(np.array([1, 2, 3], dtype='O')).infer_objects()
|
||||
expected = Series([1, 2, 3])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = Series(np.array([1, 2, 3, None], dtype='O')).infer_objects()
|
||||
expected = Series([1., 2., 3., np.nan])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
# only soft conversions, unconvertable pass thru unchanged
|
||||
actual = (Series(np.array([1, 2, 3, None, 'a'], dtype='O'))
|
||||
.infer_objects())
|
||||
expected = Series([1, 2, 3, None, 'a'])
|
||||
|
||||
assert actual.dtype == 'object'
|
||||
tm.assert_series_equal(actual, expected)
|
||||
@@ -1,311 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from numpy import nan
|
||||
import numpy as np
|
||||
|
||||
from pandas import Series
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
import pandas._libs.lib as lib
|
||||
|
||||
from pandas.util.testing import assert_series_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesInternals(object):
|
||||
|
||||
def test_convert_objects(self):
|
||||
|
||||
s = Series([1., 2, 3], index=['a', 'b', 'c'])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# force numeric conversion
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1'
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = r.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1.'
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = r.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = 'garbled'
|
||||
expected = s.copy()
|
||||
expected['a'] = np.nan
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = r.convert_objects(convert_dates=False,
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4119, not converting a mixed type (e.g.floats and object)
|
||||
s = Series([1, 'na', 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_numeric=True)
|
||||
expected = Series([1, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, '', 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_numeric=True)
|
||||
expected = Series([1, np.nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# dates
|
||||
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0)])
|
||||
s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
|
||||
Timestamp('20010104'), '20010105'],
|
||||
dtype='O')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates=True,
|
||||
convert_numeric=False)
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103')], dtype='M8[ns]')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103'),
|
||||
lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'),
|
||||
Timestamp('20010105')], dtype='M8[ns]')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s2.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
assert_series_equal(result, expected)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s2.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# preserver all-nans (if convert_dates='coerce')
|
||||
s = Series(['foo', 'bar', 1, 1.0], dtype='O')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# preserver if non-object
|
||||
s = Series([1], dtype='float32')
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce',
|
||||
convert_numeric=False)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# r = s.copy()
|
||||
# r[0] = np.nan
|
||||
# result = r.convert_objects(convert_dates=True,convert_numeric=False)
|
||||
# assert result.dtype == 'M8[ns]'
|
||||
|
||||
# dateutil parses some single letters into today's value as a date
|
||||
for x in 'abcdefghijklmnopqrstuvwxyz':
|
||||
s = Series([x])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce')
|
||||
assert_series_equal(result, s)
|
||||
s = Series([x.upper()])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.convert_objects(convert_dates='coerce')
|
||||
assert_series_equal(result, s)
|
||||
|
||||
def test_convert_objects_preserve_bool(self):
|
||||
s = Series([1, True, 3, 5], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
r = s.convert_objects(convert_numeric=True)
|
||||
e = Series([1, 1, 3, 5], dtype='i8')
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
def test_convert_objects_preserve_all_bool(self):
|
||||
s = Series([False, True, False, False], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
r = s.convert_objects(convert_numeric=True)
|
||||
e = Series([False, True, False, False], dtype=bool)
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
# GH 10265
|
||||
def test_convert(self):
|
||||
# Tests: All to nans, coerce, true
|
||||
# Test coercion returns correct type
|
||||
s = Series(['a', 'b', 'c'])
|
||||
results = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([lib.NaT] * 3)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(numeric=True, coerce=True)
|
||||
expected = Series([np.nan] * 3)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
expected = Series([lib.NaT] * 3, dtype=np.dtype('m8[ns]'))
|
||||
results = s._convert(timedelta=True, coerce=True)
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
dt = datetime(2001, 1, 1, 0, 0)
|
||||
td = dt - datetime(2000, 1, 1, 0, 0)
|
||||
|
||||
# Test coercion with mixed types
|
||||
s = Series(['a', '3.1415', dt, td])
|
||||
results = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([lib.NaT, lib.NaT, dt, lib.NaT])
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(numeric=True, coerce=True)
|
||||
expected = Series([nan, 3.1415, nan, nan])
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
results = s._convert(timedelta=True, coerce=True)
|
||||
expected = Series([lib.NaT, lib.NaT, lib.NaT, td],
|
||||
dtype=np.dtype('m8[ns]'))
|
||||
assert_series_equal(results, expected)
|
||||
|
||||
# Test standard conversion returns original
|
||||
results = s._convert(datetime=True)
|
||||
assert_series_equal(results, s)
|
||||
results = s._convert(numeric=True)
|
||||
expected = Series([nan, 3.1415, nan, nan])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(timedelta=True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
# test pass-through and non-conversion when other types selected
|
||||
s = Series(['1.0', '2.0', '3.0'])
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([1.0, 2.0, 3.0])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(True, False, True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)],
|
||||
dtype='O')
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0,
|
||||
0)])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(datetime=False, numeric=True, timedelta=True)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
|
||||
s = Series([td, td], dtype='O')
|
||||
results = s._convert(datetime=True, numeric=True, timedelta=True)
|
||||
expected = Series([td, td])
|
||||
assert_series_equal(results, expected)
|
||||
results = s._convert(True, True, False)
|
||||
assert_series_equal(results, s)
|
||||
|
||||
s = Series([1., 2, 3], index=['a', 'b', 'c'])
|
||||
result = s._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# force numeric conversion
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1'
|
||||
result = r._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = '1.'
|
||||
result = r._convert(numeric=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
r = s.copy().astype('O')
|
||||
r['a'] = 'garbled'
|
||||
result = r._convert(numeric=True)
|
||||
expected = s.copy()
|
||||
expected['a'] = nan
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 4119, not converting a mixed type (e.g.floats and object)
|
||||
s = Series([1, 'na', 3, 4])
|
||||
result = s._convert(datetime=True, numeric=True)
|
||||
expected = Series([1, nan, 3, 4])
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, '', 3, 4])
|
||||
result = s._convert(datetime=True, numeric=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# dates
|
||||
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0)])
|
||||
s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0),
|
||||
datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1,
|
||||
Timestamp('20010104'), '20010105'], dtype='O')
|
||||
|
||||
result = s._convert(datetime=True)
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103')], dtype='M8[ns]')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([Timestamp('20010101'), Timestamp('20010102'),
|
||||
Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT,
|
||||
Timestamp('20010104'), Timestamp('20010105')],
|
||||
dtype='M8[ns]')
|
||||
result = s2._convert(datetime=True, numeric=False, timedelta=False,
|
||||
coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
result = s2._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
s = Series(['foo', 'bar', 1, 1.0], dtype='O')
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# preserver if non-object
|
||||
s = Series([1], dtype='float32')
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, s)
|
||||
|
||||
# r = s.copy()
|
||||
# r[0] = np.nan
|
||||
# result = r._convert(convert_dates=True,convert_numeric=False)
|
||||
# assert result.dtype == 'M8[ns]'
|
||||
|
||||
# dateutil parses some single letters into today's value as a date
|
||||
expected = Series([lib.NaT])
|
||||
for x in 'abcdefghijklmnopqrstuvwxyz':
|
||||
s = Series([x])
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
s = Series([x.upper()])
|
||||
result = s._convert(datetime=True, coerce=True)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_convert_no_arg_error(self):
|
||||
s = Series(['1.0', '2'])
|
||||
pytest.raises(ValueError, s._convert)
|
||||
|
||||
def test_convert_preserve_bool(self):
|
||||
s = Series([1, True, 3, 5], dtype=object)
|
||||
r = s._convert(datetime=True, numeric=True)
|
||||
e = Series([1, 1, 3, 5], dtype='i8')
|
||||
tm.assert_series_equal(r, e)
|
||||
|
||||
def test_convert_preserve_all_bool(self):
|
||||
s = Series([False, True, False, False], dtype=object)
|
||||
r = s._convert(datetime=True, numeric=True)
|
||||
e = Series([False, True, False, False], dtype=bool)
|
||||
tm.assert_series_equal(r, e)
|
||||
@@ -1,249 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime
|
||||
import collections
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import Series, DataFrame
|
||||
|
||||
from pandas.compat import StringIO, u
|
||||
from pandas.io.common import _get_handle
|
||||
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
|
||||
assert_frame_equal, ensure_clean)
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesToCSV(TestData):
|
||||
|
||||
def read_csv(self, path, **kwargs):
|
||||
params = dict(squeeze=True, index_col=0,
|
||||
header=None, parse_dates=True)
|
||||
params.update(**kwargs)
|
||||
|
||||
header = params.get("header")
|
||||
out = pd.read_csv(path, **params)
|
||||
|
||||
if header is None:
|
||||
out.name = out.index.name = None
|
||||
|
||||
return out
|
||||
|
||||
def test_from_csv_deprecation(self):
|
||||
# see gh-17812
|
||||
with ensure_clean() as path:
|
||||
self.ts.to_csv(path)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
ts = self.read_csv(path)
|
||||
depr_ts = Series.from_csv(path)
|
||||
assert_series_equal(depr_ts, ts)
|
||||
|
||||
def test_from_csv(self):
|
||||
|
||||
with ensure_clean() as path:
|
||||
self.ts.to_csv(path)
|
||||
ts = self.read_csv(path)
|
||||
assert_series_equal(self.ts, ts, check_names=False)
|
||||
|
||||
assert ts.name is None
|
||||
assert ts.index.name is None
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
depr_ts = Series.from_csv(path)
|
||||
assert_series_equal(depr_ts, ts)
|
||||
|
||||
# see gh-10483
|
||||
self.ts.to_csv(path, header=True)
|
||||
ts_h = self.read_csv(path, header=0)
|
||||
assert ts_h.name == "ts"
|
||||
|
||||
self.series.to_csv(path)
|
||||
series = self.read_csv(path)
|
||||
assert_series_equal(self.series, series, check_names=False)
|
||||
|
||||
assert series.name is None
|
||||
assert series.index.name is None
|
||||
|
||||
self.series.to_csv(path, header=True)
|
||||
series_h = self.read_csv(path, header=0)
|
||||
assert series_h.name == "series"
|
||||
|
||||
outfile = open(path, "w")
|
||||
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
|
||||
outfile.close()
|
||||
|
||||
series = self.read_csv(path, sep="|")
|
||||
check_series = Series({datetime(1998, 1, 1): 1.0,
|
||||
datetime(1999, 1, 1): 2.0})
|
||||
assert_series_equal(check_series, series)
|
||||
|
||||
series = self.read_csv(path, sep="|", parse_dates=False)
|
||||
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
|
||||
assert_series_equal(check_series, series)
|
||||
|
||||
def test_to_csv(self):
|
||||
import io
|
||||
|
||||
with ensure_clean() as path:
|
||||
self.ts.to_csv(path)
|
||||
|
||||
with io.open(path, newline=None) as f:
|
||||
lines = f.readlines()
|
||||
assert (lines[1] != '\n')
|
||||
|
||||
self.ts.to_csv(path, index=False)
|
||||
arr = np.loadtxt(path)
|
||||
assert_almost_equal(arr, self.ts.values)
|
||||
|
||||
def test_to_csv_unicode_index(self):
|
||||
buf = StringIO()
|
||||
s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")])
|
||||
|
||||
s.to_csv(buf, encoding="UTF-8")
|
||||
buf.seek(0)
|
||||
|
||||
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
|
||||
assert_series_equal(s, s2)
|
||||
|
||||
def test_to_csv_float_format(self):
|
||||
|
||||
with ensure_clean() as filename:
|
||||
ser = Series([0.123456, 0.234567, 0.567567])
|
||||
ser.to_csv(filename, float_format="%.2f")
|
||||
|
||||
rs = self.read_csv(filename)
|
||||
xp = Series([0.12, 0.23, 0.57])
|
||||
assert_series_equal(rs, xp)
|
||||
|
||||
def test_to_csv_list_entries(self):
|
||||
s = Series(['jack and jill', 'jesse and frank'])
|
||||
|
||||
split = s.str.split(r'\s+and\s+')
|
||||
|
||||
buf = StringIO()
|
||||
split.to_csv(buf)
|
||||
|
||||
def test_to_csv_path_is_none(self):
|
||||
# GH 8215
|
||||
# Series.to_csv() was returning None, inconsistent with
|
||||
# DataFrame.to_csv() which returned string
|
||||
s = Series([1, 2, 3])
|
||||
csv_str = s.to_csv(path=None)
|
||||
assert isinstance(csv_str, str)
|
||||
|
||||
@pytest.mark.parametrize('s,encoding', [
|
||||
(Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
|
||||
name='X'), None),
|
||||
# GH 21241, 21118
|
||||
(Series(['abc', 'def', 'ghi'], name='X'), 'ascii'),
|
||||
(Series(["123", u"你好", u"世界"], name=u"中文"), 'gb2312'),
|
||||
(Series(["123", u"Γειά σου", u"Κόσμε"], name=u"Ελληνικά"), 'cp737')
|
||||
])
|
||||
def test_to_csv_compression(self, s, encoding, compression):
|
||||
|
||||
with ensure_clean() as filename:
|
||||
|
||||
s.to_csv(filename, compression=compression, encoding=encoding,
|
||||
header=True)
|
||||
# test the round trip - to_csv -> read_csv
|
||||
result = pd.read_csv(filename, compression=compression,
|
||||
encoding=encoding, index_col=0, squeeze=True)
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# test the round trip using file handle - to_csv -> read_csv
|
||||
f, _handles = _get_handle(filename, 'w', compression=compression,
|
||||
encoding=encoding)
|
||||
with f:
|
||||
s.to_csv(f, encoding=encoding, header=True)
|
||||
result = pd.read_csv(filename, compression=compression,
|
||||
encoding=encoding, index_col=0, squeeze=True)
|
||||
assert_series_equal(s, result)
|
||||
|
||||
# explicitly ensure file was compressed
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
text = fh.read().decode(encoding or 'utf8')
|
||||
assert s.name in text
|
||||
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
assert_series_equal(s, pd.read_csv(fh,
|
||||
index_col=0,
|
||||
squeeze=True,
|
||||
encoding=encoding))
|
||||
|
||||
|
||||
class TestSeriesIO(TestData):
|
||||
|
||||
def test_to_frame(self):
|
||||
self.ts.name = None
|
||||
rs = self.ts.to_frame()
|
||||
xp = pd.DataFrame(self.ts.values, index=self.ts.index)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
self.ts.name = 'testname'
|
||||
rs = self.ts.to_frame()
|
||||
xp = pd.DataFrame(dict(testname=self.ts.values), index=self.ts.index)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
rs = self.ts.to_frame(name='testdifferent')
|
||||
xp = pd.DataFrame(
|
||||
dict(testdifferent=self.ts.values), index=self.ts.index)
|
||||
assert_frame_equal(rs, xp)
|
||||
|
||||
def test_timeseries_periodindex(self):
|
||||
# GH2891
|
||||
from pandas import period_range
|
||||
prng = period_range('1/1/2011', '1/1/2012', freq='M')
|
||||
ts = Series(np.random.randn(len(prng)), prng)
|
||||
new_ts = tm.round_trip_pickle(ts)
|
||||
assert new_ts.index.freq == 'M'
|
||||
|
||||
def test_pickle_preserve_name(self):
|
||||
for n in [777, 777., 'name', datetime(2001, 11, 11), (1, 2)]:
|
||||
unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n))
|
||||
assert unpickled.name == n
|
||||
|
||||
def _pickle_roundtrip_name(self, obj):
|
||||
|
||||
with ensure_clean() as path:
|
||||
obj.to_pickle(path)
|
||||
unpickled = pd.read_pickle(path)
|
||||
return unpickled
|
||||
|
||||
def test_to_frame_expanddim(self):
|
||||
# GH 9762
|
||||
|
||||
class SubclassedSeries(Series):
|
||||
|
||||
@property
|
||||
def _constructor_expanddim(self):
|
||||
return SubclassedFrame
|
||||
|
||||
class SubclassedFrame(DataFrame):
|
||||
pass
|
||||
|
||||
s = SubclassedSeries([1, 2, 3], name='X')
|
||||
result = s.to_frame()
|
||||
assert isinstance(result, SubclassedFrame)
|
||||
expected = SubclassedFrame({'X': [1, 2, 3]})
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('mapping', (
|
||||
dict,
|
||||
collections.defaultdict(list),
|
||||
collections.OrderedDict))
|
||||
def test_to_dict(self, mapping):
|
||||
# GH16122
|
||||
ts = TestData().ts
|
||||
tm.assert_series_equal(
|
||||
Series(ts.to_dict(mapping), name='ts'), ts)
|
||||
from_method = Series(ts.to_dict(collections.Counter))
|
||||
from_constructor = Series(collections.Counter(ts.iteritems()))
|
||||
tm.assert_series_equal(from_method, from_constructor)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,169 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
import pandas.core.indexes.period as period
|
||||
from pandas import Series, period_range, DataFrame
|
||||
|
||||
|
||||
def _permute(obj):
|
||||
return obj.take(np.random.permutation(len(obj)))
|
||||
|
||||
|
||||
class TestSeriesPeriod(object):
|
||||
|
||||
def setup_method(self, method):
|
||||
self.series = Series(period_range('2000-01-01', periods=10, freq='D'))
|
||||
|
||||
def test_auto_conversion(self):
|
||||
series = Series(list(period_range('2000-01-01', periods=10, freq='D')))
|
||||
assert series.dtype == 'object'
|
||||
|
||||
series = pd.Series([pd.Period('2011-01-01', freq='D'),
|
||||
pd.Period('2011-02-01', freq='D')])
|
||||
assert series.dtype == 'object'
|
||||
|
||||
def test_getitem(self):
|
||||
assert self.series[1] == pd.Period('2000-01-02', freq='D')
|
||||
|
||||
result = self.series[[2, 4]]
|
||||
exp = pd.Series([pd.Period('2000-01-03', freq='D'),
|
||||
pd.Period('2000-01-05', freq='D')],
|
||||
index=[2, 4])
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == 'object'
|
||||
|
||||
def test_isna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('NaT', freq='M')])
|
||||
tm.assert_series_equal(s.isna(), Series([False, True]))
|
||||
tm.assert_series_equal(s.notna(), Series([True, False]))
|
||||
|
||||
def test_fillna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('NaT', freq='M')])
|
||||
|
||||
res = s.fillna(pd.Period('2012-01', freq='M'))
|
||||
exp = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('2012-01', freq='M')])
|
||||
tm.assert_series_equal(res, exp)
|
||||
assert res.dtype == 'object'
|
||||
|
||||
res = s.fillna('XXX')
|
||||
exp = Series([pd.Period('2011-01', freq='M'), 'XXX'])
|
||||
tm.assert_series_equal(res, exp)
|
||||
assert res.dtype == 'object'
|
||||
|
||||
def test_dropna(self):
|
||||
# GH 13737
|
||||
s = Series([pd.Period('2011-01', freq='M'),
|
||||
pd.Period('NaT', freq='M')])
|
||||
tm.assert_series_equal(s.dropna(),
|
||||
Series([pd.Period('2011-01', freq='M')]))
|
||||
|
||||
def test_between(self):
|
||||
left, right = self.series[[2, 7]]
|
||||
result = self.series.between(left, right)
|
||||
expected = (self.series >= left) & (self.series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# NaT support
|
||||
|
||||
"""
|
||||
# ToDo: Enable when support period dtype
|
||||
def test_NaT_scalar(self):
|
||||
series = Series([0, 1000, 2000, iNaT], dtype='period[D]')
|
||||
|
||||
val = series[3]
|
||||
assert isna(val)
|
||||
|
||||
series[2] = val
|
||||
assert isna(series[2])
|
||||
|
||||
def test_NaT_cast(self):
|
||||
result = Series([np.nan]).astype('period[D]')
|
||||
expected = Series([NaT])
|
||||
tm.assert_series_equal(result, expected)
|
||||
"""
|
||||
|
||||
def test_set_none_nan(self):
|
||||
# currently Period is stored as object dtype, not as NaT
|
||||
self.series[3] = None
|
||||
assert self.series[3] is None
|
||||
|
||||
self.series[3:5] = None
|
||||
assert self.series[4] is None
|
||||
|
||||
self.series[5] = np.nan
|
||||
assert np.isnan(self.series[5])
|
||||
|
||||
self.series[5:7] = np.nan
|
||||
assert np.isnan(self.series[6])
|
||||
|
||||
def test_intercept_astype_object(self):
|
||||
expected = self.series.astype('object')
|
||||
|
||||
df = DataFrame({'a': self.series,
|
||||
'b': np.random.randn(len(self.series))})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)})
|
||||
|
||||
result = df.values.squeeze()
|
||||
assert (result[:, 0] == expected.values).all()
|
||||
|
||||
def test_add_series(self):
|
||||
rng = period_range('1/1/2000', '1/1/2010', freq='A')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ts + ts[::2]
|
||||
expected = ts + ts
|
||||
expected[1::2] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts + _permute(ts[::2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)"
|
||||
with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
|
||||
ts + ts.asfreq('D', how="end")
|
||||
|
||||
def test_align_series(self, join_type):
|
||||
rng = period_range('1/1/2000', '1/1/2010', freq='A')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts.align(ts[::2], join=join_type)
|
||||
|
||||
def test_truncate(self):
|
||||
# GH 17717
|
||||
idx1 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-03')
|
||||
])
|
||||
series1 = pd.Series([1, 2, 3], index=idx1)
|
||||
result1 = series1.truncate(after='2017-09-02')
|
||||
|
||||
expected_idx1 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-02')
|
||||
])
|
||||
tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1))
|
||||
|
||||
idx2 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-03'),
|
||||
pd.Period('2017-09-02'),
|
||||
pd.Period('2017-09-03')
|
||||
])
|
||||
series2 = pd.Series([1, 2, 3], index=idx2)
|
||||
result2 = series2.sort_index().truncate(after='2017-09-02')
|
||||
|
||||
expected_idx2 = pd.PeriodIndex([
|
||||
pd.Period('2017-09-02')
|
||||
])
|
||||
tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))
|
||||
@@ -1,184 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import Index, Series
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesQuantile(TestData):
|
||||
|
||||
def test_quantile(self):
|
||||
|
||||
q = self.ts.quantile(0.1)
|
||||
assert q == np.percentile(self.ts.dropna(), 10)
|
||||
|
||||
q = self.ts.quantile(0.9)
|
||||
assert q == np.percentile(self.ts.dropna(), 90)
|
||||
|
||||
# object dtype
|
||||
q = Series(self.ts, dtype=object).quantile(0.9)
|
||||
assert q == np.percentile(self.ts.dropna(), 90)
|
||||
|
||||
# datetime64[ns] dtype
|
||||
dts = self.ts.index.to_series()
|
||||
q = dts.quantile(.2)
|
||||
assert q == Timestamp('2000-01-10 19:12:00')
|
||||
|
||||
# timedelta64[ns] dtype
|
||||
tds = dts.diff()
|
||||
q = tds.quantile(.25)
|
||||
assert q == pd.to_timedelta('24:00:00')
|
||||
|
||||
# GH7661
|
||||
result = Series([np.timedelta64('NaT')]).sum()
|
||||
assert result == pd.Timedelta(0)
|
||||
|
||||
msg = 'percentiles should all be in the interval \\[0, 1\\]'
|
||||
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
self.ts.quantile(invalid)
|
||||
|
||||
def test_quantile_multi(self):
|
||||
|
||||
qs = [.1, .9]
|
||||
result = self.ts.quantile(qs)
|
||||
expected = pd.Series([np.percentile(self.ts.dropna(), 10),
|
||||
np.percentile(self.ts.dropna(), 90)],
|
||||
index=qs, name=self.ts.name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dts = self.ts.index.to_series()
|
||||
dts.name = 'xxx'
|
||||
result = dts.quantile((.2, .2))
|
||||
expected = Series([Timestamp('2000-01-10 19:12:00'),
|
||||
Timestamp('2000-01-10 19:12:00')],
|
||||
index=[.2, .2], name='xxx')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = self.ts.quantile([])
|
||||
expected = pd.Series([], name=self.ts.name, index=Index(
|
||||
[], dtype=float))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_interpolation(self):
|
||||
# see gh-10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = self.ts.quantile(0.1, interpolation='linear')
|
||||
assert q == np.percentile(self.ts.dropna(), 10)
|
||||
q1 = self.ts.quantile(0.1)
|
||||
assert q1 == np.percentile(self.ts.dropna(), 10)
|
||||
|
||||
# test with and without interpolation keyword
|
||||
assert q == q1
|
||||
|
||||
def test_quantile_interpolation_dtype(self):
|
||||
# GH #10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower')
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher')
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
def test_quantile_nan(self):
|
||||
|
||||
# GH 13098
|
||||
s = pd.Series([1, 2, 3, 4, np.nan])
|
||||
result = s.quantile(0.5)
|
||||
expected = 2.5
|
||||
assert result == expected
|
||||
|
||||
# all nan/empty
|
||||
cases = [Series([]), Series([np.nan, np.nan])]
|
||||
|
||||
for s in cases:
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5]))
|
||||
|
||||
res = s.quantile([0.2, 0.3])
|
||||
tm.assert_series_equal(res, pd.Series([np.nan, np.nan],
|
||||
index=[0.2, 0.3]))
|
||||
|
||||
def test_quantile_box(self):
|
||||
cases = [[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03')],
|
||||
[pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-03', tz='US/Eastern')],
|
||||
[pd.Timedelta('1 days'), pd.Timedelta('2 days'),
|
||||
pd.Timedelta('3 days')],
|
||||
# NaT
|
||||
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
|
||||
pd.Timestamp('2011-01-03'), pd.NaT],
|
||||
[pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-02', tz='US/Eastern'),
|
||||
pd.Timestamp('2011-01-03', tz='US/Eastern'), pd.NaT],
|
||||
[pd.Timedelta('1 days'), pd.Timedelta('2 days'),
|
||||
pd.Timedelta('3 days'), pd.NaT]]
|
||||
|
||||
for case in cases:
|
||||
s = pd.Series(case, name='XXX')
|
||||
res = s.quantile(0.5)
|
||||
assert res == case[1]
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = pd.Series([case[1]], index=[0.5], name='XXX')
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_datetime_timedelta_quantiles(self):
|
||||
# covers #9694
|
||||
assert pd.isna(Series([], dtype='M8[ns]').quantile(.5))
|
||||
assert pd.isna(Series([], dtype='m8[ns]').quantile(.5))
|
||||
|
||||
def test_quantile_nat(self):
|
||||
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
|
||||
tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5]))
|
||||
|
||||
def test_quantile_empty(self):
|
||||
|
||||
# floats
|
||||
s = Series([], dtype='float64')
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# int
|
||||
s = Series([], dtype='int64')
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# datetime
|
||||
s = Series([], dtype='datetime64[ns]')
|
||||
|
||||
res = s.quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = s.quantile([0.5])
|
||||
exp = Series([pd.NaT], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
@@ -1,487 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from pandas import compat, Timestamp
|
||||
|
||||
import pytest
|
||||
|
||||
from distutils.version import LooseVersion
|
||||
from numpy import nan
|
||||
import numpy as np
|
||||
|
||||
from pandas import Series, date_range, NaT
|
||||
from pandas.api.types import CategoricalDtype
|
||||
|
||||
from pandas.compat import product
|
||||
from pandas.util.testing import assert_series_equal
|
||||
import pandas.util.testing as tm
|
||||
from pandas.tests.series.common import TestData
|
||||
from pandas._libs.tslib import iNaT
|
||||
from pandas._libs.algos import Infinity, NegInfinity
|
||||
from itertools import chain
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
|
||||
class TestSeriesRank(TestData):
|
||||
s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3])
|
||||
|
||||
results = {
|
||||
'average': np.array([1.5, 5.5, 7.0, 3.5, nan,
|
||||
3.5, 1.5, 8.0, nan, 5.5]),
|
||||
'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]),
|
||||
'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]),
|
||||
'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]),
|
||||
'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]),
|
||||
}
|
||||
|
||||
def test_rank(self):
|
||||
pytest.importorskip('scipy.stats.special')
|
||||
rankdata = pytest.importorskip('scipy.stats.rankdata')
|
||||
|
||||
self.ts[::2] = np.nan
|
||||
self.ts[:10][::3] = 4.
|
||||
|
||||
ranks = self.ts.rank()
|
||||
oranks = self.ts.astype('O').rank()
|
||||
|
||||
assert_series_equal(ranks, oranks)
|
||||
|
||||
mask = np.isnan(self.ts)
|
||||
filled = self.ts.fillna(np.inf)
|
||||
|
||||
# rankdata returns a ndarray
|
||||
exp = Series(rankdata(filled), index=filled.index, name='ts')
|
||||
exp[mask] = np.nan
|
||||
|
||||
tm.assert_series_equal(ranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5).repeat(2))
|
||||
|
||||
iranks = iseries.rank()
|
||||
exp = iseries.astype(float).rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
exp = iseries / 5.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(1, 100))
|
||||
exp = Series(np.repeat(0.505, 100))
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries[1] = np.nan
|
||||
exp = Series(np.repeat(50.0 / 99.0, 100))
|
||||
exp[1] = np.nan
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(np.nan, 100))
|
||||
exp = iseries.copy()
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5)) + 1
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
rng = date_range('1/1/1990', periods=5)
|
||||
iseries = Series(np.arange(5), rng) + 1
|
||||
iseries.iloc[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
|
||||
exp = Series([2, 1, 3, 5, 4, 6.0])
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
# GH 5968
|
||||
iseries = Series(['3 day', '1 day 10m', '-2 day', NaT],
|
||||
dtype='m8[ns]')
|
||||
exp = Series([3, 2, 1, np.nan])
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
values = np.array(
|
||||
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40
|
||||
], dtype='float64')
|
||||
random_order = np.random.permutation(len(values))
|
||||
iseries = Series(values[random_order])
|
||||
exp = Series(random_order + 1.0, dtype='float64')
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
def test_rank_categorical(self):
|
||||
# GH issue #15420 rank incorrectly orders ordered categories
|
||||
|
||||
# Test ascending/descending ranking for ordered categoricals
|
||||
exp = Series([1., 2., 3., 4., 5., 6.])
|
||||
exp_desc = Series([6., 5., 4., 3., 2., 1.])
|
||||
ordered = Series(
|
||||
['first', 'second', 'third', 'fourth', 'fifth', 'sixth']
|
||||
).astype(CategoricalDtype(categories=['first', 'second', 'third',
|
||||
'fourth', 'fifth', 'sixth'],
|
||||
ordered=True))
|
||||
assert_series_equal(ordered.rank(), exp)
|
||||
assert_series_equal(ordered.rank(ascending=False), exp_desc)
|
||||
|
||||
# Unordered categoricals should be ranked as objects
|
||||
unordered = Series(['first', 'second', 'third', 'fourth',
|
||||
'fifth', 'sixth']).astype(
|
||||
CategoricalDtype(categories=['first', 'second', 'third',
|
||||
'fourth', 'fifth', 'sixth'],
|
||||
ordered=False))
|
||||
exp_unordered = Series([2., 4., 6., 3., 1., 5.])
|
||||
res = unordered.rank()
|
||||
assert_series_equal(res, exp_unordered)
|
||||
|
||||
unordered1 = Series(
|
||||
[1, 2, 3, 4, 5, 6],
|
||||
).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False))
|
||||
exp_unordered1 = Series([1., 2., 3., 4., 5., 6.])
|
||||
res1 = unordered1.rank()
|
||||
assert_series_equal(res1, exp_unordered1)
|
||||
|
||||
# Test na_option for rank data
|
||||
na_ser = Series(
|
||||
['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]
|
||||
).astype(CategoricalDtype(['first', 'second', 'third', 'fourth',
|
||||
'fifth', 'sixth', 'seventh'], True))
|
||||
|
||||
exp_top = Series([2., 3., 4., 5., 6., 7., 1.])
|
||||
exp_bot = Series([1., 2., 3., 4., 5., 6., 7.])
|
||||
exp_keep = Series([1., 2., 3., 4., 5., 6., np.NaN])
|
||||
|
||||
assert_series_equal(na_ser.rank(na_option='top'), exp_top)
|
||||
assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot)
|
||||
assert_series_equal(na_ser.rank(na_option='keep'), exp_keep)
|
||||
|
||||
# Test na_option for rank data with ascending False
|
||||
exp_top = Series([7., 6., 5., 4., 3., 2., 1.])
|
||||
exp_bot = Series([6., 5., 4., 3., 2., 1., 7.])
|
||||
exp_keep = Series([6., 5., 4., 3., 2., 1., np.NaN])
|
||||
|
||||
assert_series_equal(
|
||||
na_ser.rank(na_option='top', ascending=False),
|
||||
exp_top
|
||||
)
|
||||
assert_series_equal(
|
||||
na_ser.rank(na_option='bottom', ascending=False),
|
||||
exp_bot
|
||||
)
|
||||
assert_series_equal(
|
||||
na_ser.rank(na_option='keep', ascending=False),
|
||||
exp_keep
|
||||
)
|
||||
|
||||
# Test with pct=True
|
||||
na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype(
|
||||
CategoricalDtype(['first', 'second', 'third', 'fourth'], True))
|
||||
exp_top = Series([0.4, 0.6, 0.8, 1., 0.2])
|
||||
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.])
|
||||
exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN])
|
||||
|
||||
assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top)
|
||||
assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot)
|
||||
assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep)
|
||||
|
||||
def test_rank_signature(self):
|
||||
s = Series([0, 1])
|
||||
s.rank(method='average')
|
||||
pytest.raises(ValueError, s.rank, 'average')
|
||||
|
||||
@pytest.mark.parametrize('contents,dtype', [
|
||||
([-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10,
|
||||
2, 40, np.inf],
|
||||
'float64'),
|
||||
([-np.inf, -50, -1, -1e-20, -1e-25, -1e-45, 0, 1e-40, 1e-20, 1e-10,
|
||||
2, 40, np.inf],
|
||||
'float32'),
|
||||
([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max],
|
||||
'uint8'),
|
||||
pytest.param([np.iinfo(np.int64).min, -100, 0, 1, 9999, 100000,
|
||||
1e10, np.iinfo(np.int64).max],
|
||||
'int64',
|
||||
marks=pytest.mark.xfail(
|
||||
reason="iNaT is equivalent to minimum value of dtype"
|
||||
"int64 pending issue #16674")),
|
||||
([NegInfinity(), '1', 'A', 'BA', 'Ba', 'C', Infinity()],
|
||||
'object')
|
||||
])
|
||||
def test_rank_inf(self, contents, dtype):
|
||||
dtype_na_map = {
|
||||
'float64': np.nan,
|
||||
'float32': np.nan,
|
||||
'int64': iNaT,
|
||||
'object': None
|
||||
}
|
||||
# Insert nans at random positions if underlying dtype has missing
|
||||
# value. Then adjust the expected order by adding nans accordingly
|
||||
# This is for testing whether rank calculation is affected
|
||||
# when values are interwined with nan values.
|
||||
values = np.array(contents, dtype=dtype)
|
||||
exp_order = np.array(range(len(values)), dtype='float64') + 1.0
|
||||
if dtype in dtype_na_map:
|
||||
na_value = dtype_na_map[dtype]
|
||||
nan_indices = np.random.choice(range(len(values)), 5)
|
||||
values = np.insert(values, nan_indices, na_value)
|
||||
exp_order = np.insert(exp_order, nan_indices, np.nan)
|
||||
# shuffle the testing array and expected results in the same way
|
||||
random_order = np.random.permutation(len(values))
|
||||
iseries = Series(values[random_order])
|
||||
exp = Series(exp_order[random_order], dtype='float64')
|
||||
iranks = iseries.rank()
|
||||
assert_series_equal(iranks, exp)
|
||||
|
||||
def test_rank_tie_methods(self):
|
||||
s = self.s
|
||||
|
||||
def _check(s, expected, method='average'):
|
||||
result = s.rank(method=method)
|
||||
tm.assert_series_equal(result, Series(expected))
|
||||
|
||||
dtypes = [None, object]
|
||||
disabled = set([(object, 'first')])
|
||||
results = self.results
|
||||
|
||||
for method, dtype in product(results, dtypes):
|
||||
if (dtype, method) in disabled:
|
||||
continue
|
||||
series = s if dtype is None else s.astype(dtype)
|
||||
_check(series, results[method], method=method)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize('ascending', [True, False])
|
||||
@pytest.mark.parametrize('method', ['average', 'min', 'max', 'first',
|
||||
'dense'])
|
||||
@pytest.mark.parametrize('na_option', ['top', 'bottom', 'keep'])
|
||||
def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending):
|
||||
dtypes = [('object', None, Infinity(), NegInfinity()),
|
||||
('float64', np.nan, np.inf, -np.inf)]
|
||||
chunk = 3
|
||||
disabled = set([('object', 'first')])
|
||||
|
||||
def _check(s, method, na_option, ascending):
|
||||
exp_ranks = {
|
||||
'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
|
||||
'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
|
||||
'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
|
||||
'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
|
||||
'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
|
||||
}
|
||||
ranks = exp_ranks[method]
|
||||
if na_option == 'top':
|
||||
order = [ranks[1], ranks[0], ranks[2]]
|
||||
elif na_option == 'bottom':
|
||||
order = [ranks[0], ranks[2], ranks[1]]
|
||||
else:
|
||||
order = [ranks[0], [np.nan] * chunk, ranks[1]]
|
||||
expected = order if ascending else order[::-1]
|
||||
expected = list(chain.from_iterable(expected))
|
||||
result = s.rank(method=method, na_option=na_option,
|
||||
ascending=ascending)
|
||||
tm.assert_series_equal(result, Series(expected, dtype='float64'))
|
||||
|
||||
for dtype, na_value, pos_inf, neg_inf in dtypes:
|
||||
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
|
||||
iseries = Series(in_arr, dtype=dtype)
|
||||
if (dtype, method) in disabled:
|
||||
continue
|
||||
_check(iseries, method, na_option, ascending)
|
||||
|
||||
def test_rank_desc_mix_nans_infs(self):
|
||||
# GH 19538
|
||||
# check descending ranking when mix nans and infs
|
||||
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
|
||||
result = iseries.rank(ascending=False)
|
||||
exp = Series([3, np.nan, 1, 4, 2], dtype='float64')
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_rank_methods_series(self):
|
||||
pytest.importorskip('scipy.stats.special')
|
||||
rankdata = pytest.importorskip('scipy.stats.rankdata')
|
||||
import scipy
|
||||
|
||||
xs = np.random.randn(9)
|
||||
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
|
||||
np.random.shuffle(xs)
|
||||
|
||||
index = [chr(ord('a') + i) for i in range(len(xs))]
|
||||
|
||||
for vals in [xs, xs + 1e6, xs * 1e-6]:
|
||||
ts = Series(vals, index=index)
|
||||
|
||||
for m in ['average', 'min', 'max', 'first', 'dense']:
|
||||
result = ts.rank(method=m)
|
||||
sprank = rankdata(vals, m if m != 'first' else 'ordinal')
|
||||
expected = Series(sprank, index=index)
|
||||
|
||||
if LooseVersion(scipy.__version__) >= LooseVersion('0.17.0'):
|
||||
expected = expected.astype('float64')
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_dense_method(self):
|
||||
dtypes = ['O', 'f8', 'i8']
|
||||
in_out = [([1], [1]),
|
||||
([2], [1]),
|
||||
([0], [1]),
|
||||
([2, 2], [1, 1]),
|
||||
([1, 2, 3], [1, 2, 3]),
|
||||
([4, 2, 1], [3, 2, 1],),
|
||||
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
|
||||
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])]
|
||||
|
||||
for ser, exp in in_out:
|
||||
for dtype in dtypes:
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='dense')
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_descending(self):
|
||||
dtypes = ['O', 'f8', 'i8']
|
||||
|
||||
for dtype, method in product(dtypes, self.results):
|
||||
if 'i' in dtype:
|
||||
s = self.s.dropna()
|
||||
else:
|
||||
s = self.s.astype(dtype)
|
||||
|
||||
res = s.rank(ascending=False)
|
||||
expected = (s.max() - s).rank()
|
||||
assert_series_equal(res, expected)
|
||||
|
||||
if method == 'first' and dtype == 'O':
|
||||
continue
|
||||
|
||||
expected = (s.max() - s).rank(method=method)
|
||||
res2 = s.rank(method=method, ascending=False)
|
||||
assert_series_equal(res2, expected)
|
||||
|
||||
def test_rank_int(self):
|
||||
s = self.s.dropna().astype('i8')
|
||||
|
||||
for method, res in compat.iteritems(self.results):
|
||||
result = s.rank(method=method)
|
||||
expected = Series(res).dropna()
|
||||
expected.index = result.index
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_object_bug(self):
|
||||
# GH 13445
|
||||
|
||||
# smoke tests
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=True)
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=False)
|
||||
|
||||
def test_rank_modify_inplace(self):
|
||||
# GH 18521
|
||||
# Check rank does not mutate series
|
||||
s = Series([Timestamp('2017-01-05 10:20:27.569000'), NaT])
|
||||
expected = s.copy()
|
||||
|
||||
s.rank()
|
||||
result = s
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# GH15630, pct should be on 100% basis when method='dense'
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1., 1.]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 2, 2. / 2, 2. / 2]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1. / 3, 1. / 3, 3. / 3, 3. / 3, 2. / 3]),
|
||||
([1, 1, 3, 3, 5, 5], [1. / 3, 1. / 3, 2. / 3, 2. / 3, 3. / 3, 3. / 3]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_dense_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='dense', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1. / 2, 1. / 2]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 2. / 3, 2. / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1. / 5, 1. / 5, 4. / 5, 4. / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1. / 6, 1. / 6, 3. / 6, 3. / 6, 5. / 6, 5. / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_min_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='min', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1., 1.]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 3. / 3, 3. / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [2. / 5, 2. / 5, 5. / 5, 5. / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [2. / 6, 2. / 6, 4. / 6, 4. / 6, 6. / 6, 6. / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_max_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='max', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1.5 / 2, 1.5 / 2]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 2.5 / 3, 2.5 / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5],
|
||||
[1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_average_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='average', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', ['f8', 'i8'])
|
||||
@pytest.mark.parametrize('ser, exp', [
|
||||
([1], [1.]),
|
||||
([1, 2], [1. / 2, 2. / 2]),
|
||||
([2, 2], [1. / 2, 2. / 2.]),
|
||||
([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([1, 2, 2], [1. / 3, 2. / 3, 3. / 3]),
|
||||
([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],),
|
||||
([1, 1, 5, 5, 3], [1. / 5, 2. / 5, 4. / 5, 5. / 5, 3. / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6, 6. / 6]),
|
||||
([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])])
|
||||
def test_rank_first_pct(dtype, ser, exp):
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method='first', pct=True)
|
||||
expected = Series(exp).astype(result.dtype)
|
||||
assert_series_equal(result, expected)
|
||||
@@ -1,251 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas._libs.lib as lib
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesReplace(TestData):
|
||||
def test_replace(self):
|
||||
N = 100
|
||||
ser = pd.Series(np.random.randn(N))
|
||||
ser[0:4] = np.nan
|
||||
ser[6:10] = 0
|
||||
|
||||
# replace list with a single value
|
||||
ser.replace([np.nan], -1, inplace=True)
|
||||
|
||||
exp = ser.fillna(-1)
|
||||
tm.assert_series_equal(ser, exp)
|
||||
|
||||
rs = ser.replace(0., np.nan)
|
||||
ser[ser == 0.] = np.nan
|
||||
tm.assert_series_equal(rs, ser)
|
||||
|
||||
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
|
||||
dtype=object)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = 'foo'
|
||||
ser[20:30] = 'bar'
|
||||
|
||||
# replace list with a single value
|
||||
rs = ser.replace([np.nan, 'foo', 'bar'], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
|
||||
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
ser = pd.Series([np.nan, 0, np.inf])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, lib.NaT])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
filled = ser.copy()
|
||||
filled[4] = 0
|
||||
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
|
||||
|
||||
ser = pd.Series(self.ts.index)
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
# malformed
|
||||
pytest.raises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0])
|
||||
|
||||
# make sure that we aren't just masking a TypeError because bools don't
|
||||
# implement indexing
|
||||
with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'):
|
||||
ser.replace([1, 2], [np.nan, 0])
|
||||
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
|
||||
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
|
||||
|
||||
def test_replace_gh5319(self):
|
||||
# API change from 0.12?
|
||||
# GH 5319
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
result = ser.replace([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
result = ser.replace(np.nan)
|
||||
tm.assert_series_equal(result, expected)
|
||||
# GH 5797
|
||||
ser = pd.Series(pd.date_range('20130101', periods=5))
|
||||
expected = ser.copy()
|
||||
expected.loc[2] = pd.Timestamp('20120101')
|
||||
result = ser.replace({pd.Timestamp('20130103'):
|
||||
pd.Timestamp('20120101')})
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = ser.replace(pd.Timestamp('20130103'),
|
||||
pd.Timestamp('20120101'))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_with_single_list(self):
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
result = ser.replace([1, 2, 3])
|
||||
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
s = ser.copy()
|
||||
s.replace([1, 2, 3], inplace=True)
|
||||
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
# make sure things don't get corrupted when fillna call fails
|
||||
s = ser.copy()
|
||||
with pytest.raises(ValueError):
|
||||
s.replace([1, 2, 3], inplace=True, method='crash_cymbal')
|
||||
tm.assert_series_equal(s, ser)
|
||||
|
||||
def test_replace_mixed_types(self):
|
||||
s = pd.Series(np.arange(5), dtype='int64')
|
||||
|
||||
def check_replace(to_rep, val, expected):
|
||||
sc = s.copy()
|
||||
r = s.replace(to_rep, val)
|
||||
sc.replace(to_rep, val, inplace=True)
|
||||
tm.assert_series_equal(expected, r)
|
||||
tm.assert_series_equal(expected, sc)
|
||||
|
||||
# MUST upcast to float
|
||||
e = pd.Series([0., 1., 2., 3., 4.])
|
||||
tr, v = [3], [3.0]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# MUST upcast to float
|
||||
e = pd.Series([0, 1, 2, 3.5, 4])
|
||||
tr, v = [3], [3.5]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, 'a'])
|
||||
tr, v = [3, 4], [3.5, 'a']
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# again casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp('20130101')])
|
||||
tr, v = [3, 4], [3.5, pd.Timestamp('20130101')]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, True], dtype='object')
|
||||
tr, v = [3, 4], [3.5, True]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# test an object with dates + floats + integers + strings
|
||||
dr = pd.date_range('1/1/2001', '1/10/2001',
|
||||
freq='D').to_series().reset_index(drop=True)
|
||||
result = dr.astype(object).replace(
|
||||
[dr[0], dr[1], dr[2]], [1.0, 2, 'a'])
|
||||
expected = pd.Series([1.0, 2, 'a'] + dr[3:].tolist(), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_bool_with_string_no_op(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace('fun', 'in-the-sun')
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
def test_replace_bool_with_string(self):
|
||||
# nonexistent elements
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, '2u')
|
||||
expected = pd.Series(['2u', False, '2u'])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_bool_with_bool(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, False)
|
||||
expected = pd.Series([False] * len(s))
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_with_dict_with_bool_keys(self):
|
||||
s = pd.Series([True, False, True])
|
||||
with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'):
|
||||
s.replace({'asdf': 'asdb', True: 'yes'})
|
||||
|
||||
def test_replace2(self):
|
||||
N = 100
|
||||
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
|
||||
dtype=object)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = 'foo'
|
||||
ser[20:30] = 'bar'
|
||||
|
||||
# replace list with a single value
|
||||
rs = ser.replace([np.nan, 'foo', 'bar'], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
def test_replace_with_empty_dictlike(self):
|
||||
# GH 15289
|
||||
s = pd.Series(list('abcd'))
|
||||
tm.assert_series_equal(s, s.replace(dict()))
|
||||
tm.assert_series_equal(s, s.replace(pd.Series([])))
|
||||
|
||||
def test_replace_string_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace('2', np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_unicode_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace(u'2', np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_mixed_types_with_string(self):
|
||||
# Testing mixed
|
||||
s = pd.Series([1, 2, 3, '4', 4, 5])
|
||||
result = s.replace([2, '4'], np.nan)
|
||||
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
|
||||
tm.assert_series_equal(expected, result)
|
||||
@@ -1,478 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pandas import (Index, Series, DataFrame, date_range, option_context,
|
||||
Categorical, period_range, timedelta_range)
|
||||
from pandas.core.index import MultiIndex
|
||||
from pandas.core.base import StringMixin
|
||||
|
||||
from pandas.compat import lrange, range, u
|
||||
from pandas import compat
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesRepr(TestData):
|
||||
|
||||
def test_multilevel_name_print(self):
|
||||
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
|
||||
'three']],
|
||||
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||||
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=['first', 'second'])
|
||||
s = Series(lrange(0, len(index)), index=index, name='sth')
|
||||
expected = ["first second", "foo one 0",
|
||||
" two 1", " three 2",
|
||||
"bar one 3", " two 4",
|
||||
"baz two 5", " three 6",
|
||||
"qux one 7", " two 8",
|
||||
" three 9", "Name: sth, dtype: int64"]
|
||||
expected = "\n".join(expected)
|
||||
assert repr(s) == expected
|
||||
|
||||
def test_name_printing(self):
|
||||
# Test small Series.
|
||||
s = Series([0, 1, 2])
|
||||
|
||||
s.name = "test"
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
s.name = None
|
||||
assert "Name:" not in repr(s)
|
||||
|
||||
# Test big Series (diff code path).
|
||||
s = Series(lrange(0, 1000))
|
||||
|
||||
s.name = "test"
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
s.name = None
|
||||
assert "Name:" not in repr(s)
|
||||
|
||||
s = Series(index=date_range('20010101', '20020101'), name='test')
|
||||
assert "Name: test" in repr(s)
|
||||
|
||||
def test_repr(self):
|
||||
str(self.ts)
|
||||
str(self.series)
|
||||
str(self.series.astype(int))
|
||||
str(self.objSeries)
|
||||
|
||||
str(Series(tm.randn(1000), index=np.arange(1000)))
|
||||
str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1)))
|
||||
|
||||
# empty
|
||||
str(self.empty)
|
||||
|
||||
# with NaNs
|
||||
self.series[5:7] = np.NaN
|
||||
str(self.series)
|
||||
|
||||
# with Nones
|
||||
ots = self.ts.astype('O')
|
||||
ots[::2] = None
|
||||
repr(ots)
|
||||
|
||||
# various names
|
||||
for name in ['', 1, 1.2, 'foo', u('\u03B1\u03B2\u03B3'),
|
||||
'loooooooooooooooooooooooooooooooooooooooooooooooooooong',
|
||||
('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3),
|
||||
(u('\u03B1'), u('\u03B2'), u('\u03B3')),
|
||||
(u('\u03B1'), 'bar')]:
|
||||
self.series.name = name
|
||||
repr(self.series)
|
||||
|
||||
biggie = Series(tm.randn(1000), index=np.arange(1000),
|
||||
name=('foo', 'bar', 'baz'))
|
||||
repr(biggie)
|
||||
|
||||
# 0 as name
|
||||
ser = Series(np.random.randn(100), name=0)
|
||||
rep_str = repr(ser)
|
||||
assert "Name: 0" in rep_str
|
||||
|
||||
# tidy repr
|
||||
ser = Series(np.random.randn(1001), name=0)
|
||||
rep_str = repr(ser)
|
||||
assert "Name: 0" in rep_str
|
||||
|
||||
ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
|
||||
assert "\t" not in repr(ser)
|
||||
assert "\r" not in repr(ser)
|
||||
assert "a\n" not in repr(ser)
|
||||
|
||||
# with empty series (#4651)
|
||||
s = Series([], dtype=np.int64, name='foo')
|
||||
assert repr(s) == 'Series([], Name: foo, dtype: int64)'
|
||||
|
||||
s = Series([], dtype=np.int64, name=None)
|
||||
assert repr(s) == 'Series([], dtype: int64)'
|
||||
|
||||
def test_tidy_repr(self):
|
||||
a = Series([u("\u05d0")] * 1000)
|
||||
a.name = 'title1'
|
||||
repr(a) # should not raise exception
|
||||
|
||||
@tm.capture_stderr
|
||||
def test_repr_bool_fails(self):
|
||||
s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)])
|
||||
|
||||
# It works (with no Cython exception barf)!
|
||||
repr(s)
|
||||
|
||||
output = sys.stderr.getvalue()
|
||||
assert output == ''
|
||||
|
||||
def test_repr_name_iterable_indexable(self):
|
||||
s = Series([1, 2, 3], name=np.int64(3))
|
||||
|
||||
# it works!
|
||||
repr(s)
|
||||
|
||||
s.name = (u("\u05d0"), ) * 2
|
||||
repr(s)
|
||||
|
||||
def test_repr_should_return_str(self):
|
||||
# https://docs.python.org/3/reference/datamodel.html#object.__repr__
|
||||
# ...The return value must be a string object.
|
||||
|
||||
# (str on py2.x, str (unicode) on py3)
|
||||
|
||||
data = [8, 5, 3, 5]
|
||||
index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")]
|
||||
df = Series(data, index=index1)
|
||||
assert type(df.__repr__() == str) # both py2 / 3
|
||||
|
||||
def test_repr_max_rows(self):
|
||||
# GH 6863
|
||||
with pd.option_context('max_rows', None):
|
||||
str(Series(range(1001))) # should not raise exception
|
||||
|
||||
def test_unicode_string_with_unicode(self):
|
||||
df = Series([u("\u05d0")], name=u("\u05d1"))
|
||||
if compat.PY3:
|
||||
str(df)
|
||||
else:
|
||||
compat.text_type(df)
|
||||
|
||||
def test_bytestring_with_unicode(self):
|
||||
df = Series([u("\u05d0")], name=u("\u05d1"))
|
||||
if compat.PY3:
|
||||
bytes(df)
|
||||
else:
|
||||
str(df)
|
||||
|
||||
def test_timeseries_repr_object_dtype(self):
|
||||
index = Index([datetime(2000, 1, 1) + timedelta(i)
|
||||
for i in range(1000)], dtype=object)
|
||||
ts = Series(np.random.randn(len(index)), index)
|
||||
repr(ts)
|
||||
|
||||
ts = tm.makeTimeSeries(1000)
|
||||
assert repr(ts).splitlines()[-1].startswith('Freq:')
|
||||
|
||||
ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)]
|
||||
repr(ts2).splitlines()[-1]
|
||||
|
||||
def test_latex_repr(self):
|
||||
result = r"""\begin{tabular}{ll}
|
||||
\toprule
|
||||
{} & 0 \\
|
||||
\midrule
|
||||
0 & $\alpha$ \\
|
||||
1 & b \\
|
||||
2 & c \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
"""
|
||||
with option_context('display.latex.escape', False,
|
||||
'display.latex.repr', True):
|
||||
s = Series([r'$\alpha$', 'b', 'c'])
|
||||
assert result == s._repr_latex_()
|
||||
|
||||
assert s._repr_latex_() is None
|
||||
|
||||
|
||||
class TestCategoricalRepr(object):
|
||||
|
||||
def test_categorical_repr_unicode(self):
|
||||
# GH#21002 if len(index) > 60, sys.getdefaultencoding()=='ascii',
|
||||
# and we are working in PY2, then rendering a Categorical could raise
|
||||
# UnicodeDecodeError by trying to decode when it shouldn't
|
||||
|
||||
class County(StringMixin):
|
||||
name = u'San Sebastián'
|
||||
state = u'PR'
|
||||
|
||||
def __unicode__(self):
|
||||
return self.name + u', ' + self.state
|
||||
|
||||
cat = pd.Categorical([County() for n in range(61)])
|
||||
idx = pd.Index(cat)
|
||||
ser = idx.to_series()
|
||||
|
||||
if compat.PY3:
|
||||
# no reloading of sys, just check that the default (utf8) works
|
||||
# as expected
|
||||
repr(ser)
|
||||
str(ser)
|
||||
|
||||
else:
|
||||
# set sys.defaultencoding to ascii, then change it back after
|
||||
# the test
|
||||
with tm.set_defaultencoding('ascii'):
|
||||
repr(ser)
|
||||
str(ser)
|
||||
|
||||
def test_categorical_repr(self):
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = u("0 1\n1 2\n2 3\n3 4\n" +
|
||||
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]")
|
||||
|
||||
assert exp == a.__unicode__()
|
||||
|
||||
a = Series(Categorical(["a", "b"] * 25))
|
||||
exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" +
|
||||
"Length: 50, dtype: category\nCategories (2, object): [a, b]")
|
||||
with option_context("display.max_rows", 5):
|
||||
assert exp == repr(a)
|
||||
|
||||
levs = list("abcdefghijklmnopqrstuvwxyz")
|
||||
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
|
||||
exp = u("0 a\n1 b\n" + "dtype: category\n"
|
||||
"Categories (26, object): [a < b < c < d ... w < x < y < z]")
|
||||
assert exp == a.__unicode__()
|
||||
|
||||
def test_categorical_series_repr(self):
|
||||
s = Series(Categorical([1, 2, 3]))
|
||||
exp = """0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: category
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
s = Series(Categorical(np.arange(10)))
|
||||
exp = """0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
dtype: category
|
||||
Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_ordered(self):
|
||||
s = Series(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: category
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
s = Series(Categorical(np.arange(10), ordered=True))
|
||||
exp = """0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
dtype: category
|
||||
Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_datetime(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00:00
|
||||
1 2011-01-01 10:00:00
|
||||
2 2011-01-01 11:00:00
|
||||
3 2011-01-01 12:00:00
|
||||
4 2011-01-01 13:00:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
|
||||
2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00:00-05:00
|
||||
1 2011-01-01 10:00:00-05:00
|
||||
2 2011-01-01 11:00:00-05:00
|
||||
3 2011-01-01 12:00:00-05:00
|
||||
4 2011-01-01 13:00:00-05:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
|
||||
2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_datetime_ordered(self):
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00:00
|
||||
1 2011-01-01 10:00:00
|
||||
2 2011-01-01 11:00:00
|
||||
3 2011-01-01 12:00:00
|
||||
4 2011-01-01 13:00:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = date_range('2011-01-01 09:00', freq='H', periods=5,
|
||||
tz='US/Eastern')
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00:00-05:00
|
||||
1 2011-01-01 10:00:00-05:00
|
||||
2 2011-01-01 11:00:00-05:00
|
||||
3 2011-01-01 12:00:00-05:00
|
||||
4 2011-01-01 13:00:00-05:00
|
||||
dtype: category
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_period(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01-01 09:00
|
||||
1 2011-01-01 10:00
|
||||
2 2011-01-01 11:00
|
||||
3 2011-01-01 12:00
|
||||
4 2011-01-01 13:00
|
||||
dtype: category
|
||||
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 2011-01
|
||||
1 2011-02
|
||||
2 2011-03
|
||||
3 2011-04
|
||||
4 2011-05
|
||||
dtype: category
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_period_ordered(self):
|
||||
idx = period_range('2011-01-01 09:00', freq='H', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01-01 09:00
|
||||
1 2011-01-01 10:00
|
||||
2 2011-01-01 11:00
|
||||
3 2011-01-01 12:00
|
||||
4 2011-01-01 13:00
|
||||
dtype: category
|
||||
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = period_range('2011-01', freq='M', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 2011-01
|
||||
1 2011-02
|
||||
2 2011-03
|
||||
3 2011-04
|
||||
4 2011-05
|
||||
dtype: category
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_timedelta(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 1 days
|
||||
1 2 days
|
||||
2 3 days
|
||||
3 4 days
|
||||
4 5 days
|
||||
dtype: category
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
s = Series(Categorical(idx))
|
||||
exp = """0 0 days 01:00:00
|
||||
1 1 days 01:00:00
|
||||
2 2 days 01:00:00
|
||||
3 3 days 01:00:00
|
||||
4 4 days 01:00:00
|
||||
5 5 days 01:00:00
|
||||
6 6 days 01:00:00
|
||||
7 7 days 01:00:00
|
||||
8 8 days 01:00:00
|
||||
9 9 days 01:00:00
|
||||
dtype: category
|
||||
Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
|
||||
8 days 01:00:00, 9 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
def test_categorical_series_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range('1 days', periods=5)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 1 days
|
||||
1 2 days
|
||||
2 3 days
|
||||
3 4 days
|
||||
4 5 days
|
||||
dtype: category
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
|
||||
idx = timedelta_range('1 hours', periods=10)
|
||||
s = Series(Categorical(idx, ordered=True))
|
||||
exp = """0 0 days 01:00:00
|
||||
1 1 days 01:00:00
|
||||
2 2 days 01:00:00
|
||||
3 3 days 01:00:00
|
||||
4 4 days 01:00:00
|
||||
5 5 days 01:00:00
|
||||
6 6 days 01:00:00
|
||||
7 7 days 01:00:00
|
||||
8 8 days 01:00:00
|
||||
9 9 days 01:00:00
|
||||
dtype: category
|
||||
Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
|
||||
8 days 01:00:00 < 9 days 01:00:00]""" # noqa
|
||||
|
||||
assert repr(s) == exp
|
||||
@@ -1,267 +0,0 @@
|
||||
# coding=utf-8
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
from pandas import DataFrame, Series, MultiIndex, IntervalIndex, Categorical
|
||||
|
||||
from pandas.util.testing import assert_series_equal, assert_almost_equal
|
||||
import pandas.util.testing as tm
|
||||
|
||||
from .common import TestData
|
||||
|
||||
|
||||
class TestSeriesSorting(TestData):
|
||||
|
||||
def test_sortlevel_deprecated(self):
|
||||
ts = self.ts.copy()
|
||||
|
||||
# see gh-9816
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ts.sortlevel()
|
||||
|
||||
def test_sort_values(self):
|
||||
|
||||
# check indexes are reordered corresponding with the values
|
||||
ser = Series([3, 2, 4, 1], ['A', 'B', 'C', 'D'])
|
||||
expected = Series([1, 2, 3, 4], ['D', 'B', 'A', 'C'])
|
||||
result = ser.sort_values()
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
ts = self.ts.copy()
|
||||
ts[:5] = np.NaN
|
||||
vals = ts.values
|
||||
|
||||
result = ts.sort_values()
|
||||
assert np.isnan(result[-5:]).all()
|
||||
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
|
||||
|
||||
# na_position
|
||||
result = ts.sort_values(na_position='first')
|
||||
assert np.isnan(result[:5]).all()
|
||||
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
|
||||
|
||||
# something object-type
|
||||
ser = Series(['A', 'B'], [1, 2])
|
||||
# no failure
|
||||
ser.sort_values()
|
||||
|
||||
# ascending=False
|
||||
ordered = ts.sort_values(ascending=False)
|
||||
expected = np.sort(ts.dropna().values)[::-1]
|
||||
assert_almost_equal(expected, ordered.dropna().values)
|
||||
ordered = ts.sort_values(ascending=False, na_position='first')
|
||||
assert_almost_equal(expected, ordered.dropna().values)
|
||||
|
||||
# ascending=[False] should behave the same as ascending=False
|
||||
ordered = ts.sort_values(ascending=[False])
|
||||
expected = ts.sort_values(ascending=False)
|
||||
assert_series_equal(expected, ordered)
|
||||
ordered = ts.sort_values(ascending=[False], na_position='first')
|
||||
expected = ts.sort_values(ascending=False, na_position='first')
|
||||
assert_series_equal(expected, ordered)
|
||||
|
||||
pytest.raises(ValueError,
|
||||
lambda: ts.sort_values(ascending=None))
|
||||
pytest.raises(ValueError,
|
||||
lambda: ts.sort_values(ascending=[]))
|
||||
pytest.raises(ValueError,
|
||||
lambda: ts.sort_values(ascending=[1, 2, 3]))
|
||||
pytest.raises(ValueError,
|
||||
lambda: ts.sort_values(ascending=[False, False]))
|
||||
pytest.raises(ValueError,
|
||||
lambda: ts.sort_values(ascending='foobar'))
|
||||
|
||||
# inplace=True
|
||||
ts = self.ts.copy()
|
||||
ts.sort_values(ascending=False, inplace=True)
|
||||
tm.assert_series_equal(ts, self.ts.sort_values(ascending=False))
|
||||
tm.assert_index_equal(ts.index,
|
||||
self.ts.sort_values(ascending=False).index)
|
||||
|
||||
# GH 5856/5853
|
||||
# Series.sort_values operating on a view
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
s = df.iloc[:, 0]
|
||||
|
||||
def f():
|
||||
s.sort_values(inplace=True)
|
||||
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
def test_sort_index(self):
|
||||
rindex = list(self.ts.index)
|
||||
random.shuffle(rindex)
|
||||
|
||||
random_order = self.ts.reindex(rindex)
|
||||
sorted_series = random_order.sort_index()
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
# descending
|
||||
sorted_series = random_order.sort_index(ascending=False)
|
||||
assert_series_equal(sorted_series,
|
||||
self.ts.reindex(self.ts.index[::-1]))
|
||||
|
||||
# compat on level
|
||||
sorted_series = random_order.sort_index(level=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
# compat on axis
|
||||
sorted_series = random_order.sort_index(axis=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
pytest.raises(ValueError, lambda: random_order.sort_values(axis=1))
|
||||
|
||||
sorted_series = random_order.sort_index(level=0, axis=0)
|
||||
assert_series_equal(sorted_series, self.ts)
|
||||
|
||||
pytest.raises(ValueError,
|
||||
lambda: random_order.sort_index(level=0, axis=1))
|
||||
|
||||
def test_sort_index_inplace(self):
|
||||
|
||||
# For #11402
|
||||
rindex = list(self.ts.index)
|
||||
random.shuffle(rindex)
|
||||
|
||||
# descending
|
||||
random_order = self.ts.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=False, inplace=True)
|
||||
|
||||
assert result is None
|
||||
tm.assert_series_equal(random_order, self.ts.reindex(
|
||||
self.ts.index[::-1]))
|
||||
|
||||
# ascending
|
||||
random_order = self.ts.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=True, inplace=True)
|
||||
|
||||
assert result is None
|
||||
tm.assert_series_equal(random_order, self.ts)
|
||||
|
||||
@pytest.mark.parametrize("level", ['A', 0]) # GH 21052
|
||||
def test_sort_index_multiindex(self, level):
|
||||
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
|
||||
s = Series([1, 2], mi)
|
||||
backwards = s.iloc[[1, 0]]
|
||||
|
||||
# implicit sort_remaining=True
|
||||
res = s.sort_index(level=level)
|
||||
assert_series_equal(backwards, res)
|
||||
|
||||
# GH13496
|
||||
# sort has no effect without remaining lvls
|
||||
res = s.sort_index(level=level, sort_remaining=False)
|
||||
assert_series_equal(s, res)
|
||||
|
||||
def test_sort_index_kind(self):
|
||||
# GH #14444 & #13589: Add support for sort algo choosing
|
||||
series = Series(index=[3, 2, 1, 4, 3])
|
||||
expected_series = Series(index=[1, 2, 3, 3, 4])
|
||||
|
||||
index_sorted_series = series.sort_index(kind='mergesort')
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
index_sorted_series = series.sort_index(kind='quicksort')
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
index_sorted_series = series.sort_index(kind='heapsort')
|
||||
assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
def test_sort_index_na_position(self):
|
||||
series = Series(index=[3, 2, 1, 4, 3, np.nan])
|
||||
|
||||
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4])
|
||||
index_sorted_series = series.sort_index(na_position='first')
|
||||
assert_series_equal(expected_series_first, index_sorted_series)
|
||||
|
||||
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan])
|
||||
index_sorted_series = series.sort_index(na_position='last')
|
||||
assert_series_equal(expected_series_last, index_sorted_series)
|
||||
|
||||
def test_sort_index_intervals(self):
|
||||
s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays(
|
||||
[0, 1, 2, 3],
|
||||
[1, 2, 3, 4]))
|
||||
|
||||
result = s.sort_index()
|
||||
expected = s
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = s.sort_index(ascending=False)
|
||||
expected = Series([3, 2, 1, np.nan], IntervalIndex.from_arrays(
|
||||
[3, 2, 1, 0],
|
||||
[4, 3, 2, 1]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_sort_values_categorical(self):
|
||||
|
||||
c = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat = Series(c.copy())
|
||||
|
||||
# sort in the categories order
|
||||
expected = Series(
|
||||
Categorical(["a", "a", "b", "b"],
|
||||
ordered=False), index=[0, 3, 1, 2])
|
||||
result = cat.sort_values()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
cat = Series(Categorical(["a", "c", "b", "d"], categories=[
|
||||
"a", "b", "c", "d"], ordered=True))
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
raw_cat1 = Categorical(["a", "b", "c", "d"],
|
||||
categories=["a", "b", "c", "d"], ordered=False)
|
||||
raw_cat2 = Categorical(["a", "b", "c", "d"],
|
||||
categories=["d", "c", "b", "a"], ordered=True)
|
||||
s = ["a", "b", "c", "d"]
|
||||
df = DataFrame({"unsort": raw_cat1,
|
||||
"sort": raw_cat2,
|
||||
"string": s,
|
||||
"values": [1, 2, 3, 4]})
|
||||
|
||||
# Cats must be sorted in a dataframe
|
||||
res = df.sort_values(by=["string"], ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
|
||||
assert res["sort"].dtype == "category"
|
||||
|
||||
res = df.sort_values(by=["sort"], ascending=False)
|
||||
exp = df.sort_values(by=["string"], ascending=True)
|
||||
tm.assert_series_equal(res["values"], exp["values"])
|
||||
assert res["sort"].dtype == "category"
|
||||
assert res["unsort"].dtype == "category"
|
||||
|
||||
# unordered cat, but we allow this
|
||||
df.sort_values(by=["unsort"], ascending=False)
|
||||
|
||||
# multi-columns sort
|
||||
# GH 7848
|
||||
df = DataFrame({"id": [6, 5, 4, 3, 2, 1],
|
||||
"raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
|
||||
df["grade"] = Categorical(df["raw_grade"], ordered=True)
|
||||
df['grade'] = df['grade'].cat.set_categories(['b', 'e', 'a'])
|
||||
|
||||
# sorts 'grade' according to the order of the categories
|
||||
result = df.sort_values(by=['grade'])
|
||||
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi
|
||||
result = df.sort_values(by=['grade', 'id'])
|
||||
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -1,107 +0,0 @@
|
||||
# coding=utf-8
|
||||
# pylint: disable-msg=E1101,W0612
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
class TestSeriesSubclassing(object):
|
||||
|
||||
def test_indexing_sliced(self):
|
||||
s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'))
|
||||
res = s.loc[['a', 'b']]
|
||||
exp = tm.SubclassedSeries([1, 2], index=list('ab'))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = s.iloc[[2, 3]]
|
||||
exp = tm.SubclassedSeries([3, 4], index=list('cd'))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = s.loc[['a', 'b']]
|
||||
exp = tm.SubclassedSeries([1, 2], index=list('ab'))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_to_frame(self):
|
||||
s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'), name='xxx')
|
||||
res = s.to_frame()
|
||||
exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd'))
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_subclass_unstack(self):
|
||||
# GH 15564
|
||||
s = tm.SubclassedSeries(
|
||||
[1, 2, 3, 4], index=[list('aabb'), list('xyxy')])
|
||||
|
||||
res = s.unstack()
|
||||
exp = tm.SubclassedDataFrame(
|
||||
{'x': [1, 3], 'y': [2, 4]}, index=['a', 'b'])
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
|
||||
class TestSparseSeriesSubclassing(object):
|
||||
|
||||
def test_subclass_sparse_slice(self):
|
||||
# int64
|
||||
s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
|
||||
exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
|
||||
tm.assert_sp_series_equal(s.loc[1:3], exp)
|
||||
assert s.loc[1:3].dtype == np.int64
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s.iloc[1:3], exp)
|
||||
assert s.iloc[1:3].dtype == np.int64
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s[1:3], exp)
|
||||
assert s[1:3].dtype == np.int64
|
||||
|
||||
# float64
|
||||
s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.])
|
||||
exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3])
|
||||
tm.assert_sp_series_equal(s.loc[1:3], exp)
|
||||
assert s.loc[1:3].dtype == np.float64
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s.iloc[1:3], exp)
|
||||
assert s.iloc[1:3].dtype == np.float64
|
||||
|
||||
exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
|
||||
tm.assert_sp_series_equal(s[1:3], exp)
|
||||
assert s[1:3].dtype == np.float64
|
||||
|
||||
def test_subclass_sparse_addition(self):
|
||||
s1 = tm.SubclassedSparseSeries([1, 3, 5])
|
||||
s2 = tm.SubclassedSparseSeries([-2, 5, 12])
|
||||
exp = tm.SubclassedSparseSeries([-1, 8, 17])
|
||||
tm.assert_sp_series_equal(s1 + s2, exp)
|
||||
|
||||
s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0])
|
||||
s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0])
|
||||
exp = tm.SubclassedSparseSeries([5., 7., 9.])
|
||||
tm.assert_sp_series_equal(s1 + s2, exp)
|
||||
|
||||
def test_subclass_sparse_to_frame(self):
|
||||
s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx')
|
||||
res = s.to_frame()
|
||||
|
||||
exp_arr = pd.SparseArray([1, 2], dtype=np.int64, kind='block',
|
||||
fill_value=0)
|
||||
exp = tm.SubclassedSparseDataFrame({'xxx': exp_arr},
|
||||
index=list('abcd'),
|
||||
default_fill_value=0)
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
# create from int dict
|
||||
res = tm.SubclassedSparseDataFrame({'xxx': [1, 2]},
|
||||
index=list('abcd'),
|
||||
default_fill_value=0)
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
|
||||
s = tm.SubclassedSparseSeries([1.1, 2.1], index=list('abcd'),
|
||||
name='xxx')
|
||||
res = s.to_frame()
|
||||
exp = tm.SubclassedSparseDataFrame({'xxx': [1.1, 2.1]},
|
||||
index=list('abcd'))
|
||||
tm.assert_sp_frame_equal(res, exp)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,302 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Tests for Series timezone-related methods
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
import pytz
|
||||
import numpy as np
|
||||
from dateutil.tz import tzoffset
|
||||
|
||||
import pandas.util.testing as tm
|
||||
from pandas._libs import tslib
|
||||
from pandas._libs.tslibs import timezones
|
||||
from pandas.compat import lrange
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas import Series, Timestamp, DatetimeIndex, Index
|
||||
|
||||
|
||||
class TestSeriesTimezones(object):
|
||||
# -----------------------------------------------------------------
|
||||
# Series.tz_localize
|
||||
def test_series_tz_localize(self):
|
||||
|
||||
rng = date_range('1/1/2011', periods=100, freq='H')
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
result = ts.tz_localize('utc')
|
||||
assert result.index.tz.zone == 'UTC'
|
||||
|
||||
# Can't localize if already tz-aware
|
||||
rng = date_range('1/1/2011', periods=100, freq='H', tz='utc')
|
||||
ts = Series(1, index=rng)
|
||||
tm.assert_raises_regex(TypeError, 'Already tz-aware',
|
||||
ts.tz_localize, 'US/Eastern')
|
||||
|
||||
def test_series_tz_localize_ambiguous_bool(self):
|
||||
# make sure that we are correctly accepting bool values as ambiguous
|
||||
|
||||
# GH#14402
|
||||
ts = Timestamp('2015-11-01 01:00:03')
|
||||
expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central')
|
||||
expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central')
|
||||
|
||||
ser = Series([ts])
|
||||
expected0 = Series([expected0])
|
||||
expected1 = Series([expected1])
|
||||
|
||||
with pytest.raises(pytz.AmbiguousTimeError):
|
||||
ser.dt.tz_localize('US/Central')
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=True)
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=[True])
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=False)
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
result = ser.dt.tz_localize('US/Central', ambiguous=[False])
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
@pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_series_tz_localize_empty(self, tzstr):
|
||||
# GH#2248
|
||||
ser = Series()
|
||||
|
||||
ser2 = ser.tz_localize('utc')
|
||||
assert ser2.index.tz == pytz.utc
|
||||
|
||||
ser2 = ser.tz_localize(tzstr)
|
||||
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Series.tz_convert
|
||||
|
||||
def test_series_tz_convert(self):
|
||||
rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern')
|
||||
ts = Series(1, index=rng)
|
||||
|
||||
result = ts.tz_convert('Europe/Berlin')
|
||||
assert result.index.tz.zone == 'Europe/Berlin'
|
||||
|
||||
# can't convert tz-naive
|
||||
rng = date_range('1/1/2011', periods=200, freq='D')
|
||||
ts = Series(1, index=rng)
|
||||
tm.assert_raises_regex(TypeError, "Cannot convert tz-naive",
|
||||
ts.tz_convert, 'US/Eastern')
|
||||
|
||||
def test_series_tz_convert_to_utc(self):
|
||||
base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
|
||||
tz='UTC')
|
||||
idx1 = base.tz_convert('Asia/Tokyo')[:2]
|
||||
idx2 = base.tz_convert('US/Eastern')[1:]
|
||||
|
||||
res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2)
|
||||
tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Series.append
|
||||
|
||||
def test_series_append_aware(self):
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'],
|
||||
tz='US/Eastern')
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
assert ts_result.index.tz == rng1.tz
|
||||
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC')
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'],
|
||||
tz='UTC')
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
utc = rng1.tz
|
||||
assert utc == ts_result.index.tz
|
||||
|
||||
# GH#7795
|
||||
# different tz coerces to object dtype, not UTC
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H',
|
||||
tz='US/Central')
|
||||
ser1 = Series([1], index=rng1)
|
||||
ser2 = Series([2], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'),
|
||||
Timestamp('1/1/2011 02:00', tz='US/Central')])
|
||||
exp = Series([1, 2], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
|
||||
def test_series_append_aware_naive(self):
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H')
|
||||
rng2 = date_range('1/1/2011 02:00', periods=1, freq='H',
|
||||
tz='US/Eastern')
|
||||
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
|
||||
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
expected = ser1.index.astype(object).append(ser2.index.astype(object))
|
||||
assert ts_result.index.equals(expected)
|
||||
|
||||
# mixed
|
||||
rng1 = date_range('1/1/2011 01:00', periods=1, freq='H')
|
||||
rng2 = lrange(100)
|
||||
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
|
||||
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
expected = ser1.index.astype(object).append(ser2.index)
|
||||
assert ts_result.index.equals(expected)
|
||||
|
||||
def test_series_append_dst(self):
|
||||
rng1 = date_range('1/1/2016 01:00', periods=3, freq='H',
|
||||
tz='US/Eastern')
|
||||
rng2 = date_range('8/1/2016 01:00', periods=3, freq='H',
|
||||
tz='US/Eastern')
|
||||
ser1 = Series([1, 2, 3], index=rng1)
|
||||
ser2 = Series([10, 11, 12], index=rng2)
|
||||
ts_result = ser1.append(ser2)
|
||||
|
||||
exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00',
|
||||
'2016-01-01 03:00', '2016-08-01 01:00',
|
||||
'2016-08-01 02:00', '2016-08-01 03:00'],
|
||||
tz='US/Eastern')
|
||||
exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
|
||||
tm.assert_series_equal(ts_result, exp)
|
||||
assert ts_result.index.tz == rng1.tz
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
def test_dateutil_tzoffset_support(self):
|
||||
values = [188.5, 328.25]
|
||||
tzinfo = tzoffset(None, 7200)
|
||||
index = [datetime(2012, 5, 11, 11, tzinfo=tzinfo),
|
||||
datetime(2012, 5, 11, 12, tzinfo=tzinfo)]
|
||||
series = Series(data=values, index=index)
|
||||
|
||||
assert series.index.tz == tzinfo
|
||||
|
||||
# it works! #2443
|
||||
repr(series.index[0])
|
||||
|
||||
@pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_tz_aware_asfreq(self, tz):
|
||||
dr = date_range('2011-12-01', '2012-07-20', freq='D', tz=tz)
|
||||
|
||||
ser = Series(np.random.randn(len(dr)), index=dr)
|
||||
|
||||
# it works!
|
||||
ser.asfreq('T')
|
||||
|
||||
@pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_string_index_alias_tz_aware(self, tz):
|
||||
rng = date_range('1/1/2000', periods=10, tz=tz)
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ser['1/3/2000']
|
||||
tm.assert_almost_equal(result, ser[2])
|
||||
|
||||
# TODO: De-duplicate with test below
|
||||
def test_series_add_tz_mismatch_converts_to_utc_duplicate(self):
|
||||
rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern')
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts_moscow = ser.tz_convert('Europe/Moscow')
|
||||
|
||||
result = ser + ts_moscow
|
||||
assert result.index.tz is pytz.utc
|
||||
|
||||
result = ts_moscow + ser
|
||||
assert result.index.tz is pytz.utc
|
||||
|
||||
def test_series_add_tz_mismatch_converts_to_utc(self):
|
||||
rng = date_range('1/1/2011', periods=100, freq='H', tz='utc')
|
||||
|
||||
perm = np.random.permutation(100)[:90]
|
||||
ser1 = Series(np.random.randn(90),
|
||||
index=rng.take(perm).tz_convert('US/Eastern'))
|
||||
|
||||
perm = np.random.permutation(100)[:90]
|
||||
ser2 = Series(np.random.randn(90),
|
||||
index=rng.take(perm).tz_convert('Europe/Berlin'))
|
||||
|
||||
result = ser1 + ser2
|
||||
|
||||
uts1 = ser1.tz_convert('utc')
|
||||
uts2 = ser2.tz_convert('utc')
|
||||
expected = uts1 + uts2
|
||||
|
||||
assert result.index.tz == pytz.UTC
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_add_aware_naive_raises(self):
|
||||
rng = date_range('1/1/2011', periods=10, freq='H')
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ser_utc = ser.tz_localize('utc')
|
||||
|
||||
with pytest.raises(Exception):
|
||||
ser + ser_utc
|
||||
|
||||
with pytest.raises(Exception):
|
||||
ser_utc + ser
|
||||
|
||||
def test_series_align_aware(self):
|
||||
idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern')
|
||||
ser = Series(np.random.randn(len(idx1)), index=idx1)
|
||||
ser_central = ser.tz_convert('US/Central')
|
||||
# # different timezones convert to UTC
|
||||
|
||||
new1, new2 = ser.align(ser_central)
|
||||
assert new1.index.tz == pytz.UTC
|
||||
assert new2.index.tz == pytz.UTC
|
||||
|
||||
@pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern'])
|
||||
def test_localized_at_time_between_time(self, tzstr):
|
||||
from datetime import time
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
rng = date_range('4/16/2012', '5/1/2012', freq='H')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts_local = ts.tz_localize(tzstr)
|
||||
|
||||
result = ts_local.at_time(time(10, 0))
|
||||
expected = ts.at_time(time(10, 0)).tz_localize(tzstr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
t1, t2 = time(10, 0), time(11, 0)
|
||||
result = ts_local.between_time(t1, t2)
|
||||
expected = ts.between_time(t1, t2).tz_localize(tzstr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
@pytest.mark.parametrize('tzstr', ['Europe/Berlin',
|
||||
'dateutil/Europe/Berlin'])
|
||||
def test_getitem_pydatetime_tz(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
index = date_range(start='2012-12-24 16:00', end='2012-12-24 18:00',
|
||||
freq='H', tz=tzstr)
|
||||
ts = Series(index=index, data=index.hour)
|
||||
time_pandas = Timestamp('2012-12-24 17:00', tz=tzstr)
|
||||
|
||||
dt = datetime(2012, 12, 24, 17, 0)
|
||||
time_datetime = tslib._localize_pydatetime(dt, tz)
|
||||
assert ts[time_pandas] == ts[time_datetime]
|
||||
@@ -1,27 +0,0 @@
|
||||
from pandas.core.series import Series
|
||||
|
||||
import pytest
|
||||
import pandas.util.testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series():
|
||||
return Series([1, 2, 3, 4, 5])
|
||||
|
||||
|
||||
class TestSeriesValidate(object):
|
||||
"""Tests for error handling related to data types of method arguments."""
|
||||
|
||||
@pytest.mark.parametrize("func", ["reset_index", "_set_name",
|
||||
"sort_values", "sort_index",
|
||||
"rename", "dropna"])
|
||||
@pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0])
|
||||
def test_validate_bool_args(self, series, func, inplace):
|
||||
msg = "For argument \"inplace\" expected type bool"
|
||||
kwargs = dict(inplace=inplace)
|
||||
|
||||
if func == "_set_name":
|
||||
kwargs["name"] = "hello"
|
||||
|
||||
with tm.assert_raises_regex(ValueError, msg):
|
||||
getattr(series, func)(**kwargs)
|
||||
Reference in New Issue
Block a user