demo + utils venv

This commit is contained in:
d3m1g0d
2019-02-03 13:40:10 +01:00
parent 5fa112490b
commit cfa9c8ea23
5994 changed files with 1353819 additions and 0 deletions
@@ -0,0 +1,142 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import DataFrame, Series
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import period_range
# The various methods we support
downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
'median', 'prod', 'var', 'std', 'ohlc', 'quantile']
upsample_methods = ['count', 'size']
series_methods = ['nunique']
resample_methods = downsample_methods + upsample_methods + series_methods
@pytest.fixture(params=downsample_methods)
def downsample_method(request):
"""Fixture for parametrization of Grouper downsample methods."""
return request.param
@pytest.fixture(params=upsample_methods)
def upsample_method(request):
"""Fixture for parametrization of Grouper upsample methods."""
return request.param
@pytest.fixture(params=resample_methods)
def resample_method(request):
"""Fixture for parametrization of Grouper resample methods."""
return request.param
@pytest.fixture
def simple_date_range_series():
"""
Series with date range index and random data for test purposes.
"""
def _simple_date_range_series(start, end, freq='D'):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return _simple_date_range_series
@pytest.fixture
def simple_period_range_series():
"""
Series with period range index and random data for test purposes.
"""
def _simple_period_range_series(start, end, freq='D'):
rng = period_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return _simple_period_range_series
@pytest.fixture
def _index_start():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 1)
@pytest.fixture
def _index_end():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 10)
@pytest.fixture
def _index_freq():
"""Fixture for parametrization of index, series and frame."""
return 'D'
@pytest.fixture
def _index_name():
"""Fixture for parametrization of index, series and frame."""
return None
@pytest.fixture
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
"""Fixture for parametrization of date_range, period_range and
timedelta_range indexes"""
return _index_factory(
_index_start, _index_end, freq=_index_freq, name=_index_name)
@pytest.fixture
def _static_values(index):
"""Fixture for parametrization of values used in parametrization of
Series and DataFrames with date_range, period_range and
timedelta_range indexes"""
return np.arange(len(index))
@pytest.fixture
def _series_name():
"""Fixture for parametrization of Series name for Series used with
date_range, period_range and timedelta_range indexes"""
return None
@pytest.fixture
def series(index, _series_name, _static_values):
"""Fixture for parametrization of Series with date_range, period_range and
timedelta_range indexes"""
return Series(_static_values, index=index, name=_series_name)
@pytest.fixture
def empty_series(series):
"""Fixture for parametrization of empty Series with date_range,
period_range and timedelta_range indexes"""
return series[:0]
@pytest.fixture
def frame(index, _series_name, _static_values):
"""Fixture for parametrization of DataFrame with date_range, period_range
and timedelta_range indexes"""
# _series_name is intentionally unused
return DataFrame({'value': _static_values}, index=index)
@pytest.fixture
def empty_frame(series):
"""Fixture for parametrization of empty DataFrame with date_range,
period_range and timedelta_range indexes"""
index = series.index[:0]
return DataFrame(index=index)
@pytest.fixture(params=[Series, DataFrame])
def series_and_frame(request, series, frame):
"""Fixture for parametrization of Series and DataFrame with date_range,
period_range and timedelta_range indexes"""
if request.param == Series:
return series
if request.param == DataFrame:
return frame
@@ -0,0 +1,228 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
from pandas.compat import range, zip
import pandas as pd
from pandas import DataFrame, Series
from pandas.core.groupby.groupby import DataError
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import PeriodIndex, period_range
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
from pandas.core.resample import TimeGrouper
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal, assert_frame_equal, assert_index_equal,
assert_series_equal)
# a fixture value can be overridden by the test parameter value. Note that the
# value of the fixture can be overridden this way even if the test doesn't use
# it directly (doesn't mention it in the function prototype).
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa
# in this module we override the fixture values defined in conftest.py
# tuples of '_index_factory,_series_name,_index_start,_index_end'
DATE_RANGE = (date_range, 'dti', datetime(2005, 1, 1), datetime(2005, 1, 10))
PERIOD_RANGE = (
period_range, 'pi', datetime(2005, 1, 1), datetime(2005, 1, 10))
TIMEDELTA_RANGE = (timedelta_range, 'tdi', '1 day', '10 day')
ALL_TIMESERIES_INDEXES = [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE]
def pytest_generate_tests(metafunc):
# called once per each test function
if metafunc.function.__name__.endswith('_all_ts'):
metafunc.parametrize(
'_index_factory,_series_name,_index_start,_index_end',
ALL_TIMESERIES_INDEXES)
@pytest.fixture
def create_index(_index_factory):
def _create_index(*args, **kwargs):
""" return the _index_factory created using the args, kwargs """
return _index_factory(*args, **kwargs)
return _create_index
@pytest.mark.parametrize('freq', ['2D', '1H'])
@pytest.mark.parametrize(
'_index_factory,_series_name,_index_start,_index_end',
[DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq(series_and_frame, freq, create_index):
obj = series_and_frame
result = obj.resample(freq).asfreq()
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
expected = obj.reindex(new_index)
assert_almost_equal(result, expected)
@pytest.mark.parametrize(
'_index_factory,_series_name,_index_start,_index_end',
[DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq_fill_value(series, create_index):
# test for fill value during resampling, issue 3715
s = series
result = s.resample('1H').asfreq()
new_index = create_index(s.index[0], s.index[-1], freq='1H')
expected = s.reindex(new_index)
assert_series_equal(result, expected)
frame = s.to_frame('value')
frame.iloc[1] = None
result = frame.resample('1H').asfreq(fill_value=4.0)
new_index = create_index(frame.index[0],
frame.index[-1], freq='1H')
expected = frame.reindex(new_index, fill_value=4.0)
assert_frame_equal(result, expected)
def test_resample_interpolate_all_ts(frame):
# # 12925
df = frame
assert_frame_equal(
df.resample('1T').asfreq().interpolate(),
df.resample('1T').interpolate())
def test_raises_on_non_datetimelike_index():
# this is a non datetimelike index
xp = DataFrame()
pytest.raises(TypeError, lambda: xp.resample('A').mean())
@pytest.mark.parametrize('freq', ['M', 'D', 'H'])
def test_resample_empty_series_all_ts(freq, empty_series, resample_method):
# GH12771 & GH12868
if resample_method == 'ohlc':
pytest.skip('need to test for ohlc from GH13083')
s = empty_series
result = getattr(s.resample(freq), resample_method)()
expected = s.copy()
if isinstance(s.index, PeriodIndex):
expected.index = s.index.asfreq(freq=freq)
else:
expected.index = s.index._shallow_copy(freq=freq)
assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
assert_series_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize('freq', ['M', 'D', 'H'])
def test_resample_empty_dataframe_all_ts(empty_frame, freq, resample_method):
# GH13212
df = empty_frame
# count retains dimensions too
result = getattr(df.resample(freq), resample_method)()
if resample_method != 'size':
expected = df.copy()
else:
# GH14962
expected = Series([])
if isinstance(df.index, PeriodIndex):
expected.index = df.index.asfreq(freq=freq)
else:
expected.index = df.index._shallow_copy(freq=freq)
assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
assert_almost_equal(result, expected, check_dtype=False)
# test size for GH13212 (currently stays as df)
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
@pytest.mark.parametrize(
"dtype",
[np.float, np.int, np.object, 'datetime64[ns]'])
def test_resample_empty_dtypes(index, dtype, resample_method):
# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
empty_series = Series([], index, dtype)
try:
getattr(empty_series.resample('d'), resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
pass
def test_resample_loffset_arg_type_all_ts(frame, create_index):
# GH 13218, 15002
df = frame
expected_means = [df.values[i:i + 2].mean()
for i in range(0, len(df.values), 2)]
expected_index = create_index(df.index[0],
periods=len(df.index) / 2,
freq='2D')
# loffset coerces PeriodIndex to DateTimeIndex
if isinstance(expected_index, PeriodIndex):
expected_index = expected_index.to_timestamp()
expected_index += timedelta(hours=2)
expected = DataFrame({'value': expected_means}, index=expected_index)
for arg in ['mean', {'value': 'mean'}, ['mean']]:
result_agg = df.resample('2D', loffset='2H').agg(arg)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result_how = df.resample('2D', how=arg, loffset='2H')
if isinstance(arg, list):
expected.columns = pd.MultiIndex.from_tuples([('value',
'mean')])
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
if isinstance(expected.index, TimedeltaIndex):
with pytest.raises(AssertionError):
assert_frame_equal(result_agg, expected)
assert_frame_equal(result_how, expected)
else:
assert_frame_equal(result_agg, expected)
assert_frame_equal(result_how, expected)
def test_apply_to_empty_series_all_ts(empty_series):
# GH 14313
s = empty_series
for freq in ['M', 'D', 'H']:
result = s.resample(freq).apply(lambda x: 1)
expected = s.resample(freq).apply(np.sum)
assert_series_equal(result, expected, check_dtype=False)
def test_resampler_is_iterable_all_ts(series):
# GH 15314
freq = 'H'
tg = TimeGrouper(freq, convention='start')
grouped = series.groupby(tg)
resampled = series.resample(freq)
for (rk, rv), (gk, gv) in zip(resampled, grouped):
assert rk == gk
assert_series_equal(rv, gv)
def test_resample_quantile_all_ts(series):
# GH 15023
s = series
q = 0.75
freq = 'H'
result = s.resample(freq).quantile(q)
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
tm.assert_series_equal(result, expected)
@@ -0,0 +1,759 @@
from datetime import datetime, timedelta
import dateutil
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs.ccalendar import DAYS, MONTHS
from pandas._libs.tslibs.period import IncompatibleFrequency
from pandas.compat import lrange, range, zip
import pandas as pd
from pandas import DataFrame, Series, Timestamp
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import Period, PeriodIndex, period_range
from pandas.core.resample import _get_period_range_edges
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal, assert_frame_equal, assert_series_equal)
import pandas.tseries.offsets as offsets
@pytest.fixture()
def _index_factory():
return period_range
@pytest.fixture
def _series_name():
return 'pi'
class TestPeriodIndex(object):
@pytest.mark.parametrize('freq', ['2D', '1H', '2H'])
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
def test_asfreq(self, series_and_frame, freq, kind):
# GH 12884, 15944
# make sure .asfreq() returns PeriodIndex (except kind='timestamp')
obj = series_and_frame
if kind == 'timestamp':
expected = obj.to_timestamp().resample(freq).asfreq()
else:
start = obj.index[0].to_timestamp(how='start')
end = (obj.index[-1] + obj.index.freq).to_timestamp(how='start')
new_index = date_range(start=start, end=end, freq=freq,
closed='left')
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
result = obj.resample(freq, kind=kind).asfreq()
assert_almost_equal(result, expected)
def test_asfreq_fill_value(self, series):
# test for fill value during resampling, issue 3715
s = series
new_index = date_range(s.index[0].to_timestamp(how='start'),
(s.index[-1]).to_timestamp(how='start'),
freq='1H')
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
assert_series_equal(result, expected)
frame = s.to_frame('value')
new_index = date_range(frame.index[0].to_timestamp(how='start'),
(frame.index[-1]).to_timestamp(how='start'),
freq='1H')
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
assert_frame_equal(result, expected)
@pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W'])
@pytest.mark.parametrize('kind', [None, 'period', 'timestamp'])
def test_selection(self, index, freq, kind):
# This is a bug, these should be implemented
# GH 14008
rng = np.arange(len(index), dtype=np.int64)
df = DataFrame({'date': index, 'a': rng},
index=pd.MultiIndex.from_arrays([rng, index],
names=['v', 'd']))
with pytest.raises(NotImplementedError):
df.resample(freq, on='date', kind=kind)
with pytest.raises(NotImplementedError):
df.resample(freq, level='d', kind=kind)
@pytest.mark.parametrize('month', MONTHS)
@pytest.mark.parametrize('meth', ['ffill', 'bfill'])
@pytest.mark.parametrize('conv', ['start', 'end'])
@pytest.mark.parametrize('targ', ['D', 'B', 'M'])
def test_annual_upsample_cases(self, targ, conv, meth, month,
simple_period_range_series):
ts = simple_period_range_series(
'1/1/1990', '12/31/1991', freq='A-%s' % month)
result = getattr(ts.resample(targ, convention=conv), meth)()
expected = result.to_timestamp(targ, how=conv)
expected = expected.asfreq(targ, meth).to_period()
assert_series_equal(result, expected)
def test_basic_downsample(self, simple_period_range_series):
ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M')
result = ts.resample('a-dec').mean()
expected = ts.groupby(ts.index.year).mean()
expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec')
assert_series_equal(result, expected)
# this is ok
assert_series_equal(ts.resample('a-dec').mean(), result)
assert_series_equal(ts.resample('a').mean(), result)
def test_not_subperiod(self, simple_period_range_series):
# These are incompatible period rules for resampling
ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='w-wed')
pytest.raises(ValueError, lambda: ts.resample('a-dec').mean())
pytest.raises(ValueError, lambda: ts.resample('q-mar').mean())
pytest.raises(ValueError, lambda: ts.resample('M').mean())
pytest.raises(ValueError, lambda: ts.resample('w-thu').mean())
@pytest.mark.parametrize('freq', ['D', '2D'])
def test_basic_upsample(self, freq, simple_period_range_series):
ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M')
result = ts.resample('a-dec').mean()
resampled = result.resample(freq, convention='end').ffill()
expected = result.to_timestamp(freq, how='end')
expected = expected.asfreq(freq, 'ffill').to_period(freq)
assert_series_equal(resampled, expected)
def test_upsample_with_limit(self):
rng = period_range('1/1/2000', periods=5, freq='A')
ts = Series(np.random.randn(len(rng)), rng)
result = ts.resample('M', convention='end').ffill(limit=2)
expected = ts.asfreq('M').reindex(result.index, method='ffill',
limit=2)
assert_series_equal(result, expected)
def test_annual_upsample(self, simple_period_range_series):
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC')
df = DataFrame({'a': ts})
rdf = df.resample('D').ffill()
exp = df['a'].resample('D').ffill()
assert_series_equal(rdf['a'], exp)
rng = period_range('2000', '2003', freq='A-DEC')
ts = Series([1, 2, 3, 4], index=rng)
result = ts.resample('M').ffill()
ex_index = period_range('2000-01', '2003-12', freq='M')
expected = ts.asfreq('M', how='start').reindex(ex_index,
method='ffill')
assert_series_equal(result, expected)
@pytest.mark.parametrize('month', MONTHS)
@pytest.mark.parametrize('target', ['D', 'B', 'M'])
@pytest.mark.parametrize('convention', ['start', 'end'])
def test_quarterly_upsample(self, month, target, convention,
simple_period_range_series):
freq = 'Q-{month}'.format(month=month)
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq)
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, 'ffill').to_period()
assert_series_equal(result, expected)
@pytest.mark.parametrize('target', ['D', 'B'])
@pytest.mark.parametrize('convention', ['start', 'end'])
def test_monthly_upsample(self, target, convention,
simple_period_range_series):
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M')
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, 'ffill').to_period()
assert_series_equal(result, expected)
def test_resample_basic(self):
# GH3609
s = Series(range(100), index=date_range(
'20130101', freq='s', periods=100, name='idx'), dtype='float')
s[10:30] = np.nan
index = PeriodIndex([
Period('2013-01-01 00:00', 'T'),
Period('2013-01-01 00:01', 'T')], name='idx')
expected = Series([34.5, 79.5], index=index)
result = s.to_period().resample('T', kind='period').mean()
assert_series_equal(result, expected)
result2 = s.resample('T', kind='period').mean()
assert_series_equal(result2, expected)
@pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]),
('2M', [31 + 29, 31 + 9])])
def test_resample_count(self, freq, expected_vals):
# GH12774
series = Series(1, index=pd.period_range(start='2000', periods=100))
result = series.resample(freq).count()
expected_index = pd.period_range(start='2000', freq=freq,
periods=len(expected_vals))
expected = Series(expected_vals, index=expected_index)
assert_series_equal(result, expected)
def test_resample_same_freq(self, resample_method):
# GH12770
series = Series(range(3), index=pd.period_range(
start='2000', periods=3, freq='M'))
expected = series
result = getattr(series.resample('M'), resample_method)()
assert_series_equal(result, expected)
def test_resample_incompat_freq(self):
with pytest.raises(IncompatibleFrequency):
Series(range(3), index=pd.period_range(
start='2000', periods=3, freq='M')).resample('W').mean()
def test_with_local_timezone_pytz(self):
# see gh-5430
local_timezone = pytz.timezone('America/Los_Angeles')
start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
tzinfo=pytz.utc)
# 1 day later
end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
tzinfo=pytz.utc)
index = pd.date_range(start, end, freq='H')
series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample('D', kind='period').mean()
# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (pd.period_range(start=start, end=end, freq='D') -
offsets.Day())
expected = Series(1, index=expected_index)
assert_series_equal(result, expected)
def test_resample_with_pytz(self):
# GH 13238
s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H",
tz="US/Eastern"))
result = s.resample("D").mean()
expected = Series(2, index=pd.DatetimeIndex(['2017-01-01',
'2017-01-02'],
tz="US/Eastern"))
assert_series_equal(result, expected)
# Especially assert that the timezone is LMT for pytz
assert result.index.tz == pytz.timezone('US/Eastern')
def test_with_local_timezone_dateutil(self):
# see gh-5430
local_timezone = 'dateutil/America/Los_Angeles'
start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
tzinfo=dateutil.tz.tzutc())
# 1 day later
end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
tzinfo=dateutil.tz.tzutc())
index = pd.date_range(start, end, freq='H', name='idx')
series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample('D', kind='period').mean()
# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (pd.period_range(start=start, end=end, freq='D',
name='idx') - offsets.Day())
expected = Series(1, index=expected_index)
assert_series_equal(result, expected)
def test_resample_nonexistent_time_bin_edge(self):
# GH 19375
index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
s = Series(np.zeros(len(index)), index=index)
expected = s.tz_localize('US/Pacific')
result = expected.resample('900S').mean()
tm.assert_series_equal(result, expected)
# GH 23742
index = date_range(start='2017-10-10', end='2017-10-20', freq='1H')
index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo')
df = DataFrame(data=list(range(len(index))), index=index)
result = df.groupby(pd.Grouper(freq='1D')).count()
expected = date_range(start='2017-10-09', end='2017-10-20', freq='D',
tz="America/Sao_Paulo",
nonexistent='shift_forward', closed='left')
tm.assert_index_equal(result.index, expected)
def test_resample_ambiguous_time_bin_edge(self):
# GH 10117
idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00",
freq="30T", tz="Europe/London")
expected = Series(np.zeros(len(idx)), index=idx)
result = expected.resample('30T').mean()
tm.assert_series_equal(result, expected)
def test_fill_method_and_how_upsample(self):
# GH2073
s = Series(np.arange(9, dtype='int64'),
index=date_range('2010-01-01', periods=9, freq='Q'))
last = s.resample('M').ffill()
both = s.resample('M').ffill().resample('M').last().astype('int64')
assert_series_equal(last, both)
@pytest.mark.parametrize('day', DAYS)
@pytest.mark.parametrize('target', ['D', 'B'])
@pytest.mark.parametrize('convention', ['start', 'end'])
def test_weekly_upsample(self, day, target, convention,
simple_period_range_series):
freq = 'W-{day}'.format(day=day)
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq)
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, 'ffill').to_period()
assert_series_equal(result, expected)
def test_resample_to_timestamps(self, simple_period_range_series):
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M')
result = ts.resample('A-DEC', kind='timestamp').mean()
expected = ts.to_timestamp(how='start').resample('A-DEC').mean()
assert_series_equal(result, expected)
def test_resample_to_quarterly(self, simple_period_range_series):
for month in MONTHS:
ts = simple_period_range_series(
'1990', '1992', freq='A-%s' % month)
quar_ts = ts.resample('Q-%s' % month).ffill()
stamps = ts.to_timestamp('D', how='start')
qdates = period_range(ts.index[0].asfreq('D', 'start'),
ts.index[-1].asfreq('D', 'end'),
freq='Q-%s' % month)
expected = stamps.reindex(qdates.to_timestamp('D', 's'),
method='ffill')
expected.index = qdates
assert_series_equal(quar_ts, expected)
# conforms, but different month
ts = simple_period_range_series('1990', '1992', freq='A-JUN')
for how in ['start', 'end']:
result = ts.resample('Q-MAR', convention=how).ffill()
expected = ts.asfreq('Q-MAR', how=how)
expected = expected.reindex(result.index, method='ffill')
# .to_timestamp('D')
# expected = expected.resample('Q-MAR').ffill()
assert_series_equal(result, expected)
def test_resample_fill_missing(self):
rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A')
s = Series(np.random.randn(4), index=rng)
stamps = s.to_timestamp()
filled = s.resample('A').ffill()
expected = stamps.resample('A').ffill().to_period('A')
assert_series_equal(filled, expected)
def test_cant_fill_missing_dups(self):
rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A')
s = Series(np.random.randn(5), index=rng)
pytest.raises(Exception, lambda: s.resample('A').ffill())
@pytest.mark.parametrize('freq', ['5min'])
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
def test_resample_5minute(self, freq, kind):
rng = period_range('1/1/2000', '1/5/2000', freq='T')
ts = Series(np.random.randn(len(rng)), index=rng)
expected = ts.to_timestamp().resample(freq).mean()
if kind != 'timestamp':
expected = expected.to_period(freq)
result = ts.resample(freq, kind=kind).mean()
assert_series_equal(result, expected)
def test_upsample_daily_business_daily(self, simple_period_range_series):
ts = simple_period_range_series('1/1/2000', '2/1/2000', freq='B')
result = ts.resample('D').asfreq()
expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000'))
assert_series_equal(result, expected)
ts = simple_period_range_series('1/1/2000', '2/1/2000')
result = ts.resample('H', convention='s').asfreq()
exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H')
expected = ts.asfreq('H', how='s').reindex(exp_rng)
assert_series_equal(result, expected)
def test_resample_irregular_sparse(self):
dr = date_range(start='1/1/2012', freq='5min', periods=1000)
s = Series(np.array(100), index=dr)
# subset the data.
subset = s[:'2012-01-04 06:55']
result = subset.resample('10min').apply(len)
expected = s.resample('10min').apply(len).loc[result.index]
assert_series_equal(result, expected)
def test_resample_weekly_all_na(self):
rng = date_range('1/1/2000', periods=10, freq='W-WED')
ts = Series(np.random.randn(len(rng)), index=rng)
result = ts.resample('W-THU').asfreq()
assert result.isna().all()
result = ts.resample('W-THU').asfreq().ffill()[:-1]
expected = ts.asfreq('W-THU').ffill()
assert_series_equal(result, expected)
def test_resample_tz_localized(self):
dr = date_range(start='2012-4-13', end='2012-5-1')
ts = Series(lrange(len(dr)), dr)
ts_utc = ts.tz_localize('UTC')
ts_local = ts_utc.tz_convert('America/Los_Angeles')
result = ts_local.resample('W').mean()
ts_local_naive = ts_local.copy()
ts_local_naive.index = [x.replace(tzinfo=None)
for x in ts_local_naive.index.to_pydatetime()]
exp = ts_local_naive.resample(
'W').mean().tz_localize('America/Los_Angeles')
assert_series_equal(result, exp)
# it works
result = ts_local.resample('D').mean()
# #2245
idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T',
tz='Australia/Sydney')
s = Series([1, 2], index=idx)
result = s.resample('D', closed='right', label='right').mean()
ex_index = date_range('2001-09-21', periods=1, freq='D',
tz='Australia/Sydney')
expected = Series([1.5], index=ex_index)
assert_series_equal(result, expected)
# for good measure
result = s.resample('D', kind='period').mean()
ex_index = period_range('2001-09-20', periods=1, freq='D')
expected = Series([1.5], index=ex_index)
assert_series_equal(result, expected)
# GH 6397
# comparing an offset that doesn't propagate tz's
rng = date_range('1/1/2011', periods=20000, freq='H')
rng = rng.tz_localize('EST')
ts = DataFrame(index=rng)
ts['first'] = np.random.randn(len(rng))
ts['second'] = np.cumsum(np.random.randn(len(rng)))
expected = DataFrame(
{
'first': ts.resample('A').sum()['first'],
'second': ts.resample('A').mean()['second']},
columns=['first', 'second'])
result = ts.resample(
'A').agg({'first': np.sum,
'second': np.mean}).reindex(columns=['first', 'second'])
assert_frame_equal(result, expected)
def test_closed_left_corner(self):
# #1465
s = Series(np.random.randn(21),
index=date_range(start='1/1/2012 9:30',
freq='1min', periods=21))
s[0] = np.nan
result = s.resample('10min', closed='left', label='right').mean()
exp = s[1:].resample('10min', closed='left', label='right').mean()
assert_series_equal(result, exp)
result = s.resample('10min', closed='left', label='left').mean()
exp = s[1:].resample('10min', closed='left', label='left').mean()
ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3)
tm.assert_index_equal(result.index, ex_index)
assert_series_equal(result, exp)
def test_quarterly_resampling(self):
rng = period_range('2000Q1', periods=10, freq='Q-DEC')
ts = Series(np.arange(10), index=rng)
result = ts.resample('A').mean()
exp = ts.to_timestamp().resample('A').mean().to_period()
assert_series_equal(result, exp)
def test_resample_weekly_bug_1726(self):
# 8/6/12 is a Monday
ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
n = len(ind)
data = [[x] * 5 for x in range(n)]
df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'],
index=ind)
# it works!
df.resample('W-MON', closed='left', label='left').first()
def test_resample_with_dst_time_change(self):
# GH 15549
index = (
pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
.tz_localize("UTC").tz_convert('America/Chicago')
)
df = pd.DataFrame([1, 2], index=index)
result = df.resample('12h', closed='right',
label='right').last().ffill()
expected_index_values = ['2016-03-09 12:00:00-06:00',
'2016-03-10 00:00:00-06:00',
'2016-03-10 12:00:00-06:00',
'2016-03-11 00:00:00-06:00',
'2016-03-11 12:00:00-06:00',
'2016-03-12 00:00:00-06:00',
'2016-03-12 12:00:00-06:00',
'2016-03-13 00:00:00-06:00',
'2016-03-13 13:00:00-05:00',
'2016-03-14 01:00:00-05:00',
'2016-03-14 13:00:00-05:00',
'2016-03-15 01:00:00-05:00',
'2016-03-15 13:00:00-05:00']
index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
'America/Chicago')
expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 2.0], index=index)
assert_frame_equal(result, expected)
def test_resample_bms_2752(self):
# GH2753
foo = Series(index=pd.bdate_range('20000101', '20000201'))
res1 = foo.resample("BMS").mean()
res2 = foo.resample("BMS").mean().resample("B").mean()
assert res1.index[0] == Timestamp('20000103')
assert res1.index[0] == res2.index[0]
# def test_monthly_convention_span(self):
# rng = period_range('2000-01', periods=3, freq='M')
# ts = Series(np.arange(3), index=rng)
# # hacky way to get same thing
# exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
# expected = ts.asfreq('D', how='end').reindex(exp_index)
# expected = expected.fillna(method='bfill')
# result = ts.resample('D', convention='span').mean()
# assert_series_equal(result, expected)
def test_default_right_closed_label(self):
end_freq = ['D', 'Q', 'M', 'D']
end_types = ['M', 'A', 'Q', 'W']
for from_freq, to_freq in zip(end_freq, end_types):
idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
df = DataFrame(np.random.randn(len(idx), 2), idx)
resampled = df.resample(to_freq).mean()
assert_frame_equal(resampled, df.resample(to_freq, closed='right',
label='right').mean())
def test_default_left_closed_label(self):
others = ['MS', 'AS', 'QS', 'D', 'H']
others_freq = ['D', 'Q', 'M', 'H', 'T']
for from_freq, to_freq in zip(others_freq, others):
idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
df = DataFrame(np.random.randn(len(idx), 2), idx)
resampled = df.resample(to_freq).mean()
assert_frame_equal(resampled, df.resample(to_freq, closed='left',
label='left').mean())
def test_all_values_single_bin(self):
# 2070
index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
s = Series(np.random.randn(len(index)), index=index)
result = s.resample("A").mean()
tm.assert_almost_equal(result[0], s.mean())
def test_evenly_divisible_with_no_extra_bins(self):
# 4076
# when the frequency is evenly divisible, sometimes extra bins
df = DataFrame(np.random.randn(9, 3),
index=date_range('2000-1-1', periods=9))
result = df.resample('5D').mean()
expected = pd.concat(
[df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')]
assert_frame_equal(result, expected)
index = date_range(start='2001-5-4', periods=28)
df = DataFrame(
[{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90,
'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 +
[{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10,
'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28,
index=index.append(index)).sort_index()
index = date_range('2001-5-4', periods=4, freq='7D')
expected = DataFrame(
[{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14,
'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4,
index=index)
result = df.resample('7D').count()
assert_frame_equal(result, expected)
expected = DataFrame(
[{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700,
'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4,
index=index)
result = df.resample('7D').sum()
assert_frame_equal(result, expected)
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
@pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']])
def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg):
# make sure passing loffset returns DatetimeIndex in all cases
# basic method taken from Base.test_resample_loffset_arg_type()
df = frame
expected_means = [df.values[i:i + 2].mean()
for i in range(0, len(df.values), 2)]
expected_index = period_range(
df.index[0], periods=len(df.index) / 2, freq='2D')
# loffset coerces PeriodIndex to DateTimeIndex
expected_index = expected_index.to_timestamp()
expected_index += timedelta(hours=2)
expected = DataFrame({'value': expected_means}, index=expected_index)
result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result_how = df.resample('2D', how=agg_arg, loffset='2H',
kind=kind)
if isinstance(agg_arg, list):
expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')])
assert_frame_equal(result_agg, expected)
assert_frame_equal(result_how, expected)
@pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)])
@pytest.mark.parametrize('kind', [None, 'period'])
def test_upsampling_ohlc(self, freq, period_mult, kind):
# GH 13083
pi = period_range(start='2000', freq='D', periods=10)
s = Series(range(len(pi)), index=pi)
expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
# timestamp-based resampling doesn't include all sub-periods
# of the last original period, so extend accordingly:
new_index = period_range(start='2000', freq=freq,
periods=period_mult * len(pi))
expected = expected.reindex(new_index)
result = s.resample(freq, kind=kind).ohlc()
assert_frame_equal(result, expected)
@pytest.mark.parametrize('periods, values',
[([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
'1970-01-01 00:00:02', '1970-01-01 00:00:03'],
[2, 3, 5, 7, 11]),
([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT,
pd.NaT, pd.NaT, '1970-01-01 00:00:02',
'1970-01-01 00:00:03', pd.NaT, pd.NaT],
[1, 2, 3, 5, 6, 8, 7, 11, 12, 13])])
@pytest.mark.parametrize('freq, expected_values',
[('1s', [3, np.NaN, 7, 11]),
('2s', [3, int((7 + 11) / 2)]),
('3s', [int((3 + 7) / 2), 11])])
def test_resample_with_nat(self, periods, values, freq, expected_values):
# GH 13224
index = PeriodIndex(periods, freq='S')
frame = DataFrame(values, index=index)
expected_index = period_range('1970-01-01 00:00:00',
periods=len(expected_values), freq=freq)
expected = DataFrame(expected_values, index=expected_index)
result = frame.resample(freq).mean()
assert_frame_equal(result, expected)
def test_resample_with_only_nat(self):
# GH 13224
pi = PeriodIndex([pd.NaT] * 3, freq='S')
frame = DataFrame([2, 3, 5], index=pi)
expected_index = PeriodIndex(data=[], freq=pi.freq)
expected = DataFrame([], index=expected_index)
result = frame.resample('1s').mean()
assert_frame_equal(result, expected)
@pytest.mark.parametrize('start,end,start_freq,end_freq,base', [
('19910905', '19910909 03:00', 'H', '24H', 10),
('19910905', '19910909 12:00', 'H', '24H', 10),
('19910905', '19910909 23:00', 'H', '24H', 10),
('19910905 10:00', '19910909', 'H', '24H', 10),
('19910905 10:00', '19910909 10:00', 'H', '24H', 10),
('19910905', '19910909 10:00', 'H', '24H', 10),
('19910905 12:00', '19910909', 'H', '24H', 10),
('19910905 12:00', '19910909 03:00', 'H', '24H', 10),
('19910905 12:00', '19910909 12:00', 'H', '24H', 10),
('19910905 12:00', '19910909 12:00', 'H', '24H', 34),
('19910905 12:00', '19910909 12:00', 'H', '17H', 10),
('19910905 12:00', '19910909 12:00', 'H', '17H', 3),
('19910905 12:00', '19910909 1:00', 'H', 'M', 3),
('19910905', '19910913 06:00', '2H', '24H', 10),
('19910905', '19910905 01:39', 'Min', '5Min', 3),
('19910905', '19910905 03:18', '2Min', '5Min', 3),
])
def test_resample_with_non_zero_base(self, start, end, start_freq,
end_freq, base):
# GH 23882
s = pd.Series(0, index=pd.period_range(start, end, freq=start_freq))
s = s + np.arange(len(s))
result = s.resample(end_freq, base=base).mean()
result = result.to_timestamp(end_freq)
# to_timestamp casts 24H -> D
result = result.asfreq(end_freq) if end_freq == '24H' else result
expected = s.to_timestamp().resample(end_freq, base=base).mean()
assert_series_equal(result, expected)
@pytest.mark.parametrize('first,last,offset,exp_first,exp_last', [
('19910905', '19920406', 'D', '19910905', '19920406'),
('19910905 00:00', '19920406 06:00', 'D', '19910905', '19920406'),
('19910905 06:00', '19920406 06:00', 'H', '19910905 06:00',
'19920406 06:00'),
('19910906', '19920406', 'M', '1991-09', '1992-04'),
('19910831', '19920430', 'M', '1991-08', '1992-04'),
('1991-08', '1992-04', 'M', '1991-08', '1992-04'),
])
def test_get_period_range_edges(self, first, last, offset,
exp_first, exp_last):
first = pd.Period(first)
last = pd.Period(last)
exp_first = pd.Period(exp_first, freq=offset)
exp_last = pd.Period(exp_last, freq=offset)
offset = pd.tseries.frequencies.to_offset(offset)
result = _get_period_range_edges(first, last, offset)
expected = (exp_first, exp_last)
assert result == expected
@@ -0,0 +1,544 @@
# pylint: disable=E1101
from datetime import datetime
import numpy as np
import pytest
from pandas.compat import OrderedDict, range
import pandas as pd
from pandas import DataFrame, Series
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
dti = date_range(start=datetime(2005, 1, 1),
end=datetime(2005, 1, 10), freq='Min')
test_series = Series(np.random.rand(len(dti)), dti)
test_frame = DataFrame(
{'A': test_series, 'B': test_series, 'C': np.arange(len(dti))})
def test_str():
r = test_series.resample('H')
assert ('DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, '
'label=left, convention=start, base=0]' in str(r))
def test_api():
r = test_series.resample('H')
result = r.mean()
assert isinstance(result, Series)
assert len(result) == 217
r = test_series.to_frame().resample('H')
result = r.mean()
assert isinstance(result, DataFrame)
assert len(result) == 217
def test_groupby_resample_api():
# GH 12448
# .groupby(...).resample(...) hitting warnings
# when appropriate
df = DataFrame({'date': pd.date_range(start='2016-01-01',
periods=4,
freq='W'),
'group': [1, 1, 2, 2],
'val': [5, 6, 7, 8]}).set_index('date')
# replication step
i = pd.date_range('2016-01-03', periods=8).tolist() + \
pd.date_range('2016-01-17', periods=8).tolist()
index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i],
names=['group', 'date'])
expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]},
index=index)
result = df.groupby('group').apply(
lambda x: x.resample('1D').ffill())[['val']]
assert_frame_equal(result, expected)
def test_groupby_resample_on_api():
# GH 15021
# .groupby(...).resample(on=...) results in an unexpected
# keyword warning.
df = DataFrame({'key': ['A', 'B'] * 5,
'dates': pd.date_range('2016-01-01', periods=10),
'values': np.random.randn(10)})
expected = df.set_index('dates').groupby('key').resample('D').mean()
result = df.groupby('key').resample('D', on='dates').mean()
assert_frame_equal(result, expected)
def test_pipe():
# GH17905
# series
r = test_series.resample('H')
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_series_equal(result, expected)
# dataframe
r = test_frame.resample('H')
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_frame_equal(result, expected)
def test_getitem():
r = test_frame.resample('H')
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
r = test_frame.resample('H')['B']
assert r._selected_obj.name == test_frame.columns[1]
# technically this is allowed
r = test_frame.resample('H')['A', 'B']
tm.assert_index_equal(r._selected_obj.columns,
test_frame.columns[[0, 1]])
r = test_frame.resample('H')['A', 'B']
tm.assert_index_equal(r._selected_obj.columns,
test_frame.columns[[0, 1]])
def test_select_bad_cols():
g = test_frame.resample('H')
pytest.raises(KeyError, g.__getitem__, ['D'])
pytest.raises(KeyError, g.__getitem__, ['A', 'D'])
with pytest.raises(KeyError, match='^[^A]+$'):
# A should not be referenced as a bad column...
# will have to rethink regex if you change message!
g[['A', 'D']]
def test_attribute_access():
r = test_frame.resample('H')
tm.assert_series_equal(r.A.sum(), r['A'].sum())
def test_api_compat_before_use():
# make sure that we are setting the binner
# on these attributes
for attr in ['groups', 'ngroups', 'indices']:
rng = pd.date_range('1/1/2012', periods=100, freq='S')
ts = Series(np.arange(len(rng)), index=rng)
rs = ts.resample('30s')
# before use
getattr(rs, attr)
# after grouper is initialized is ok
rs.mean()
getattr(rs, attr)
def tests_skip_nuisance():
df = test_frame
df['D'] = 'foo'
r = df.resample('H')
result = r[['A', 'B']].sum()
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
assert_frame_equal(result, expected)
expected = r[['A', 'B', 'C']].sum()
result = r.sum()
assert_frame_equal(result, expected)
def test_downsample_but_actually_upsampling():
# this is reindex / asfreq
rng = pd.date_range('1/1/2012', periods=100, freq='S')
ts = Series(np.arange(len(rng), dtype='int64'), index=rng)
result = ts.resample('20s').asfreq()
expected = Series([0, 20, 40, 60, 80],
index=pd.date_range('2012-01-01 00:00:00',
freq='20s',
periods=5))
assert_series_equal(result, expected)
def test_combined_up_downsampling_of_irregular():
# since we are reallydoing an operation like this
# ts2.resample('2s').mean().ffill()
# preserve these semantics
rng = pd.date_range('1/1/2012', periods=100, freq='S')
ts = Series(np.arange(len(rng)), index=rng)
ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = ts2.resample('2s', how='mean', fill_method='ffill')
expected = ts2.resample('2s').mean().ffill()
assert_series_equal(result, expected)
def test_transform():
r = test_series.resample('20min')
expected = test_series.groupby(
pd.Grouper(freq='20min')).transform('mean')
result = r.transform('mean')
assert_series_equal(result, expected)
def test_fillna():
# need to upsample here
rng = pd.date_range('1/1/2012', periods=10, freq='2S')
ts = Series(np.arange(len(rng), dtype='int64'), index=rng)
r = ts.resample('s')
expected = r.ffill()
result = r.fillna(method='ffill')
assert_series_equal(result, expected)
expected = r.bfill()
result = r.fillna(method='bfill')
assert_series_equal(result, expected)
with pytest.raises(ValueError):
r.fillna(0)
def test_apply_without_aggregation():
# both resample and groupby should work w/o aggregation
r = test_series.resample('20min')
g = test_series.groupby(pd.Grouper(freq='20min'))
for t in [g, r]:
result = t.apply(lambda x: x)
assert_series_equal(result, test_series)
def test_agg_consistency():
# make sure that we are consistent across
# similar aggregations with and w/o selection list
df = DataFrame(np.random.randn(1000, 3),
index=pd.date_range('1/1/2012', freq='S', periods=1000),
columns=['A', 'B', 'C'])
r = df.resample('3T')
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'})
result = r.agg({'r1': 'mean', 'r2': 'sum'})
assert_frame_equal(result, expected)
# TODO: once GH 14008 is fixed, move these tests into
# `Base` test class
def test_agg():
# test with all three Resampler apis and TimeGrouper
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')
index.name = 'date'
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
names=['index', 'date'])
r = df.resample('2D')
cases = [
r,
df_col.resample('2D', on='date'),
df_mult.resample('2D', level='date'),
df.groupby(pd.Grouper(freq='2D'))
]
a_mean = r['A'].mean()
a_std = r['A'].std()
a_sum = r['A'].sum()
b_mean = r['B'].mean()
b_std = r['B'].std()
b_sum = r['B'].sum()
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_product([['A', 'B'],
['mean', 'std']])
for t in cases:
result = t.aggregate([np.mean, np.std])
assert_frame_equal(result, expected)
expected = pd.concat([a_mean, b_std], axis=1)
for t in cases:
result = t.aggregate({'A': np.mean,
'B': np.std})
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_std], axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
('A', 'std')])
for t in cases:
result = t.aggregate({'A': ['mean', 'std']})
assert_frame_equal(result, expected)
expected = pd.concat([a_mean, a_sum], axis=1)
expected.columns = ['mean', 'sum']
for t in cases:
result = t['A'].aggregate(['mean', 'sum'])
assert_frame_equal(result, expected)
expected = pd.concat([a_mean, a_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
('A', 'sum')])
for t in cases:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}})
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
('A', 'sum'),
('B', 'mean2'),
('B', 'sum2')])
for t in cases:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'},
'B': {'mean2': 'mean', 'sum2': 'sum'}})
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
('A', 'std'),
('B', 'mean'),
('B', 'std')])
for t in cases:
result = t.aggregate({'A': ['mean', 'std'],
'B': ['mean', 'std']})
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'),
('r1', 'A', 'sum'),
('r2', 'B', 'mean'),
('r2', 'B', 'sum')])
def test_agg_misc():
# test with all three Resampler apis and TimeGrouper
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')
index.name = 'date'
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
names=['index', 'date'])
r = df.resample('2D')
cases = [
r,
df_col.resample('2D', on='date'),
df_mult.resample('2D', level='date'),
df.groupby(pd.Grouper(freq='2D'))
]
# passed lambda
for t in cases:
result = t.agg({'A': np.sum,
'B': lambda x: np.std(x, ddof=1)})
rcustom = t['B'].apply(lambda x: np.std(x, ddof=1))
expected = pd.concat([r['A'].sum(), rcustom], axis=1)
assert_frame_equal(result, expected, check_like=True)
# agg with renamers
expected = pd.concat([t['A'].sum(),
t['B'].sum(),
t['A'].mean(),
t['B'].mean()],
axis=1)
expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'),
('result1', 'B'),
('result2', 'A'),
('result2', 'B')])
for t in cases:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum),
('result2', np.mean)]))
assert_frame_equal(result, expected, check_like=True)
# agg with different hows
expected = pd.concat([t['A'].sum(),
t['A'].std(),
t['B'].mean(),
t['B'].std()],
axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
('A', 'std'),
('B', 'mean'),
('B', 'std')])
for t in cases:
result = t.agg(OrderedDict([('A', ['sum', 'std']),
('B', ['mean', 'std'])]))
assert_frame_equal(result, expected, check_like=True)
# equivalent of using a selection list / or not
for t in cases:
result = t[['A', 'B']].agg({'A': ['sum', 'std'],
'B': ['mean', 'std']})
assert_frame_equal(result, expected, check_like=True)
# series like aggs
for t in cases:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = t['A'].agg({'A': ['sum', 'std']})
expected = pd.concat([t['A'].sum(),
t['A'].std()],
axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
('A', 'std')])
assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([t['A'].agg(['sum', 'std']),
t['A'].agg(['mean', 'std'])],
axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
('A', 'std'),
('B', 'mean'),
('B', 'std')])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = t['A'].agg({'A': ['sum', 'std'],
'B': ['mean', 'std']})
assert_frame_equal(result, expected, check_like=True)
# errors
# invalid names in the agg specification
for t in cases:
with pytest.raises(KeyError):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
t[['A']].agg({'A': ['sum', 'std'],
'B': ['mean', 'std']})
def test_agg_nested_dicts():
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')
index.name = 'date'
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
names=['index', 'date'])
r = df.resample('2D')
cases = [
r,
df_col.resample('2D', on='date'),
df_mult.resample('2D', level='date'),
df.groupby(pd.Grouper(freq='2D'))
]
for t in cases:
def f():
t.aggregate({'r1': {'A': ['mean', 'sum']},
'r2': {'B': ['mean', 'sum']}})
pytest.raises(ValueError, f)
for t in cases:
expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(),
t['B'].std()], axis=1)
expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), (
'ra', 'std'), ('rb', 'mean'), ('rb', 'std')])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']},
'B': {'rb': ['mean', 'std']}})
assert_frame_equal(result, expected, check_like=True)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = t.agg({'A': {'ra': ['mean', 'std']},
'B': {'rb': ['mean', 'std']}})
assert_frame_equal(result, expected, check_like=True)
def test_try_aggregate_non_existing_column():
# GH 16766
data = [
{'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0},
{'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0},
{'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5}
]
df = DataFrame(data).set_index('dt')
# Error as we don't have 'z' column
with pytest.raises(KeyError):
df.resample('30T').agg({'x': ['mean'],
'y': ['median'],
'z': ['sum']})
def test_selection_api_validation():
# GH 13500
index = date_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')
rng = np.arange(len(index), dtype=np.int64)
df = DataFrame({'date': index, 'a': rng},
index=pd.MultiIndex.from_arrays([rng, index],
names=['v', 'd']))
df_exp = DataFrame({'a': rng}, index=index)
# non DatetimeIndex
with pytest.raises(TypeError):
df.resample('2D', level='v')
with pytest.raises(ValueError):
df.resample('2D', on='date', level='d')
with pytest.raises(TypeError):
df.resample('2D', on=['a', 'date'])
with pytest.raises(KeyError):
df.resample('2D', level=['a', 'date'])
# upsampling not allowed
with pytest.raises(ValueError):
df.resample('2D', level='d').asfreq()
with pytest.raises(ValueError):
df.resample('2D', on='date').asfreq()
exp = df_exp.resample('2D').sum()
exp.index.name = 'date'
assert_frame_equal(exp, df.resample('2D', on='date').sum())
exp.index.name = 'd'
assert_frame_equal(exp, df.resample('2D', level='d').sum())
@@ -0,0 +1,260 @@
# pylint: disable=E1101
from textwrap import dedent
import numpy as np
from pandas.compat import range
import pandas as pd
from pandas import DataFrame, Series, Timestamp
from pandas.core.indexes.datetimes import date_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
test_frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
'B': np.arange(40)},
index=date_range('1/1/2000',
freq='s',
periods=40))
def test_tab_complete_ipython6_warning(ip):
from IPython.core.completer import provisionalcompleter
code = dedent("""\
import pandas.util.testing as tm
s = tm.makeTimeSeries()
rs = s.resample("D")
""")
ip.run_code(code)
with tm.assert_produces_warning(None):
with provisionalcompleter('ignore'):
list(ip.Completer.completions('rs.', 1))
def test_deferred_with_groupby():
# GH 12486
# support deferred resample ops with groupby
data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3],
['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7],
['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5],
['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1],
['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]]
df = DataFrame(data, columns=['date', 'id', 'score'])
df.date = pd.to_datetime(df.date)
def f(x):
return x.set_index('date').resample('D').asfreq()
expected = df.groupby('id').apply(f)
result = df.set_index('date').groupby('id').resample('D').asfreq()
assert_frame_equal(result, expected)
df = DataFrame({'date': pd.date_range(start='2016-01-01',
periods=4,
freq='W'),
'group': [1, 1, 2, 2],
'val': [5, 6, 7, 8]}).set_index('date')
def f(x):
return x.resample('1D').ffill()
expected = df.groupby('group').apply(f)
result = df.groupby('group').resample('1D').ffill()
assert_frame_equal(result, expected)
def test_getitem():
g = test_frame.groupby('A')
expected = g.B.apply(lambda x: x.resample('2s').mean())
result = g.resample('2s').B.mean()
assert_series_equal(result, expected)
result = g.B.resample('2s').mean()
assert_series_equal(result, expected)
result = g.resample('2s').mean().B
assert_series_equal(result, expected)
def test_getitem_multiple():
# GH 13174
# multiple calls after selection causing an issue with aliasing
data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}]
df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2))
r = df.groupby('id').resample('1D')
result = r['buyer'].count()
expected = Series([1, 1],
index=pd.MultiIndex.from_tuples(
[(1, Timestamp('2016-01-01')),
(2, Timestamp('2016-01-02'))],
names=['id', None]),
name='buyer')
assert_series_equal(result, expected)
result = r['buyer'].count()
assert_series_equal(result, expected)
def test_groupby_resample_on_api_with_getitem():
# GH 17813
df = pd.DataFrame({'id': list('aabbb'),
'date': pd.date_range('1-1-2016', periods=5),
'data': 1})
exp = df.set_index('date').groupby('id').resample('2D')['data'].sum()
result = df.groupby('id').resample('2D', on='date')['data'].sum()
assert_series_equal(result, exp)
def test_nearest():
# GH 17496
# Resample nearest
index = pd.date_range('1/1/2000', periods=3, freq='T')
result = Series(range(3), index=index).resample('20s').nearest()
expected = Series(
[0, 0, 1, 1, 1, 2, 2],
index=pd.DatetimeIndex(
['2000-01-01 00:00:00', '2000-01-01 00:00:20',
'2000-01-01 00:00:40', '2000-01-01 00:01:00',
'2000-01-01 00:01:20', '2000-01-01 00:01:40',
'2000-01-01 00:02:00'],
dtype='datetime64[ns]',
freq='20S'))
assert_series_equal(result, expected)
def test_methods():
g = test_frame.groupby('A')
r = g.resample('2s')
for f in ['first', 'last', 'median', 'sem', 'sum', 'mean',
'min', 'max']:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
assert_frame_equal(result, expected)
for f in ['size']:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
assert_series_equal(result, expected)
for f in ['count']:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
assert_frame_equal(result, expected)
# series only
for f in ['nunique']:
result = getattr(r.B, f)()
expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)())
assert_series_equal(result, expected)
for f in ['nearest', 'backfill', 'ffill', 'asfreq']:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
assert_frame_equal(result, expected)
result = r.ohlc()
expected = g.apply(lambda x: x.resample('2s').ohlc())
assert_frame_equal(result, expected)
for f in ['std', 'var']:
result = getattr(r, f)(ddof=1)
expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1))
assert_frame_equal(result, expected)
def test_apply():
g = test_frame.groupby('A')
r = g.resample('2s')
# reduction
expected = g.resample('2s').sum()
def f(x):
return x.resample('2s').sum()
result = r.apply(f)
assert_frame_equal(result, expected)
def f(x):
return x.resample('2s').apply(lambda y: y.sum())
result = g.apply(f)
assert_frame_equal(result, expected)
def test_apply_with_mutated_index():
# GH 15169
index = pd.date_range('1-1-2015', '12-31-15', freq='D')
df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)
def f(x):
s = Series([1, 2], index=['a', 'b'])
return s
expected = df.groupby(pd.Grouper(freq='M')).apply(f)
result = df.resample('M').apply(f)
assert_frame_equal(result, expected)
# A case for series
expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
result = df['col1'].resample('M').apply(f)
assert_series_equal(result, expected)
def test_resample_groupby_with_label():
# GH 13235
index = date_range('2000-01-01', freq='2D', periods=5)
df = DataFrame(index=index,
data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]}
)
result = df.groupby('col0').resample('1W', label='left').sum()
mi = [np.array([0, 0, 1, 2]),
pd.to_datetime(np.array(['1999-12-26', '2000-01-02',
'2000-01-02', '2000-01-02'])
)
]
mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None])
expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]},
index=mindex
)
assert_frame_equal(result, expected)
def test_consistency_with_window():
# consistent return values with window
df = test_frame
expected = pd.Int64Index([1, 2, 3], name='A')
result = df.groupby('A').resample('2s').mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
result = df.groupby('A').rolling(20).mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
def test_median_duplicate_columns():
# GH 14233
df = DataFrame(np.random.randn(20, 3),
columns=list('aaa'),
index=pd.date_range('2012-01-01', periods=20, freq='s'))
df2 = df.copy()
df2.columns = ['a', 'b', 'c']
expected = df2.resample('5s').median()
result = df.resample('5s').median()
expected.columns = result.columns
assert_frame_equal(result, expected)
@@ -0,0 +1,287 @@
from datetime import datetime
from operator import methodcaller
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Panel, Series
from pandas.core.indexes.datetimes import date_range
from pandas.core.resample import TimeGrouper
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
test_series = Series(np.random.randn(1000),
index=date_range('1/1/2000', periods=1000))
def test_apply():
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
grouped = test_series.groupby(grouper)
def f(x):
return x.sort_values()[-3:]
applied = grouped.apply(f)
expected = test_series.groupby(lambda x: x.year).apply(f)
applied.index = applied.index.droplevel(0)
expected.index = expected.index.droplevel(0)
assert_series_equal(applied, expected)
def test_count():
test_series[::3] = np.nan
expected = test_series.groupby(lambda x: x.year).count()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
result = test_series.groupby(grouper).count()
expected.index = result.index
assert_series_equal(result, expected)
result = test_series.resample('A').count()
expected.index = result.index
assert_series_equal(result, expected)
def test_numpy_reduction():
result = test_series.resample('A', closed='right').prod()
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
expected.index = result.index
assert_series_equal(result, expected)
def test_apply_iteration():
# #2300
N = 1000
ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
df = DataFrame({'open': 1, 'close': 2}, index=ind)
tg = TimeGrouper('M')
_, grouper, _ = tg._get_grouper(df)
# Errors
grouped = df.groupby(grouper, group_keys=False)
def f(df):
return df['close'] / df['open']
# it works!
result = grouped.apply(f)
tm.assert_index_equal(result.index, df.index)
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_panel_aggregation():
ind = pd.date_range('1/1/2000', periods=100)
data = np.random.randn(2, len(ind), 4)
wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
minor_axis=['A', 'B', 'C', 'D'])
tg = TimeGrouper('M', axis=1)
_, grouper, _ = tg._get_grouper(wp)
bingrouped = wp.groupby(grouper)
binagg = bingrouped.mean()
def f(x):
assert (isinstance(x, Panel))
return x.mean(1)
result = bingrouped.agg(f)
tm.assert_panel_equal(result, binagg)
@pytest.mark.parametrize('name, func', [
('Int64Index', tm.makeIntIndex),
('Index', tm.makeUnicodeIndex),
('Float64Index', tm.makeFloatIndex),
('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))
])
def test_fails_on_no_datetime_index(name, func):
n = 2
index = func(n)
df = DataFrame({'a': np.random.randn(n)}, index=index)
msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
"or PeriodIndex, but got an instance of %r" % name)
with pytest.raises(TypeError, match=msg):
df.groupby(TimeGrouper('D'))
def test_aaa_group_order():
# GH 12840
# check TimeGrouper perform stable sorts
n = 20
data = np.random.randn(n, 4)
df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
datetime(2013, 1, 3), datetime(2013, 1, 4),
datetime(2013, 1, 5)] * 4
grouped = df.groupby(TimeGrouper(key='key', freq='D'))
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)),
df[::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)),
df[1::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)),
df[2::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)),
df[3::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)),
df[4::5])
def test_aggregate_normal(resample_method):
"""Check TimeGrouper's aggregation is identical as normal groupby."""
if resample_method == 'ohlc':
pytest.xfail(reason='DataError: No numeric types to aggregate')
data = np.random.randn(20, 4)
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
normal_df['key'] = [1, 2, 3, 4, 5] * 4
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
datetime(2013, 1, 3), datetime(2013, 1, 4),
datetime(2013, 1, 5)] * 4
normal_grouped = normal_df.groupby('key')
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
expected = getattr(normal_grouped, resample_method)()
dt_result = getattr(dt_grouped, resample_method)()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
tm.assert_equal(expected, dt_result)
# if TimeGrouper is used included, 'nth' doesn't work yet
"""
for func in ['nth']:
expected = getattr(normal_grouped, func)(3)
expected.index = date_range(start='2013-01-01',
freq='D', periods=5, name='key')
dt_result = getattr(dt_grouped, func)(3)
assert_frame_equal(expected, dt_result)
"""
@pytest.mark.parametrize('method, method_args, unit', [
('sum', dict(), 0),
('sum', dict(min_count=0), 0),
('sum', dict(min_count=1), np.nan),
('prod', dict(), 1),
('prod', dict(min_count=0), 1),
('prod', dict(min_count=1), np.nan)
])
def test_resample_entirly_nat_window(method, method_args, unit):
s = pd.Series([0] * 2 + [np.nan] * 2,
index=pd.date_range('2017', periods=4))
result = methodcaller(method, **method_args)(s.resample("2d"))
expected = pd.Series([0.0, unit],
index=pd.to_datetime(['2017-01-01',
'2017-01-03']))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('func, fill_value', [
('min', np.nan),
('max', np.nan),
('sum', 0),
('prod', 1),
('count', 0),
])
def test_aggregate_with_nat(func, fill_value):
# check TimeGrouper's aggregation is identical as normal groupby
# if NaT is included, 'var', 'std', 'mean', 'first','last'
# and 'nth' doesn't work yet
n = 20
data = np.random.randn(n, 4).astype('int64')
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
normal_df['key'] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT,
datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
normal_grouped = normal_df.groupby('key')
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
normal_result = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
pad = DataFrame([[fill_value] * 4], index=[3],
columns=['A', 'B', 'C', 'D'])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
assert_frame_equal(expected, dt_result)
assert dt_result.index.name == 'key'
def test_aggregate_with_nat_size():
# GH 9925
n = 20
data = np.random.randn(n, 4).astype('int64')
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
normal_df['key'] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT,
datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
normal_grouped = normal_df.groupby('key')
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
normal_result = normal_grouped.size()
dt_result = dt_grouped.size()
pad = Series([0], index=[3])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start='2013-01-01', freq='D',
periods=5, name='key')
assert_series_equal(expected, dt_result)
assert dt_result.index.name == 'key'
def test_repr():
# GH18203
result = repr(TimeGrouper(key='A', freq='H'))
expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
"closed='left', label='left', how='mean', "
"convention='e', base=0)")
assert result == expected
@pytest.mark.parametrize('method, method_args, expected_values', [
('sum', dict(), [1, 0, 1]),
('sum', dict(min_count=0), [1, 0, 1]),
('sum', dict(min_count=1), [1, np.nan, 1]),
('sum', dict(min_count=2), [np.nan, np.nan, np.nan]),
('prod', dict(), [1, 1, 1]),
('prod', dict(min_count=0), [1, 1, 1]),
('prod', dict(min_count=1), [1, np.nan, 1]),
('prod', dict(min_count=2), [np.nan, np.nan, np.nan]),
])
def test_upsample_sum(method, method_args, expected_values):
s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
resampled = s.resample("30T")
index = pd.to_datetime(['2017-01-01T00:00:00',
'2017-01-01T00:30:00',
'2017-01-01T01:00:00'])
result = methodcaller(method, **method_args)(resampled)
expected = pd.Series(expected_values, index=index)
tm.assert_series_equal(result, expected)
@@ -0,0 +1,128 @@
from datetime import timedelta
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from pandas.core.indexes.timedeltas import timedelta_range
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
def test_asfreq_bug():
df = DataFrame(data=[1, 3],
index=[timedelta(), timedelta(minutes=3)])
result = df.resample('1T').asfreq()
expected = DataFrame(data=[1, np.nan, np.nan, 3],
index=timedelta_range('0 day',
periods=4,
freq='1T'))
assert_frame_equal(result, expected)
def test_resample_with_nat():
# GH 13223
index = pd.to_timedelta(['0s', pd.NaT, '2s'])
result = DataFrame({'value': [2, 3, 5]}, index).resample('1s').mean()
expected = DataFrame({'value': [2.5, np.nan, 5.0]},
index=timedelta_range('0 day',
periods=3,
freq='1S'))
assert_frame_equal(result, expected)
def test_resample_as_freq_with_subperiod():
# GH 13022
index = timedelta_range('00:00:00', '00:10:00', freq='5T')
df = DataFrame(data={'value': [1, 5, 10]}, index=index)
result = df.resample('2T').asfreq()
expected_data = {'value': [1, np.nan, np.nan, np.nan, np.nan, 10]}
expected = DataFrame(data=expected_data,
index=timedelta_range('00:00:00',
'00:10:00', freq='2T'))
tm.assert_frame_equal(result, expected)
def test_resample_with_timedeltas():
expected = DataFrame({'A': np.arange(1480)})
expected = expected.groupby(expected.index // 30).sum()
expected.index = pd.timedelta_range('0 days', freq='30T', periods=50)
df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta(
np.arange(1480), unit='T'))
result = df.resample('30T').sum()
assert_frame_equal(result, expected)
s = df['A']
result = s.resample('30T').sum()
assert_series_equal(result, expected['A'])
def test_resample_single_period_timedelta():
s = Series(list(range(5)), index=pd.timedelta_range(
'1 day', freq='s', periods=5))
result = s.resample('2s').sum()
expected = Series([1, 5, 4], index=pd.timedelta_range(
'1 day', freq='2s', periods=3))
assert_series_equal(result, expected)
def test_resample_timedelta_idempotency():
# GH 12072
index = pd.timedelta_range('0', periods=9, freq='10L')
series = Series(range(9), index=index)
result = series.resample('10L').mean()
expected = series
assert_series_equal(result, expected)
def test_resample_base_with_timedeltaindex():
# GH 10530
rng = timedelta_range(start='0s', periods=25, freq='s')
ts = Series(np.random.randn(len(rng)), index=rng)
with_base = ts.resample('2s', base=5).mean()
without_base = ts.resample('2s').mean()
exp_without_base = timedelta_range(start='0s', end='25s', freq='2s')
exp_with_base = timedelta_range(start='5s', end='29s', freq='2s')
tm.assert_index_equal(without_base.index, exp_without_base)
tm.assert_index_equal(with_base.index, exp_with_base)
def test_resample_categorical_data_with_timedeltaindex():
# GH #12169
df = DataFrame({'Group_obj': 'A'},
index=pd.to_timedelta(list(range(20)), unit='s'))
df['Group'] = df['Group_obj'].astype('category')
result = df.resample('10s').agg(lambda x: (x.value_counts().index[0]))
expected = DataFrame({'Group_obj': ['A', 'A'],
'Group': ['A', 'A']},
index=pd.to_timedelta([0, 10], unit='s'))
expected = expected.reindex(['Group_obj', 'Group'], axis=1)
expected['Group'] = expected['Group_obj'].astype('category')
tm.assert_frame_equal(result, expected)
def test_resample_timedelta_values():
# GH 13119
# check that timedelta dtype is preserved when NaT values are
# introduced by the resampling
times = timedelta_range('1 day', '4 day', freq='4D')
df = DataFrame({'time': times}, index=times)
times2 = timedelta_range('1 day', '4 day', freq='2D')
exp = Series(times2, index=times2, name='time')
exp.iloc[1] = pd.NaT
res = df.resample('2D').first()['time']
tm.assert_series_equal(res, exp)
res = df['time'].resample('2D').first()
tm.assert_series_equal(res, exp)