demo + utils venv
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,142 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import period_range
|
||||
|
||||
# The various methods we support
|
||||
downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
|
||||
'median', 'prod', 'var', 'std', 'ohlc', 'quantile']
|
||||
upsample_methods = ['count', 'size']
|
||||
series_methods = ['nunique']
|
||||
resample_methods = downsample_methods + upsample_methods + series_methods
|
||||
|
||||
|
||||
@pytest.fixture(params=downsample_methods)
|
||||
def downsample_method(request):
|
||||
"""Fixture for parametrization of Grouper downsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=upsample_methods)
|
||||
def upsample_method(request):
|
||||
"""Fixture for parametrization of Grouper upsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=resample_methods)
|
||||
def resample_method(request):
|
||||
"""Fixture for parametrization of Grouper resample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_date_range_series():
|
||||
"""
|
||||
Series with date range index and random data for test purposes.
|
||||
"""
|
||||
def _simple_date_range_series(start, end, freq='D'):
|
||||
rng = date_range(start, end, freq=freq)
|
||||
return Series(np.random.randn(len(rng)), index=rng)
|
||||
return _simple_date_range_series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_period_range_series():
|
||||
"""
|
||||
Series with period range index and random data for test purposes.
|
||||
"""
|
||||
def _simple_period_range_series(start, end, freq='D'):
|
||||
rng = period_range(start, end, freq=freq)
|
||||
return Series(np.random.randn(len(rng)), index=rng)
|
||||
return _simple_period_range_series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_start():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 1)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_end():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_freq():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return 'D'
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_name():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
|
||||
"""Fixture for parametrization of date_range, period_range and
|
||||
timedelta_range indexes"""
|
||||
return _index_factory(
|
||||
_index_start, _index_end, freq=_index_freq, name=_index_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _static_values(index):
|
||||
"""Fixture for parametrization of values used in parametrization of
|
||||
Series and DataFrames with date_range, period_range and
|
||||
timedelta_range indexes"""
|
||||
return np.arange(len(index))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
"""Fixture for parametrization of Series name for Series used with
|
||||
date_range, period_range and timedelta_range indexes"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series(index, _series_name, _static_values):
|
||||
"""Fixture for parametrization of Series with date_range, period_range and
|
||||
timedelta_range indexes"""
|
||||
return Series(_static_values, index=index, name=_series_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_series(series):
|
||||
"""Fixture for parametrization of empty Series with date_range,
|
||||
period_range and timedelta_range indexes"""
|
||||
return series[:0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(index, _series_name, _static_values):
|
||||
"""Fixture for parametrization of DataFrame with date_range, period_range
|
||||
and timedelta_range indexes"""
|
||||
# _series_name is intentionally unused
|
||||
return DataFrame({'value': _static_values}, index=index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_frame(series):
|
||||
"""Fixture for parametrization of empty DataFrame with date_range,
|
||||
period_range and timedelta_range indexes"""
|
||||
index = series.index[:0]
|
||||
return DataFrame(index=index)
|
||||
|
||||
|
||||
@pytest.fixture(params=[Series, DataFrame])
|
||||
def series_and_frame(request, series, frame):
|
||||
"""Fixture for parametrization of Series and DataFrame with date_range,
|
||||
period_range and timedelta_range indexes"""
|
||||
if request.param == Series:
|
||||
return series
|
||||
if request.param == DataFrame:
|
||||
return frame
|
||||
@@ -0,0 +1,228 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import range, zip
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.groupby.groupby import DataError
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import PeriodIndex, period_range
|
||||
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
|
||||
from pandas.core.resample import TimeGrouper
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal, assert_frame_equal, assert_index_equal,
|
||||
assert_series_equal)
|
||||
|
||||
# a fixture value can be overridden by the test parameter value. Note that the
|
||||
# value of the fixture can be overridden this way even if the test doesn't use
|
||||
# it directly (doesn't mention it in the function prototype).
|
||||
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa
|
||||
# in this module we override the fixture values defined in conftest.py
|
||||
# tuples of '_index_factory,_series_name,_index_start,_index_end'
|
||||
DATE_RANGE = (date_range, 'dti', datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
PERIOD_RANGE = (
|
||||
period_range, 'pi', datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
TIMEDELTA_RANGE = (timedelta_range, 'tdi', '1 day', '10 day')
|
||||
|
||||
ALL_TIMESERIES_INDEXES = [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE]
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
# called once per each test function
|
||||
if metafunc.function.__name__.endswith('_all_ts'):
|
||||
metafunc.parametrize(
|
||||
'_index_factory,_series_name,_index_start,_index_end',
|
||||
ALL_TIMESERIES_INDEXES)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_index(_index_factory):
|
||||
def _create_index(*args, **kwargs):
|
||||
""" return the _index_factory created using the args, kwargs """
|
||||
return _index_factory(*args, **kwargs)
|
||||
return _create_index
|
||||
|
||||
|
||||
@pytest.mark.parametrize('freq', ['2D', '1H'])
|
||||
@pytest.mark.parametrize(
|
||||
'_index_factory,_series_name,_index_start,_index_end',
|
||||
[DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq(series_and_frame, freq, create_index):
|
||||
obj = series_and_frame
|
||||
|
||||
result = obj.resample(freq).asfreq()
|
||||
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
|
||||
expected = obj.reindex(new_index)
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'_index_factory,_series_name,_index_start,_index_end',
|
||||
[DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq_fill_value(series, create_index):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
s = series
|
||||
|
||||
result = s.resample('1H').asfreq()
|
||||
new_index = create_index(s.index[0], s.index[-1], freq='1H')
|
||||
expected = s.reindex(new_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
frame = s.to_frame('value')
|
||||
frame.iloc[1] = None
|
||||
result = frame.resample('1H').asfreq(fill_value=4.0)
|
||||
new_index = create_index(frame.index[0],
|
||||
frame.index[-1], freq='1H')
|
||||
expected = frame.reindex(new_index, fill_value=4.0)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_interpolate_all_ts(frame):
|
||||
# # 12925
|
||||
df = frame
|
||||
assert_frame_equal(
|
||||
df.resample('1T').asfreq().interpolate(),
|
||||
df.resample('1T').interpolate())
|
||||
|
||||
|
||||
def test_raises_on_non_datetimelike_index():
|
||||
# this is a non datetimelike index
|
||||
xp = DataFrame()
|
||||
pytest.raises(TypeError, lambda: xp.resample('A').mean())
|
||||
|
||||
|
||||
@pytest.mark.parametrize('freq', ['M', 'D', 'H'])
|
||||
def test_resample_empty_series_all_ts(freq, empty_series, resample_method):
|
||||
# GH12771 & GH12868
|
||||
|
||||
if resample_method == 'ohlc':
|
||||
pytest.skip('need to test for ohlc from GH13083')
|
||||
|
||||
s = empty_series
|
||||
result = getattr(s.resample(freq), resample_method)()
|
||||
|
||||
expected = s.copy()
|
||||
if isinstance(s.index, PeriodIndex):
|
||||
expected.index = s.index.asfreq(freq=freq)
|
||||
else:
|
||||
expected.index = s.index._shallow_copy(freq=freq)
|
||||
assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('freq', ['M', 'D', 'H'])
|
||||
def test_resample_empty_dataframe_all_ts(empty_frame, freq, resample_method):
|
||||
# GH13212
|
||||
df = empty_frame
|
||||
# count retains dimensions too
|
||||
result = getattr(df.resample(freq), resample_method)()
|
||||
if resample_method != 'size':
|
||||
expected = df.copy()
|
||||
else:
|
||||
# GH14962
|
||||
expected = Series([])
|
||||
|
||||
if isinstance(df.index, PeriodIndex):
|
||||
expected.index = df.index.asfreq(freq=freq)
|
||||
else:
|
||||
expected.index = df.index._shallow_copy(freq=freq)
|
||||
assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
assert_almost_equal(result, expected, check_dtype=False)
|
||||
|
||||
# test size for GH13212 (currently stays as df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[np.float, np.int, np.object, 'datetime64[ns]'])
|
||||
def test_resample_empty_dtypes(index, dtype, resample_method):
|
||||
|
||||
# Empty series were sometimes causing a segfault (for the functions
|
||||
# with Cython bounds-checking disabled) or an IndexError. We just run
|
||||
# them to ensure they no longer do. (GH #10228)
|
||||
empty_series = Series([], index, dtype)
|
||||
try:
|
||||
getattr(empty_series.resample('d'), resample_method)()
|
||||
except DataError:
|
||||
# Ignore these since some combinations are invalid
|
||||
# (ex: doing mean with dtype of np.object)
|
||||
pass
|
||||
|
||||
|
||||
def test_resample_loffset_arg_type_all_ts(frame, create_index):
|
||||
# GH 13218, 15002
|
||||
df = frame
|
||||
expected_means = [df.values[i:i + 2].mean()
|
||||
for i in range(0, len(df.values), 2)]
|
||||
expected_index = create_index(df.index[0],
|
||||
periods=len(df.index) / 2,
|
||||
freq='2D')
|
||||
|
||||
# loffset coerces PeriodIndex to DateTimeIndex
|
||||
if isinstance(expected_index, PeriodIndex):
|
||||
expected_index = expected_index.to_timestamp()
|
||||
|
||||
expected_index += timedelta(hours=2)
|
||||
expected = DataFrame({'value': expected_means}, index=expected_index)
|
||||
|
||||
for arg in ['mean', {'value': 'mean'}, ['mean']]:
|
||||
|
||||
result_agg = df.resample('2D', loffset='2H').agg(arg)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result_how = df.resample('2D', how=arg, loffset='2H')
|
||||
|
||||
if isinstance(arg, list):
|
||||
expected.columns = pd.MultiIndex.from_tuples([('value',
|
||||
'mean')])
|
||||
|
||||
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
|
||||
if isinstance(expected.index, TimedeltaIndex):
|
||||
with pytest.raises(AssertionError):
|
||||
assert_frame_equal(result_agg, expected)
|
||||
assert_frame_equal(result_how, expected)
|
||||
else:
|
||||
assert_frame_equal(result_agg, expected)
|
||||
assert_frame_equal(result_how, expected)
|
||||
|
||||
|
||||
def test_apply_to_empty_series_all_ts(empty_series):
|
||||
# GH 14313
|
||||
s = empty_series
|
||||
for freq in ['M', 'D', 'H']:
|
||||
result = s.resample(freq).apply(lambda x: 1)
|
||||
expected = s.resample(freq).apply(np.sum)
|
||||
|
||||
assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
def test_resampler_is_iterable_all_ts(series):
|
||||
# GH 15314
|
||||
freq = 'H'
|
||||
tg = TimeGrouper(freq, convention='start')
|
||||
grouped = series.groupby(tg)
|
||||
resampled = series.resample(freq)
|
||||
for (rk, rv), (gk, gv) in zip(resampled, grouped):
|
||||
assert rk == gk
|
||||
assert_series_equal(rv, gv)
|
||||
|
||||
|
||||
def test_resample_quantile_all_ts(series):
|
||||
# GH 15023
|
||||
s = series
|
||||
q = 0.75
|
||||
freq = 'H'
|
||||
result = s.resample(freq).quantile(q)
|
||||
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,759 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.ccalendar import DAYS, MONTHS
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
from pandas.compat import lrange, range, zip
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, Timestamp
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import Period, PeriodIndex, period_range
|
||||
from pandas.core.resample import _get_period_range_edges
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import (
|
||||
assert_almost_equal, assert_frame_equal, assert_series_equal)
|
||||
|
||||
import pandas.tseries.offsets as offsets
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def _index_factory():
|
||||
return period_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
return 'pi'
|
||||
|
||||
|
||||
class TestPeriodIndex(object):
|
||||
|
||||
@pytest.mark.parametrize('freq', ['2D', '1H', '2H'])
|
||||
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
|
||||
def test_asfreq(self, series_and_frame, freq, kind):
|
||||
# GH 12884, 15944
|
||||
# make sure .asfreq() returns PeriodIndex (except kind='timestamp')
|
||||
|
||||
obj = series_and_frame
|
||||
if kind == 'timestamp':
|
||||
expected = obj.to_timestamp().resample(freq).asfreq()
|
||||
else:
|
||||
start = obj.index[0].to_timestamp(how='start')
|
||||
end = (obj.index[-1] + obj.index.freq).to_timestamp(how='start')
|
||||
new_index = date_range(start=start, end=end, freq=freq,
|
||||
closed='left')
|
||||
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
|
||||
result = obj.resample(freq, kind=kind).asfreq()
|
||||
assert_almost_equal(result, expected)
|
||||
|
||||
def test_asfreq_fill_value(self, series):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
s = series
|
||||
new_index = date_range(s.index[0].to_timestamp(how='start'),
|
||||
(s.index[-1]).to_timestamp(how='start'),
|
||||
freq='1H')
|
||||
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
|
||||
result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
frame = s.to_frame('value')
|
||||
new_index = date_range(frame.index[0].to_timestamp(how='start'),
|
||||
(frame.index[-1]).to_timestamp(how='start'),
|
||||
freq='1H')
|
||||
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
|
||||
result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W'])
|
||||
@pytest.mark.parametrize('kind', [None, 'period', 'timestamp'])
|
||||
def test_selection(self, index, freq, kind):
|
||||
# This is a bug, these should be implemented
|
||||
# GH 14008
|
||||
rng = np.arange(len(index), dtype=np.int64)
|
||||
df = DataFrame({'date': index, 'a': rng},
|
||||
index=pd.MultiIndex.from_arrays([rng, index],
|
||||
names=['v', 'd']))
|
||||
with pytest.raises(NotImplementedError):
|
||||
df.resample(freq, on='date', kind=kind)
|
||||
with pytest.raises(NotImplementedError):
|
||||
df.resample(freq, level='d', kind=kind)
|
||||
|
||||
@pytest.mark.parametrize('month', MONTHS)
|
||||
@pytest.mark.parametrize('meth', ['ffill', 'bfill'])
|
||||
@pytest.mark.parametrize('conv', ['start', 'end'])
|
||||
@pytest.mark.parametrize('targ', ['D', 'B', 'M'])
|
||||
def test_annual_upsample_cases(self, targ, conv, meth, month,
|
||||
simple_period_range_series):
|
||||
ts = simple_period_range_series(
|
||||
'1/1/1990', '12/31/1991', freq='A-%s' % month)
|
||||
|
||||
result = getattr(ts.resample(targ, convention=conv), meth)()
|
||||
expected = result.to_timestamp(targ, how=conv)
|
||||
expected = expected.asfreq(targ, meth).to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_basic_downsample(self, simple_period_range_series):
|
||||
ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M')
|
||||
result = ts.resample('a-dec').mean()
|
||||
|
||||
expected = ts.groupby(ts.index.year).mean()
|
||||
expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# this is ok
|
||||
assert_series_equal(ts.resample('a-dec').mean(), result)
|
||||
assert_series_equal(ts.resample('a').mean(), result)
|
||||
|
||||
def test_not_subperiod(self, simple_period_range_series):
|
||||
# These are incompatible period rules for resampling
|
||||
ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='w-wed')
|
||||
pytest.raises(ValueError, lambda: ts.resample('a-dec').mean())
|
||||
pytest.raises(ValueError, lambda: ts.resample('q-mar').mean())
|
||||
pytest.raises(ValueError, lambda: ts.resample('M').mean())
|
||||
pytest.raises(ValueError, lambda: ts.resample('w-thu').mean())
|
||||
|
||||
@pytest.mark.parametrize('freq', ['D', '2D'])
|
||||
def test_basic_upsample(self, freq, simple_period_range_series):
|
||||
ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M')
|
||||
result = ts.resample('a-dec').mean()
|
||||
|
||||
resampled = result.resample(freq, convention='end').ffill()
|
||||
expected = result.to_timestamp(freq, how='end')
|
||||
expected = expected.asfreq(freq, 'ffill').to_period(freq)
|
||||
assert_series_equal(resampled, expected)
|
||||
|
||||
def test_upsample_with_limit(self):
|
||||
rng = period_range('1/1/2000', periods=5, freq='A')
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
|
||||
result = ts.resample('M', convention='end').ffill(limit=2)
|
||||
expected = ts.asfreq('M').reindex(result.index, method='ffill',
|
||||
limit=2)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_annual_upsample(self, simple_period_range_series):
|
||||
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC')
|
||||
df = DataFrame({'a': ts})
|
||||
rdf = df.resample('D').ffill()
|
||||
exp = df['a'].resample('D').ffill()
|
||||
assert_series_equal(rdf['a'], exp)
|
||||
|
||||
rng = period_range('2000', '2003', freq='A-DEC')
|
||||
ts = Series([1, 2, 3, 4], index=rng)
|
||||
|
||||
result = ts.resample('M').ffill()
|
||||
ex_index = period_range('2000-01', '2003-12', freq='M')
|
||||
|
||||
expected = ts.asfreq('M', how='start').reindex(ex_index,
|
||||
method='ffill')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('month', MONTHS)
|
||||
@pytest.mark.parametrize('target', ['D', 'B', 'M'])
|
||||
@pytest.mark.parametrize('convention', ['start', 'end'])
|
||||
def test_quarterly_upsample(self, month, target, convention,
|
||||
simple_period_range_series):
|
||||
freq = 'Q-{month}'.format(month=month)
|
||||
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq)
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, 'ffill').to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('target', ['D', 'B'])
|
||||
@pytest.mark.parametrize('convention', ['start', 'end'])
|
||||
def test_monthly_upsample(self, target, convention,
|
||||
simple_period_range_series):
|
||||
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M')
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, 'ffill').to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_basic(self):
|
||||
# GH3609
|
||||
s = Series(range(100), index=date_range(
|
||||
'20130101', freq='s', periods=100, name='idx'), dtype='float')
|
||||
s[10:30] = np.nan
|
||||
index = PeriodIndex([
|
||||
Period('2013-01-01 00:00', 'T'),
|
||||
Period('2013-01-01 00:01', 'T')], name='idx')
|
||||
expected = Series([34.5, 79.5], index=index)
|
||||
result = s.to_period().resample('T', kind='period').mean()
|
||||
assert_series_equal(result, expected)
|
||||
result2 = s.resample('T', kind='period').mean()
|
||||
assert_series_equal(result2, expected)
|
||||
|
||||
@pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]),
|
||||
('2M', [31 + 29, 31 + 9])])
|
||||
def test_resample_count(self, freq, expected_vals):
|
||||
# GH12774
|
||||
series = Series(1, index=pd.period_range(start='2000', periods=100))
|
||||
result = series.resample(freq).count()
|
||||
expected_index = pd.period_range(start='2000', freq=freq,
|
||||
periods=len(expected_vals))
|
||||
expected = Series(expected_vals, index=expected_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_same_freq(self, resample_method):
|
||||
|
||||
# GH12770
|
||||
series = Series(range(3), index=pd.period_range(
|
||||
start='2000', periods=3, freq='M'))
|
||||
expected = series
|
||||
|
||||
result = getattr(series.resample('M'), resample_method)()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_incompat_freq(self):
|
||||
|
||||
with pytest.raises(IncompatibleFrequency):
|
||||
Series(range(3), index=pd.period_range(
|
||||
start='2000', periods=3, freq='M')).resample('W').mean()
|
||||
|
||||
def test_with_local_timezone_pytz(self):
|
||||
# see gh-5430
|
||||
local_timezone = pytz.timezone('America/Los_Angeles')
|
||||
|
||||
start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
|
||||
tzinfo=pytz.utc)
|
||||
# 1 day later
|
||||
end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
|
||||
tzinfo=pytz.utc)
|
||||
|
||||
index = pd.date_range(start, end, freq='H')
|
||||
|
||||
series = Series(1, index=index)
|
||||
series = series.tz_convert(local_timezone)
|
||||
result = series.resample('D', kind='period').mean()
|
||||
|
||||
# Create the expected series
|
||||
# Index is moved back a day with the timezone conversion from UTC to
|
||||
# Pacific
|
||||
expected_index = (pd.period_range(start=start, end=end, freq='D') -
|
||||
offsets.Day())
|
||||
expected = Series(1, index=expected_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_with_pytz(self):
|
||||
# GH 13238
|
||||
s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H",
|
||||
tz="US/Eastern"))
|
||||
result = s.resample("D").mean()
|
||||
expected = Series(2, index=pd.DatetimeIndex(['2017-01-01',
|
||||
'2017-01-02'],
|
||||
tz="US/Eastern"))
|
||||
assert_series_equal(result, expected)
|
||||
# Especially assert that the timezone is LMT for pytz
|
||||
assert result.index.tz == pytz.timezone('US/Eastern')
|
||||
|
||||
def test_with_local_timezone_dateutil(self):
|
||||
# see gh-5430
|
||||
local_timezone = 'dateutil/America/Los_Angeles'
|
||||
|
||||
start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
|
||||
tzinfo=dateutil.tz.tzutc())
|
||||
# 1 day later
|
||||
end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
|
||||
tzinfo=dateutil.tz.tzutc())
|
||||
|
||||
index = pd.date_range(start, end, freq='H', name='idx')
|
||||
|
||||
series = Series(1, index=index)
|
||||
series = series.tz_convert(local_timezone)
|
||||
result = series.resample('D', kind='period').mean()
|
||||
|
||||
# Create the expected series
|
||||
# Index is moved back a day with the timezone conversion from UTC to
|
||||
# Pacific
|
||||
expected_index = (pd.period_range(start=start, end=end, freq='D',
|
||||
name='idx') - offsets.Day())
|
||||
expected = Series(1, index=expected_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_nonexistent_time_bin_edge(self):
|
||||
# GH 19375
|
||||
index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
|
||||
s = Series(np.zeros(len(index)), index=index)
|
||||
expected = s.tz_localize('US/Pacific')
|
||||
result = expected.resample('900S').mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 23742
|
||||
index = date_range(start='2017-10-10', end='2017-10-20', freq='1H')
|
||||
index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo')
|
||||
df = DataFrame(data=list(range(len(index))), index=index)
|
||||
result = df.groupby(pd.Grouper(freq='1D')).count()
|
||||
expected = date_range(start='2017-10-09', end='2017-10-20', freq='D',
|
||||
tz="America/Sao_Paulo",
|
||||
nonexistent='shift_forward', closed='left')
|
||||
tm.assert_index_equal(result.index, expected)
|
||||
|
||||
def test_resample_ambiguous_time_bin_edge(self):
|
||||
# GH 10117
|
||||
idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00",
|
||||
freq="30T", tz="Europe/London")
|
||||
expected = Series(np.zeros(len(idx)), index=idx)
|
||||
result = expected.resample('30T').mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_fill_method_and_how_upsample(self):
|
||||
# GH2073
|
||||
s = Series(np.arange(9, dtype='int64'),
|
||||
index=date_range('2010-01-01', periods=9, freq='Q'))
|
||||
last = s.resample('M').ffill()
|
||||
both = s.resample('M').ffill().resample('M').last().astype('int64')
|
||||
assert_series_equal(last, both)
|
||||
|
||||
@pytest.mark.parametrize('day', DAYS)
|
||||
@pytest.mark.parametrize('target', ['D', 'B'])
|
||||
@pytest.mark.parametrize('convention', ['start', 'end'])
|
||||
def test_weekly_upsample(self, day, target, convention,
|
||||
simple_period_range_series):
|
||||
freq = 'W-{day}'.format(day=day)
|
||||
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq)
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, 'ffill').to_period()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_to_timestamps(self, simple_period_range_series):
|
||||
ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M')
|
||||
|
||||
result = ts.resample('A-DEC', kind='timestamp').mean()
|
||||
expected = ts.to_timestamp(how='start').resample('A-DEC').mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_to_quarterly(self, simple_period_range_series):
|
||||
for month in MONTHS:
|
||||
ts = simple_period_range_series(
|
||||
'1990', '1992', freq='A-%s' % month)
|
||||
quar_ts = ts.resample('Q-%s' % month).ffill()
|
||||
|
||||
stamps = ts.to_timestamp('D', how='start')
|
||||
qdates = period_range(ts.index[0].asfreq('D', 'start'),
|
||||
ts.index[-1].asfreq('D', 'end'),
|
||||
freq='Q-%s' % month)
|
||||
|
||||
expected = stamps.reindex(qdates.to_timestamp('D', 's'),
|
||||
method='ffill')
|
||||
expected.index = qdates
|
||||
|
||||
assert_series_equal(quar_ts, expected)
|
||||
|
||||
# conforms, but different month
|
||||
ts = simple_period_range_series('1990', '1992', freq='A-JUN')
|
||||
|
||||
for how in ['start', 'end']:
|
||||
result = ts.resample('Q-MAR', convention=how).ffill()
|
||||
expected = ts.asfreq('Q-MAR', how=how)
|
||||
expected = expected.reindex(result.index, method='ffill')
|
||||
|
||||
# .to_timestamp('D')
|
||||
# expected = expected.resample('Q-MAR').ffill()
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_fill_missing(self):
|
||||
rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A')
|
||||
|
||||
s = Series(np.random.randn(4), index=rng)
|
||||
|
||||
stamps = s.to_timestamp()
|
||||
filled = s.resample('A').ffill()
|
||||
expected = stamps.resample('A').ffill().to_period('A')
|
||||
assert_series_equal(filled, expected)
|
||||
|
||||
def test_cant_fill_missing_dups(self):
|
||||
rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A')
|
||||
s = Series(np.random.randn(5), index=rng)
|
||||
pytest.raises(Exception, lambda: s.resample('A').ffill())
|
||||
|
||||
@pytest.mark.parametrize('freq', ['5min'])
|
||||
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
|
||||
def test_resample_5minute(self, freq, kind):
|
||||
rng = period_range('1/1/2000', '1/5/2000', freq='T')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
expected = ts.to_timestamp().resample(freq).mean()
|
||||
if kind != 'timestamp':
|
||||
expected = expected.to_period(freq)
|
||||
result = ts.resample(freq, kind=kind).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_upsample_daily_business_daily(self, simple_period_range_series):
|
||||
ts = simple_period_range_series('1/1/2000', '2/1/2000', freq='B')
|
||||
|
||||
result = ts.resample('D').asfreq()
|
||||
expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000'))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
ts = simple_period_range_series('1/1/2000', '2/1/2000')
|
||||
result = ts.resample('H', convention='s').asfreq()
|
||||
exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H')
|
||||
expected = ts.asfreq('H', how='s').reindex(exp_rng)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_irregular_sparse(self):
|
||||
dr = date_range(start='1/1/2012', freq='5min', periods=1000)
|
||||
s = Series(np.array(100), index=dr)
|
||||
# subset the data.
|
||||
subset = s[:'2012-01-04 06:55']
|
||||
|
||||
result = subset.resample('10min').apply(len)
|
||||
expected = s.resample('10min').apply(len).loc[result.index]
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_weekly_all_na(self):
|
||||
rng = date_range('1/1/2000', periods=10, freq='W-WED')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ts.resample('W-THU').asfreq()
|
||||
|
||||
assert result.isna().all()
|
||||
|
||||
result = ts.resample('W-THU').asfreq().ffill()[:-1]
|
||||
expected = ts.asfreq('W-THU').ffill()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_tz_localized(self):
|
||||
dr = date_range(start='2012-4-13', end='2012-5-1')
|
||||
ts = Series(lrange(len(dr)), dr)
|
||||
|
||||
ts_utc = ts.tz_localize('UTC')
|
||||
ts_local = ts_utc.tz_convert('America/Los_Angeles')
|
||||
|
||||
result = ts_local.resample('W').mean()
|
||||
|
||||
ts_local_naive = ts_local.copy()
|
||||
ts_local_naive.index = [x.replace(tzinfo=None)
|
||||
for x in ts_local_naive.index.to_pydatetime()]
|
||||
|
||||
exp = ts_local_naive.resample(
|
||||
'W').mean().tz_localize('America/Los_Angeles')
|
||||
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
# it works
|
||||
result = ts_local.resample('D').mean()
|
||||
|
||||
# #2245
|
||||
idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T',
|
||||
tz='Australia/Sydney')
|
||||
s = Series([1, 2], index=idx)
|
||||
|
||||
result = s.resample('D', closed='right', label='right').mean()
|
||||
ex_index = date_range('2001-09-21', periods=1, freq='D',
|
||||
tz='Australia/Sydney')
|
||||
expected = Series([1.5], index=ex_index)
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# for good measure
|
||||
result = s.resample('D', kind='period').mean()
|
||||
ex_index = period_range('2001-09-20', periods=1, freq='D')
|
||||
expected = Series([1.5], index=ex_index)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# GH 6397
|
||||
# comparing an offset that doesn't propagate tz's
|
||||
rng = date_range('1/1/2011', periods=20000, freq='H')
|
||||
rng = rng.tz_localize('EST')
|
||||
ts = DataFrame(index=rng)
|
||||
ts['first'] = np.random.randn(len(rng))
|
||||
ts['second'] = np.cumsum(np.random.randn(len(rng)))
|
||||
expected = DataFrame(
|
||||
{
|
||||
'first': ts.resample('A').sum()['first'],
|
||||
'second': ts.resample('A').mean()['second']},
|
||||
columns=['first', 'second'])
|
||||
result = ts.resample(
|
||||
'A').agg({'first': np.sum,
|
||||
'second': np.mean}).reindex(columns=['first', 'second'])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_closed_left_corner(self):
|
||||
# #1465
|
||||
s = Series(np.random.randn(21),
|
||||
index=date_range(start='1/1/2012 9:30',
|
||||
freq='1min', periods=21))
|
||||
s[0] = np.nan
|
||||
|
||||
result = s.resample('10min', closed='left', label='right').mean()
|
||||
exp = s[1:].resample('10min', closed='left', label='right').mean()
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
result = s.resample('10min', closed='left', label='left').mean()
|
||||
exp = s[1:].resample('10min', closed='left', label='left').mean()
|
||||
|
||||
ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3)
|
||||
|
||||
tm.assert_index_equal(result.index, ex_index)
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
def test_quarterly_resampling(self):
|
||||
rng = period_range('2000Q1', periods=10, freq='Q-DEC')
|
||||
ts = Series(np.arange(10), index=rng)
|
||||
|
||||
result = ts.resample('A').mean()
|
||||
exp = ts.to_timestamp().resample('A').mean().to_period()
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
def test_resample_weekly_bug_1726(self):
|
||||
# 8/6/12 is a Monday
|
||||
ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
|
||||
n = len(ind)
|
||||
data = [[x] * 5 for x in range(n)]
|
||||
df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'],
|
||||
index=ind)
|
||||
|
||||
# it works!
|
||||
df.resample('W-MON', closed='left', label='left').first()
|
||||
|
||||
def test_resample_with_dst_time_change(self):
|
||||
# GH 15549
|
||||
index = (
|
||||
pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
|
||||
.tz_localize("UTC").tz_convert('America/Chicago')
|
||||
)
|
||||
df = pd.DataFrame([1, 2], index=index)
|
||||
result = df.resample('12h', closed='right',
|
||||
label='right').last().ffill()
|
||||
|
||||
expected_index_values = ['2016-03-09 12:00:00-06:00',
|
||||
'2016-03-10 00:00:00-06:00',
|
||||
'2016-03-10 12:00:00-06:00',
|
||||
'2016-03-11 00:00:00-06:00',
|
||||
'2016-03-11 12:00:00-06:00',
|
||||
'2016-03-12 00:00:00-06:00',
|
||||
'2016-03-12 12:00:00-06:00',
|
||||
'2016-03-13 00:00:00-06:00',
|
||||
'2016-03-13 13:00:00-05:00',
|
||||
'2016-03-14 01:00:00-05:00',
|
||||
'2016-03-14 13:00:00-05:00',
|
||||
'2016-03-15 01:00:00-05:00',
|
||||
'2016-03-15 13:00:00-05:00']
|
||||
index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
|
||||
'America/Chicago')
|
||||
expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0,
|
||||
1.0, 1.0, 1.0, 1.0, 1.0,
|
||||
1.0, 1.0, 2.0], index=index)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_resample_bms_2752(self):
|
||||
# GH2753
|
||||
foo = Series(index=pd.bdate_range('20000101', '20000201'))
|
||||
res1 = foo.resample("BMS").mean()
|
||||
res2 = foo.resample("BMS").mean().resample("B").mean()
|
||||
assert res1.index[0] == Timestamp('20000103')
|
||||
assert res1.index[0] == res2.index[0]
|
||||
|
||||
# def test_monthly_convention_span(self):
|
||||
# rng = period_range('2000-01', periods=3, freq='M')
|
||||
# ts = Series(np.arange(3), index=rng)
|
||||
|
||||
# # hacky way to get same thing
|
||||
# exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
|
||||
# expected = ts.asfreq('D', how='end').reindex(exp_index)
|
||||
# expected = expected.fillna(method='bfill')
|
||||
|
||||
# result = ts.resample('D', convention='span').mean()
|
||||
|
||||
# assert_series_equal(result, expected)
|
||||
|
||||
def test_default_right_closed_label(self):
|
||||
end_freq = ['D', 'Q', 'M', 'D']
|
||||
end_types = ['M', 'A', 'Q', 'W']
|
||||
|
||||
for from_freq, to_freq in zip(end_freq, end_types):
|
||||
idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
|
||||
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
||||
|
||||
resampled = df.resample(to_freq).mean()
|
||||
assert_frame_equal(resampled, df.resample(to_freq, closed='right',
|
||||
label='right').mean())
|
||||
|
||||
def test_default_left_closed_label(self):
|
||||
others = ['MS', 'AS', 'QS', 'D', 'H']
|
||||
others_freq = ['D', 'Q', 'M', 'H', 'T']
|
||||
|
||||
for from_freq, to_freq in zip(others_freq, others):
|
||||
idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
|
||||
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
||||
|
||||
resampled = df.resample(to_freq).mean()
|
||||
assert_frame_equal(resampled, df.resample(to_freq, closed='left',
|
||||
label='left').mean())
|
||||
|
||||
def test_all_values_single_bin(self):
|
||||
# 2070
|
||||
index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
|
||||
s = Series(np.random.randn(len(index)), index=index)
|
||||
|
||||
result = s.resample("A").mean()
|
||||
tm.assert_almost_equal(result[0], s.mean())
|
||||
|
||||
def test_evenly_divisible_with_no_extra_bins(self):
|
||||
# 4076
|
||||
# when the frequency is evenly divisible, sometimes extra bins
|
||||
|
||||
df = DataFrame(np.random.randn(9, 3),
|
||||
index=date_range('2000-1-1', periods=9))
|
||||
result = df.resample('5D').mean()
|
||||
expected = pd.concat(
|
||||
[df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
|
||||
expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
index = date_range(start='2001-5-4', periods=28)
|
||||
df = DataFrame(
|
||||
[{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90,
|
||||
'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 +
|
||||
[{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10,
|
||||
'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28,
|
||||
index=index.append(index)).sort_index()
|
||||
|
||||
index = date_range('2001-5-4', periods=4, freq='7D')
|
||||
expected = DataFrame(
|
||||
[{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14,
|
||||
'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4,
|
||||
index=index)
|
||||
result = df.resample('7D').count()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
[{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700,
|
||||
'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4,
|
||||
index=index)
|
||||
result = df.resample('7D').sum()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
|
||||
@pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']])
|
||||
def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg):
|
||||
# make sure passing loffset returns DatetimeIndex in all cases
|
||||
# basic method taken from Base.test_resample_loffset_arg_type()
|
||||
df = frame
|
||||
expected_means = [df.values[i:i + 2].mean()
|
||||
for i in range(0, len(df.values), 2)]
|
||||
expected_index = period_range(
|
||||
df.index[0], periods=len(df.index) / 2, freq='2D')
|
||||
|
||||
# loffset coerces PeriodIndex to DateTimeIndex
|
||||
expected_index = expected_index.to_timestamp()
|
||||
expected_index += timedelta(hours=2)
|
||||
expected = DataFrame({'value': expected_means}, index=expected_index)
|
||||
|
||||
result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg)
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
result_how = df.resample('2D', how=agg_arg, loffset='2H',
|
||||
kind=kind)
|
||||
if isinstance(agg_arg, list):
|
||||
expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')])
|
||||
assert_frame_equal(result_agg, expected)
|
||||
assert_frame_equal(result_how, expected)
|
||||
|
||||
@pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)])
|
||||
@pytest.mark.parametrize('kind', [None, 'period'])
|
||||
def test_upsampling_ohlc(self, freq, period_mult, kind):
|
||||
# GH 13083
|
||||
pi = period_range(start='2000', freq='D', periods=10)
|
||||
s = Series(range(len(pi)), index=pi)
|
||||
expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
|
||||
|
||||
# timestamp-based resampling doesn't include all sub-periods
|
||||
# of the last original period, so extend accordingly:
|
||||
new_index = period_range(start='2000', freq=freq,
|
||||
periods=period_mult * len(pi))
|
||||
expected = expected.reindex(new_index)
|
||||
result = s.resample(freq, kind=kind).ohlc()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('periods, values',
|
||||
[([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
|
||||
'1970-01-01 00:00:02', '1970-01-01 00:00:03'],
|
||||
[2, 3, 5, 7, 11]),
|
||||
([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT,
|
||||
pd.NaT, pd.NaT, '1970-01-01 00:00:02',
|
||||
'1970-01-01 00:00:03', pd.NaT, pd.NaT],
|
||||
[1, 2, 3, 5, 6, 8, 7, 11, 12, 13])])
|
||||
@pytest.mark.parametrize('freq, expected_values',
|
||||
[('1s', [3, np.NaN, 7, 11]),
|
||||
('2s', [3, int((7 + 11) / 2)]),
|
||||
('3s', [int((3 + 7) / 2), 11])])
|
||||
def test_resample_with_nat(self, periods, values, freq, expected_values):
|
||||
# GH 13224
|
||||
index = PeriodIndex(periods, freq='S')
|
||||
frame = DataFrame(values, index=index)
|
||||
|
||||
expected_index = period_range('1970-01-01 00:00:00',
|
||||
periods=len(expected_values), freq=freq)
|
||||
expected = DataFrame(expected_values, index=expected_index)
|
||||
result = frame.resample(freq).mean()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def test_resample_with_only_nat(self):
|
||||
# GH 13224
|
||||
pi = PeriodIndex([pd.NaT] * 3, freq='S')
|
||||
frame = DataFrame([2, 3, 5], index=pi)
|
||||
expected_index = PeriodIndex(data=[], freq=pi.freq)
|
||||
expected = DataFrame([], index=expected_index)
|
||||
result = frame.resample('1s').mean()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('start,end,start_freq,end_freq,base', [
|
||||
('19910905', '19910909 03:00', 'H', '24H', 10),
|
||||
('19910905', '19910909 12:00', 'H', '24H', 10),
|
||||
('19910905', '19910909 23:00', 'H', '24H', 10),
|
||||
('19910905 10:00', '19910909', 'H', '24H', 10),
|
||||
('19910905 10:00', '19910909 10:00', 'H', '24H', 10),
|
||||
('19910905', '19910909 10:00', 'H', '24H', 10),
|
||||
('19910905 12:00', '19910909', 'H', '24H', 10),
|
||||
('19910905 12:00', '19910909 03:00', 'H', '24H', 10),
|
||||
('19910905 12:00', '19910909 12:00', 'H', '24H', 10),
|
||||
('19910905 12:00', '19910909 12:00', 'H', '24H', 34),
|
||||
('19910905 12:00', '19910909 12:00', 'H', '17H', 10),
|
||||
('19910905 12:00', '19910909 12:00', 'H', '17H', 3),
|
||||
('19910905 12:00', '19910909 1:00', 'H', 'M', 3),
|
||||
('19910905', '19910913 06:00', '2H', '24H', 10),
|
||||
('19910905', '19910905 01:39', 'Min', '5Min', 3),
|
||||
('19910905', '19910905 03:18', '2Min', '5Min', 3),
|
||||
])
|
||||
def test_resample_with_non_zero_base(self, start, end, start_freq,
|
||||
end_freq, base):
|
||||
# GH 23882
|
||||
s = pd.Series(0, index=pd.period_range(start, end, freq=start_freq))
|
||||
s = s + np.arange(len(s))
|
||||
result = s.resample(end_freq, base=base).mean()
|
||||
result = result.to_timestamp(end_freq)
|
||||
# to_timestamp casts 24H -> D
|
||||
result = result.asfreq(end_freq) if end_freq == '24H' else result
|
||||
expected = s.to_timestamp().resample(end_freq, base=base).mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize('first,last,offset,exp_first,exp_last', [
|
||||
('19910905', '19920406', 'D', '19910905', '19920406'),
|
||||
('19910905 00:00', '19920406 06:00', 'D', '19910905', '19920406'),
|
||||
('19910905 06:00', '19920406 06:00', 'H', '19910905 06:00',
|
||||
'19920406 06:00'),
|
||||
('19910906', '19920406', 'M', '1991-09', '1992-04'),
|
||||
('19910831', '19920430', 'M', '1991-08', '1992-04'),
|
||||
('1991-08', '1992-04', 'M', '1991-08', '1992-04'),
|
||||
])
|
||||
def test_get_period_range_edges(self, first, last, offset,
|
||||
exp_first, exp_last):
|
||||
first = pd.Period(first)
|
||||
last = pd.Period(last)
|
||||
|
||||
exp_first = pd.Period(exp_first, freq=offset)
|
||||
exp_last = pd.Period(exp_last, freq=offset)
|
||||
|
||||
offset = pd.tseries.frequencies.to_offset(offset)
|
||||
result = _get_period_range_edges(first, last, offset)
|
||||
expected = (exp_first, exp_last)
|
||||
assert result == expected
|
||||
@@ -0,0 +1,544 @@
|
||||
# pylint: disable=E1101
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import OrderedDict, range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
dti = date_range(start=datetime(2005, 1, 1),
|
||||
end=datetime(2005, 1, 10), freq='Min')
|
||||
|
||||
test_series = Series(np.random.rand(len(dti)), dti)
|
||||
test_frame = DataFrame(
|
||||
{'A': test_series, 'B': test_series, 'C': np.arange(len(dti))})
|
||||
|
||||
|
||||
def test_str():
|
||||
|
||||
r = test_series.resample('H')
|
||||
assert ('DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, '
|
||||
'label=left, convention=start, base=0]' in str(r))
|
||||
|
||||
|
||||
def test_api():
|
||||
|
||||
r = test_series.resample('H')
|
||||
result = r.mean()
|
||||
assert isinstance(result, Series)
|
||||
assert len(result) == 217
|
||||
|
||||
r = test_series.to_frame().resample('H')
|
||||
result = r.mean()
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 217
|
||||
|
||||
|
||||
def test_groupby_resample_api():
|
||||
|
||||
# GH 12448
|
||||
# .groupby(...).resample(...) hitting warnings
|
||||
# when appropriate
|
||||
df = DataFrame({'date': pd.date_range(start='2016-01-01',
|
||||
periods=4,
|
||||
freq='W'),
|
||||
'group': [1, 1, 2, 2],
|
||||
'val': [5, 6, 7, 8]}).set_index('date')
|
||||
|
||||
# replication step
|
||||
i = pd.date_range('2016-01-03', periods=8).tolist() + \
|
||||
pd.date_range('2016-01-17', periods=8).tolist()
|
||||
index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i],
|
||||
names=['group', 'date'])
|
||||
expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]},
|
||||
index=index)
|
||||
result = df.groupby('group').apply(
|
||||
lambda x: x.resample('1D').ffill())[['val']]
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api():
|
||||
|
||||
# GH 15021
|
||||
# .groupby(...).resample(on=...) results in an unexpected
|
||||
# keyword warning.
|
||||
df = DataFrame({'key': ['A', 'B'] * 5,
|
||||
'dates': pd.date_range('2016-01-01', periods=10),
|
||||
'values': np.random.randn(10)})
|
||||
|
||||
expected = df.set_index('dates').groupby('key').resample('D').mean()
|
||||
|
||||
result = df.groupby('key').resample('D', on='dates').mean()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_pipe():
|
||||
# GH17905
|
||||
|
||||
# series
|
||||
r = test_series.resample('H')
|
||||
expected = r.max() - r.mean()
|
||||
result = r.pipe(lambda x: x.max() - x.mean())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# dataframe
|
||||
r = test_frame.resample('H')
|
||||
expected = r.max() - r.mean()
|
||||
result = r.pipe(lambda x: x.max() - x.mean())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem():
|
||||
|
||||
r = test_frame.resample('H')
|
||||
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
|
||||
|
||||
r = test_frame.resample('H')['B']
|
||||
assert r._selected_obj.name == test_frame.columns[1]
|
||||
|
||||
# technically this is allowed
|
||||
r = test_frame.resample('H')['A', 'B']
|
||||
tm.assert_index_equal(r._selected_obj.columns,
|
||||
test_frame.columns[[0, 1]])
|
||||
|
||||
r = test_frame.resample('H')['A', 'B']
|
||||
tm.assert_index_equal(r._selected_obj.columns,
|
||||
test_frame.columns[[0, 1]])
|
||||
|
||||
|
||||
def test_select_bad_cols():
|
||||
|
||||
g = test_frame.resample('H')
|
||||
pytest.raises(KeyError, g.__getitem__, ['D'])
|
||||
|
||||
pytest.raises(KeyError, g.__getitem__, ['A', 'D'])
|
||||
with pytest.raises(KeyError, match='^[^A]+$'):
|
||||
# A should not be referenced as a bad column...
|
||||
# will have to rethink regex if you change message!
|
||||
g[['A', 'D']]
|
||||
|
||||
|
||||
def test_attribute_access():
|
||||
|
||||
r = test_frame.resample('H')
|
||||
tm.assert_series_equal(r.A.sum(), r['A'].sum())
|
||||
|
||||
|
||||
def test_api_compat_before_use():
|
||||
|
||||
# make sure that we are setting the binner
|
||||
# on these attributes
|
||||
for attr in ['groups', 'ngroups', 'indices']:
|
||||
rng = pd.date_range('1/1/2012', periods=100, freq='S')
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
rs = ts.resample('30s')
|
||||
|
||||
# before use
|
||||
getattr(rs, attr)
|
||||
|
||||
# after grouper is initialized is ok
|
||||
rs.mean()
|
||||
getattr(rs, attr)
|
||||
|
||||
|
||||
def tests_skip_nuisance():
|
||||
|
||||
df = test_frame
|
||||
df['D'] = 'foo'
|
||||
r = df.resample('H')
|
||||
result = r[['A', 'B']].sum()
|
||||
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = r[['A', 'B', 'C']].sum()
|
||||
result = r.sum()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_downsample_but_actually_upsampling():
|
||||
|
||||
# this is reindex / asfreq
|
||||
rng = pd.date_range('1/1/2012', periods=100, freq='S')
|
||||
ts = Series(np.arange(len(rng), dtype='int64'), index=rng)
|
||||
result = ts.resample('20s').asfreq()
|
||||
expected = Series([0, 20, 40, 60, 80],
|
||||
index=pd.date_range('2012-01-01 00:00:00',
|
||||
freq='20s',
|
||||
periods=5))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_combined_up_downsampling_of_irregular():
|
||||
|
||||
# since we are reallydoing an operation like this
|
||||
# ts2.resample('2s').mean().ffill()
|
||||
# preserve these semantics
|
||||
|
||||
rng = pd.date_range('1/1/2012', periods=100, freq='S')
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = ts2.resample('2s', how='mean', fill_method='ffill')
|
||||
expected = ts2.resample('2s').mean().ffill()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform():
|
||||
|
||||
r = test_series.resample('20min')
|
||||
expected = test_series.groupby(
|
||||
pd.Grouper(freq='20min')).transform('mean')
|
||||
result = r.transform('mean')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_fillna():
|
||||
|
||||
# need to upsample here
|
||||
rng = pd.date_range('1/1/2012', periods=10, freq='2S')
|
||||
ts = Series(np.arange(len(rng), dtype='int64'), index=rng)
|
||||
r = ts.resample('s')
|
||||
|
||||
expected = r.ffill()
|
||||
result = r.fillna(method='ffill')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
expected = r.bfill()
|
||||
result = r.fillna(method='bfill')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
r.fillna(0)
|
||||
|
||||
|
||||
def test_apply_without_aggregation():
|
||||
|
||||
# both resample and groupby should work w/o aggregation
|
||||
r = test_series.resample('20min')
|
||||
g = test_series.groupby(pd.Grouper(freq='20min'))
|
||||
|
||||
for t in [g, r]:
|
||||
result = t.apply(lambda x: x)
|
||||
assert_series_equal(result, test_series)
|
||||
|
||||
|
||||
def test_agg_consistency():
|
||||
|
||||
# make sure that we are consistent across
|
||||
# similar aggregations with and w/o selection list
|
||||
df = DataFrame(np.random.randn(1000, 3),
|
||||
index=pd.date_range('1/1/2012', freq='S', periods=1000),
|
||||
columns=['A', 'B', 'C'])
|
||||
|
||||
r = df.resample('3T')
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'})
|
||||
result = r.agg({'r1': 'mean', 'r2': 'sum'})
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# TODO: once GH 14008 is fixed, move these tests into
|
||||
# `Base` test class
|
||||
|
||||
|
||||
def test_agg():
|
||||
# test with all three Resampler apis and TimeGrouper
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1),
|
||||
datetime(2005, 1, 10), freq='D')
|
||||
index.name = 'date'
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
|
||||
names=['index', 'date'])
|
||||
r = df.resample('2D')
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample('2D', on='date'),
|
||||
df_mult.resample('2D', level='date'),
|
||||
df.groupby(pd.Grouper(freq='2D'))
|
||||
]
|
||||
|
||||
a_mean = r['A'].mean()
|
||||
a_std = r['A'].std()
|
||||
a_sum = r['A'].sum()
|
||||
b_mean = r['B'].mean()
|
||||
b_std = r['B'].std()
|
||||
b_sum = r['B'].sum()
|
||||
|
||||
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_product([['A', 'B'],
|
||||
['mean', 'std']])
|
||||
for t in cases:
|
||||
result = t.aggregate([np.mean, np.std])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, b_std], axis=1)
|
||||
for t in cases:
|
||||
result = t.aggregate({'A': np.mean,
|
||||
'B': np.std})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
||||
('A', 'std')])
|
||||
for t in cases:
|
||||
result = t.aggregate({'A': ['mean', 'std']})
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = ['mean', 'sum']
|
||||
for t in cases:
|
||||
result = t['A'].aggregate(['mean', 'sum'])
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
||||
('A', 'sum')])
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
||||
('A', 'sum'),
|
||||
('B', 'mean2'),
|
||||
('B', 'sum2')])
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'},
|
||||
'B': {'mean2': 'mean', 'sum2': 'sum'}})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
||||
('A', 'std'),
|
||||
('B', 'mean'),
|
||||
('B', 'std')])
|
||||
for t in cases:
|
||||
result = t.aggregate({'A': ['mean', 'std'],
|
||||
'B': ['mean', 'std']})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'),
|
||||
('r1', 'A', 'sum'),
|
||||
('r2', 'B', 'mean'),
|
||||
('r2', 'B', 'sum')])
|
||||
|
||||
|
||||
def test_agg_misc():
|
||||
# test with all three Resampler apis and TimeGrouper
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1),
|
||||
datetime(2005, 1, 10), freq='D')
|
||||
index.name = 'date'
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
|
||||
names=['index', 'date'])
|
||||
|
||||
r = df.resample('2D')
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample('2D', on='date'),
|
||||
df_mult.resample('2D', level='date'),
|
||||
df.groupby(pd.Grouper(freq='2D'))
|
||||
]
|
||||
|
||||
# passed lambda
|
||||
for t in cases:
|
||||
result = t.agg({'A': np.sum,
|
||||
'B': lambda x: np.std(x, ddof=1)})
|
||||
rcustom = t['B'].apply(lambda x: np.std(x, ddof=1))
|
||||
expected = pd.concat([r['A'].sum(), rcustom], axis=1)
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# agg with renamers
|
||||
expected = pd.concat([t['A'].sum(),
|
||||
t['B'].sum(),
|
||||
t['A'].mean(),
|
||||
t['B'].mean()],
|
||||
axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'),
|
||||
('result1', 'B'),
|
||||
('result2', 'A'),
|
||||
('result2', 'B')])
|
||||
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum),
|
||||
('result2', np.mean)]))
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# agg with different hows
|
||||
expected = pd.concat([t['A'].sum(),
|
||||
t['A'].std(),
|
||||
t['B'].mean(),
|
||||
t['B'].std()],
|
||||
axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
|
||||
('A', 'std'),
|
||||
('B', 'mean'),
|
||||
('B', 'std')])
|
||||
for t in cases:
|
||||
result = t.agg(OrderedDict([('A', ['sum', 'std']),
|
||||
('B', ['mean', 'std'])]))
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# equivalent of using a selection list / or not
|
||||
for t in cases:
|
||||
result = t[['A', 'B']].agg({'A': ['sum', 'std'],
|
||||
'B': ['mean', 'std']})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# series like aggs
|
||||
for t in cases:
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = t['A'].agg({'A': ['sum', 'std']})
|
||||
expected = pd.concat([t['A'].sum(),
|
||||
t['A'].std()],
|
||||
axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
|
||||
('A', 'std')])
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([t['A'].agg(['sum', 'std']),
|
||||
t['A'].agg(['mean', 'std'])],
|
||||
axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
|
||||
('A', 'std'),
|
||||
('B', 'mean'),
|
||||
('B', 'std')])
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = t['A'].agg({'A': ['sum', 'std'],
|
||||
'B': ['mean', 'std']})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# errors
|
||||
# invalid names in the agg specification
|
||||
for t in cases:
|
||||
with pytest.raises(KeyError):
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
t[['A']].agg({'A': ['sum', 'std'],
|
||||
'B': ['mean', 'std']})
|
||||
|
||||
|
||||
def test_agg_nested_dicts():
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1),
|
||||
datetime(2005, 1, 10), freq='D')
|
||||
index.name = 'date'
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
|
||||
names=['index', 'date'])
|
||||
r = df.resample('2D')
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample('2D', on='date'),
|
||||
df_mult.resample('2D', level='date'),
|
||||
df.groupby(pd.Grouper(freq='2D'))
|
||||
]
|
||||
|
||||
for t in cases:
|
||||
def f():
|
||||
t.aggregate({'r1': {'A': ['mean', 'sum']},
|
||||
'r2': {'B': ['mean', 'sum']}})
|
||||
pytest.raises(ValueError, f)
|
||||
|
||||
for t in cases:
|
||||
expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(),
|
||||
t['B'].std()], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), (
|
||||
'ra', 'std'), ('rb', 'mean'), ('rb', 'std')])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']},
|
||||
'B': {'rb': ['mean', 'std']}})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
result = t.agg({'A': {'ra': ['mean', 'std']},
|
||||
'B': {'rb': ['mean', 'std']}})
|
||||
assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
|
||||
def test_try_aggregate_non_existing_column():
|
||||
# GH 16766
|
||||
data = [
|
||||
{'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0},
|
||||
{'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0},
|
||||
{'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5}
|
||||
]
|
||||
df = DataFrame(data).set_index('dt')
|
||||
|
||||
# Error as we don't have 'z' column
|
||||
with pytest.raises(KeyError):
|
||||
df.resample('30T').agg({'x': ['mean'],
|
||||
'y': ['median'],
|
||||
'z': ['sum']})
|
||||
|
||||
|
||||
def test_selection_api_validation():
|
||||
# GH 13500
|
||||
index = date_range(datetime(2005, 1, 1),
|
||||
datetime(2005, 1, 10), freq='D')
|
||||
|
||||
rng = np.arange(len(index), dtype=np.int64)
|
||||
df = DataFrame({'date': index, 'a': rng},
|
||||
index=pd.MultiIndex.from_arrays([rng, index],
|
||||
names=['v', 'd']))
|
||||
df_exp = DataFrame({'a': rng}, index=index)
|
||||
|
||||
# non DatetimeIndex
|
||||
with pytest.raises(TypeError):
|
||||
df.resample('2D', level='v')
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.resample('2D', on='date', level='d')
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
df.resample('2D', on=['a', 'date'])
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
df.resample('2D', level=['a', 'date'])
|
||||
|
||||
# upsampling not allowed
|
||||
with pytest.raises(ValueError):
|
||||
df.resample('2D', level='d').asfreq()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df.resample('2D', on='date').asfreq()
|
||||
|
||||
exp = df_exp.resample('2D').sum()
|
||||
exp.index.name = 'date'
|
||||
assert_frame_equal(exp, df.resample('2D', on='date').sum())
|
||||
|
||||
exp.index.name = 'd'
|
||||
assert_frame_equal(exp, df.resample('2D', level='d').sum())
|
||||
+260
@@ -0,0 +1,260 @@
|
||||
# pylint: disable=E1101
|
||||
|
||||
from textwrap import dedent
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat import range
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series, Timestamp
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
test_frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
|
||||
'B': np.arange(40)},
|
||||
index=date_range('1/1/2000',
|
||||
freq='s',
|
||||
periods=40))
|
||||
|
||||
|
||||
def test_tab_complete_ipython6_warning(ip):
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
code = dedent("""\
|
||||
import pandas.util.testing as tm
|
||||
s = tm.makeTimeSeries()
|
||||
rs = s.resample("D")
|
||||
""")
|
||||
ip.run_code(code)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter('ignore'):
|
||||
list(ip.Completer.completions('rs.', 1))
|
||||
|
||||
|
||||
def test_deferred_with_groupby():
|
||||
|
||||
# GH 12486
|
||||
# support deferred resample ops with groupby
|
||||
data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3],
|
||||
['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7],
|
||||
['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5],
|
||||
['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1],
|
||||
['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]]
|
||||
|
||||
df = DataFrame(data, columns=['date', 'id', 'score'])
|
||||
df.date = pd.to_datetime(df.date)
|
||||
|
||||
def f(x):
|
||||
return x.set_index('date').resample('D').asfreq()
|
||||
expected = df.groupby('id').apply(f)
|
||||
result = df.set_index('date').groupby('id').resample('D').asfreq()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({'date': pd.date_range(start='2016-01-01',
|
||||
periods=4,
|
||||
freq='W'),
|
||||
'group': [1, 1, 2, 2],
|
||||
'val': [5, 6, 7, 8]}).set_index('date')
|
||||
|
||||
def f(x):
|
||||
return x.resample('1D').ffill()
|
||||
expected = df.groupby('group').apply(f)
|
||||
result = df.groupby('group').resample('1D').ffill()
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem():
|
||||
g = test_frame.groupby('A')
|
||||
|
||||
expected = g.B.apply(lambda x: x.resample('2s').mean())
|
||||
|
||||
result = g.resample('2s').B.mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = g.B.resample('2s').mean()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = g.resample('2s').mean().B
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_multiple():
|
||||
|
||||
# GH 13174
|
||||
# multiple calls after selection causing an issue with aliasing
|
||||
data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}]
|
||||
df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2))
|
||||
r = df.groupby('id').resample('1D')
|
||||
result = r['buyer'].count()
|
||||
expected = Series([1, 1],
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[(1, Timestamp('2016-01-01')),
|
||||
(2, Timestamp('2016-01-02'))],
|
||||
names=['id', None]),
|
||||
name='buyer')
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = r['buyer'].count()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api_with_getitem():
|
||||
# GH 17813
|
||||
df = pd.DataFrame({'id': list('aabbb'),
|
||||
'date': pd.date_range('1-1-2016', periods=5),
|
||||
'data': 1})
|
||||
exp = df.set_index('date').groupby('id').resample('2D')['data'].sum()
|
||||
result = df.groupby('id').resample('2D', on='date')['data'].sum()
|
||||
assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_nearest():
|
||||
|
||||
# GH 17496
|
||||
# Resample nearest
|
||||
index = pd.date_range('1/1/2000', periods=3, freq='T')
|
||||
result = Series(range(3), index=index).resample('20s').nearest()
|
||||
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 2, 2],
|
||||
index=pd.DatetimeIndex(
|
||||
['2000-01-01 00:00:00', '2000-01-01 00:00:20',
|
||||
'2000-01-01 00:00:40', '2000-01-01 00:01:00',
|
||||
'2000-01-01 00:01:20', '2000-01-01 00:01:40',
|
||||
'2000-01-01 00:02:00'],
|
||||
dtype='datetime64[ns]',
|
||||
freq='20S'))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_methods():
|
||||
g = test_frame.groupby('A')
|
||||
r = g.resample('2s')
|
||||
|
||||
for f in ['first', 'last', 'median', 'sem', 'sum', 'mean',
|
||||
'min', 'max']:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
for f in ['size']:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for f in ['count']:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# series only
|
||||
for f in ['nunique']:
|
||||
result = getattr(r.B, f)()
|
||||
expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)())
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
for f in ['nearest', 'backfill', 'ffill', 'asfreq']:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = r.ohlc()
|
||||
expected = g.apply(lambda x: x.resample('2s').ohlc())
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
for f in ['std', 'var']:
|
||||
result = getattr(r, f)(ddof=1)
|
||||
expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply():
|
||||
|
||||
g = test_frame.groupby('A')
|
||||
r = g.resample('2s')
|
||||
|
||||
# reduction
|
||||
expected = g.resample('2s').sum()
|
||||
|
||||
def f(x):
|
||||
return x.resample('2s').sum()
|
||||
|
||||
result = r.apply(f)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
def f(x):
|
||||
return x.resample('2s').apply(lambda y: y.sum())
|
||||
|
||||
result = g.apply(f)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_with_mutated_index():
|
||||
# GH 15169
|
||||
index = pd.date_range('1-1-2015', '12-31-15', freq='D')
|
||||
df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)
|
||||
|
||||
def f(x):
|
||||
s = Series([1, 2], index=['a', 'b'])
|
||||
return s
|
||||
|
||||
expected = df.groupby(pd.Grouper(freq='M')).apply(f)
|
||||
|
||||
result = df.resample('M').apply(f)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
# A case for series
|
||||
expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
|
||||
result = df['col1'].resample('M').apply(f)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_with_label():
|
||||
# GH 13235
|
||||
index = date_range('2000-01-01', freq='2D', periods=5)
|
||||
df = DataFrame(index=index,
|
||||
data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]}
|
||||
)
|
||||
result = df.groupby('col0').resample('1W', label='left').sum()
|
||||
|
||||
mi = [np.array([0, 0, 1, 2]),
|
||||
pd.to_datetime(np.array(['1999-12-26', '2000-01-02',
|
||||
'2000-01-02', '2000-01-02'])
|
||||
)
|
||||
]
|
||||
mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None])
|
||||
expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]},
|
||||
index=mindex
|
||||
)
|
||||
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_consistency_with_window():
|
||||
|
||||
# consistent return values with window
|
||||
df = test_frame
|
||||
expected = pd.Int64Index([1, 2, 3], name='A')
|
||||
result = df.groupby('A').resample('2s').mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
result = df.groupby('A').rolling(20).mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
|
||||
def test_median_duplicate_columns():
|
||||
# GH 14233
|
||||
|
||||
df = DataFrame(np.random.randn(20, 3),
|
||||
columns=list('aaa'),
|
||||
index=pd.date_range('2012-01-01', periods=20, freq='s'))
|
||||
df2 = df.copy()
|
||||
df2.columns = ['a', 'b', 'c']
|
||||
expected = df2.resample('5s').median()
|
||||
result = df.resample('5s').median()
|
||||
expected.columns = result.columns
|
||||
assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,287 @@
|
||||
from datetime import datetime
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Panel, Series
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.resample import TimeGrouper
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
test_series = Series(np.random.randn(1000),
|
||||
index=date_range('1/1/2000', periods=1000))
|
||||
|
||||
|
||||
def test_apply():
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
|
||||
|
||||
grouped = test_series.groupby(grouper)
|
||||
|
||||
def f(x):
|
||||
return x.sort_values()[-3:]
|
||||
|
||||
applied = grouped.apply(f)
|
||||
expected = test_series.groupby(lambda x: x.year).apply(f)
|
||||
|
||||
applied.index = applied.index.droplevel(0)
|
||||
expected.index = expected.index.droplevel(0)
|
||||
assert_series_equal(applied, expected)
|
||||
|
||||
|
||||
def test_count():
|
||||
test_series[::3] = np.nan
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).count()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning,
|
||||
check_stacklevel=False):
|
||||
grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
|
||||
result = test_series.groupby(grouper).count()
|
||||
expected.index = result.index
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
result = test_series.resample('A').count()
|
||||
expected.index = result.index
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_reduction():
|
||||
result = test_series.resample('A', closed='right').prod()
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
|
||||
expected.index = result.index
|
||||
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_iteration():
|
||||
# #2300
|
||||
N = 1000
|
||||
ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
|
||||
df = DataFrame({'open': 1, 'close': 2}, index=ind)
|
||||
tg = TimeGrouper('M')
|
||||
|
||||
_, grouper, _ = tg._get_grouper(df)
|
||||
|
||||
# Errors
|
||||
grouped = df.groupby(grouper, group_keys=False)
|
||||
|
||||
def f(df):
|
||||
return df['close'] / df['open']
|
||||
|
||||
# it works!
|
||||
result = grouped.apply(f)
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
|
||||
def test_panel_aggregation():
|
||||
ind = pd.date_range('1/1/2000', periods=100)
|
||||
data = np.random.randn(2, len(ind), 4)
|
||||
|
||||
wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
|
||||
minor_axis=['A', 'B', 'C', 'D'])
|
||||
|
||||
tg = TimeGrouper('M', axis=1)
|
||||
_, grouper, _ = tg._get_grouper(wp)
|
||||
bingrouped = wp.groupby(grouper)
|
||||
binagg = bingrouped.mean()
|
||||
|
||||
def f(x):
|
||||
assert (isinstance(x, Panel))
|
||||
return x.mean(1)
|
||||
|
||||
result = bingrouped.agg(f)
|
||||
tm.assert_panel_equal(result, binagg)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('name, func', [
|
||||
('Int64Index', tm.makeIntIndex),
|
||||
('Index', tm.makeUnicodeIndex),
|
||||
('Float64Index', tm.makeFloatIndex),
|
||||
('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))
|
||||
])
|
||||
def test_fails_on_no_datetime_index(name, func):
|
||||
n = 2
|
||||
index = func(n)
|
||||
df = DataFrame({'a': np.random.randn(n)}, index=index)
|
||||
|
||||
msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
|
||||
"or PeriodIndex, but got an instance of %r" % name)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby(TimeGrouper('D'))
|
||||
|
||||
|
||||
def test_aaa_group_order():
|
||||
# GH 12840
|
||||
# check TimeGrouper perform stable sorts
|
||||
n = 20
|
||||
data = np.random.randn(n, 4)
|
||||
df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
||||
df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3), datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5)] * 4
|
||||
grouped = df.groupby(TimeGrouper(key='key', freq='D'))
|
||||
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)),
|
||||
df[::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)),
|
||||
df[1::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)),
|
||||
df[2::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)),
|
||||
df[3::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)),
|
||||
df[4::5])
|
||||
|
||||
|
||||
def test_aggregate_normal(resample_method):
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
if resample_method == 'ohlc':
|
||||
pytest.xfail(reason='DataError: No numeric types to aggregate')
|
||||
|
||||
data = np.random.randn(20, 4)
|
||||
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
||||
normal_df['key'] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
||||
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3), datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5)] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby('key')
|
||||
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
|
||||
|
||||
expected = getattr(normal_grouped, resample_method)()
|
||||
dt_result = getattr(dt_grouped, resample_method)()
|
||||
expected.index = date_range(start='2013-01-01', freq='D',
|
||||
periods=5, name='key')
|
||||
tm.assert_equal(expected, dt_result)
|
||||
|
||||
# if TimeGrouper is used included, 'nth' doesn't work yet
|
||||
|
||||
"""
|
||||
for func in ['nth']:
|
||||
expected = getattr(normal_grouped, func)(3)
|
||||
expected.index = date_range(start='2013-01-01',
|
||||
freq='D', periods=5, name='key')
|
||||
dt_result = getattr(dt_grouped, func)(3)
|
||||
assert_frame_equal(expected, dt_result)
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method, method_args, unit', [
|
||||
('sum', dict(), 0),
|
||||
('sum', dict(min_count=0), 0),
|
||||
('sum', dict(min_count=1), np.nan),
|
||||
('prod', dict(), 1),
|
||||
('prod', dict(min_count=0), 1),
|
||||
('prod', dict(min_count=1), np.nan)
|
||||
])
|
||||
def test_resample_entirly_nat_window(method, method_args, unit):
|
||||
s = pd.Series([0] * 2 + [np.nan] * 2,
|
||||
index=pd.date_range('2017', periods=4))
|
||||
result = methodcaller(method, **method_args)(s.resample("2d"))
|
||||
expected = pd.Series([0.0, unit],
|
||||
index=pd.to_datetime(['2017-01-01',
|
||||
'2017-01-03']))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('func, fill_value', [
|
||||
('min', np.nan),
|
||||
('max', np.nan),
|
||||
('sum', 0),
|
||||
('prod', 1),
|
||||
('count', 0),
|
||||
])
|
||||
def test_aggregate_with_nat(func, fill_value):
|
||||
# check TimeGrouper's aggregation is identical as normal groupby
|
||||
# if NaT is included, 'var', 'std', 'mean', 'first','last'
|
||||
# and 'nth' doesn't work yet
|
||||
|
||||
n = 20
|
||||
data = np.random.randn(n, 4).astype('int64')
|
||||
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
||||
normal_df['key'] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
||||
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT,
|
||||
datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby('key')
|
||||
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
|
||||
|
||||
normal_result = getattr(normal_grouped, func)()
|
||||
dt_result = getattr(dt_grouped, func)()
|
||||
|
||||
pad = DataFrame([[fill_value] * 4], index=[3],
|
||||
columns=['A', 'B', 'C', 'D'])
|
||||
expected = normal_result.append(pad)
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(start='2013-01-01', freq='D',
|
||||
periods=5, name='key')
|
||||
assert_frame_equal(expected, dt_result)
|
||||
assert dt_result.index.name == 'key'
|
||||
|
||||
|
||||
def test_aggregate_with_nat_size():
|
||||
# GH 9925
|
||||
n = 20
|
||||
data = np.random.randn(n, 4).astype('int64')
|
||||
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
||||
normal_df['key'] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
||||
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT,
|
||||
datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby('key')
|
||||
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
|
||||
|
||||
normal_result = normal_grouped.size()
|
||||
dt_result = dt_grouped.size()
|
||||
|
||||
pad = Series([0], index=[3])
|
||||
expected = normal_result.append(pad)
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(start='2013-01-01', freq='D',
|
||||
periods=5, name='key')
|
||||
assert_series_equal(expected, dt_result)
|
||||
assert dt_result.index.name == 'key'
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH18203
|
||||
result = repr(TimeGrouper(key='A', freq='H'))
|
||||
expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', base=0)")
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method, method_args, expected_values', [
|
||||
('sum', dict(), [1, 0, 1]),
|
||||
('sum', dict(min_count=0), [1, 0, 1]),
|
||||
('sum', dict(min_count=1), [1, np.nan, 1]),
|
||||
('sum', dict(min_count=2), [np.nan, np.nan, np.nan]),
|
||||
('prod', dict(), [1, 1, 1]),
|
||||
('prod', dict(min_count=0), [1, 1, 1]),
|
||||
('prod', dict(min_count=1), [1, np.nan, 1]),
|
||||
('prod', dict(min_count=2), [np.nan, np.nan, np.nan]),
|
||||
])
|
||||
def test_upsample_sum(method, method_args, expected_values):
|
||||
s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
|
||||
resampled = s.resample("30T")
|
||||
index = pd.to_datetime(['2017-01-01T00:00:00',
|
||||
'2017-01-01T00:30:00',
|
||||
'2017-01-01T01:00:00'])
|
||||
result = methodcaller(method, **method_args)(resampled)
|
||||
expected = pd.Series(expected_values, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,128 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
import pandas.util.testing as tm
|
||||
from pandas.util.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
def test_asfreq_bug():
|
||||
df = DataFrame(data=[1, 3],
|
||||
index=[timedelta(), timedelta(minutes=3)])
|
||||
result = df.resample('1T').asfreq()
|
||||
expected = DataFrame(data=[1, np.nan, np.nan, 3],
|
||||
index=timedelta_range('0 day',
|
||||
periods=4,
|
||||
freq='1T'))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_nat():
|
||||
# GH 13223
|
||||
index = pd.to_timedelta(['0s', pd.NaT, '2s'])
|
||||
result = DataFrame({'value': [2, 3, 5]}, index).resample('1s').mean()
|
||||
expected = DataFrame({'value': [2.5, np.nan, 5.0]},
|
||||
index=timedelta_range('0 day',
|
||||
periods=3,
|
||||
freq='1S'))
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_as_freq_with_subperiod():
|
||||
# GH 13022
|
||||
index = timedelta_range('00:00:00', '00:10:00', freq='5T')
|
||||
df = DataFrame(data={'value': [1, 5, 10]}, index=index)
|
||||
result = df.resample('2T').asfreq()
|
||||
expected_data = {'value': [1, np.nan, np.nan, np.nan, np.nan, 10]}
|
||||
expected = DataFrame(data=expected_data,
|
||||
index=timedelta_range('00:00:00',
|
||||
'00:10:00', freq='2T'))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_timedeltas():
|
||||
|
||||
expected = DataFrame({'A': np.arange(1480)})
|
||||
expected = expected.groupby(expected.index // 30).sum()
|
||||
expected.index = pd.timedelta_range('0 days', freq='30T', periods=50)
|
||||
|
||||
df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta(
|
||||
np.arange(1480), unit='T'))
|
||||
result = df.resample('30T').sum()
|
||||
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
s = df['A']
|
||||
result = s.resample('30T').sum()
|
||||
assert_series_equal(result, expected['A'])
|
||||
|
||||
|
||||
def test_resample_single_period_timedelta():
|
||||
|
||||
s = Series(list(range(5)), index=pd.timedelta_range(
|
||||
'1 day', freq='s', periods=5))
|
||||
result = s.resample('2s').sum()
|
||||
expected = Series([1, 5, 4], index=pd.timedelta_range(
|
||||
'1 day', freq='2s', periods=3))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_idempotency():
|
||||
|
||||
# GH 12072
|
||||
index = pd.timedelta_range('0', periods=9, freq='10L')
|
||||
series = Series(range(9), index=index)
|
||||
result = series.resample('10L').mean()
|
||||
expected = series
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_base_with_timedeltaindex():
|
||||
|
||||
# GH 10530
|
||||
rng = timedelta_range(start='0s', periods=25, freq='s')
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
with_base = ts.resample('2s', base=5).mean()
|
||||
without_base = ts.resample('2s').mean()
|
||||
|
||||
exp_without_base = timedelta_range(start='0s', end='25s', freq='2s')
|
||||
exp_with_base = timedelta_range(start='5s', end='29s', freq='2s')
|
||||
|
||||
tm.assert_index_equal(without_base.index, exp_without_base)
|
||||
tm.assert_index_equal(with_base.index, exp_with_base)
|
||||
|
||||
|
||||
def test_resample_categorical_data_with_timedeltaindex():
|
||||
# GH #12169
|
||||
df = DataFrame({'Group_obj': 'A'},
|
||||
index=pd.to_timedelta(list(range(20)), unit='s'))
|
||||
df['Group'] = df['Group_obj'].astype('category')
|
||||
result = df.resample('10s').agg(lambda x: (x.value_counts().index[0]))
|
||||
expected = DataFrame({'Group_obj': ['A', 'A'],
|
||||
'Group': ['A', 'A']},
|
||||
index=pd.to_timedelta([0, 10], unit='s'))
|
||||
expected = expected.reindex(['Group_obj', 'Group'], axis=1)
|
||||
expected['Group'] = expected['Group_obj'].astype('category')
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_values():
|
||||
# GH 13119
|
||||
# check that timedelta dtype is preserved when NaT values are
|
||||
# introduced by the resampling
|
||||
|
||||
times = timedelta_range('1 day', '4 day', freq='4D')
|
||||
df = DataFrame({'time': times}, index=times)
|
||||
|
||||
times2 = timedelta_range('1 day', '4 day', freq='2D')
|
||||
exp = Series(times2, index=times2, name='time')
|
||||
exp.iloc[1] = pd.NaT
|
||||
|
||||
res = df.resample('2D').first()['time']
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = df['time'].resample('2D').first()
|
||||
tm.assert_series_equal(res, exp)
|
||||
Reference in New Issue
Block a user