pruned venvs

2019-03-12 21:57:16 +01:00
parent 33f0511081
commit e441f4f7f7
5988 changed files with 0 additions and 1353666 deletions
@@ -1,289 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-test .agg behavior / note that .apply is tested generally in test_groupby.py
-"""
-
-import numpy as np
-import pytest
-
-from pandas.compat import OrderedDict
-
-import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, concat
-from pandas.core.base import SpecificationError
-from pandas.core.groupby.grouper import Grouping
-import pandas.util.testing as tm
-
-
-def test_agg_regression1(tsframe):
-    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
-    result = grouped.agg(np.mean)
-    expected = grouped.mean()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_must_agg(df):
-    grouped = df.groupby('A')['C']
-
-    msg = "Must produce aggregated value"
-    with pytest.raises(Exception, match=msg):
-        grouped.agg(lambda x: x.describe())
-    with pytest.raises(Exception, match=msg):
-        grouped.agg(lambda x: x.index[:2])
-
-
-def test_agg_ser_multi_key(df):
-    # TODO(wesm): unused
-    ser = df.C  # noqa
-
-    f = lambda x: x.sum()
-    results = df.C.groupby([df.A, df.B]).aggregate(f)
-    expected = df.groupby(['A', 'B']).sum()['C']
-    tm.assert_series_equal(results, expected)
-
-
-def test_groupby_aggregation_mixed_dtype():
-
-    # GH 6212
-    expected = DataFrame({
-        'v1': [5, 5, 7, np.nan, 3, 3, 4, 1],
-        'v2': [55, 55, 77, np.nan, 33, 33, 44, 11]},
-        index=MultiIndex.from_tuples([(1, 95), (1, 99), (2, 95), (2, 99),
-                                      ('big', 'damp'),
-                                      ('blue', 'dry'),
-                                      ('red', 'red'), ('red', 'wet')],
-                                     names=['by1', 'by2']))
-
-    df = DataFrame({
-        'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
-        'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
-        'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan,
-                12],
-        'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99,
-                np.nan, np.nan]
-    })
-
-    g = df.groupby(['by1', 'by2'])
-    result = g[['v1', 'v2']].mean()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_apply_corner(ts, tsframe):
-    # nothing to group, all NA
-    grouped = ts.groupby(ts * np.nan)
-    assert ts.dtype == np.float64
-
-    # groupby float64 values results in Float64Index
-    exp = Series([], dtype=np.float64,
-                 index=pd.Index([], dtype=np.float64))
-    tm.assert_series_equal(grouped.sum(), exp)
-    tm.assert_series_equal(grouped.agg(np.sum), exp)
-    tm.assert_series_equal(grouped.apply(np.sum), exp,
-                           check_index_type=False)
-
-    # DataFrame
-    grouped = tsframe.groupby(tsframe['A'] * np.nan)
-    exp_df = DataFrame(columns=tsframe.columns, dtype=float,
-                       index=pd.Index([], dtype=np.float64))
-    tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False)
-    tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False)
-    tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0],
-                          check_names=False)
-
-
-def test_agg_grouping_is_list_tuple(ts):
-    df = tm.makeTimeDataFrame()
-
-    grouped = df.groupby(lambda x: x.year)
-    grouper = grouped.grouper.groupings[0].grouper
-    grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper))
-
-    result = grouped.agg(np.mean)
-    expected = grouped.mean()
-    tm.assert_frame_equal(result, expected)
-
-    grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper))
-
-    result = grouped.agg(np.mean)
-    expected = grouped.mean()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_python_multiindex(mframe):
-    grouped = mframe.groupby(['A', 'B'])
-
-    result = grouped.agg(np.mean)
-    expected = grouped.mean()
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize('groupbyfunc', [
-    lambda x: x.weekday(),
-    [lambda x: x.month, lambda x: x.weekday()],
-])
-def test_aggregate_str_func(tsframe, groupbyfunc):
-    grouped = tsframe.groupby(groupbyfunc)
-
-    # single series
-    result = grouped['A'].agg('std')
-    expected = grouped['A'].std()
-    tm.assert_series_equal(result, expected)
-
-    # group frame by function name
-    result = grouped.aggregate('var')
-    expected = grouped.var()
-    tm.assert_frame_equal(result, expected)
-
-    # group frame by function dict
-    result = grouped.agg(OrderedDict([['A', 'var'],
-                                      ['B', 'std'],
-                                      ['C', 'mean'],
-                                      ['D', 'sem']]))
-    expected = DataFrame(OrderedDict([['A', grouped['A'].var()],
-                                      ['B', grouped['B'].std()],
-                                      ['C', grouped['C'].mean()],
-                                      ['D', grouped['D'].sem()]]))
-    tm.assert_frame_equal(result, expected)
-
-
-def test_aggregate_item_by_item(df):
-    grouped = df.groupby('A')
-
-    aggfun = lambda ser: ser.size
-    result = grouped.agg(aggfun)
-    foo = (df.A == 'foo').sum()
-    bar = (df.A == 'bar').sum()
-    K = len(result.columns)
-
-    # GH5782
-    # odd comparisons can result here, so cast to make easy
-    exp = pd.Series(np.array([foo] * K), index=list('BCD'),
-                    dtype=np.float64, name='foo')
-    tm.assert_series_equal(result.xs('foo'), exp)
-
-    exp = pd.Series(np.array([bar] * K), index=list('BCD'),
-                    dtype=np.float64, name='bar')
-    tm.assert_almost_equal(result.xs('bar'), exp)
-
-    def aggfun(ser):
-        return ser.size
-
-    result = DataFrame().groupby(df.A).agg(aggfun)
-    assert isinstance(result, DataFrame)
-    assert len(result) == 0
-
-
-def test_wrap_agg_out(three_group):
-    grouped = three_group.groupby(['A', 'B'])
-
-    def func(ser):
-        if ser.dtype == np.object:
-            raise TypeError
-        else:
-            return ser.sum()
-
-    result = grouped.aggregate(func)
-    exp_grouped = three_group.loc[:, three_group.columns != 'C']
-    expected = exp_grouped.groupby(['A', 'B']).aggregate(func)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_multiple_functions_maintain_order(df):
-    # GH #610
-    funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)]
-    result = df.groupby('A')['C'].agg(funcs)
-    exp_cols = Index(['mean', 'max', 'min'])
-
-    tm.assert_index_equal(result.columns, exp_cols)
-
-
-def test_multiple_functions_tuples_and_non_tuples(df):
-    # #1359
-    funcs = [('foo', 'mean'), 'std']
-    ex_funcs = [('foo', 'mean'), ('std', 'std')]
-
-    result = df.groupby('A')['C'].agg(funcs)
-    expected = df.groupby('A')['C'].agg(ex_funcs)
-    tm.assert_frame_equal(result, expected)
-
-    result = df.groupby('A').agg(funcs)
-    expected = df.groupby('A').agg(ex_funcs)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_multiple_functions_too_many_lambdas(df):
-    grouped = df.groupby('A')
-    funcs = ['mean', lambda x: x.mean(), lambda x: x.std()]
-
-    msg = 'Function names must be unique, found multiple named <lambda>'
-    with pytest.raises(SpecificationError, match=msg):
-        grouped.agg(funcs)
-
-
-def test_more_flexible_frame_multi_function(df):
-    grouped = df.groupby('A')
-
-    exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]]))
-    exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]]))
-
-    expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1)
-    expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1)
-
-    d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]])
-    result = grouped.aggregate(d)
-
-    tm.assert_frame_equal(result, expected)
-
-    # be careful
-    result = grouped.aggregate(OrderedDict([['C', np.mean],
-                                            ['D', [np.mean, np.std]]]))
-    expected = grouped.aggregate(OrderedDict([['C', np.mean],
-                                              ['D', [np.mean, np.std]]]))
-    tm.assert_frame_equal(result, expected)
-
-    def foo(x):
-        return np.mean(x)
-
-    def bar(x):
-        return np.std(x, ddof=1)
-
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        d = OrderedDict([['C', np.mean],
-                         ['D', OrderedDict([['foo', np.mean],
-                                            ['bar', np.std]])]])
-        result = grouped.aggregate(d)
-
-    d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
-    expected = grouped.aggregate(d)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_multi_function_flexible_mix(df):
-    # GH #1268
-    grouped = df.groupby('A')
-
-    # Expected
-    d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', {'sum': 'sum'}]])
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        expected = grouped.aggregate(d)
-
-    # Test 1
-    d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', 'sum']])
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = grouped.aggregate(d)
-    tm.assert_frame_equal(result, expected)
-
-    # Test 2
-    d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', ['sum']]])
-    # this uses column selection & renaming
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = grouped.aggregate(d)
-    tm.assert_frame_equal(result, expected)
@@ -1,218 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-test cython .agg behavior
-"""
-
-from __future__ import print_function
-
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import (
-    DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range)
-from pandas.core.groupby.groupby import DataError
-import pandas.util.testing as tm
-
-
-@pytest.mark.parametrize('op_name', [
-    'count',
-    'sum',
-    'std',
-    'var',
-    'sem',
-    'mean',
-    pytest.param('median',
-                 # ignore mean of empty slice
-                 # and all-NaN
-                 marks=[pytest.mark.filterwarnings(
-                     "ignore::RuntimeWarning"
-                 )]),
-    'prod',
-    'min',
-    'max',
-])
-def test_cythonized_aggers(op_name):
-    data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., np.nan, np.nan],
-            'B': ['A', 'B'] * 6,
-            'C': np.random.randn(12)}
-    df = DataFrame(data)
-    df.loc[2:10:2, 'C'] = np.nan
-
-    op = lambda x: getattr(x, op_name)()
-
-    # single column
-    grouped = df.drop(['B'], axis=1).groupby('A')
-    exp = {cat: op(group['C']) for cat, group in grouped}
-    exp = DataFrame({'C': exp})
-    exp.index.name = 'A'
-    result = op(grouped)
-    tm.assert_frame_equal(result, exp)
-
-    # multiple columns
-    grouped = df.groupby(['A', 'B'])
-    expd = {}
-    for (cat1, cat2), group in grouped:
-        expd.setdefault(cat1, {})[cat2] = op(group['C'])
-    exp = DataFrame(expd).T.stack(dropna=False)
-    exp.index.names = ['A', 'B']
-    exp.name = 'C'
-
-    result = op(grouped)['C']
-    if op_name in ['sum', 'prod']:
-        tm.assert_series_equal(result, exp)
-
-
-def test_cython_agg_boolean():
-    frame = DataFrame({'a': np.random.randint(0, 5, 50),
-                       'b': np.random.randint(0, 2, 50).astype('bool')})
-    result = frame.groupby('a')['b'].mean()
-    expected = frame.groupby('a')['b'].agg(np.mean)
-
-    tm.assert_series_equal(result, expected)
-
-
-def test_cython_agg_nothing_to_agg():
-    frame = DataFrame({'a': np.random.randint(0, 5, 50),
-                       'b': ['foo', 'bar'] * 25})
-    msg = "No numeric types to aggregate"
-
-    with pytest.raises(DataError, match=msg):
-        frame.groupby('a')['b'].mean()
-
-    frame = DataFrame({'a': np.random.randint(0, 5, 50),
-                       'b': ['foo', 'bar'] * 25})
-    with pytest.raises(DataError, match=msg):
-        frame[['b']].groupby(frame['a']).mean()
-
-
-def test_cython_agg_nothing_to_agg_with_dates():
-    frame = DataFrame({'a': np.random.randint(0, 5, 50),
-                       'b': ['foo', 'bar'] * 25,
-                       'dates': pd.date_range('now', periods=50, freq='T')})
-    msg = "No numeric types to aggregate"
-    with pytest.raises(DataError, match=msg):
-        frame.groupby('b').dates.mean()
-
-
-def test_cython_agg_frame_columns():
-    # #2113
-    df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})
-
-    df.groupby(level=0, axis='columns').mean()
-    df.groupby(level=0, axis='columns').mean()
-    df.groupby(level=0, axis='columns').mean()
-    df.groupby(level=0, axis='columns').mean()
-
-
-def test_cython_agg_return_dict():
-    # GH 16741
-    df = DataFrame(
-        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-         'C': np.random.randn(8),
-         'D': np.random.randn(8)})
-
-    ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict())
-    expected = Series([{'two': 1, 'one': 1, 'three': 1},
-                       {'two': 2, 'one': 2, 'three': 1}],
-                      index=Index(['bar', 'foo'], name='A'),
-                      name='B')
-    tm.assert_series_equal(ts, expected)
-
-
-def test_cython_fail_agg():
-    dr = bdate_range('1/1/2000', periods=50)
-    ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr)
-
-    grouped = ts.groupby(lambda x: x.month)
-    summed = grouped.sum()
-    expected = grouped.agg(np.sum)
-    tm.assert_series_equal(summed, expected)
-
-
-@pytest.mark.parametrize('op, targop', [
-    ('mean', np.mean),
-    ('median', np.median),
-    ('var', np.var),
-    ('add', np.sum),
-    ('prod', np.prod),
-    ('min', np.min),
-    ('max', np.max),
-    ('first', lambda x: x.iloc[0]),
-    ('last', lambda x: x.iloc[-1]),
-])
-def test__cython_agg_general(op, targop):
-    df = DataFrame(np.random.randn(1000))
-    labels = np.random.randint(0, 50, size=1000).astype(float)
-
-    result = df.groupby(labels)._cython_agg_general(op)
-    expected = df.groupby(labels).agg(targop)
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize('op, targop', [
-    ('mean', np.mean),
-    ('median', lambda x: np.median(x) if len(x) > 0 else np.nan),
-    ('var', lambda x: np.var(x, ddof=1)),
-    ('min', np.min),
-    ('max', np.max), ]
-)
-def test_cython_agg_empty_buckets(op, targop, observed):
-    df = pd.DataFrame([11, 12, 13])
-    grps = range(0, 55, 5)
-
-    # calling _cython_agg_general directly, instead of via the user API
-    # which sets different values for min_count, so do that here.
-    g = df.groupby(pd.cut(df[0], grps), observed=observed)
-    result = g._cython_agg_general(op)
-
-    g = df.groupby(pd.cut(df[0], grps), observed=observed)
-    expected = g.agg(lambda x: targop(x))
-    tm.assert_frame_equal(result, expected)
-
-
-def test_cython_agg_empty_buckets_nanops(observed):
-    # GH-18869 can't call nanops on empty groups, so hardcode expected
-    # for these
-    df = pd.DataFrame([11, 12, 13], columns=['a'])
-    grps = range(0, 25, 5)
-    # add / sum
-    result = df.groupby(pd.cut(df['a'], grps),
-                        observed=observed)._cython_agg_general('add')
-    intervals = pd.interval_range(0, 20, freq=5)
-    expected = pd.DataFrame(
-        {"a": [0, 0, 36, 0]},
-        index=pd.CategoricalIndex(intervals, name='a', ordered=True))
-    if observed:
-        expected = expected[expected.a != 0]
-
-    tm.assert_frame_equal(result, expected)
-
-    # prod
-    result = df.groupby(pd.cut(df['a'], grps),
-                        observed=observed)._cython_agg_general('prod')
-    expected = pd.DataFrame(
-        {"a": [1, 1, 1716, 1]},
-        index=pd.CategoricalIndex(intervals, name='a', ordered=True))
-    if observed:
-        expected = expected[expected.a != 1]
-
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize('op', ['first', 'last', 'max', 'min'])
-@pytest.mark.parametrize('data', [
-    Timestamp('2016-10-14 21:00:44.557'),
-    Timedelta('17088 days 21:00:44.557'), ])
-def test_cython_with_timestamp_and_nat(op, data):
-    # https://github.com/pandas-dev/pandas/issues/19526
-    df = DataFrame({'a': [0, 1], 'b': [data, NaT]})
-    index = Index([0, 1], name='a')
-
-    # We will group by a and test the cython aggregations
-    expected = DataFrame({'b': [data, NaT]}, index=index)
-
-    result = df.groupby('a').aggregate(op)
-    tm.assert_frame_equal(expected, result)
@@ -1,514 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-test all other .agg behavior
-"""
-
-from __future__ import print_function
-
-from collections import OrderedDict
-import datetime as dt
-from functools import partial
-
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import (
-    DataFrame, Index, MultiIndex, PeriodIndex, Series, date_range,
-    period_range)
-from pandas.core.groupby.groupby import SpecificationError
-import pandas.util.testing as tm
-
-from pandas.io.formats.printing import pprint_thing
-
-
-def test_agg_api():
-    # GH 6337
-    # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error
-    # different api for agg when passed custom function with mixed frame
-
-    df = DataFrame({'data1': np.random.randn(5),
-                    'data2': np.random.randn(5),
-                    'key1': ['a', 'a', 'b', 'b', 'a'],
-                    'key2': ['one', 'two', 'one', 'two', 'one']})
-    grouped = df.groupby('key1')
-
-    def peak_to_peak(arr):
-        return arr.max() - arr.min()
-
-    expected = grouped.agg([peak_to_peak])
-    expected.columns = ['data1', 'data2']
-    result = grouped.agg(peak_to_peak)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_datetimes_mixed():
-    data = [[1, '2012-01-01', 1.0],
-            [2, '2012-01-02', 2.0],
-            [3, None, 3.0]]
-
-    df1 = DataFrame({'key': [x[0] for x in data],
-                     'date': [x[1] for x in data],
-                     'value': [x[2] for x in data]})
-
-    data = [[row[0],
-             (dt.datetime.strptime(row[1], '%Y-%m-%d').date()
-              if row[1] else None),
-             row[2]]
-            for row in data]
-
-    df2 = DataFrame({'key': [x[0] for x in data],
-                     'date': [x[1] for x in data],
-                     'value': [x[2] for x in data]})
-
-    df1['weights'] = df1['value'] / df1['value'].sum()
-    gb1 = df1.groupby('date').aggregate(np.sum)
-
-    df2['weights'] = df1['value'] / df1['value'].sum()
-    gb2 = df2.groupby('date').aggregate(np.sum)
-
-    assert (len(gb1) == len(gb2))
-
-
-def test_agg_period_index():
-    prng = period_range('2012-1-1', freq='M', periods=3)
-    df = DataFrame(np.random.randn(3, 2), index=prng)
-    rs = df.groupby(level=0).sum()
-    assert isinstance(rs.index, PeriodIndex)
-
-    # GH 3579
-    index = period_range(start='1999-01', periods=5, freq='M')
-    s1 = Series(np.random.rand(len(index)), index=index)
-    s2 = Series(np.random.rand(len(index)), index=index)
-    series = [('s1', s1), ('s2', s2)]
-    df = DataFrame.from_dict(OrderedDict(series))
-    grouped = df.groupby(df.index.month)
-    list(grouped)
-
-
-def test_agg_dict_parameter_cast_result_dtypes():
-    # GH 12821
-
-    df = DataFrame({'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'],
-                    'time': date_range('1/1/2011', periods=8, freq='H')})
-    df.loc[[0, 1, 2, 5], 'time'] = None
-
-    # test for `first` function
-    exp = df.loc[[0, 3, 4, 6]].set_index('class')
-    grouped = df.groupby('class')
-    tm.assert_frame_equal(grouped.first(), exp)
-    tm.assert_frame_equal(grouped.agg('first'), exp)
-    tm.assert_frame_equal(grouped.agg({'time': 'first'}), exp)
-    tm.assert_series_equal(grouped.time.first(), exp['time'])
-    tm.assert_series_equal(grouped.time.agg('first'), exp['time'])
-
-    # test for `last` function
-    exp = df.loc[[0, 3, 4, 7]].set_index('class')
-    grouped = df.groupby('class')
-    tm.assert_frame_equal(grouped.last(), exp)
-    tm.assert_frame_equal(grouped.agg('last'), exp)
-    tm.assert_frame_equal(grouped.agg({'time': 'last'}), exp)
-    tm.assert_series_equal(grouped.time.last(), exp['time'])
-    tm.assert_series_equal(grouped.time.agg('last'), exp['time'])
-
-    # count
-    exp = pd.Series([2, 2, 2, 2],
-                    index=Index(list('ABCD'), name='class'),
-                    name='time')
-    tm.assert_series_equal(grouped.time.agg(len), exp)
-    tm.assert_series_equal(grouped.time.size(), exp)
-
-    exp = pd.Series([0, 1, 1, 2],
-                    index=Index(list('ABCD'), name='class'),
-                    name='time')
-    tm.assert_series_equal(grouped.time.count(), exp)
-
-
-def test_agg_cast_results_dtypes():
-    # similar to GH12821
-    # xref #11444
-    u = [dt.datetime(2015, x + 1, 1) for x in range(12)]
-    v = list('aaabbbbbbccd')
-    df = pd.DataFrame({'X': v, 'Y': u})
-
-    result = df.groupby('X')['Y'].agg(len)
-    expected = df.groupby('X')['Y'].count()
-    tm.assert_series_equal(result, expected)
-
-
-def test_aggregate_float64_no_int64():
-    # see gh-11199
-    df = DataFrame({"a": [1, 2, 3, 4, 5],
-                    "b": [1, 2, 2, 4, 5],
-                    "c": [1, 2, 3, 4, 5]})
-
-    expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])
-    expected.index.name = "b"
-
-    result = df.groupby("b")[["a"]].mean()
-    tm.assert_frame_equal(result, expected)
-
-    expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]},
-                         index=[1, 2, 4, 5])
-    expected.index.name = "b"
-
-    result = df.groupby("b")[["a", "c"]].mean()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_aggregate_api_consistency():
-    # GH 9052
-    # make sure that the aggregates via dict
-    # are consistent
-    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                          'foo', 'bar', 'foo', 'foo'],
-                    'B': ['one', 'one', 'two', 'two',
-                          'two', 'two', 'one', 'two'],
-                    'C': np.random.randn(8) + 1.0,
-                    'D': np.arange(8)})
-
-    grouped = df.groupby(['A', 'B'])
-    c_mean = grouped['C'].mean()
-    c_sum = grouped['C'].sum()
-    d_mean = grouped['D'].mean()
-    d_sum = grouped['D'].sum()
-
-    result = grouped['D'].agg(['sum', 'mean'])
-    expected = pd.concat([d_sum, d_mean], axis=1)
-    expected.columns = ['sum', 'mean']
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    result = grouped.agg([np.sum, np.mean])
-    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
-    expected.columns = MultiIndex.from_product([['C', 'D'],
-                                                ['sum', 'mean']])
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    result = grouped[['D', 'C']].agg([np.sum, np.mean])
-    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([['D', 'C'],
-                                                ['sum', 'mean']])
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    result = grouped.agg({'C': 'mean', 'D': 'sum'})
-    expected = pd.concat([d_sum, c_mean], axis=1)
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    result = grouped.agg({'C': ['mean', 'sum'],
-                          'D': ['mean', 'sum']})
-    expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
-    expected.columns = MultiIndex.from_product([['C', 'D'],
-                                                ['mean', 'sum']])
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = grouped[['D', 'C']].agg({'r': np.sum,
-                                          'r2': np.mean})
-    expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([['r', 'r2'],
-                                                ['D', 'C']])
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-
-def test_agg_dict_renaming_deprecation():
-    # 15931
-    df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
-                       'B': range(5),
-                       'C': range(5)})
-
-    with tm.assert_produces_warning(FutureWarning,
-                                    check_stacklevel=False) as w:
-        df.groupby('A').agg({'B': {'foo': ['sum', 'max']},
-                             'C': {'bar': ['count', 'min']}})
-        assert "using a dict with renaming" in str(w[0].message)
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        df.groupby('A')[['B', 'C']].agg({'ma': 'max'})
-
-    with tm.assert_produces_warning(FutureWarning) as w:
-        df.groupby('A').B.agg({'foo': 'count'})
-        assert "using a dict on a Series for aggregation" in str(w[0].message)
-
-
-def test_agg_compat():
-    # GH 12334
-    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                          'foo', 'bar', 'foo', 'foo'],
-                    'B': ['one', 'one', 'two', 'two',
-                          'two', 'two', 'one', 'two'],
-                    'C': np.random.randn(8) + 1.0,
-                    'D': np.arange(8)})
-
-    g = df.groupby(['A', 'B'])
-
-    expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
-    expected.columns = MultiIndex.from_tuples([('C', 'sum'),
-                                               ('C', 'std')])
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g['D'].agg({'C': ['sum', 'std']})
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
-    expected.columns = ['C', 'D']
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g['D'].agg({'C': 'sum', 'D': 'std'})
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-
-def test_agg_nested_dicts():
-    # API change for disallowing these types of nested dicts
-    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                          'foo', 'bar', 'foo', 'foo'],
-                    'B': ['one', 'one', 'two', 'two',
-                          'two', 'two', 'one', 'two'],
-                    'C': np.random.randn(8) + 1.0,
-                    'D': np.arange(8)})
-
-    g = df.groupby(['A', 'B'])
-
-    msg = r'cannot perform renaming for r[1-2] with a nested dictionary'
-    with pytest.raises(SpecificationError, match=msg):
-        g.aggregate({'r1': {'C': ['mean', 'sum']},
-                     'r2': {'D': ['mean', 'sum']}})
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g.agg({'C': {'ra': ['mean', 'std']},
-                        'D': {'rb': ['mean', 'std']}})
-    expected = pd.concat([g['C'].mean(), g['C'].std(),
-                          g['D'].mean(), g['D'].std()],
-                         axis=1)
-    expected.columns = pd.MultiIndex.from_tuples(
-        [('ra', 'mean'), ('ra', 'std'),
-         ('rb', 'mean'), ('rb', 'std')])
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-    # same name as the original column
-    # GH9052
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        expected = g['D'].agg({'result1': np.sum, 'result2': np.mean})
-    expected = expected.rename(columns={'result1': 'D'})
-
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        result = g['D'].agg({'D': np.sum, 'result2': np.mean})
-    tm.assert_frame_equal(result, expected, check_like=True)
-
-
-def test_agg_item_by_item_raise_typeerror():
-    df = DataFrame(np.random.randint(10, size=(20, 10)))
-
-    def raiseException(df):
-        pprint_thing('----------------------------------------')
-        pprint_thing(df.to_string())
-        raise TypeError('test')
-
-    with pytest.raises(TypeError, match='test'):
-        df.groupby(0).agg(raiseException)
-
-
-def test_series_agg_multikey():
-    ts = tm.makeTimeSeries()
-    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
-
-    result = grouped.agg(np.sum)
-    expected = grouped.sum()
-    tm.assert_series_equal(result, expected)
-
-
-def test_series_agg_multi_pure_python():
-    data = DataFrame(
-        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-               'foo', 'foo', 'foo'],
-         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-               'two', 'two', 'one'],
-         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-               'dull', 'shiny', 'shiny', 'shiny'],
-         'D': np.random.randn(11),
-         'E': np.random.randn(11),
-         'F': np.random.randn(11)})
-
-    def bad(x):
-        assert (len(x.values.base) > 0)
-        return 'foo'
-
-    result = data.groupby(['A', 'B']).agg(bad)
-    expected = data.groupby(['A', 'B']).agg(lambda x: 'foo')
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_consistency():
-    # agg with ([]) and () not consistent
-    # GH 6715
-    def P1(a):
-        try:
-            return np.percentile(a.dropna(), q=1)
-        except Exception:
-            return np.nan
-
-    df = DataFrame({'col1': [1, 2, 3, 4],
-                    'col2': [10, 25, 26, 31],
-                    'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10),
-                             dt.date(2013, 2, 11), dt.date(2013, 2, 11)]})
-
-    g = df.groupby('date')
-
-    expected = g.agg([P1])
-    expected.columns = expected.columns.levels[0]
-
-    result = g.agg(P1)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_callables():
-    # GH 7929
-    df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64)
-
-    class fn_class(object):
-
-        def __call__(self, x):
-            return sum(x)
-
-    equiv_callables = [sum,
-                       np.sum,
-                       lambda x: sum(x),
-                       lambda x: x.sum(),
-                       partial(sum),
-                       fn_class(), ]
-
-    expected = df.groupby("foo").agg(sum)
-    for ecall in equiv_callables:
-        result = df.groupby('foo').agg(ecall)
-        tm.assert_frame_equal(result, expected)
-
-
-def test_agg_over_numpy_arrays():
-    # GH 3788
-    df = pd.DataFrame([[1, np.array([10, 20, 30])],
-                       [1, np.array([40, 50, 60])],
-                       [2, np.array([20, 30, 40])]],
-                      columns=['category', 'arraydata'])
-    result = df.groupby('category').agg(sum)
-
-    expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]]
-    expected_index = pd.Index([1, 2], name='category')
-    expected_column = ['arraydata']
-    expected = pd.DataFrame(expected_data,
-                            index=expected_index,
-                            columns=expected_column)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_agg_timezone_round_trip():
-    # GH 15426
-    ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific')
-    df = pd.DataFrame({'a': 1,
-                       'b': [ts + dt.timedelta(minutes=nn)
-                             for nn in range(10)]})
-
-    result1 = df.groupby('a')['b'].agg(np.min).iloc[0]
-    result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0]
-    result3 = df.groupby('a')['b'].min().iloc[0]
-
-    assert result1 == ts
-    assert result2 == ts
-    assert result3 == ts
-
-    dates = [pd.Timestamp("2016-01-0%d 12:00:00" % i, tz='US/Pacific')
-             for i in range(1, 5)]
-    df = pd.DataFrame({'A': ['a', 'b'] * 2, 'B': dates})
-    grouped = df.groupby('A')
-
-    ts = df['B'].iloc[0]
-    assert ts == grouped.nth(0)['B'].iloc[0]
-    assert ts == grouped.head(1)['B'].iloc[0]
-    assert ts == grouped.first()['B'].iloc[0]
-    assert ts == grouped.apply(lambda x: x.iloc[0])[0]
-
-    ts = df['B'].iloc[2]
-    assert ts == grouped.last()['B'].iloc[0]
-    assert ts == grouped.apply(lambda x: x.iloc[-1])[0]
-
-
-def test_sum_uint64_overflow():
-    # see gh-14758
-    # Convert to uint64 and don't overflow
-    df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object)
-    df = df + 9223372036854775807
-
-    index = pd.Index([9223372036854775808,
-                      9223372036854775810,
-                      9223372036854775812],
-                     dtype=np.uint64)
-    expected = pd.DataFrame({1: [9223372036854775809,
-                                 9223372036854775811,
-                                 9223372036854775813]},
-                            index=index)
-
-    expected.index.name = 0
-    result = df.groupby(0).sum()
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("structure, expected", [
-    (tuple, pd.DataFrame({'C': {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})),
-    (list, pd.DataFrame({'C': {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})),
-    (lambda x: tuple(x), pd.DataFrame({'C': {(1, 1): (1, 1, 1),
-                                             (3, 4): (3, 4, 4)}})),
-    (lambda x: list(x), pd.DataFrame({'C': {(1, 1): [1, 1, 1],
-                                            (3, 4): [3, 4, 4]}}))
-])
-def test_agg_structs_dataframe(structure, expected):
-    df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3],
-                       'B': [1, 1, 1, 4, 4, 4],
-                       'C': [1, 1, 1, 3, 4, 4]})
-
-    result = df.groupby(['A', 'B']).aggregate(structure)
-    expected.index.names = ['A', 'B']
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("structure, expected", [
-    (tuple, pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C')),
-    (list, pd.Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name='C')),
-    (lambda x: tuple(x), pd.Series([(1, 1, 1), (3, 4, 4)],
-                                   index=[1, 3], name='C')),
-    (lambda x: list(x), pd.Series([[1, 1, 1], [3, 4, 4]],
-                                  index=[1, 3], name='C'))
-])
-def test_agg_structs_series(structure, expected):
-    # Issue #18079
-    df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3],
-                       'B': [1, 1, 1, 4, 4, 4],
-                       'C': [1, 1, 1, 3, 4, 4]})
-
-    result = df.groupby('A')['C'].aggregate(structure)
-    expected.index.name = 'A'
-    tm.assert_series_equal(result, expected)
-
-
-def test_agg_category_nansum(observed):
-    categories = ['a', 'b', 'c']
-    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                           categories=categories),
-                       'B': [1, 2, 3]})
-    result = df.groupby("A", observed=observed).B.agg(np.nansum)
-    expected = pd.Series([3, 3, 0],
-                         index=pd.CategoricalIndex(['a', 'b', 'c'],
-                                                   categories=categories,
-                                                   name='A'),
-                         name='B')
-    if observed:
-        expected = expected[expected != 0]
-    tm.assert_series_equal(result, expected)
-
-
-def test_agg_list_like_func():
-    # GH 18473
-    df = pd.DataFrame({'A': [str(x) for x in range(3)],
-                       'B': [str(x) for x in range(3)]})
-    grouped = df.groupby('A', as_index=False, sort=False)
-    result = grouped.agg({'B': lambda x: list(x)})
-    expected = pd.DataFrame({'A': [str(x) for x in range(3)],
-                             'B': [[str(x)] for x in range(3)]})
-    tm.assert_frame_equal(result, expected)
@@ -1,78 +0,0 @@
-import numpy as np
-import pytest
-
-from pandas import DataFrame, MultiIndex
-from pandas.util import testing as tm
-
-
-@pytest.fixture
-def mframe():
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                              'three']],
-                       codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                              [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    return DataFrame(np.random.randn(10, 3), index=index,
-                     columns=['A', 'B', 'C'])
-
-
-@pytest.fixture
-def df():
-    return DataFrame(
-        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-         'C': np.random.randn(8),
-         'D': np.random.randn(8)})
-
-
-@pytest.fixture
-def ts():
-    return tm.makeTimeSeries()
-
-
-@pytest.fixture
-def seriesd():
-    return tm.getSeriesData()
-
-
-@pytest.fixture
-def tsd():
-    return tm.getTimeSeriesData()
-
-
-@pytest.fixture
-def frame(seriesd):
-    return DataFrame(seriesd)
-
-
-@pytest.fixture
-def tsframe(tsd):
-    return DataFrame(tsd)
-
-
-@pytest.fixture
-def df_mixed_floats():
-    return DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                            'foo', 'bar', 'foo', 'foo'],
-                      'B': ['one', 'one', 'two', 'three',
-                            'two', 'two', 'one', 'three'],
-                      'C': np.random.randn(8),
-                      'D': np.array(
-                          np.random.randn(8), dtype='float32')})
-
-
-@pytest.fixture
-def three_group():
-    return DataFrame({'A': ['foo', 'foo', 'foo',
-                            'foo', 'bar', 'bar',
-                            'bar', 'bar',
-                            'foo', 'foo', 'foo'],
-                      'B': ['one', 'one', 'one',
-                            'two', 'one', 'one', 'one', 'two',
-                            'two', 'two', 'one'],
-                      'C': ['dull', 'dull', 'shiny',
-                            'dull', 'dull', 'shiny', 'shiny',
-                            'dull', 'shiny', 'shiny', 'shiny'],
-                      'D': np.random.randn(11),
-                      'E': np.random.randn(11),
-                      'F': np.random.randn(11)})
@@ -1,542 +0,0 @@
-from datetime import datetime
-
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, bdate_range, compat
-from pandas.util import testing as tm
-
-
-def test_apply_issues():
-        # GH 5788
-
-    s = """2011.05.16,00:00,1.40893
-2011.05.16,01:00,1.40760
-2011.05.16,02:00,1.40750
-2011.05.16,03:00,1.40649
-2011.05.17,02:00,1.40893
-2011.05.17,03:00,1.40760
-2011.05.17,04:00,1.40750
-2011.05.17,05:00,1.40649
-2011.05.18,02:00,1.40893
-2011.05.18,03:00,1.40760
-2011.05.18,04:00,1.40750
-2011.05.18,05:00,1.40649"""
-
-    df = pd.read_csv(
-        compat.StringIO(s), header=None, names=['date', 'time', 'value'],
-        parse_dates=[['date', 'time']])
-    df = df.set_index('date_time')
-
-    expected = df.groupby(df.index.date).idxmax()
-    result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
-    tm.assert_frame_equal(result, expected)
-
-    # GH 5789
-    # don't auto coerce dates
-    df = pd.read_csv(
-        compat.StringIO(s), header=None, names=['date', 'time', 'value'])
-    exp_idx = pd.Index(
-        ['2011.05.16', '2011.05.17', '2011.05.18'
-         ], dtype=object, name='date')
-    expected = Series(['00:00', '02:00', '02:00'], index=exp_idx)
-    result = df.groupby('date').apply(
-        lambda x: x['time'][x['value'].idxmax()])
-    tm.assert_series_equal(result, expected)
-
-
-def test_apply_trivial():
-    # GH 20066
-    # trivial apply: ignore input and return a constant dataframe.
-    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
-                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
-                      columns=['key', 'data'])
-    expected = pd.concat([df.iloc[1:], df.iloc[1:]],
-                         axis=1, keys=['float64', 'object'])
-    result = df.groupby([str(x) for x in df.dtypes],
-                        axis=1).apply(lambda x: df.iloc[1:])
-
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.xfail(reason="GH#20066; function passed into apply "
-                          "returns a DataFrame with the same index "
-                          "as the one to create GroupBy object.")
-def test_apply_trivial_fail():
-    # GH 20066
-    # trivial apply fails if the constant dataframe has the same index
-    # with the one used to create GroupBy object.
-    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
-                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
-                      columns=['key', 'data'])
-    expected = pd.concat([df, df],
-                         axis=1, keys=['float64', 'object'])
-    result = df.groupby([str(x) for x in df.dtypes],
-                        axis=1).apply(lambda x: df)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_fast_apply():
-    # make sure that fast apply is correctly called
-    # rather than raising any kind of error
-    # otherwise the python path will be callsed
-    # which slows things down
-    N = 1000
-    labels = np.random.randint(0, 2000, size=N)
-    labels2 = np.random.randint(0, 3, size=N)
-    df = DataFrame({'key': labels,
-                    'key2': labels2,
-                    'value1': np.random.randn(N),
-                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
-
-    def f(g):
-        return 1
-
-    g = df.groupby(['key', 'key2'])
-
-    grouper = g.grouper
-
-    splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
-    group_keys = grouper._get_group_keys()
-
-    values, mutated = splitter.fast_apply(f, group_keys)
-    assert not mutated
-
-
-def test_apply_with_mixed_dtype():
-    # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
-    df = DataFrame({'foo1': np.random.randn(6),
-                    'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})
-    result = df.apply(lambda x: x, axis=1)
-    tm.assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())
-
-    # GH 3610 incorrect dtype conversion with as_index=False
-    df = DataFrame({"c1": [1, 2, 6, 6, 8]})
-    df["c2"] = df.c1 / 2.0
-    result1 = df.groupby("c2").mean().reset_index().c2
-    result2 = df.groupby("c2", as_index=False).mean().c2
-    tm.assert_series_equal(result1, result2)
-
-
-def test_groupby_as_index_apply(df):
-    # GH #4648 and #3417
-    df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'],
-                    'user_id': [1, 2, 1, 1, 3, 1],
-                    'time': range(6)})
-
-    g_as = df.groupby('user_id', as_index=True)
-    g_not_as = df.groupby('user_id', as_index=False)
-
-    res_as = g_as.head(2).index
-    res_not_as = g_not_as.head(2).index
-    exp = Index([0, 1, 2, 4])
-    tm.assert_index_equal(res_as, exp)
-    tm.assert_index_equal(res_not_as, exp)
-
-    res_as_apply = g_as.apply(lambda x: x.head(2)).index
-    res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
-
-    # apply doesn't maintain the original ordering
-    # changed in GH5610 as the as_index=False returns a MI here
-    exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (
-        2, 4)])
-    tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
-    exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])
-
-    tm.assert_index_equal(res_as_apply, exp_as_apply)
-    tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
-
-    ind = Index(list('abcde'))
-    df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
-    res = df.groupby(0, as_index=False).apply(lambda x: x).index
-    tm.assert_index_equal(res, ind)
-
-
-def test_apply_concat_preserve_names(three_group):
-    grouped = three_group.groupby(['A', 'B'])
-
-    def desc(group):
-        result = group.describe()
-        result.index.name = 'stat'
-        return result
-
-    def desc2(group):
-        result = group.describe()
-        result.index.name = 'stat'
-        result = result[:len(group)]
-        # weirdo
-        return result
-
-    def desc3(group):
-        result = group.describe()
-
-        # names are different
-        result.index.name = 'stat_%d' % len(group)
-
-        result = result[:len(group)]
-        # weirdo
-        return result
-
-    result = grouped.apply(desc)
-    assert result.index.names == ('A', 'B', 'stat')
-
-    result2 = grouped.apply(desc2)
-    assert result2.index.names == ('A', 'B', 'stat')
-
-    result3 = grouped.apply(desc3)
-    assert result3.index.names == ('A', 'B', None)
-
-
-def test_apply_series_to_frame():
-    def f(piece):
-        with np.errstate(invalid='ignore'):
-            logged = np.log(piece)
-        return DataFrame({'value': piece,
-                          'demeaned': piece - piece.mean(),
-                          'logged': logged})
-
-    dr = bdate_range('1/1/2000', periods=100)
-    ts = Series(np.random.randn(100), index=dr)
-
-    grouped = ts.groupby(lambda x: x.month)
-    result = grouped.apply(f)
-
-    assert isinstance(result, DataFrame)
-    tm.assert_index_equal(result.index, ts.index)
-
-
-def test_apply_series_yield_constant(df):
-    result = df.groupby(['A', 'B'])['C'].apply(len)
-    assert result.index.names[:2] == ('A', 'B')
-
-
-def test_apply_frame_yield_constant(df):
-    # GH13568
-    result = df.groupby(['A', 'B']).apply(len)
-    assert isinstance(result, Series)
-    assert result.name is None
-
-    result = df.groupby(['A', 'B'])[['C', 'D']].apply(len)
-    assert isinstance(result, Series)
-    assert result.name is None
-
-
-def test_apply_frame_to_series(df):
-    grouped = df.groupby(['A', 'B'])
-    result = grouped.apply(len)
-    expected = grouped.count()['C']
-    tm.assert_index_equal(result.index, expected.index)
-    tm.assert_numpy_array_equal(result.values, expected.values)
-
-
-def test_apply_frame_concat_series():
-    def trans(group):
-        return group.groupby('B')['C'].sum().sort_values()[:2]
-
-    def trans2(group):
-        grouped = group.groupby(df.reindex(group.index)['B'])
-        return grouped.sum().sort_values()[:2]
-
-    df = DataFrame({'A': np.random.randint(0, 5, 1000),
-                    'B': np.random.randint(0, 5, 1000),
-                    'C': np.random.randn(1000)})
-
-    result = df.groupby('A').apply(trans)
-    exp = df.groupby('A')['C'].apply(trans2)
-    tm.assert_series_equal(result, exp, check_names=False)
-    assert result.name == 'C'
-
-
-def test_apply_transform(ts):
-    grouped = ts.groupby(lambda x: x.month)
-    result = grouped.apply(lambda x: x * 2)
-    expected = grouped.transform(lambda x: x * 2)
-    tm.assert_series_equal(result, expected)
-
-
-def test_apply_multikey_corner(tsframe):
-    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
-
-    def f(group):
-        return group.sort_values('A')[-5:]
-
-    result = grouped.apply(f)
-    for key, group in grouped:
-        tm.assert_frame_equal(result.loc[key], f(group))
-
-
-def test_apply_chunk_view():
-    # Low level tinkering could be unsafe, make sure not
-    df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],
-                    'value': compat.lrange(9)})
-
-    result = df.groupby('key', group_keys=False).apply(lambda x: x[:2])
-    expected = df.take([0, 1, 3, 4, 6, 7])
-    tm.assert_frame_equal(result, expected)
-
-
-def test_apply_no_name_column_conflict():
-    df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
-                    'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
-                    'value': compat.lrange(10)[::-1]})
-
-    # it works! #2605
-    grouped = df.groupby(['name', 'name2'])
-    grouped.apply(lambda x: x.sort_values('value', inplace=True))
-
-
-def test_apply_typecast_fail():
-    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                    'c': np.tile(
-                        ['a', 'b', 'c'], 2),
-                    'v': np.arange(1., 7.)})
-
-    def f(group):
-        v = group['v']
-        group['v2'] = (v - v.min()) / (v.max() - v.min())
-        return group
-
-    result = df.groupby('d').apply(f)
-
-    expected = df.copy()
-    expected['v2'] = np.tile([0., 0.5, 1], 2)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_apply_multiindex_fail():
-    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
-                                    ])
-    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                    'c': np.tile(['a', 'b', 'c'], 2),
-                    'v': np.arange(1., 7.)}, index=index)
-
-    def f(group):
-        v = group['v']
-        group['v2'] = (v - v.min()) / (v.max() - v.min())
-        return group
-
-    result = df.groupby('d').apply(f)
-
-    expected = df.copy()
-    expected['v2'] = np.tile([0., 0.5, 1], 2)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_apply_corner(tsframe):
-    result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)
-    expected = tsframe * 2
-    tm.assert_frame_equal(result, expected)
-
-
-def test_apply_without_copy():
-    # GH 5545
-    # returning a non-copy in an applied function fails
-
-    data = DataFrame({'id_field': [100, 100, 200, 300],
-                      'category': ['a', 'b', 'c', 'c'],
-                      'value': [1, 2, 3, 4]})
-
-    def filt1(x):
-        if x.shape[0] == 1:
-            return x.copy()
-        else:
-            return x[x.category == 'c']
-
-    def filt2(x):
-        if x.shape[0] == 1:
-            return x
-        else:
-            return x[x.category == 'c']
-
-    expected = data.groupby('id_field').apply(filt1)
-    result = data.groupby('id_field').apply(filt2)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_apply_corner_cases():
-    # #535, can't use sliding iterator
-
-    N = 1000
-    labels = np.random.randint(0, 100, size=N)
-    df = DataFrame({'key': labels,
-                    'value1': np.random.randn(N),
-                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
-
-    grouped = df.groupby('key')
-
-    def f(g):
-        g['value3'] = g['value1'] * 2
-        return g
-
-    result = grouped.apply(f)
-    assert 'value3' in result
-
-
-def test_apply_numeric_coercion_when_datetime():
-    # In the past, group-by/apply operations have been over-eager
-    # in converting dtypes to numeric, in the presence of datetime
-    # columns.  Various GH issues were filed, the reproductions
-    # for which are here.
-
-    # GH 15670
-    df = pd.DataFrame({'Number': [1, 2],
-                       'Date': ["2017-03-02"] * 2,
-                       'Str': ["foo", "inf"]})
-    expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
-    df.Date = pd.to_datetime(df.Date)
-    result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
-    tm.assert_series_equal(result['Str'], expected['Str'])
-
-    # GH 15421
-    df = pd.DataFrame({'A': [10, 20, 30],
-                       'B': ['foo', '3', '4'],
-                       'T': [pd.Timestamp("12:31:22")] * 3})
-
-    def get_B(g):
-        return g.iloc[0][['B']]
-    result = df.groupby('A').apply(get_B)['B']
-    expected = df.B
-    expected.index = df.A
-    tm.assert_series_equal(result, expected)
-
-    # GH 14423
-    def predictions(tool):
-        out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
-        if 'step1' in list(tool.State):
-            out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
-        if 'step2' in list(tool.State):
-            out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
-            out['useTime'] = str(
-                tool[tool.State == 'step2'].oTime.values[0])
-        return out
-    df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
-                        'State': ['step1', 'step2', 'step1', 'step2'],
-                        'oTime': ['', '2016-09-19 05:24:33',
-                                  '', '2016-09-19 23:59:04'],
-                        'Machine': ['23', '36L', '36R', '36R']})
-    df2 = df1.copy()
-    df2.oTime = pd.to_datetime(df2.oTime)
-    expected = df1.groupby('Key').apply(predictions).p1
-    result = df2.groupby('Key').apply(predictions).p1
-    tm.assert_series_equal(expected, result)
-
-
-def test_time_field_bug():
-    # Test a fix for the following error related to GH issue 11324 When
-    # non-key fields in a group-by dataframe contained time-based fields
-    # that were not returned by the apply function, an exception would be
-    # raised.
-
-    df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]})
-
-    def func_with_no_date(batch):
-        return pd.Series({'c': 2})
-
-    def func_with_date(batch):
-        return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})
-
-    dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
-    dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
-    dfg_no_conversion_expected.index.name = 'a'
-
-    dfg_conversion = df.groupby(by=['a']).apply(func_with_date)
-    dfg_conversion_expected = pd.DataFrame(
-        {'b': datetime(2015, 1, 1),
-         'c': 2}, index=[1])
-    dfg_conversion_expected.index.name = 'a'
-
-    tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)
-    tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected)
-
-
-def test_gb_apply_list_of_unequal_len_arrays():
-
-    # GH1738
-    df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a',
-                               'b', 'b', 'b'],
-                    'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd',
-                               'd', 'd', 'e'],
-                    'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2],
-                    'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]})
-    df = df.set_index(['group1', 'group2'])
-    df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
-
-    def noddy(value, weight):
-        out = np.array(value * weight).repeat(3)
-        return out
-
-    # the kernel function returns arrays of unequal length
-    # pandas sniffs the first one, sees it's an array and not
-    # a list, and assumed the rest are of equal length
-    # and so tries a vstack
-
-    # don't die
-    df_grouped.apply(lambda x: noddy(x.value, x.weight))
-
-
-def test_groupby_apply_all_none():
-    # Tests to make sure no errors if apply function returns all None
-    # values. Issue 9684.
-    test_df = DataFrame({'groups': [0, 0, 1, 1],
-                         'random_vars': [8, 7, 4, 5]})
-
-    def test_func(x):
-        pass
-
-    result = test_df.groupby('groups').apply(test_func)
-    expected = DataFrame()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_groupby_apply_none_first():
-    # GH 12824. Tests if apply returns None first.
-    test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
-    test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})
-
-    def test_func(x):
-        if x.shape[0] < 2:
-            return None
-        return x.iloc[[0, -1]]
-
-    result1 = test_df1.groupby('groups').apply(test_func)
-    result2 = test_df2.groupby('groups').apply(test_func)
-    index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
-                                    names=['groups', None])
-    index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
-                                    names=['groups', None])
-    expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
-                          index=index1)
-    expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
-                          index=index2)
-    tm.assert_frame_equal(result1, expected1)
-    tm.assert_frame_equal(result2, expected2)
-
-
-def test_groupby_apply_return_empty_chunk():
-    # GH 22221: apply filter which returns some empty groups
-    df = pd.DataFrame(dict(value=[0, 1], group=['filled', 'empty']))
-    groups = df.groupby('group')
-    result = groups.apply(lambda group: group[group.value != 1]['value'])
-    expected = pd.Series([0], name='value',
-                         index=MultiIndex.from_product([['empty', 'filled'],
-                                                        [0]],
-                                                       names=['group', None]
-                                                       ).drop('empty'))
-    tm.assert_series_equal(result, expected)
-
-
-def test_apply_with_mixed_types():
-    # gh-20949
-    df = pd.DataFrame({'A': 'a a b'.split(), 'B': [1, 2, 3], 'C': [4, 6, 5]})
-    g = df.groupby('A')
-
-    result = g.transform(lambda x: x / x.sum())
-    expected = pd.DataFrame({'B': [1 / 3., 2 / 3., 1], 'C': [0.4, 0.6, 1.0]})
-    tm.assert_frame_equal(result, expected)
-
-    result = g.apply(lambda x: x / x.sum())
-    tm.assert_frame_equal(result, expected)
@@ -1,157 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import numpy as np
-from numpy import nan
-import pytest
-
-from pandas._libs import groupby, lib, reduction
-
-from pandas.core.dtypes.common import ensure_int64
-
-from pandas import Index, isna
-from pandas.core.groupby.ops import generate_bins_generic
-import pandas.util.testing as tm
-from pandas.util.testing import assert_almost_equal
-
-
-def test_series_grouper():
-    from pandas import Series
-    obj = Series(np.random.randn(10))
-    dummy = obj[:0]
-
-    labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64)
-
-    grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
-    result, counts = grouper.get_result()
-
-    expected = np.array([obj[3:6].mean(), obj[6:].mean()])
-    assert_almost_equal(result, expected)
-
-    exp_counts = np.array([3, 4], dtype=np.int64)
-    assert_almost_equal(counts, exp_counts)
-
-
-def test_series_bin_grouper():
-    from pandas import Series
-    obj = Series(np.random.randn(10))
-    dummy = obj[:0]
-
-    bins = np.array([3, 6])
-
-    grouper = reduction.SeriesBinGrouper(obj, np.mean, bins, dummy)
-    result, counts = grouper.get_result()
-
-    expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
-    assert_almost_equal(result, expected)
-
-    exp_counts = np.array([3, 3, 4], dtype=np.int64)
-    assert_almost_equal(counts, exp_counts)
-
-
-class TestBinGroupers(object):
-
-    def setup_method(self, method):
-        self.obj = np.random.randn(10, 1)
-        self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64)
-        self.bins = np.array([3, 6], dtype=np.int64)
-
-    def test_generate_bins(self):
-        values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
-        binner = np.array([0, 3, 6, 9], dtype=np.int64)
-
-        for func in [lib.generate_bins_dt64, generate_bins_generic]:
-            bins = func(values, binner, closed='left')
-            assert ((bins == np.array([2, 5, 6])).all())
-
-            bins = func(values, binner, closed='right')
-            assert ((bins == np.array([3, 6, 6])).all())
-
-        for func in [lib.generate_bins_dt64, generate_bins_generic]:
-            values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
-            binner = np.array([0, 3, 6], dtype=np.int64)
-
-            bins = func(values, binner, closed='right')
-            assert ((bins == np.array([3, 6])).all())
-
-        msg = "Invalid length for values or for binner"
-        with pytest.raises(ValueError, match=msg):
-            generate_bins_generic(values, [], 'right')
-        with pytest.raises(ValueError, match=msg):
-            generate_bins_generic(values[:0], binner, 'right')
-
-        msg = "Values falls before first bin"
-        with pytest.raises(ValueError, match=msg):
-            generate_bins_generic(values, [4], 'right')
-        msg = "Values falls after last bin"
-        with pytest.raises(ValueError, match=msg):
-            generate_bins_generic(values, [-3, -1], 'right')
-
-
-def test_group_ohlc():
-    def _check(dtype):
-        obj = np.array(np.random.randn(20), dtype=dtype)
-
-        bins = np.array([6, 12, 20])
-        out = np.zeros((3, 4), dtype)
-        counts = np.zeros(len(out), dtype=np.int64)
-        labels = ensure_int64(np.repeat(np.arange(3),
-                                        np.diff(np.r_[0, bins])))
-
-        func = getattr(groupby, 'group_ohlc_%s' % dtype)
-        func(out, counts, obj[:, None], labels)
-
-        def _ohlc(group):
-            if isna(group).all():
-                return np.repeat(nan, 4)
-            return [group[0], group.max(), group.min(), group[-1]]
-
-        expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]),
-                             _ohlc(obj[12:])])
-
-        assert_almost_equal(out, expected)
-        tm.assert_numpy_array_equal(counts,
-                                    np.array([6, 6, 8], dtype=np.int64))
-
-        obj[:6] = nan
-        func(out, counts, obj[:, None], labels)
-        expected[0] = nan
-        assert_almost_equal(out, expected)
-
-    _check('float32')
-    _check('float64')
-
-
-class TestMoments(object):
-    pass
-
-
-class TestReducer(object):
-
-    def test_int_index(self):
-        from pandas.core.series import Series
-
-        arr = np.random.randn(100, 4)
-        result = reduction.reduce(arr, np.sum, labels=Index(np.arange(4)))
-        expected = arr.sum(0)
-        assert_almost_equal(result, expected)
-
-        result = reduction.reduce(arr, np.sum, axis=1,
-                                  labels=Index(np.arange(100)))
-        expected = arr.sum(1)
-        assert_almost_equal(result, expected)
-
-        dummy = Series(0., index=np.arange(100))
-        result = reduction.reduce(arr, np.sum, dummy=dummy,
-                                  labels=Index(np.arange(4)))
-        expected = arr.sum(0)
-        assert_almost_equal(result, expected)
-
-        dummy = Series(0., index=np.arange(4))
-        result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy,
-                                  labels=Index(np.arange(100)))
-        expected = arr.sum(1)
-        assert_almost_equal(result, expected)
-
-        result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy,
-                                  labels=Index(np.arange(100)))
-        assert_almost_equal(result, expected)
@@ -1,936 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-
-from datetime import datetime
-
-import numpy as np
-import pytest
-
-from pandas.compat import PY37
-
-import pandas as pd
-from pandas import (
-    Categorical, CategoricalIndex, DataFrame, Index, MultiIndex, Series, qcut)
-import pandas.util.testing as tm
-from pandas.util.testing import (
-    assert_equal, assert_frame_equal, assert_series_equal)
-
-
-def cartesian_product_for_groupers(result, args, names):
-    """ Reindex to a cartesian production for the groupers,
-    preserving the nature (Categorical) of each grouper """
-
-    def f(a):
-        if isinstance(a, (CategoricalIndex, Categorical)):
-            categories = a.categories
-            a = Categorical.from_codes(np.arange(len(categories)),
-                                       categories=categories,
-                                       ordered=a.ordered)
-        return a
-
-    index = pd.MultiIndex.from_product(map(f, args), names=names)
-    return result.reindex(index).sort_index()
-
-
-def test_apply_use_categorical_name(df):
-    cats = qcut(df.C, 4)
-
-    def get_stats(group):
-        return {'min': group.min(),
-                'max': group.max(),
-                'count': group.count(),
-                'mean': group.mean()}
-
-    result = df.groupby(cats, observed=False).D.apply(get_stats)
-    assert result.index.names[0] == 'C'
-
-
-def test_basic():
-
-    cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                       categories=["a", "b", "c", "d"], ordered=True)
-    data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
-
-    exp_index = CategoricalIndex(list('abcd'), name='b', ordered=True)
-    expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index)
-    result = data.groupby("b", observed=False).mean()
-    tm.assert_frame_equal(result, expected)
-
-    cat1 = Categorical(["a", "a", "b", "b"],
-                       categories=["a", "b", "z"], ordered=True)
-    cat2 = Categorical(["c", "d", "c", "d"],
-                       categories=["c", "d", "y"], ordered=True)
-    df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-
-    # single grouper
-    gb = df.groupby("A", observed=False)
-    exp_idx = CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True)
-    expected = DataFrame({'values': Series([3, 7, 0], index=exp_idx)})
-    result = gb.sum()
-    tm.assert_frame_equal(result, expected)
-
-    # GH 8623
-    x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'],
-                   [1, 'John P. Doe']],
-                  columns=['person_id', 'person_name'])
-    x['person_name'] = Categorical(x.person_name)
-
-    g = x.groupby(['person_id'], observed=False)
-    result = g.transform(lambda x: x)
-    tm.assert_frame_equal(result, x[['person_name']])
-
-    result = x.drop_duplicates('person_name')
-    expected = x.iloc[[0, 1]]
-    tm.assert_frame_equal(result, expected)
-
-    def f(x):
-        return x.drop_duplicates('person_name').iloc[0]
-
-    result = g.apply(f)
-    expected = x.iloc[[0, 1]].copy()
-    expected.index = Index([1, 2], name='person_id')
-    expected['person_name'] = expected['person_name'].astype('object')
-    tm.assert_frame_equal(result, expected)
-
-    # GH 9921
-    # Monotonic
-    df = DataFrame({"a": [5, 15, 25]})
-    c = pd.cut(df.a, bins=[0, 10, 20, 30, 40])
-
-    result = df.a.groupby(c, observed=False).transform(sum)
-    tm.assert_series_equal(result, df['a'])
-
-    tm.assert_series_equal(
-        df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)),
-        df['a'])
-    tm.assert_frame_equal(
-        df.groupby(c, observed=False).transform(sum),
-        df[['a']])
-    tm.assert_frame_equal(
-        df.groupby(c, observed=False).transform(lambda xs: np.max(xs)),
-        df[['a']])
-
-    # Filter
-    tm.assert_series_equal(
-        df.a.groupby(c, observed=False).filter(np.all),
-        df['a'])
-    tm.assert_frame_equal(
-        df.groupby(c, observed=False).filter(np.all),
-        df)
-
-    # Non-monotonic
-    df = DataFrame({"a": [5, 15, 25, -5]})
-    c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40])
-
-    result = df.a.groupby(c, observed=False).transform(sum)
-    tm.assert_series_equal(result, df['a'])
-
-    tm.assert_series_equal(
-        df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)),
-        df['a'])
-    tm.assert_frame_equal(
-        df.groupby(c, observed=False).transform(sum),
-        df[['a']])
-    tm.assert_frame_equal(
-        df.groupby(c, observed=False).transform(lambda xs: np.sum(xs)),
-        df[['a']])
-
-    # GH 9603
-    df = DataFrame({'a': [1, 0, 0, 0]})
-    c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list('abcd')))
-    result = df.groupby(c, observed=False).apply(len)
-
-    exp_index = CategoricalIndex(
-        c.values.categories, ordered=c.values.ordered)
-    expected = Series([1, 0, 0, 0], index=exp_index)
-    expected.index.name = 'a'
-    tm.assert_series_equal(result, expected)
-
-    # more basic
-    levels = ['foo', 'bar', 'baz', 'qux']
-    codes = np.random.randint(0, 4, size=100)
-
-    cats = Categorical.from_codes(codes, levels, ordered=True)
-
-    data = DataFrame(np.random.randn(100, 4))
-
-    result = data.groupby(cats, observed=False).mean()
-
-    expected = data.groupby(np.asarray(cats), observed=False).mean()
-    exp_idx = CategoricalIndex(levels, categories=cats.categories,
-                               ordered=True)
-    expected = expected.reindex(exp_idx)
-
-    assert_frame_equal(result, expected)
-
-    grouped = data.groupby(cats, observed=False)
-    desc_result = grouped.describe()
-
-    idx = cats.codes.argsort()
-    ord_labels = np.asarray(cats).take(idx)
-    ord_data = data.take(idx)
-
-    exp_cats = Categorical(ord_labels, ordered=True,
-                           categories=['foo', 'bar', 'baz', 'qux'])
-    expected = ord_data.groupby(
-        exp_cats, sort=False, observed=False).describe()
-    assert_frame_equal(desc_result, expected)
-
-    # GH 10460
-    expc = Categorical.from_codes(np.arange(4).repeat(8),
-                                  levels, ordered=True)
-    exp = CategoricalIndex(expc)
-    tm.assert_index_equal((desc_result.stack().index
-                           .get_level_values(0)), exp)
-    exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
-                 '75%', 'max'] * 4)
-    tm.assert_index_equal((desc_result.stack().index
-                           .get_level_values(1)), exp)
-
-
-def test_level_get_group(observed):
-    # GH15155
-    df = DataFrame(data=np.arange(2, 22, 2),
-                   index=MultiIndex(
-                       levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
-                       codes=[[0] * 5 + [1] * 5, range(10)],
-                       names=["Index1", "Index2"]))
-    g = df.groupby(level=["Index1"], observed=observed)
-
-    # expected should equal test.loc[["a"]]
-    # GH15166
-    expected = DataFrame(data=np.arange(2, 12, 2),
-                         index=pd.MultiIndex(levels=[pd.CategoricalIndex(
-                             ["a", "b"]), range(5)],
-        codes=[[0] * 5, range(5)],
-        names=["Index1", "Index2"]))
-    result = g.get_group('a')
-
-    assert_frame_equal(result, expected)
-
-
-@pytest.mark.xfail(PY37, reason="flaky on 3.7, xref gh-21636", strict=False)
-@pytest.mark.parametrize('ordered', [True, False])
-def test_apply(ordered):
-    # GH 10138
-
-    dense = Categorical(list('abc'), ordered=ordered)
-
-    # 'b' is in the categories but not in the list
-    missing = Categorical(
-        list('aaa'), categories=['a', 'b'], ordered=ordered)
-    values = np.arange(len(dense))
-    df = DataFrame({'missing': missing,
-                    'dense': dense,
-                    'values': values})
-    grouped = df.groupby(['missing', 'dense'], observed=True)
-
-    # missing category 'b' should still exist in the output index
-    idx = MultiIndex.from_arrays(
-        [missing, dense], names=['missing', 'dense'])
-    expected = DataFrame([0, 1, 2.],
-                         index=idx,
-                         columns=['values'])
-
-    result = grouped.apply(lambda x: np.mean(x))
-    assert_frame_equal(result, expected)
-
-    # we coerce back to ints
-    expected = expected.astype('int')
-    result = grouped.mean()
-    assert_frame_equal(result, expected)
-
-    result = grouped.agg(np.mean)
-    assert_frame_equal(result, expected)
-
-    # but for transform we should still get back the original index
-    idx = MultiIndex.from_arrays([missing, dense],
-                                 names=['missing', 'dense'])
-    expected = Series(1, index=idx)
-    result = grouped.apply(lambda x: 1)
-    assert_series_equal(result, expected)
-
-
-def test_observed(observed):
-    # multiple groupers, don't re-expand the output space
-    # of the grouper
-    # gh-14942 (implement)
-    # gh-10132 (back-compat)
-    # gh-8138 (back-compat)
-    # gh-8869
-
-    cat1 = Categorical(["a", "a", "b", "b"],
-                       categories=["a", "b", "z"], ordered=True)
-    cat2 = Categorical(["c", "d", "c", "d"],
-                       categories=["c", "d", "y"], ordered=True)
-    df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-    df['C'] = ['foo', 'bar'] * 2
-
-    # multiple groupers with a non-cat
-    gb = df.groupby(['A', 'B', 'C'], observed=observed)
-    exp_index = pd.MultiIndex.from_arrays(
-        [cat1, cat2, ['foo', 'bar'] * 2],
-        names=['A', 'B', 'C'])
-    expected = DataFrame({'values': Series(
-        [1, 2, 3, 4], index=exp_index)}).sort_index()
-    result = gb.sum()
-    if not observed:
-        expected = cartesian_product_for_groupers(
-            expected,
-            [cat1, cat2, ['foo', 'bar']],
-            list('ABC'))
-
-    tm.assert_frame_equal(result, expected)
-
-    gb = df.groupby(['A', 'B'], observed=observed)
-    exp_index = pd.MultiIndex.from_arrays(
-        [cat1, cat2],
-        names=['A', 'B'])
-    expected = DataFrame({'values': [1, 2, 3, 4]},
-                         index=exp_index)
-    result = gb.sum()
-    if not observed:
-        expected = cartesian_product_for_groupers(
-            expected,
-            [cat1, cat2],
-            list('AB'))
-
-    tm.assert_frame_equal(result, expected)
-
-    # https://github.com/pandas-dev/pandas/issues/8138
-    d = {'cat':
-         pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
-                        ordered=True),
-         'ints': [1, 1, 2, 2],
-         'val': [10, 20, 30, 40]}
-    df = pd.DataFrame(d)
-
-    # Grouping on a single column
-    groups_single_key = df.groupby("cat", observed=observed)
-    result = groups_single_key.mean()
-
-    exp_index = pd.CategoricalIndex(list('ab'), name="cat",
-                                    categories=list('abc'),
-                                    ordered=True)
-    expected = DataFrame({"ints": [1.5, 1.5], "val": [20., 30]},
-                         index=exp_index)
-    if not observed:
-        index = pd.CategoricalIndex(list('abc'), name="cat",
-                                    categories=list('abc'),
-                                    ordered=True)
-        expected = expected.reindex(index)
-
-    tm.assert_frame_equal(result, expected)
-
-    # Grouping on two columns
-    groups_double_key = df.groupby(["cat", "ints"], observed=observed)
-    result = groups_double_key.agg('mean')
-    expected = DataFrame(
-        {"val": [10, 30, 20, 40],
-         "cat": pd.Categorical(['a', 'a', 'b', 'b'],
-                               categories=['a', 'b', 'c'],
-                               ordered=True),
-         "ints": [1, 2, 1, 2]}).set_index(["cat", "ints"])
-    if not observed:
-        expected = cartesian_product_for_groupers(
-            expected,
-            [df.cat.values, [1, 2]],
-            ['cat', 'ints'])
-
-    tm.assert_frame_equal(result, expected)
-
-    # GH 10132
-    for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
-        c, i = key
-        result = groups_double_key.get_group(key)
-        expected = df[(df.cat == c) & (df.ints == i)]
-        assert_frame_equal(result, expected)
-
-    # gh-8869
-    # with as_index
-    d = {'foo': [10, 8, 4, 8, 4, 1, 1], 'bar': [10, 20, 30, 40, 50, 60, 70],
-         'baz': ['d', 'c', 'e', 'a', 'a', 'd', 'c']}
-    df = pd.DataFrame(d)
-    cat = pd.cut(df['foo'], np.linspace(0, 10, 3))
-    df['range'] = cat
-    groups = df.groupby(['range', 'baz'], as_index=False, observed=observed)
-    result = groups.agg('mean')
-
-    groups2 = df.groupby(['range', 'baz'], as_index=True, observed=observed)
-    expected = groups2.agg('mean').reset_index()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_observed_codes_remap(observed):
-    d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
-    df = pd.DataFrame(d)
-    values = pd.cut(df['C1'], [1, 2, 3, 6])
-    values.name = "cat"
-    groups_double_key = df.groupby([values, 'C2'], observed=observed)
-
-    idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]],
-                                 names=["cat", "C2"])
-    expected = DataFrame({"C1": [3, 3, 4, 5],
-                          "C3": [10, 100, 200, 34]}, index=idx)
-    if not observed:
-        expected = cartesian_product_for_groupers(
-            expected,
-            [values.values, [1, 2, 3, 4]],
-            ['cat', 'C2'])
-
-    result = groups_double_key.agg('mean')
-    tm.assert_frame_equal(result, expected)
-
-
-def test_observed_perf():
-    # we create a cartesian product, so this is
-    # non-performant if we don't use observed values
-    # gh-14942
-    df = DataFrame({
-        'cat': np.random.randint(0, 255, size=30000),
-        'int_id': np.random.randint(0, 255, size=30000),
-        'other_id': np.random.randint(0, 10000, size=30000),
-        'foo': 0})
-    df['cat'] = df.cat.astype(str).astype('category')
-
-    grouped = df.groupby(['cat', 'int_id', 'other_id'], observed=True)
-    result = grouped.count()
-    assert result.index.levels[0].nunique() == df.cat.nunique()
-    assert result.index.levels[1].nunique() == df.int_id.nunique()
-    assert result.index.levels[2].nunique() == df.other_id.nunique()
-
-
-def test_observed_groups(observed):
-    # gh-20583
-    # test that we have the appropriate groups
-
-    cat = pd.Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
-    df = pd.DataFrame({'cat': cat, 'vals': [1, 2, 3]})
-    g = df.groupby('cat', observed=observed)
-
-    result = g.groups
-    if observed:
-        expected = {'a': Index([0, 2], dtype='int64'),
-                    'c': Index([1], dtype='int64')}
-    else:
-        expected = {'a': Index([0, 2], dtype='int64'),
-                    'b': Index([], dtype='int64'),
-                    'c': Index([1], dtype='int64')}
-
-    tm.assert_dict_equal(result, expected)
-
-
-def test_observed_groups_with_nan(observed):
-    # GH 24740
-    df = pd.DataFrame({'cat': pd.Categorical(['a', np.nan, 'a'],
-                       categories=['a', 'b', 'd']),
-                       'vals': [1, 2, 3]})
-    g = df.groupby('cat', observed=observed)
-    result = g.groups
-    if observed:
-        expected = {'a': Index([0, 2], dtype='int64')}
-    else:
-        expected = {'a': Index([0, 2], dtype='int64'),
-                    'b': Index([], dtype='int64'),
-                    'd': Index([], dtype='int64')}
-    tm.assert_dict_equal(result, expected)
-
-
-def test_dataframe_categorical_with_nan(observed):
-    # GH 21151
-    s1 = pd.Categorical([np.nan, 'a', np.nan, 'a'],
-                        categories=['a', 'b', 'c'])
-    s2 = pd.Series([1, 2, 3, 4])
-    df = pd.DataFrame({'s1': s1, 's2': s2})
-    result = df.groupby('s1', observed=observed).first().reset_index()
-    if observed:
-        expected = DataFrame({'s1': pd.Categorical(['a'],
-                              categories=['a', 'b', 'c']), 's2': [2]})
-    else:
-        expected = DataFrame({'s1': pd.Categorical(['a', 'b', 'c'],
-                              categories=['a', 'b', 'c']),
-                              's2': [2, np.nan, np.nan]})
-    tm.assert_frame_equal(result, expected)
-
-
-def test_datetime():
-    # GH9049: ensure backward compatibility
-    levels = pd.date_range('2014-01-01', periods=4)
-    codes = np.random.randint(0, 4, size=100)
-
-    cats = Categorical.from_codes(codes, levels, ordered=True)
-
-    data = DataFrame(np.random.randn(100, 4))
-    result = data.groupby(cats, observed=False).mean()
-
-    expected = data.groupby(np.asarray(cats), observed=False).mean()
-    expected = expected.reindex(levels)
-    expected.index = CategoricalIndex(expected.index,
-                                      categories=expected.index,
-                                      ordered=True)
-
-    assert_frame_equal(result, expected)
-
-    grouped = data.groupby(cats, observed=False)
-    desc_result = grouped.describe()
-
-    idx = cats.codes.argsort()
-    ord_labels = cats.take_nd(idx)
-    ord_data = data.take(idx)
-    expected = ord_data.groupby(ord_labels, observed=False).describe()
-    assert_frame_equal(desc_result, expected)
-    tm.assert_index_equal(desc_result.index, expected.index)
-    tm.assert_index_equal(
-        desc_result.index.get_level_values(0),
-        expected.index.get_level_values(0))
-
-    # GH 10460
-    expc = Categorical.from_codes(
-        np.arange(4).repeat(8), levels, ordered=True)
-    exp = CategoricalIndex(expc)
-    tm.assert_index_equal((desc_result.stack().index
-                           .get_level_values(0)), exp)
-    exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
-                 '75%', 'max'] * 4)
-    tm.assert_index_equal((desc_result.stack().index
-                           .get_level_values(1)), exp)
-
-
-def test_categorical_index():
-
-    s = np.random.RandomState(12345)
-    levels = ['foo', 'bar', 'baz', 'qux']
-    codes = s.randint(0, 4, size=20)
-    cats = Categorical.from_codes(codes, levels, ordered=True)
-    df = DataFrame(
-        np.repeat(
-            np.arange(20), 4).reshape(-1, 4), columns=list('abcd'))
-    df['cats'] = cats
-
-    # with a cat index
-    result = df.set_index('cats').groupby(level=0, observed=False).sum()
-    expected = df[list('abcd')].groupby(cats.codes, observed=False).sum()
-    expected.index = CategoricalIndex(
-        Categorical.from_codes(
-            [0, 1, 2, 3], levels, ordered=True), name='cats')
-    assert_frame_equal(result, expected)
-
-    # with a cat column, should produce a cat index
-    result = df.groupby('cats', observed=False).sum()
-    expected = df[list('abcd')].groupby(cats.codes, observed=False).sum()
-    expected.index = CategoricalIndex(
-        Categorical.from_codes(
-            [0, 1, 2, 3], levels, ordered=True), name='cats')
-    assert_frame_equal(result, expected)
-
-
-def test_describe_categorical_columns():
-    # GH 11558
-    cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
-                               categories=['foo', 'bar', 'baz', 'qux'],
-                               ordered=True)
-    df = DataFrame(np.random.randn(20, 4), columns=cats)
-    result = df.groupby([1, 2, 3, 4] * 5).describe()
-
-    tm.assert_index_equal(result.stack().columns, cats)
-    tm.assert_categorical_equal(result.stack().columns.values, cats.values)
-
-
-def test_unstack_categorical():
-    # GH11558 (example is taken from the original issue)
-    df = pd.DataFrame({'a': range(10),
-                       'medium': ['A', 'B'] * 5,
-                       'artist': list('XYXXY') * 2})
-    df['medium'] = df['medium'].astype('category')
-
-    gcat = df.groupby(
-        ['artist', 'medium'], observed=False)['a'].count().unstack()
-    result = gcat.describe()
-
-    exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
-                                      name='medium')
-    tm.assert_index_equal(result.columns, exp_columns)
-    tm.assert_categorical_equal(result.columns.values, exp_columns.values)
-
-    result = gcat['A'] + gcat['B']
-    expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
-    tm.assert_series_equal(result, expected)
-
-
-def test_bins_unequal_len():
-    # GH3011
-    series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
-    bins = pd.cut(series.dropna().values, 4)
-
-    # len(bins) != len(series) here
-    with pytest.raises(ValueError):
-        series.groupby(bins).mean()
-
-
-def test_as_index():
-    # GH13204
-    df = DataFrame({'cat': Categorical([1, 2, 2], [1, 2, 3]),
-                    'A': [10, 11, 11],
-                    'B': [101, 102, 103]})
-    result = df.groupby(['cat', 'A'], as_index=False, observed=True).sum()
-    expected = DataFrame(
-        {'cat': Categorical([1, 2], categories=df.cat.cat.categories),
-         'A': [10, 11],
-         'B': [101, 205]},
-        columns=['cat', 'A', 'B'])
-    tm.assert_frame_equal(result, expected)
-
-    # function grouper
-    f = lambda r: df.loc[r, 'A']
-    result = df.groupby(['cat', f], as_index=False, observed=True).sum()
-    expected = DataFrame(
-        {'cat': Categorical([1, 2], categories=df.cat.cat.categories),
-         'A': [10, 22],
-         'B': [101, 205]},
-        columns=['cat', 'A', 'B'])
-    tm.assert_frame_equal(result, expected)
-
-    # another not in-axis grouper (conflicting names in index)
-    s = Series(['a', 'b', 'b'], name='cat')
-    result = df.groupby(['cat', s], as_index=False, observed=True).sum()
-    tm.assert_frame_equal(result, expected)
-
-    # is original index dropped?
-    group_columns = ['cat', 'A']
-    expected = DataFrame(
-        {'cat': Categorical([1, 2], categories=df.cat.cat.categories),
-         'A': [10, 11],
-         'B': [101, 205]},
-        columns=['cat', 'A', 'B'])
-
-    for name in [None, 'X', 'B']:
-        df.index = Index(list("abc"), name=name)
-        result = df.groupby(group_columns, as_index=False, observed=True).sum()
-
-        tm.assert_frame_equal(result, expected)
-
-
-def test_preserve_categories():
-    # GH-13179
-    categories = list('abc')
-
-    # ordered=True
-    df = DataFrame({'A': pd.Categorical(list('ba'),
-                                        categories=categories,
-                                        ordered=True)})
-    index = pd.CategoricalIndex(categories, categories, ordered=True)
-    tm.assert_index_equal(
-        df.groupby('A', sort=True, observed=False).first().index, index)
-    tm.assert_index_equal(
-        df.groupby('A', sort=False, observed=False).first().index, index)
-
-    # ordered=False
-    df = DataFrame({'A': pd.Categorical(list('ba'),
-                                        categories=categories,
-                                        ordered=False)})
-    sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
-    nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
-                                       ordered=False)
-    tm.assert_index_equal(
-        df.groupby('A', sort=True, observed=False).first().index,
-        sort_index)
-    tm.assert_index_equal(
-        df.groupby('A', sort=False, observed=False).first().index,
-        nosort_index)
-
-
-def test_preserve_categorical_dtype():
-    # GH13743, GH13854
-    df = DataFrame({'A': [1, 2, 1, 1, 2],
-                    'B': [10, 16, 22, 28, 34],
-                    'C1': Categorical(list("abaab"),
-                                      categories=list("bac"),
-                                      ordered=False),
-                    'C2': Categorical(list("abaab"),
-                                      categories=list("bac"),
-                                      ordered=True)})
-    # single grouper
-    exp_full = DataFrame({'A': [2.0, 1.0, np.nan],
-                          'B': [25.0, 20.0, np.nan],
-                          'C1': Categorical(list("bac"),
-                                            categories=list("bac"),
-                                            ordered=False),
-                          'C2': Categorical(list("bac"),
-                                            categories=list("bac"),
-                                            ordered=True)})
-    for col in ['C1', 'C2']:
-        result1 = df.groupby(by=col, as_index=False, observed=False).mean()
-        result2 = df.groupby(
-            by=col, as_index=True, observed=False).mean().reset_index()
-        expected = exp_full.reindex(columns=result1.columns)
-        tm.assert_frame_equal(result1, expected)
-        tm.assert_frame_equal(result2, expected)
-
-
-def test_categorical_no_compress():
-    data = Series(np.random.randn(9))
-
-    codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
-    cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True)
-
-    result = data.groupby(cats, observed=False).mean()
-    exp = data.groupby(codes, observed=False).mean()
-
-    exp.index = CategoricalIndex(exp.index, categories=cats.categories,
-                                 ordered=cats.ordered)
-    assert_series_equal(result, exp)
-
-    codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
-    cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True)
-
-    result = data.groupby(cats, observed=False).mean()
-    exp = data.groupby(codes, observed=False).mean().reindex(cats.categories)
-    exp.index = CategoricalIndex(exp.index, categories=cats.categories,
-                                 ordered=cats.ordered)
-    assert_series_equal(result, exp)
-
-    cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                       categories=["a", "b", "c", "d"], ordered=True)
-    data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
-
-    result = data.groupby("b", observed=False).mean()
-    result = result["a"].values
-    exp = np.array([1, 2, 4, np.nan])
-    tm.assert_numpy_array_equal(result, exp)
-
-
-def test_sort():
-
-    # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby  # noqa: flake8
-    # This should result in a properly sorted Series so that the plot
-    # has a sorted x axis
-    # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
-
-    df = DataFrame({'value': np.random.randint(0, 10000, 100)})
-    labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
-    cat_labels = Categorical(labels, labels)
-
-    df = df.sort_values(by=['value'], ascending=True)
-    df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
-                               right=False, labels=cat_labels)
-
-    res = df.groupby(['value_group'], observed=False)['value_group'].count()
-    exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
-    exp.index = CategoricalIndex(exp.index, name=exp.index.name)
-    tm.assert_series_equal(res, exp)
-
-
-def test_sort2():
-    # dataframe groupby sort was being ignored # GH 8868
-    df = DataFrame([['(7.5, 10]', 10, 10],
-                    ['(7.5, 10]', 8, 20],
-                    ['(2.5, 5]', 5, 30],
-                    ['(5, 7.5]', 6, 40],
-                    ['(2.5, 5]', 4, 50],
-                    ['(0, 2.5]', 1, 60],
-                    ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar'])
-    df['range'] = Categorical(df['range'], ordered=True)
-    index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
-                              '(7.5, 10]'], name='range', ordered=True)
-    expected_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
-                              columns=['foo', 'bar'], index=index)
-
-    col = 'range'
-    result_sort = df.groupby(col, sort=True, observed=False).first()
-    assert_frame_equal(result_sort, expected_sort)
-
-    # when categories is ordered, group is ordered by category's order
-    expected_sort = result_sort
-    result_sort = df.groupby(col, sort=False, observed=False).first()
-    assert_frame_equal(result_sort, expected_sort)
-
-    df['range'] = Categorical(df['range'], ordered=False)
-    index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
-                              '(7.5, 10]'], name='range')
-    expected_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
-                              columns=['foo', 'bar'], index=index)
-
-    index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]',
-                              '(0, 2.5]'],
-                             categories=['(7.5, 10]', '(2.5, 5]',
-                                         '(5, 7.5]', '(0, 2.5]'],
-                             name='range')
-    expected_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                                index=index, columns=['foo', 'bar'])
-
-    col = 'range'
-
-    # this is an unordered categorical, but we allow this ####
-    result_sort = df.groupby(col, sort=True, observed=False).first()
-    assert_frame_equal(result_sort, expected_sort)
-
-    result_nosort = df.groupby(col, sort=False, observed=False).first()
-    assert_frame_equal(result_nosort, expected_nosort)
-
-
-def test_sort_datetimelike():
-    # GH10505
-
-    # use same data as test_groupby_sort_categorical, which category is
-    # corresponding to datetime.month
-    df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1),
-                           datetime(2011, 2, 1), datetime(2011, 5, 1),
-                           datetime(2011, 2, 1), datetime(2011, 1, 1),
-                           datetime(2011, 5, 1)],
-                    'foo': [10, 8, 5, 6, 4, 1, 7],
-                    'bar': [10, 20, 30, 40, 50, 60, 70]},
-                   columns=['dt', 'foo', 'bar'])
-
-    # ordered=True
-    df['dt'] = Categorical(df['dt'], ordered=True)
-    index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
-             datetime(2011, 5, 1), datetime(2011, 7, 1)]
-    result_sort = DataFrame(
-        [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
-    result_sort.index = CategoricalIndex(index, name='dt', ordered=True)
-
-    index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
-             datetime(2011, 5, 1), datetime(2011, 1, 1)]
-    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                              columns=['foo', 'bar'])
-    result_nosort.index = CategoricalIndex(index, categories=index,
-                                           name='dt', ordered=True)
-
-    col = 'dt'
-    assert_frame_equal(
-        result_sort, df.groupby(col, sort=True, observed=False).first())
-
-    # when categories is ordered, group is ordered by category's order
-    assert_frame_equal(
-        result_sort, df.groupby(col, sort=False, observed=False).first())
-
-    # ordered = False
-    df['dt'] = Categorical(df['dt'], ordered=False)
-    index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
-             datetime(2011, 5, 1), datetime(2011, 7, 1)]
-    result_sort = DataFrame(
-        [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
-    result_sort.index = CategoricalIndex(index, name='dt')
-
-    index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
-             datetime(2011, 5, 1), datetime(2011, 1, 1)]
-    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                              columns=['foo', 'bar'])
-    result_nosort.index = CategoricalIndex(index, categories=index,
-                                           name='dt')
-
-    col = 'dt'
-    assert_frame_equal(
-        result_sort, df.groupby(col, sort=True, observed=False).first())
-    assert_frame_equal(
-        result_nosort, df.groupby(col, sort=False, observed=False).first())
-
-
-def test_empty_sum():
-    # https://github.com/pandas-dev/pandas/issues/18678
-    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                           categories=['a', 'b', 'c']),
-                       'B': [1, 2, 1]})
-    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
-
-    # 0 by default
-    result = df.groupby("A", observed=False).B.sum()
-    expected = pd.Series([3, 1, 0], expected_idx, name='B')
-    tm.assert_series_equal(result, expected)
-
-    # min_count=0
-    result = df.groupby("A", observed=False).B.sum(min_count=0)
-    expected = pd.Series([3, 1, 0], expected_idx, name='B')
-    tm.assert_series_equal(result, expected)
-
-    # min_count=1
-    result = df.groupby("A", observed=False).B.sum(min_count=1)
-    expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
-    tm.assert_series_equal(result, expected)
-
-    # min_count>1
-    result = df.groupby("A", observed=False).B.sum(min_count=2)
-    expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B')
-    tm.assert_series_equal(result, expected)
-
-
-def test_empty_prod():
-    # https://github.com/pandas-dev/pandas/issues/18678
-    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                           categories=['a', 'b', 'c']),
-                       'B': [1, 2, 1]})
-
-    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
-
-    # 1 by default
-    result = df.groupby("A", observed=False).B.prod()
-    expected = pd.Series([2, 1, 1], expected_idx, name='B')
-    tm.assert_series_equal(result, expected)
-
-    # min_count=0
-    result = df.groupby("A", observed=False).B.prod(min_count=0)
-    expected = pd.Series([2, 1, 1], expected_idx, name='B')
-    tm.assert_series_equal(result, expected)
-
-    # min_count=1
-    result = df.groupby("A", observed=False).B.prod(min_count=1)
-    expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
-    tm.assert_series_equal(result, expected)
-
-
-def test_groupby_multiindex_categorical_datetime():
-    # https://github.com/pandas-dev/pandas/issues/21390
-
-    df = pd.DataFrame({
-        'key1': pd.Categorical(list('abcbabcba')),
-        'key2': pd.Categorical(
-            list(pd.date_range('2018-06-01 00', freq='1T', periods=3)) * 3),
-        'values': np.arange(9),
-    })
-    result = df.groupby(['key1', 'key2']).mean()
-
-    idx = pd.MultiIndex.from_product(
-        [pd.Categorical(['a', 'b', 'c']),
-         pd.Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
-        names=['key1', 'key2'])
-    expected = pd.DataFrame(
-        {'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
-    assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("as_index, expected", [
-    (True, pd.Series(
-        index=pd.MultiIndex.from_arrays(
-            [pd.Series([1, 1, 2], dtype='category'),
-                [1, 2, 2]], names=['a', 'b']
-        ),
-        data=[1, 2, 3], name='x'
-    )),
-    (False, pd.DataFrame({
-        'a': pd.Series([1, 1, 2], dtype='category'),
-        'b': [1, 2, 2],
-        'x': [1, 2, 3]
-    }))
-])
-def test_groupby_agg_observed_true_single_column(as_index, expected):
-    # GH-23970
-    df = pd.DataFrame({
-        'a': pd.Series([1, 1, 2], dtype='category'),
-        'b': [1, 2, 2],
-        'x': [1, 2, 3]
-    })
-
-    result = df.groupby(
-        ['a', 'b'], as_index=as_index, observed=True)['x'].sum()
-
-    assert_equal(result, expected)
-
-
-@pytest.mark.parametrize('fill_value', [None, np.nan, pd.NaT])
-def test_shift(fill_value):
-    ct = pd.Categorical(['a', 'b', 'c', 'd'],
-                        categories=['a', 'b', 'c', 'd'], ordered=False)
-    expected = pd.Categorical([None, 'a', 'b', 'c'],
-                              categories=['a', 'b', 'c', 'd'], ordered=False)
-    res = ct.shift(1, fill_value=fill_value)
-    assert_equal(res, expected)
@@ -1,224 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-
-import numpy as np
-import pytest
-
-from pandas.compat import product as cart_product, range
-
-from pandas import DataFrame, MultiIndex, Period, Series, Timedelta, Timestamp
-from pandas.util.testing import assert_frame_equal, assert_series_equal
-
-
-class TestCounting(object):
-
-    def test_cumcount(self):
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3])
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
-    def test_cumcount_empty(self):
-        ge = DataFrame().groupby(level=0)
-        se = Series().groupby(level=0)
-
-        # edge case, as this is usually considered float
-        e = Series(dtype='int64')
-
-        assert_series_equal(e, ge.cumcount())
-        assert_series_equal(e, se.cumcount())
-
-    def test_cumcount_dupe_index(self):
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
-                       index=[0] * 5)
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
-    def test_cumcount_mi(self):
-        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
-                       index=mi)
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3], index=mi)
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
-    def test_cumcount_groupby_not_col(self):
-        df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'],
-                       index=[0] * 5)
-        g = df.groupby([0, 0, 0, 1, 0])
-        sg = g.A
-
-        expected = Series([0, 1, 2, 0, 3], index=[0] * 5)
-
-        assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())
-
-    def test_ngroup(self):
-        df = DataFrame({'A': list('aaaba')})
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 0, 0, 1, 0])
-
-        assert_series_equal(expected, g.ngroup())
-        assert_series_equal(expected, sg.ngroup())
-
-    def test_ngroup_distinct(self):
-        df = DataFrame({'A': list('abcde')})
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series(range(5), dtype='int64')
-
-        assert_series_equal(expected, g.ngroup())
-        assert_series_equal(expected, sg.ngroup())
-
-    def test_ngroup_one_group(self):
-        df = DataFrame({'A': [0] * 5})
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0] * 5)
-
-        assert_series_equal(expected, g.ngroup())
-        assert_series_equal(expected, sg.ngroup())
-
-    def test_ngroup_empty(self):
-        ge = DataFrame().groupby(level=0)
-        se = Series().groupby(level=0)
-
-        # edge case, as this is usually considered float
-        e = Series(dtype='int64')
-
-        assert_series_equal(e, ge.ngroup())
-        assert_series_equal(e, se.ngroup())
-
-    def test_ngroup_series_matches_frame(self):
-        df = DataFrame({'A': list('aaaba')})
-        s = Series(list('aaaba'))
-
-        assert_series_equal(df.groupby(s).ngroup(),
-                            s.groupby(s).ngroup())
-
-    def test_ngroup_dupe_index(self):
-        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
-        g = df.groupby('A')
-        sg = g.A
-
-        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)
-
-        assert_series_equal(expected, g.ngroup())
-        assert_series_equal(expected, sg.ngroup())
-
-    def test_ngroup_mi(self):
-        mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]])
-        df = DataFrame({'A': list('aaaba')}, index=mi)
-        g = df.groupby('A')
-        sg = g.A
-        expected = Series([0, 0, 0, 1, 0], index=mi)
-
-        assert_series_equal(expected, g.ngroup())
-        assert_series_equal(expected, sg.ngroup())
-
-    def test_ngroup_groupby_not_col(self):
-        df = DataFrame({'A': list('aaaba')}, index=[0] * 5)
-        g = df.groupby([0, 0, 0, 1, 0])
-        sg = g.A
-
-        expected = Series([0, 0, 0, 1, 0], index=[0] * 5)
-
-        assert_series_equal(expected, g.ngroup())
-        assert_series_equal(expected, sg.ngroup())
-
-    def test_ngroup_descending(self):
-        df = DataFrame(['a', 'a', 'b', 'a', 'b'], columns=['A'])
-        g = df.groupby(['A'])
-
-        ascending = Series([0, 0, 1, 0, 1])
-        descending = Series([1, 1, 0, 1, 0])
-
-        assert_series_equal(descending, (g.ngroups - 1) - ascending)
-        assert_series_equal(ascending, g.ngroup(ascending=True))
-        assert_series_equal(descending, g.ngroup(ascending=False))
-
-    def test_ngroup_matches_cumcount(self):
-        # verify one manually-worked out case works
-        df = DataFrame([['a', 'x'], ['a', 'y'], ['b', 'x'],
-                        ['a', 'x'], ['b', 'y']], columns=['A', 'X'])
-        g = df.groupby(['A', 'X'])
-        g_ngroup = g.ngroup()
-        g_cumcount = g.cumcount()
-        expected_ngroup = Series([0, 1, 2, 0, 3])
-        expected_cumcount = Series([0, 0, 0, 1, 0])
-
-        assert_series_equal(g_ngroup, expected_ngroup)
-        assert_series_equal(g_cumcount, expected_cumcount)
-
-    def test_ngroup_cumcount_pair(self):
-        # brute force comparison for all small series
-        for p in cart_product(range(3), repeat=4):
-            df = DataFrame({'a': p})
-            g = df.groupby(['a'])
-
-            order = sorted(set(p))
-            ngroupd = [order.index(val) for val in p]
-            cumcounted = [p[:i].count(val) for i, val in enumerate(p)]
-
-            assert_series_equal(g.ngroup(), Series(ngroupd))
-            assert_series_equal(g.cumcount(), Series(cumcounted))
-
-    def test_ngroup_respects_groupby_order(self):
-        np.random.seed(0)
-        df = DataFrame({'a': np.random.choice(list('abcdef'), 100)})
-        for sort_flag in (False, True):
-            g = df.groupby(['a'], sort=sort_flag)
-            df['group_id'] = -1
-            df['group_index'] = -1
-
-            for i, (_, group) in enumerate(g):
-                df.loc[group.index, 'group_id'] = i
-                for j, ind in enumerate(group.index):
-                    df.loc[ind, 'group_index'] = j
-
-            assert_series_equal(Series(df['group_id'].values),
-                                g.ngroup())
-            assert_series_equal(Series(df['group_index'].values),
-                                g.cumcount())
-
-    @pytest.mark.parametrize('datetimelike', [
-        [Timestamp('2016-05-%02d 20:09:25+00:00' % i) for i in range(1, 4)],
-        [Timestamp('2016-05-%02d 20:09:25' % i) for i in range(1, 4)],
-        [Timedelta(x, unit="h") for x in range(1, 4)],
-        [Period(freq="2W", year=2017, month=x) for x in range(1, 4)]])
-    def test_count_with_datetimelike(self, datetimelike):
-        # test for #13393, where DataframeGroupBy.count() fails
-        # when counting a datetimelike column.
-
-        df = DataFrame({'x': ['a', 'a', 'b'], 'y': datetimelike})
-        res = df.groupby('x').count()
-        expected = DataFrame({'y': [2, 1]}, index=['a', 'b'])
-        expected.index.name = "x"
-        assert_frame_equal(expected, res)
-
-    def test_count_with_only_nans_in_first_group(self):
-        # GH21956
-        df = DataFrame({'A': [np.nan, np.nan], 'B': ['a', 'b'], 'C': [1, 2]})
-        result = df.groupby(['A', 'B']).C.count()
-        mi = MultiIndex(levels=[[], ['a', 'b']],
-                        codes=[[], []],
-                        names=['A', 'B'])
-        expected = Series([], index=mi, dtype=np.int64, name='C')
-        assert_series_equal(result, expected, check_index_type=False)
@@ -1,588 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import DataFrame, Series, Timestamp
-import pandas.util.testing as tm
-
-
-def test_filter_series():
-    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-    expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6])
-    expected_even = pd.Series([20, 22, 24], index=[2, 4, 5])
-    grouper = s.apply(lambda x: x % 2)
-    grouped = s.groupby(grouper)
-    tm.assert_series_equal(
-        grouped.filter(lambda x: x.mean() < 10), expected_odd)
-    tm.assert_series_equal(
-        grouped.filter(lambda x: x.mean() > 10), expected_even)
-    # Test dropna=False.
-    tm.assert_series_equal(
-        grouped.filter(lambda x: x.mean() < 10, dropna=False),
-        expected_odd.reindex(s.index))
-    tm.assert_series_equal(
-        grouped.filter(lambda x: x.mean() > 10, dropna=False),
-        expected_even.reindex(s.index))
-
-
-def test_filter_single_column_df():
-    df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
-    expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
-    expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
-    grouper = df[0].apply(lambda x: x % 2)
-    grouped = df.groupby(grouper)
-    tm.assert_frame_equal(
-        grouped.filter(lambda x: x.mean() < 10), expected_odd)
-    tm.assert_frame_equal(
-        grouped.filter(lambda x: x.mean() > 10), expected_even)
-    # Test dropna=False.
-    tm.assert_frame_equal(
-        grouped.filter(lambda x: x.mean() < 10, dropna=False),
-        expected_odd.reindex(df.index))
-    tm.assert_frame_equal(
-        grouped.filter(lambda x: x.mean() > 10, dropna=False),
-        expected_even.reindex(df.index))
-
-
-def test_filter_multi_column_df():
-    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]})
-    grouper = df['A'].apply(lambda x: x % 2)
-    grouped = df.groupby(grouper)
-    expected = pd.DataFrame({'A': [12, 12], 'B': [1, 1]}, index=[1, 2])
-    tm.assert_frame_equal(
-        grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10),
-        expected)
-
-
-def test_filter_mixed_df():
-    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-    grouper = df['A'].apply(lambda x: x % 2)
-    grouped = df.groupby(grouper)
-    expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2])
-    tm.assert_frame_equal(
-        grouped.filter(lambda x: x['A'].sum() > 10), expected)
-
-
-def test_filter_out_all_groups():
-    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-    grouper = s.apply(lambda x: x % 2)
-    grouped = s.groupby(grouper)
-    tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]])
-    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-    grouper = df['A'].apply(lambda x: x % 2)
-    grouped = df.groupby(grouper)
-    tm.assert_frame_equal(
-        grouped.filter(lambda x: x['A'].sum() > 1000), df.loc[[]])
-
-
-def test_filter_out_no_groups():
-    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-    grouper = s.apply(lambda x: x % 2)
-    grouped = s.groupby(grouper)
-    filtered = grouped.filter(lambda x: x.mean() > 0)
-    tm.assert_series_equal(filtered, s)
-    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-    grouper = df['A'].apply(lambda x: x % 2)
-    grouped = df.groupby(grouper)
-    filtered = grouped.filter(lambda x: x['A'].mean() > 0)
-    tm.assert_frame_equal(filtered, df)
-
-
-def test_filter_out_all_groups_in_df():
-    # GH12768
-    df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
-    res = df.groupby('a')
-    res = res.filter(lambda x: x['b'].sum() > 5, dropna=False)
-    expected = pd.DataFrame({'a': [np.nan] * 3, 'b': [np.nan] * 3})
-    tm.assert_frame_equal(expected, res)
-
-    df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
-    res = df.groupby('a')
-    res = res.filter(lambda x: x['b'].sum() > 5, dropna=True)
-    expected = pd.DataFrame({'a': [], 'b': []}, dtype="int64")
-    tm.assert_frame_equal(expected, res)
-
-
-def test_filter_condition_raises():
-    def raise_if_sum_is_zero(x):
-        if x.sum() == 0:
-            raise ValueError
-        else:
-            return x.sum() > 0
-
-    s = pd.Series([-1, 0, 1, 2])
-    grouper = s.apply(lambda x: x % 2)
-    grouped = s.groupby(grouper)
-    msg = "the filter must return a boolean result"
-    with pytest.raises(TypeError, match=msg):
-        grouped.filter(raise_if_sum_is_zero)
-
-
-def test_filter_with_axis_in_groupby():
-    # issue 11041
-    index = pd.MultiIndex.from_product([range(10), [0, 1]])
-    data = pd.DataFrame(
-        np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
-    result = data.groupby(level=0,
-                          axis=1).filter(lambda x: x.iloc[0, 0] > 10)
-    expected = data.iloc[:, 12:20]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_filter_bad_shapes():
-    df = DataFrame({'A': np.arange(8),
-                    'B': list('aabbbbcc'),
-                    'C': np.arange(8)})
-    s = df['B']
-    g_df = df.groupby('B')
-    g_s = s.groupby(s)
-
-    f = lambda x: x
-    msg = "filter function returned a DataFrame, but expected a scalar bool"
-    with pytest.raises(TypeError, match=msg):
-        g_df.filter(f)
-    msg = "the filter must return a boolean result"
-    with pytest.raises(TypeError, match=msg):
-        g_s.filter(f)
-
-    f = lambda x: x == 1
-    msg = "filter function returned a DataFrame, but expected a scalar bool"
-    with pytest.raises(TypeError, match=msg):
-        g_df.filter(f)
-    msg = "the filter must return a boolean result"
-    with pytest.raises(TypeError, match=msg):
-        g_s.filter(f)
-
-    f = lambda x: np.outer(x, x)
-    msg = "can't multiply sequence by non-int of type 'str'"
-    with pytest.raises(TypeError, match=msg):
-        g_df.filter(f)
-    msg = "the filter must return a boolean result"
-    with pytest.raises(TypeError, match=msg):
-        g_s.filter(f)
-
-
-def test_filter_nan_is_false():
-    df = DataFrame({'A': np.arange(8),
-                    'B': list('aabbbbcc'),
-                    'C': np.arange(8)})
-    s = df['B']
-    g_df = df.groupby(df['B'])
-    g_s = s.groupby(s)
-
-    f = lambda x: np.nan
-    tm.assert_frame_equal(g_df.filter(f), df.loc[[]])
-    tm.assert_series_equal(g_s.filter(f), s[[]])
-
-
-def test_filter_against_workaround():
-    np.random.seed(0)
-    # Series of ints
-    s = Series(np.random.randint(0, 100, 1000))
-    grouper = s.apply(lambda x: np.round(x, -1))
-    grouped = s.groupby(grouper)
-    f = lambda x: x.mean() > 10
-
-    old_way = s[grouped.transform(f).astype('bool')]
-    new_way = grouped.filter(f)
-    tm.assert_series_equal(new_way.sort_values(), old_way.sort_values())
-
-    # Series of floats
-    s = 100 * Series(np.random.random(1000))
-    grouper = s.apply(lambda x: np.round(x, -1))
-    grouped = s.groupby(grouper)
-    f = lambda x: x.mean() > 10
-    old_way = s[grouped.transform(f).astype('bool')]
-    new_way = grouped.filter(f)
-    tm.assert_series_equal(new_way.sort_values(), old_way.sort_values())
-
-    # Set up DataFrame of ints, floats, strings.
-    from string import ascii_lowercase
-    letters = np.array(list(ascii_lowercase))
-    N = 1000
-    random_letters = letters.take(np.random.randint(0, 26, N))
-    df = DataFrame({'ints': Series(np.random.randint(0, 100, N)),
-                    'floats': N / 10 * Series(np.random.random(N)),
-                    'letters': Series(random_letters)})
-
-    # Group by ints; filter on floats.
-    grouped = df.groupby('ints')
-    old_way = df[grouped.floats.
-                 transform(lambda x: x.mean() > N / 20).astype('bool')]
-    new_way = grouped.filter(lambda x: x['floats'].mean() > N / 20)
-    tm.assert_frame_equal(new_way, old_way)
-
-    # Group by floats (rounded); filter on strings.
-    grouper = df.floats.apply(lambda x: np.round(x, -1))
-    grouped = df.groupby(grouper)
-    old_way = df[grouped.letters.
-                 transform(lambda x: len(x) < N / 10).astype('bool')]
-    new_way = grouped.filter(lambda x: len(x.letters) < N / 10)
-    tm.assert_frame_equal(new_way, old_way)
-
-    # Group by strings; filter on ints.
-    grouped = df.groupby('letters')
-    old_way = df[grouped.ints.
-                 transform(lambda x: x.mean() > N / 20).astype('bool')]
-    new_way = grouped.filter(lambda x: x['ints'].mean() > N / 20)
-    tm.assert_frame_equal(new_way, old_way)
-
-
-def test_filter_using_len():
-    # BUG GH4447
-    df = DataFrame({'A': np.arange(8),
-                    'B': list('aabbbbcc'),
-                    'C': np.arange(8)})
-    grouped = df.groupby('B')
-    actual = grouped.filter(lambda x: len(x) > 2)
-    expected = DataFrame(
-        {'A': np.arange(2, 6),
-         'B': list('bbbb'),
-         'C': np.arange(2, 6)}, index=np.arange(2, 6))
-    tm.assert_frame_equal(actual, expected)
-
-    actual = grouped.filter(lambda x: len(x) > 4)
-    expected = df.loc[[]]
-    tm.assert_frame_equal(actual, expected)
-
-    # Series have always worked properly, but we'll test anyway.
-    s = df['B']
-    grouped = s.groupby(s)
-    actual = grouped.filter(lambda x: len(x) > 2)
-    expected = Series(4 * ['b'], index=np.arange(2, 6), name='B')
-    tm.assert_series_equal(actual, expected)
-
-    actual = grouped.filter(lambda x: len(x) > 4)
-    expected = s[[]]
-    tm.assert_series_equal(actual, expected)
-
-
-def test_filter_maintains_ordering():
-    # Simple case: index is sequential. #4621
-    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]})
-    s = df['pid']
-    grouped = df.groupby('tag')
-    actual = grouped.filter(lambda x: len(x) > 1)
-    expected = df.iloc[[1, 2, 4, 7]]
-    tm.assert_frame_equal(actual, expected)
-
-    grouped = s.groupby(df['tag'])
-    actual = grouped.filter(lambda x: len(x) > 1)
-    expected = s.iloc[[1, 2, 4, 7]]
-    tm.assert_series_equal(actual, expected)
-
-    # Now index is sequentially decreasing.
-    df.index = np.arange(len(df) - 1, -1, -1)
-    s = df['pid']
-    grouped = df.groupby('tag')
-    actual = grouped.filter(lambda x: len(x) > 1)
-    expected = df.iloc[[1, 2, 4, 7]]
-    tm.assert_frame_equal(actual, expected)
-
-    grouped = s.groupby(df['tag'])
-    actual = grouped.filter(lambda x: len(x) > 1)
-    expected = s.iloc[[1, 2, 4, 7]]
-    tm.assert_series_equal(actual, expected)
-
-    # Index is shuffled.
-    SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3]
-    df.index = df.index[SHUFFLED]
-    s = df['pid']
-    grouped = df.groupby('tag')
-    actual = grouped.filter(lambda x: len(x) > 1)
-    expected = df.iloc[[1, 2, 4, 7]]
-    tm.assert_frame_equal(actual, expected)
-
-    grouped = s.groupby(df['tag'])
-    actual = grouped.filter(lambda x: len(x) > 1)
-    expected = s.iloc[[1, 2, 4, 7]]
-    tm.assert_series_equal(actual, expected)
-
-
-def test_filter_multiple_timestamp():
-    # GH 10114
-    df = DataFrame({'A': np.arange(5, dtype='int64'),
-                    'B': ['foo', 'bar', 'foo', 'bar', 'bar'],
-                    'C': Timestamp('20130101')})
-
-    grouped = df.groupby(['B', 'C'])
-
-    result = grouped['A'].filter(lambda x: True)
-    tm.assert_series_equal(df['A'], result)
-
-    result = grouped['A'].transform(len)
-    expected = Series([2, 3, 2, 3, 3], name='A')
-    tm.assert_series_equal(result, expected)
-
-    result = grouped.filter(lambda x: True)
-    tm.assert_frame_equal(df, result)
-
-    result = grouped.transform('sum')
-    expected = DataFrame({'A': [2, 8, 2, 8, 8]})
-    tm.assert_frame_equal(result, expected)
-
-    result = grouped.transform(len)
-    expected = DataFrame({'A': [2, 3, 2, 3, 3]})
-    tm.assert_frame_equal(result, expected)
-
-
-def test_filter_and_transform_with_non_unique_int_index():
-    # GH4620
-    index = [1, 1, 1, 2, 1, 1, 0, 1]
-    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-    grouped_df = df.groupby('tag')
-    ser = df['pid']
-    grouped_ser = ser.groupby(df['tag'])
-    expected_indexes = [1, 2, 4, 7]
-
-    # Filter DataFrame
-    actual = grouped_df.filter(lambda x: len(x) > 1)
-    expected = df.iloc[expected_indexes]
-    tm.assert_frame_equal(actual, expected)
-
-    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
-    expected.iloc[[0, 3, 5, 6]] = np.nan
-    tm.assert_frame_equal(actual, expected)
-
-    # Filter Series
-    actual = grouped_ser.filter(lambda x: len(x) > 1)
-    expected = ser.take(expected_indexes)
-    tm.assert_series_equal(actual, expected)
-
-    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-    NA = np.nan
-    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-    # ^ made manually because this can get confusing!
-    tm.assert_series_equal(actual, expected)
-
-    # Transform Series
-    actual = grouped_ser.transform(len)
-    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-    tm.assert_series_equal(actual, expected)
-
-    # Transform (a column from) DataFrameGroupBy
-    actual = grouped_df.pid.transform(len)
-    tm.assert_series_equal(actual, expected)
-
-
-def test_filter_and_transform_with_multiple_non_unique_int_index():
-    # GH4620
-    index = [1, 1, 1, 2, 0, 0, 0, 1]
-    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-    grouped_df = df.groupby('tag')
-    ser = df['pid']
-    grouped_ser = ser.groupby(df['tag'])
-    expected_indexes = [1, 2, 4, 7]
-
-    # Filter DataFrame
-    actual = grouped_df.filter(lambda x: len(x) > 1)
-    expected = df.iloc[expected_indexes]
-    tm.assert_frame_equal(actual, expected)
-
-    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
-    expected.iloc[[0, 3, 5, 6]] = np.nan
-    tm.assert_frame_equal(actual, expected)
-
-    # Filter Series
-    actual = grouped_ser.filter(lambda x: len(x) > 1)
-    expected = ser.take(expected_indexes)
-    tm.assert_series_equal(actual, expected)
-
-    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-    NA = np.nan
-    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-    # ^ made manually because this can get confusing!
-    tm.assert_series_equal(actual, expected)
-
-    # Transform Series
-    actual = grouped_ser.transform(len)
-    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-    tm.assert_series_equal(actual, expected)
-
-    # Transform (a column from) DataFrameGroupBy
-    actual = grouped_df.pid.transform(len)
-    tm.assert_series_equal(actual, expected)
-
-
-def test_filter_and_transform_with_non_unique_float_index():
-    # GH4620
-    index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float)
-    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-    grouped_df = df.groupby('tag')
-    ser = df['pid']
-    grouped_ser = ser.groupby(df['tag'])
-    expected_indexes = [1, 2, 4, 7]
-
-    # Filter DataFrame
-    actual = grouped_df.filter(lambda x: len(x) > 1)
-    expected = df.iloc[expected_indexes]
-    tm.assert_frame_equal(actual, expected)
-
-    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
-    expected.iloc[[0, 3, 5, 6]] = np.nan
-    tm.assert_frame_equal(actual, expected)
-
-    # Filter Series
-    actual = grouped_ser.filter(lambda x: len(x) > 1)
-    expected = ser.take(expected_indexes)
-    tm.assert_series_equal(actual, expected)
-
-    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-    NA = np.nan
-    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-    # ^ made manually because this can get confusing!
-    tm.assert_series_equal(actual, expected)
-
-    # Transform Series
-    actual = grouped_ser.transform(len)
-    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-    tm.assert_series_equal(actual, expected)
-
-    # Transform (a column from) DataFrameGroupBy
-    actual = grouped_df.pid.transform(len)
-    tm.assert_series_equal(actual, expected)
-
-
-def test_filter_and_transform_with_non_unique_timestamp_index():
-    # GH4620
-    t0 = Timestamp('2013-09-30 00:05:00')
-    t1 = Timestamp('2013-10-30 00:05:00')
-    t2 = Timestamp('2013-11-30 00:05:00')
-    index = [t1, t1, t1, t2, t1, t1, t0, t1]
-    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-    grouped_df = df.groupby('tag')
-    ser = df['pid']
-    grouped_ser = ser.groupby(df['tag'])
-    expected_indexes = [1, 2, 4, 7]
-
-    # Filter DataFrame
-    actual = grouped_df.filter(lambda x: len(x) > 1)
-    expected = df.iloc[expected_indexes]
-    tm.assert_frame_equal(actual, expected)
-
-    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
-    expected.iloc[[0, 3, 5, 6]] = np.nan
-    tm.assert_frame_equal(actual, expected)
-
-    # Filter Series
-    actual = grouped_ser.filter(lambda x: len(x) > 1)
-    expected = ser.take(expected_indexes)
-    tm.assert_series_equal(actual, expected)
-
-    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-    NA = np.nan
-    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-    # ^ made manually because this can get confusing!
-    tm.assert_series_equal(actual, expected)
-
-    # Transform Series
-    actual = grouped_ser.transform(len)
-    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-    tm.assert_series_equal(actual, expected)
-
-    # Transform (a column from) DataFrameGroupBy
-    actual = grouped_df.pid.transform(len)
-    tm.assert_series_equal(actual, expected)
-
-
-def test_filter_and_transform_with_non_unique_string_index():
-    # GH4620
-    index = list('bbbcbbab')
-    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-    grouped_df = df.groupby('tag')
-    ser = df['pid']
-    grouped_ser = ser.groupby(df['tag'])
-    expected_indexes = [1, 2, 4, 7]
-
-    # Filter DataFrame
-    actual = grouped_df.filter(lambda x: len(x) > 1)
-    expected = df.iloc[expected_indexes]
-    tm.assert_frame_equal(actual, expected)
-
-    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
-    expected.iloc[[0, 3, 5, 6]] = np.nan
-    tm.assert_frame_equal(actual, expected)
-
-    # Filter Series
-    actual = grouped_ser.filter(lambda x: len(x) > 1)
-    expected = ser.take(expected_indexes)
-    tm.assert_series_equal(actual, expected)
-
-    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-    NA = np.nan
-    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-    # ^ made manually because this can get confusing!
-    tm.assert_series_equal(actual, expected)
-
-    # Transform Series
-    actual = grouped_ser.transform(len)
-    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-    tm.assert_series_equal(actual, expected)
-
-    # Transform (a column from) DataFrameGroupBy
-    actual = grouped_df.pid.transform(len)
-    tm.assert_series_equal(actual, expected)
-
-
-def test_filter_has_access_to_grouped_cols():
-    df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=['A', 'B'])
-    g = df.groupby('A')
-    # previously didn't have access to col A #????
-    filt = g.filter(lambda x: x['A'].sum() == 2)
-    tm.assert_frame_equal(filt, df.iloc[[0, 1]])
-
-
-def test_filter_enforces_scalarness():
-    df = pd.DataFrame([
-        ['best', 'a', 'x'],
-        ['worst', 'b', 'y'],
-        ['best', 'c', 'x'],
-        ['best', 'd', 'y'],
-        ['worst', 'd', 'y'],
-        ['worst', 'd', 'y'],
-        ['best', 'd', 'z'],
-    ], columns=['a', 'b', 'c'])
-    with pytest.raises(TypeError, match='filter function returned a.*'):
-        df.groupby('c').filter(lambda g: g['a'] == 'best')
-
-
-def test_filter_non_bool_raises():
-    df = pd.DataFrame([
-        ['best', 'a', 1],
-        ['worst', 'b', 1],
-        ['best', 'c', 1],
-        ['best', 'd', 1],
-        ['worst', 'd', 1],
-        ['worst', 'd', 1],
-        ['best', 'd', 1],
-    ], columns=['a', 'b', 'c'])
-    with pytest.raises(TypeError, match='filter function returned a.*'):
-        df.groupby('a').filter(lambda g: g.c.mean())
-
-
-def test_filter_dropna_with_empty_groups():
-    # GH 10780
-    data = pd.Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3))
-    groupped = data.groupby(level=0)
-    result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False)
-    expected_false = pd.Series([np.nan] * 9,
-                               index=np.repeat([1, 2, 3], 3))
-    tm.assert_series_equal(result_false, expected_false)
-
-    result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True)
-    expected_true = pd.Series(index=pd.Index([], dtype=int))
-    tm.assert_series_equal(result_true, expected_true)
@@ -1,838 +0,0 @@
-# -*- coding: utf-8 -*-
-
-""" test where we are determining what we are grouping, or getting groups """
-
-import numpy as np
-import pytest
-
-from pandas.compat import long, lrange
-
-import pandas as pd
-from pandas import (
-    CategoricalIndex, DataFrame, Index, MultiIndex, Series, Timestamp, compat,
-    date_range)
-from pandas.core.groupby.grouper import Grouping
-import pandas.util.testing as tm
-from pandas.util.testing import (
-    assert_almost_equal, assert_frame_equal, assert_panel_equal,
-    assert_series_equal)
-
-# selection
-# --------------------------------
-
-
-class TestSelection(object):
-
-    def test_select_bad_cols(self):
-        df = DataFrame([[1, 2]], columns=['A', 'B'])
-        g = df.groupby('A')
-        with pytest.raises(KeyError, match='"Columns not found: \'C\'"'):
-            g[['C']]
-
-        with pytest.raises(KeyError, match='^[^A]+$'):
-            # A should not be referenced as a bad column...
-            # will have to rethink regex if you change message!
-            g[['A', 'C']]
-
-    def test_groupby_duplicated_column_errormsg(self):
-        # GH7511
-        df = DataFrame(columns=['A', 'B', 'A', 'C'],
-                       data=[range(4), range(2, 6), range(0, 8, 2)])
-
-        msg = "Grouper for 'A' not 1-dimensional"
-        with pytest.raises(ValueError, match=msg):
-            df.groupby('A')
-        with pytest.raises(ValueError, match=msg):
-            df.groupby(['A', 'B'])
-
-        grouped = df.groupby('B')
-        c = grouped.count()
-        assert c.columns.nlevels == 1
-        assert c.columns.size == 3
-
-    def test_column_select_via_attr(self, df):
-        result = df.groupby('A').C.sum()
-        expected = df.groupby('A')['C'].sum()
-        assert_series_equal(result, expected)
-
-        df['mean'] = 1.5
-        result = df.groupby('A').mean()
-        expected = df.groupby('A').agg(np.mean)
-        assert_frame_equal(result, expected)
-
-    def test_getitem_list_of_columns(self):
-        df = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.random.randn(8),
-             'E': np.random.randn(8)})
-
-        result = df.groupby('A')[['C', 'D']].mean()
-        result2 = df.groupby('A')['C', 'D'].mean()
-        result3 = df.groupby('A')[df.columns[2:4]].mean()
-
-        expected = df.loc[:, ['A', 'C', 'D']].groupby('A').mean()
-
-        assert_frame_equal(result, expected)
-        assert_frame_equal(result2, expected)
-        assert_frame_equal(result3, expected)
-
-    def test_getitem_numeric_column_names(self):
-        # GH #13731
-        df = DataFrame({0: list('abcd') * 2,
-                        2: np.random.randn(8),
-                        4: np.random.randn(8),
-                        6: np.random.randn(8)})
-        result = df.groupby(0)[df.columns[1:3]].mean()
-        result2 = df.groupby(0)[2, 4].mean()
-        result3 = df.groupby(0)[[2, 4]].mean()
-
-        expected = df.loc[:, [0, 2, 4]].groupby(0).mean()
-
-        assert_frame_equal(result, expected)
-        assert_frame_equal(result2, expected)
-        assert_frame_equal(result3, expected)
-
-
-# grouping
-# --------------------------------
-
-class TestGrouping():
-
-    def test_grouper_index_types(self):
-        # related GH5375
-        # groupby misbehaving when using a Floatlike index
-        df = DataFrame(np.arange(10).reshape(5, 2), columns=list('AB'))
-        for index in [tm.makeFloatIndex, tm.makeStringIndex,
-                      tm.makeUnicodeIndex, tm.makeIntIndex, tm.makeDateIndex,
-                      tm.makePeriodIndex]:
-
-            df.index = index(len(df))
-            df.groupby(list('abcde')).apply(lambda x: x)
-
-            df.index = list(reversed(df.index.tolist()))
-            df.groupby(list('abcde')).apply(lambda x: x)
-
-    def test_grouper_multilevel_freq(self):
-
-        # GH 7885
-        # with level and freq specified in a pd.Grouper
-        from datetime import date, timedelta
-        d0 = date.today() - timedelta(days=14)
-        dates = date_range(d0, date.today())
-        date_index = pd.MultiIndex.from_product(
-            [dates, dates], names=['foo', 'bar'])
-        df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index)
-
-        # Check string level
-        expected = df.reset_index().groupby([pd.Grouper(
-            key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum()
-        # reset index changes columns dtype to object
-        expected.columns = pd.Index([0], dtype='int64')
-
-        result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper(
-            level='bar', freq='W')]).sum()
-        assert_frame_equal(result, expected)
-
-        # Check integer level
-        result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper(
-            level=1, freq='W')]).sum()
-        assert_frame_equal(result, expected)
-
-    def test_grouper_creation_bug(self):
-
-        # GH 8795
-        df = DataFrame({'A': [0, 0, 1, 1, 2, 2], 'B': [1, 2, 3, 4, 5, 6]})
-        g = df.groupby('A')
-        expected = g.sum()
-
-        g = df.groupby(pd.Grouper(key='A'))
-        result = g.sum()
-        assert_frame_equal(result, expected)
-
-        result = g.apply(lambda x: x.sum())
-        assert_frame_equal(result, expected)
-
-        g = df.groupby(pd.Grouper(key='A', axis=0))
-        result = g.sum()
-        assert_frame_equal(result, expected)
-
-        # GH14334
-        # pd.Grouper(key=...) may be passed in a list
-        df = DataFrame({'A': [0, 0, 0, 1, 1, 1],
-                        'B': [1, 1, 2, 2, 3, 3],
-                        'C': [1, 2, 3, 4, 5, 6]})
-        # Group by single column
-        expected = df.groupby('A').sum()
-        g = df.groupby([pd.Grouper(key='A')])
-        result = g.sum()
-        assert_frame_equal(result, expected)
-
-        # Group by two columns
-        # using a combination of strings and Grouper objects
-        expected = df.groupby(['A', 'B']).sum()
-
-        # Group with two Grouper objects
-        g = df.groupby([pd.Grouper(key='A'), pd.Grouper(key='B')])
-        result = g.sum()
-        assert_frame_equal(result, expected)
-
-        # Group with a string and a Grouper object
-        g = df.groupby(['A', pd.Grouper(key='B')])
-        result = g.sum()
-        assert_frame_equal(result, expected)
-
-        # Group with a Grouper object and a string
-        g = df.groupby([pd.Grouper(key='A'), 'B'])
-        result = g.sum()
-        assert_frame_equal(result, expected)
-
-        # GH8866
-        s = Series(np.arange(8, dtype='int64'),
-                   index=pd.MultiIndex.from_product(
-                       [list('ab'), range(2),
-                        date_range('20130101', periods=2)],
-                       names=['one', 'two', 'three']))
-        result = s.groupby(pd.Grouper(level='three', freq='M')).sum()
-        expected = Series([28], index=Index(
-            [Timestamp('2013-01-31')], freq='M', name='three'))
-        assert_series_equal(result, expected)
-
-        # just specifying a level breaks
-        result = s.groupby(pd.Grouper(level='one')).sum()
-        expected = s.groupby(level='one').sum()
-        assert_series_equal(result, expected)
-
-    def test_grouper_column_and_index(self):
-        # GH 14327
-
-        # Grouping a multi-index frame by a column and an index level should
-        # be equivalent to resetting the index and grouping by two columns
-        idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3),
-                                         ('b', 1), ('b', 2), ('b', 3)])
-        idx.names = ['outer', 'inner']
-        df_multi = pd.DataFrame({"A": np.arange(6),
-                                 'B': ['one', 'one', 'two',
-                                       'two', 'one', 'one']},
-                                index=idx)
-        result = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean()
-        expected = df_multi.reset_index().groupby(['B', 'inner']).mean()
-        assert_frame_equal(result, expected)
-
-        # Test the reverse grouping order
-        result = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean()
-        expected = df_multi.reset_index().groupby(['inner', 'B']).mean()
-        assert_frame_equal(result, expected)
-
-        # Grouping a single-index frame by a column and the index should
-        # be equivalent to resetting the index and grouping by two columns
-        df_single = df_multi.reset_index('outer')
-        result = df_single.groupby(['B', pd.Grouper(level='inner')]).mean()
-        expected = df_single.reset_index().groupby(['B', 'inner']).mean()
-        assert_frame_equal(result, expected)
-
-        # Test the reverse grouping order
-        result = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean()
-        expected = df_single.reset_index().groupby(['inner', 'B']).mean()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_levels_and_columns(self):
-        # GH9344, GH9049
-        idx_names = ['x', 'y']
-        idx = pd.MultiIndex.from_tuples(
-            [(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names)
-        df = pd.DataFrame(np.arange(12).reshape(-1, 3), index=idx)
-
-        by_levels = df.groupby(level=idx_names).mean()
-        # reset_index changes columns dtype to object
-        by_columns = df.reset_index().groupby(idx_names).mean()
-
-        tm.assert_frame_equal(by_levels, by_columns, check_column_type=False)
-
-        by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64)
-        tm.assert_frame_equal(by_levels, by_columns)
-
-    def test_groupby_categorical_index_and_columns(self, observed):
-        # GH18432
-        columns = ['A', 'B', 'A', 'B']
-        categories = ['B', 'A']
-        data = np.ones((5, 4), int)
-        cat_columns = CategoricalIndex(columns,
-                                       categories=categories,
-                                       ordered=True)
-        df = DataFrame(data=data, columns=cat_columns)
-        result = df.groupby(axis=1, level=0, observed=observed).sum()
-        expected_data = 2 * np.ones((5, 2), int)
-
-        if observed:
-            # if we are not-observed we undergo a reindex
-            # so need to adjust the output as our expected sets us up
-            # to be non-observed
-            expected_columns = CategoricalIndex(['A', 'B'],
-                                                categories=categories,
-                                                ordered=True)
-        else:
-            expected_columns = CategoricalIndex(categories,
-                                                categories=categories,
-                                                ordered=True)
-        expected = DataFrame(data=expected_data, columns=expected_columns)
-        assert_frame_equal(result, expected)
-
-        # test transposed version
-        df = DataFrame(data.T, index=cat_columns)
-        result = df.groupby(axis=0, level=0, observed=observed).sum()
-        expected = DataFrame(data=expected_data.T, index=expected_columns)
-        assert_frame_equal(result, expected)
-
-    def test_grouper_getting_correct_binner(self):
-
-        # GH 10063
-        # using a non-time-based grouper and a time-based grouper
-        # and specifying levels
-        df = DataFrame({'A': 1}, index=pd.MultiIndex.from_product(
-            [list('ab'), date_range('20130101', periods=80)], names=['one',
-                                                                     'two']))
-        result = df.groupby([pd.Grouper(level='one'), pd.Grouper(
-            level='two', freq='M')]).sum()
-        expected = DataFrame({'A': [31, 28, 21, 31, 28, 21]},
-                             index=MultiIndex.from_product(
-                                 [list('ab'),
-                                  date_range('20130101', freq='M', periods=3)],
-                                 names=['one', 'two']))
-        assert_frame_equal(result, expected)
-
-    def test_grouper_iter(self, df):
-        assert sorted(df.groupby('A').grouper) == ['bar', 'foo']
-
-    def test_empty_groups(self, df):
-        # see gh-1048
-        with pytest.raises(ValueError, match="No group keys passed!"):
-            df.groupby([])
-
-    def test_groupby_grouper(self, df):
-        grouped = df.groupby('A')
-
-        result = df.groupby(grouped.grouper).mean()
-        expected = grouped.mean()
-        tm.assert_frame_equal(result, expected)
-
-    def test_groupby_dict_mapping(self):
-        # GH #679
-        from pandas import Series
-        s = Series({'T1': 5})
-        result = s.groupby({'T1': 'T2'}).agg(sum)
-        expected = s.groupby(['T2']).agg(sum)
-        assert_series_equal(result, expected)
-
-        s = Series([1., 2., 3., 4.], index=list('abcd'))
-        mapping = {'a': 0, 'b': 0, 'c': 1, 'd': 1}
-
-        result = s.groupby(mapping).mean()
-        result2 = s.groupby(mapping).agg(np.mean)
-        expected = s.groupby([0, 0, 1, 1]).mean()
-        expected2 = s.groupby([0, 0, 1, 1]).mean()
-        assert_series_equal(result, expected)
-        assert_series_equal(result, result2)
-        assert_series_equal(result, expected2)
-
-    def test_groupby_grouper_f_sanity_checked(self):
-        dates = date_range('01-Jan-2013', periods=12, freq='MS')
-        ts = Series(np.random.randn(12), index=dates)
-
-        # GH3035
-        # index.map is used to apply grouper to the index
-        # if it fails on the elements, map tries it on the entire index as
-        # a sequence. That can yield invalid results that cause trouble
-        # down the line.
-        # the surprise comes from using key[0:6] rather then str(key)[0:6]
-        # when the elements are Timestamp.
-        # the result is Index[0:6], very confusing.
-
-        msg = r"Grouper result violates len\(labels\) == len\(data\)"
-        with pytest.raises(AssertionError, match=msg):
-            ts.groupby(lambda key: key[0:6])
-
-    def test_grouping_error_on_multidim_input(self, df):
-        msg = ("Grouper for '<class 'pandas.core.frame.DataFrame'>'"
-               " not 1-dimensional")
-        with pytest.raises(ValueError, match=msg):
-            Grouping(df.index, df[['A', 'A']])
-
-    def test_multiindex_passthru(self):
-
-        # GH 7997
-        # regression from 0.14.1
-        df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-        df.columns = pd.MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)])
-
-        result = df.groupby(axis=1, level=[0, 1]).first()
-        assert_frame_equal(result, df)
-
-    def test_multiindex_negative_level(self, mframe):
-        # GH 13901
-        result = mframe.groupby(level=-1).sum()
-        expected = mframe.groupby(level='second').sum()
-        assert_frame_equal(result, expected)
-
-        result = mframe.groupby(level=-2).sum()
-        expected = mframe.groupby(level='first').sum()
-        assert_frame_equal(result, expected)
-
-        result = mframe.groupby(level=[-2, -1]).sum()
-        expected = mframe
-        assert_frame_equal(result, expected)
-
-        result = mframe.groupby(level=[-1, 'first']).sum()
-        expected = mframe.groupby(level=['second', 'first']).sum()
-        assert_frame_equal(result, expected)
-
-    def test_multifunc_select_col_integer_cols(self, df):
-        df.columns = np.arange(len(df.columns))
-
-        # it works!
-        df.groupby(1, as_index=False)[2].agg({'Q': np.mean})
-
-    def test_multiindex_columns_empty_level(self):
-        lst = [['count', 'values'], ['to filter', '']]
-        midx = MultiIndex.from_tuples(lst)
-
-        df = DataFrame([[long(1), 'A']], columns=midx)
-
-        grouped = df.groupby('to filter').groups
-        assert grouped['A'] == [0]
-
-        grouped = df.groupby([('to filter', '')]).groups
-        assert grouped['A'] == [0]
-
-        df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx)
-
-        expected = df.groupby('to filter').groups
-        result = df.groupby([('to filter', '')]).groups
-        assert result == expected
-
-        df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx)
-
-        expected = df.groupby('to filter').groups
-        result = df.groupby([('to filter', '')]).groups
-        tm.assert_dict_equal(result, expected)
-
-    def test_groupby_multiindex_tuple(self):
-        # GH 17979
-        df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
-                          columns=pd.MultiIndex.from_arrays(
-                              [['a', 'b', 'b', 'c'],
-                               [1, 1, 2, 2]]))
-        expected = df.groupby([('b', 1)]).groups
-        result = df.groupby(('b', 1)).groups
-        tm.assert_dict_equal(expected, result)
-
-        df2 = pd.DataFrame(df.values,
-                           columns=pd.MultiIndex.from_arrays(
-                               [['a', 'b', 'b', 'c'],
-                                ['d', 'd', 'e', 'e']]))
-        expected = df2.groupby([('b', 'd')]).groups
-        result = df.groupby(('b', 1)).groups
-        tm.assert_dict_equal(expected, result)
-
-        df3 = pd.DataFrame(df.values,
-                           columns=[('a', 'd'), ('b', 'd'), ('b', 'e'), 'c'])
-        expected = df3.groupby([('b', 'd')]).groups
-        result = df.groupby(('b', 1)).groups
-        tm.assert_dict_equal(expected, result)
-
-    @pytest.mark.parametrize('sort', [True, False])
-    def test_groupby_level(self, sort, mframe, df):
-        # GH 17537
-        frame = mframe
-        deleveled = frame.reset_index()
-
-        result0 = frame.groupby(level=0, sort=sort).sum()
-        result1 = frame.groupby(level=1, sort=sort).sum()
-
-        expected0 = frame.groupby(deleveled['first'].values, sort=sort).sum()
-        expected1 = frame.groupby(deleveled['second'].values, sort=sort).sum()
-
-        expected0.index.name = 'first'
-        expected1.index.name = 'second'
-
-        assert result0.index.name == 'first'
-        assert result1.index.name == 'second'
-
-        assert_frame_equal(result0, expected0)
-        assert_frame_equal(result1, expected1)
-        assert result0.index.name == frame.index.names[0]
-        assert result1.index.name == frame.index.names[1]
-
-        # groupby level name
-        result0 = frame.groupby(level='first', sort=sort).sum()
-        result1 = frame.groupby(level='second', sort=sort).sum()
-        assert_frame_equal(result0, expected0)
-        assert_frame_equal(result1, expected1)
-
-        # axis=1
-
-        result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum()
-        result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum()
-        assert_frame_equal(result0, expected0.T)
-        assert_frame_equal(result1, expected1.T)
-
-        # raise exception for non-MultiIndex
-        msg = "level > 0 or level < -1 only valid with MultiIndex"
-        with pytest.raises(ValueError, match=msg):
-            df.groupby(level=1)
-
-    def test_groupby_level_index_names(self):
-        # GH4014 this used to raise ValueError since 'exp'>1 (in py2)
-        df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3,
-                        'var1': lrange(6), }).set_index('exp')
-        df.groupby(level='exp')
-        msg = "level name foo is not the name of the index"
-        with pytest.raises(ValueError, match=msg):
-            df.groupby(level='foo')
-
-    @pytest.mark.parametrize('sort', [True, False])
-    def test_groupby_level_with_nas(self, sort):
-        # GH 17537
-        index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
-                           codes=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1,
-                                                             2, 3]])
-
-        # factorizing doesn't confuse things
-        s = Series(np.arange(8.), index=index)
-        result = s.groupby(level=0, sort=sort).sum()
-        expected = Series([6., 22.], index=[0, 1])
-        assert_series_equal(result, expected)
-
-        index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
-                           codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0,
-                                                              1, 2, 3]])
-
-        # factorizing doesn't confuse things
-        s = Series(np.arange(8.), index=index)
-        result = s.groupby(level=0, sort=sort).sum()
-        expected = Series([6., 18.], index=[0.0, 1.0])
-        assert_series_equal(result, expected)
-
-    def test_groupby_args(self, mframe):
-        # PR8618 and issue 8015
-        frame = mframe
-
-        msg = "You have to supply one of 'by' and 'level'"
-        with pytest.raises(TypeError, match=msg):
-            frame.groupby()
-
-        msg = "You have to supply one of 'by' and 'level'"
-        with pytest.raises(TypeError, match=msg):
-            frame.groupby(by=None, level=None)
-
-    @pytest.mark.parametrize('sort,labels', [
-        [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]],
-        [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]]
-    ])
-    def test_level_preserve_order(self, sort, labels, mframe):
-        # GH 17537
-        grouped = mframe.groupby(level=0, sort=sort)
-        exp_labels = np.array(labels, np.intp)
-        assert_almost_equal(grouped.grouper.labels[0], exp_labels)
-
-    def test_grouping_labels(self, mframe):
-        grouped = mframe.groupby(mframe.index.get_level_values(0))
-        exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp)
-        assert_almost_equal(grouped.grouper.labels[0], exp_labels)
-
-    def test_list_grouper_with_nat(self):
-        # GH 14715
-        df = pd.DataFrame({'date': pd.date_range('1/1/2011',
-                                                 periods=365, freq='D')})
-        df.iloc[-1] = pd.NaT
-        grouper = pd.Grouper(key='date', freq='AS')
-
-        # Grouper in a list grouping
-        result = df.groupby([grouper])
-        expected = {pd.Timestamp('2011-01-01'): pd.Index(list(range(364)))}
-        tm.assert_dict_equal(result.groups, expected)
-
-        # Test case without a list
-        result = df.groupby(grouper)
-        expected = {pd.Timestamp('2011-01-01'): 365}
-        tm.assert_dict_equal(result.groups, expected)
-
-
-# get_group
-# --------------------------------
-
-class TestGetGroup():
-
-    @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
-    def test_get_group(self):
-        wp = tm.makePanel()
-        grouped = wp.groupby(lambda x: x.month, axis='major')
-
-        gp = grouped.get_group(1)
-        expected = wp.reindex(
-            major=[x for x in wp.major_axis if x.month == 1])
-        assert_panel_equal(gp, expected)
-
-        # GH 5267
-        # be datelike friendly
-        df = DataFrame({'DATE': pd.to_datetime(
-            ['10-Oct-2013', '10-Oct-2013', '10-Oct-2013', '11-Oct-2013',
-             '11-Oct-2013', '11-Oct-2013']),
-            'label': ['foo', 'foo', 'bar', 'foo', 'foo', 'bar'],
-            'VAL': [1, 2, 3, 4, 5, 6]})
-
-        g = df.groupby('DATE')
-        key = list(g.groups)[0]
-        result1 = g.get_group(key)
-        result2 = g.get_group(Timestamp(key).to_pydatetime())
-        result3 = g.get_group(str(Timestamp(key)))
-        assert_frame_equal(result1, result2)
-        assert_frame_equal(result1, result3)
-
-        g = df.groupby(['DATE', 'label'])
-
-        key = list(g.groups)[0]
-        result1 = g.get_group(key)
-        result2 = g.get_group((Timestamp(key[0]).to_pydatetime(), key[1]))
-        result3 = g.get_group((str(Timestamp(key[0])), key[1]))
-        assert_frame_equal(result1, result2)
-        assert_frame_equal(result1, result3)
-
-        # must pass a same-length tuple with multiple keys
-        msg = "must supply a tuple to get_group with multiple grouping keys"
-        with pytest.raises(ValueError, match=msg):
-            g.get_group('foo')
-        with pytest.raises(ValueError, match=msg):
-            g.get_group(('foo'))
-        msg = ("must supply a same-length tuple to get_group with multiple"
-               " grouping keys")
-        with pytest.raises(ValueError, match=msg):
-            g.get_group(('foo', 'bar', 'baz'))
-
-    def test_get_group_empty_bins(self, observed):
-
-        d = pd.DataFrame([3, 1, 7, 6])
-        bins = [0, 5, 10, 15]
-        g = d.groupby(pd.cut(d[0], bins), observed=observed)
-
-        # TODO: should prob allow a str of Interval work as well
-        # IOW '(0, 5]'
-        result = g.get_group(pd.Interval(0, 5))
-        expected = DataFrame([3, 1], index=[0, 1])
-        assert_frame_equal(result, expected)
-
-        msg = r"Interval\(10, 15, closed='right'\)"
-        with pytest.raises(KeyError, match=msg):
-            g.get_group(pd.Interval(10, 15))
-
-    def test_get_group_grouped_by_tuple(self):
-        # GH 8121
-        df = DataFrame([[(1, ), (1, 2), (1, ), (1, 2)]], index=['ids']).T
-        gr = df.groupby('ids')
-        expected = DataFrame({'ids': [(1, ), (1, )]}, index=[0, 2])
-        result = gr.get_group((1, ))
-        assert_frame_equal(result, expected)
-
-        dt = pd.to_datetime(['2010-01-01', '2010-01-02', '2010-01-01',
-                             '2010-01-02'])
-        df = DataFrame({'ids': [(x, ) for x in dt]})
-        gr = df.groupby('ids')
-        result = gr.get_group(('2010-01-01', ))
-        expected = DataFrame({'ids': [(dt[0], ), (dt[0], )]}, index=[0, 2])
-        assert_frame_equal(result, expected)
-
-    def test_groupby_with_empty(self):
-        index = pd.DatetimeIndex(())
-        data = ()
-        series = pd.Series(data, index)
-        grouper = pd.Grouper(freq='D')
-        grouped = series.groupby(grouper)
-        assert next(iter(grouped), None) is None
-
-    def test_groupby_with_single_column(self):
-        df = pd.DataFrame({'a': list('abssbab')})
-        tm.assert_frame_equal(df.groupby('a').get_group('a'), df.iloc[[0, 5]])
-        # GH 13530
-        exp = pd.DataFrame([], index=pd.Index(['a', 'b', 's'], name='a'))
-        tm.assert_frame_equal(df.groupby('a').count(), exp)
-        tm.assert_frame_equal(df.groupby('a').sum(), exp)
-        tm.assert_frame_equal(df.groupby('a').nth(1), exp)
-
-    def test_gb_key_len_equal_axis_len(self):
-            # GH16843
-            # test ensures that index and column keys are recognized correctly
-            # when number of keys equals axis length of groupby
-            df = pd.DataFrame([['foo', 'bar', 'B', 1],
-                               ['foo', 'bar', 'B', 2],
-                               ['foo', 'baz', 'C', 3]],
-                              columns=['first', 'second', 'third', 'one'])
-            df = df.set_index(['first', 'second'])
-            df = df.groupby(['first', 'second', 'third']).size()
-            assert df.loc[('foo', 'bar', 'B')] == 2
-            assert df.loc[('foo', 'baz', 'C')] == 1
-
-
-# groups & iteration
-# --------------------------------
-
-class TestIteration():
-
-    def test_groups(self, df):
-        grouped = df.groupby(['A'])
-        groups = grouped.groups
-        assert groups is grouped.groups  # caching works
-
-        for k, v in compat.iteritems(grouped.groups):
-            assert (df.loc[v]['A'] == k).all()
-
-        grouped = df.groupby(['A', 'B'])
-        groups = grouped.groups
-        assert groups is grouped.groups  # caching works
-
-        for k, v in compat.iteritems(grouped.groups):
-            assert (df.loc[v]['A'] == k[0]).all()
-            assert (df.loc[v]['B'] == k[1]).all()
-
-    def test_grouping_is_iterable(self, tsframe):
-        # this code path isn't used anywhere else
-        # not sure it's useful
-        grouped = tsframe.groupby([lambda x: x.weekday(), lambda x: x.year])
-
-        # test it works
-        for g in grouped.grouper.groupings[0]:
-            pass
-
-    def test_multi_iter(self):
-        s = Series(np.arange(6))
-        k1 = np.array(['a', 'a', 'a', 'b', 'b', 'b'])
-        k2 = np.array(['1', '2', '1', '2', '1', '2'])
-
-        grouped = s.groupby([k1, k2])
-
-        iterated = list(grouped)
-        expected = [('a', '1', s[[0, 2]]), ('a', '2', s[[1]]),
-                    ('b', '1', s[[4]]), ('b', '2', s[[3, 5]])]
-        for i, ((one, two), three) in enumerate(iterated):
-            e1, e2, e3 = expected[i]
-            assert e1 == one
-            assert e2 == two
-            assert_series_equal(three, e3)
-
-    def test_multi_iter_frame(self, three_group):
-        k1 = np.array(['b', 'b', 'b', 'a', 'a', 'a'])
-        k2 = np.array(['1', '2', '1', '2', '1', '2'])
-        df = DataFrame({'v1': np.random.randn(6),
-                        'v2': np.random.randn(6),
-                        'k1': k1, 'k2': k2},
-                       index=['one', 'two', 'three', 'four', 'five', 'six'])
-
-        grouped = df.groupby(['k1', 'k2'])
-
-        # things get sorted!
-        iterated = list(grouped)
-        idx = df.index
-        expected = [('a', '1', df.loc[idx[[4]]]),
-                    ('a', '2', df.loc[idx[[3, 5]]]),
-                    ('b', '1', df.loc[idx[[0, 2]]]),
-                    ('b', '2', df.loc[idx[[1]]])]
-        for i, ((one, two), three) in enumerate(iterated):
-            e1, e2, e3 = expected[i]
-            assert e1 == one
-            assert e2 == two
-            assert_frame_equal(three, e3)
-
-        # don't iterate through groups with no data
-        df['k1'] = np.array(['b', 'b', 'b', 'a', 'a', 'a'])
-        df['k2'] = np.array(['1', '1', '1', '2', '2', '2'])
-        grouped = df.groupby(['k1', 'k2'])
-        groups = {key: gp for key, gp in grouped}
-        assert len(groups) == 2
-
-        # axis = 1
-        three_levels = three_group.groupby(['A', 'B', 'C']).mean()
-        grouped = three_levels.T.groupby(axis=1, level=(1, 2))
-        for key, group in grouped:
-            pass
-
-    @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
-    def test_multi_iter_panel(self):
-        wp = tm.makePanel()
-        grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()],
-                             axis=1)
-
-        for (month, wd), group in grouped:
-            exp_axis = [x
-                        for x in wp.major_axis
-                        if x.month == month and x.weekday() == wd]
-            expected = wp.reindex(major=exp_axis)
-            assert_panel_equal(group, expected)
-
-    def test_dictify(self, df):
-        dict(iter(df.groupby('A')))
-        dict(iter(df.groupby(['A', 'B'])))
-        dict(iter(df['C'].groupby(df['A'])))
-        dict(iter(df['C'].groupby([df['A'], df['B']])))
-        dict(iter(df.groupby('A')['C']))
-        dict(iter(df.groupby(['A', 'B'])['C']))
-
-    def test_groupby_with_small_elem(self):
-        # GH 8542
-        # length=2
-        df = pd.DataFrame({'event': ['start', 'start'],
-                           'change': [1234, 5678]},
-                          index=pd.DatetimeIndex(['2014-09-10', '2013-10-10']))
-        grouped = df.groupby([pd.Grouper(freq='M'), 'event'])
-        assert len(grouped.groups) == 2
-        assert grouped.ngroups == 2
-        assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups
-        assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups
-
-        res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start'))
-        tm.assert_frame_equal(res, df.iloc[[0], :])
-        res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start'))
-        tm.assert_frame_equal(res, df.iloc[[1], :])
-
-        df = pd.DataFrame({'event': ['start', 'start', 'start'],
-                           'change': [1234, 5678, 9123]},
-                          index=pd.DatetimeIndex(['2014-09-10', '2013-10-10',
-                                                  '2014-09-15']))
-        grouped = df.groupby([pd.Grouper(freq='M'), 'event'])
-        assert len(grouped.groups) == 2
-        assert grouped.ngroups == 2
-        assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups
-        assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups
-
-        res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start'))
-        tm.assert_frame_equal(res, df.iloc[[0, 2], :])
-        res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start'))
-        tm.assert_frame_equal(res, df.iloc[[1], :])
-
-        # length=3
-        df = pd.DataFrame({'event': ['start', 'start', 'start'],
-                           'change': [1234, 5678, 9123]},
-                          index=pd.DatetimeIndex(['2014-09-10', '2013-10-10',
-                                                  '2014-08-05']))
-        grouped = df.groupby([pd.Grouper(freq='M'), 'event'])
-        assert len(grouped.groups) == 3
-        assert grouped.ngroups == 3
-        assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups
-        assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups
-        assert (pd.Timestamp('2014-08-31'), 'start') in grouped.groups
-
-        res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start'))
-        tm.assert_frame_equal(res, df.iloc[[0], :])
-        res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start'))
-        tm.assert_frame_equal(res, df.iloc[[1], :])
-        res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start'))
-        tm.assert_frame_equal(res, df.iloc[[2], :])
-
-    def test_grouping_string_repr(self):
-        # GH 13394
-        mi = MultiIndex.from_arrays([list("AAB"), list("aba")])
-        df = DataFrame([[1, 2, 3]], columns=mi)
-        gr = df.groupby(df[('A', 'a')])
-
-        result = gr.grouper.groupings[0].__repr__()
-        expected = "Grouping(('A', 'a'))"
-        assert result == expected
@@ -1,68 +0,0 @@
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas.util.testing import assert_frame_equal, assert_series_equal
-
-
-@pytest.fixture(params=[['inner'], ['inner', 'outer']])
-def frame(request):
-    levels = request.param
-    df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'],
-                       'inner': [1, 2, 3, 1, 2, 3],
-                       'A': np.arange(6),
-                       'B': ['one', 'one', 'two', 'two', 'one', 'one']})
-    if levels:
-        df = df.set_index(levels)
-
-    return df
-
-
-@pytest.fixture()
-def series():
-    df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'],
-                       'inner': [1, 2, 3, 1, 2, 3],
-                       'A': np.arange(6),
-                       'B': ['one', 'one', 'two', 'two', 'one', 'one']})
-    s = df.set_index(['outer', 'inner', 'B'])['A']
-
-    return s
-
-
-@pytest.mark.parametrize('key_strs,groupers', [
-    ('inner',  # Index name
-     pd.Grouper(level='inner')
-     ),
-    (['inner'],  # List of index name
-     [pd.Grouper(level='inner')]
-     ),
-    (['B', 'inner'],  # Column and index
-     ['B', pd.Grouper(level='inner')]
-     ),
-    (['inner', 'B'],  # Index and column
-     [pd.Grouper(level='inner'), 'B'])])
-def test_grouper_index_level_as_string(frame, key_strs, groupers):
-    result = frame.groupby(key_strs).mean()
-    expected = frame.groupby(groupers).mean()
-    assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize('levels', [
-    'inner', 'outer', 'B',
-    ['inner'], ['outer'], ['B'],
-    ['inner', 'outer'], ['outer', 'inner'],
-    ['inner', 'outer', 'B'], ['B', 'outer', 'inner']
-])
-def test_grouper_index_level_as_string_series(series, levels):
-
-    # Compute expected result
-    if isinstance(levels, list):
-        groupers = [pd.Grouper(level=lv) for lv in levels]
-    else:
-        groupers = pd.Grouper(level=levels)
-
-    expected = series.groupby(groupers).mean()
-
-    # Compute and check result
-    result = series.groupby(levels).mean()
-    assert_series_equal(result, expected)
@@ -1,416 +0,0 @@
-import numpy as np
-import pytest
-
-from pandas.compat import lrange
-
-import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, isna
-from pandas.util.testing import (
-    assert_frame_equal, assert_produces_warning, assert_series_equal)
-
-
-def test_first_last_nth(df):
-    # tests for first / last / nth
-    grouped = df.groupby('A')
-    first = grouped.first()
-    expected = df.loc[[1, 0], ['B', 'C', 'D']]
-    expected.index = Index(['bar', 'foo'], name='A')
-    expected = expected.sort_index()
-    assert_frame_equal(first, expected)
-
-    nth = grouped.nth(0)
-    assert_frame_equal(nth, expected)
-
-    last = grouped.last()
-    expected = df.loc[[5, 7], ['B', 'C', 'D']]
-    expected.index = Index(['bar', 'foo'], name='A')
-    assert_frame_equal(last, expected)
-
-    nth = grouped.nth(-1)
-    assert_frame_equal(nth, expected)
-
-    nth = grouped.nth(1)
-    expected = df.loc[[2, 3], ['B', 'C', 'D']].copy()
-    expected.index = Index(['foo', 'bar'], name='A')
-    expected = expected.sort_index()
-    assert_frame_equal(nth, expected)
-
-    # it works!
-    grouped['B'].first()
-    grouped['B'].last()
-    grouped['B'].nth(0)
-
-    df.loc[df['A'] == 'foo', 'B'] = np.nan
-    assert isna(grouped['B'].first()['foo'])
-    assert isna(grouped['B'].last()['foo'])
-    assert isna(grouped['B'].nth(0)['foo'])
-
-    # v0.14.0 whatsnew
-    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-    g = df.groupby('A')
-    result = g.first()
-    expected = df.iloc[[1, 2]].set_index('A')
-    assert_frame_equal(result, expected)
-
-    expected = df.iloc[[1, 2]].set_index('A')
-    result = g.nth(0, dropna='any')
-    assert_frame_equal(result, expected)
-
-
-def test_first_last_nth_dtypes(df_mixed_floats):
-
-    df = df_mixed_floats.copy()
-    df['E'] = True
-    df['F'] = 1
-
-    # tests for first / last / nth
-    grouped = df.groupby('A')
-    first = grouped.first()
-    expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']]
-    expected.index = Index(['bar', 'foo'], name='A')
-    expected = expected.sort_index()
-    assert_frame_equal(first, expected)
-
-    last = grouped.last()
-    expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']]
-    expected.index = Index(['bar', 'foo'], name='A')
-    expected = expected.sort_index()
-    assert_frame_equal(last, expected)
-
-    nth = grouped.nth(1)
-    expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']]
-    expected.index = Index(['bar', 'foo'], name='A')
-    expected = expected.sort_index()
-    assert_frame_equal(nth, expected)
-
-    # GH 2763, first/last shifting dtypes
-    idx = lrange(10)
-    idx.append(9)
-    s = Series(data=lrange(11), index=idx, name='IntCol')
-    assert s.dtype == 'int64'
-    f = s.groupby(level=0).first()
-    assert f.dtype == 'int64'
-
-
-def test_nth():
-    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-    g = df.groupby('A')
-
-    assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A'))
-    assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A'))
-    assert_frame_equal(g.nth(2), df.loc[[]].set_index('A'))
-    assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A'))
-    assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A'))
-    assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A'))
-    assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]])
-    assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]])
-    assert_frame_equal(g[['B']].nth(0),
-                       df.loc[[0, 2], ['A', 'B']].set_index('A'))
-
-    exp = df.set_index('A')
-    assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]])
-    assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]])
-
-    exp['B'] = np.nan
-    assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]])
-    assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]])
-
-    # out of bounds, regression from 0.13.1
-    # GH 6621
-    df = DataFrame({'color': {0: 'green',
-                              1: 'green',
-                              2: 'red',
-                              3: 'red',
-                              4: 'red'},
-                    'food': {0: 'ham',
-                             1: 'eggs',
-                             2: 'eggs',
-                             3: 'ham',
-                             4: 'pork'},
-                    'two': {0: 1.5456590000000001,
-                            1: -0.070345000000000005,
-                            2: -2.4004539999999999,
-                            3: 0.46206000000000003,
-                            4: 0.52350799999999997},
-                    'one': {0: 0.56573799999999996,
-                            1: -0.9742360000000001,
-                            2: 1.033801,
-                            3: -0.78543499999999999,
-                            4: 0.70422799999999997}}).set_index(['color',
-                                                                 'food'])
-
-    result = df.groupby(level=0, as_index=False).nth(2)
-    expected = df.iloc[[-1]]
-    assert_frame_equal(result, expected)
-
-    result = df.groupby(level=0, as_index=False).nth(3)
-    expected = df.loc[[]]
-    assert_frame_equal(result, expected)
-
-    # GH 7559
-    # from the vbench
-    df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64')
-    s = df[1]
-    g = df[0]
-    expected = s.groupby(g).first()
-    expected2 = s.groupby(g).apply(lambda x: x.iloc[0])
-    assert_series_equal(expected2, expected, check_names=False)
-    assert expected.name == 1
-    assert expected2.name == 1
-
-    # validate first
-    v = s[g == 1].iloc[0]
-    assert expected.iloc[0] == v
-    assert expected2.iloc[0] == v
-
-    # this is NOT the same as .first (as sorted is default!)
-    # as it keeps the order in the series (and not the group order)
-    # related GH 7287
-    expected = s.groupby(g, sort=False).first()
-    result = s.groupby(g, sort=False).nth(0, dropna='all')
-    assert_series_equal(result, expected)
-
-    # doc example
-    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-    g = df.groupby('A')
-    # PR 17493, related to issue 11038
-    # test Series.nth with True for dropna produces FutureWarning
-    with assert_produces_warning(FutureWarning):
-        result = g.B.nth(0, dropna=True)
-    expected = g.B.first()
-    assert_series_equal(result, expected)
-
-    # test multiple nth values
-    df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]],
-                   columns=['A', 'B'])
-    g = df.groupby('A')
-
-    assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A'))
-    assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A'))
-    assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A'))
-    assert_frame_equal(
-        g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A'))
-    assert_frame_equal(
-        g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
-    assert_frame_equal(
-        g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
-    assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A'))
-    assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A'))
-
-    business_dates = pd.date_range(start='4/1/2014', end='6/30/2014',
-                                   freq='B')
-    df = DataFrame(1, index=business_dates, columns=['a', 'b'])
-    # get the first, fourth and last two business days for each month
-    key = [df.index.year, df.index.month]
-    result = df.groupby(key, as_index=False).nth([0, 3, -2, -1])
-    expected_dates = pd.to_datetime(
-        ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1',
-         '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5',
-         '2014/6/27', '2014/6/30'])
-    expected = DataFrame(1, columns=['a', 'b'], index=expected_dates)
-    assert_frame_equal(result, expected)
-
-
-def test_nth_multi_index(three_group):
-    # PR 9090, related to issue 8979
-    # test nth on MultiIndex, should match .first()
-    grouped = three_group.groupby(['A', 'B'])
-    result = grouped.nth(0)
-    expected = grouped.first()
-    assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize('data, expected_first, expected_last', [
-    ({'id': ['A'],
-      'time': Timestamp('2012-02-01 14:00:00',
-                        tz='US/Central'),
-      'foo': [1]},
-     {'id': ['A'],
-      'time': Timestamp('2012-02-01 14:00:00',
-                        tz='US/Central'),
-      'foo': [1]},
-     {'id': ['A'],
-      'time': Timestamp('2012-02-01 14:00:00',
-                        tz='US/Central'),
-      'foo': [1]}),
-    ({'id': ['A', 'B', 'A'],
-      'time': [Timestamp('2012-01-01 13:00:00',
-                         tz='America/New_York'),
-               Timestamp('2012-02-01 14:00:00',
-                         tz='US/Central'),
-               Timestamp('2012-03-01 12:00:00',
-                         tz='Europe/London')],
-      'foo': [1, 2, 3]},
-     {'id': ['A', 'B'],
-      'time': [Timestamp('2012-01-01 13:00:00',
-                         tz='America/New_York'),
-               Timestamp('2012-02-01 14:00:00',
-                         tz='US/Central')],
-      'foo': [1, 2]},
-     {'id': ['A', 'B'],
-      'time': [Timestamp('2012-03-01 12:00:00',
-                         tz='Europe/London'),
-               Timestamp('2012-02-01 14:00:00',
-                         tz='US/Central')],
-      'foo': [3, 2]})
-])
-def test_first_last_tz(data, expected_first, expected_last):
-    # GH15884
-    # Test that the timezone is retained when calling first
-    # or last on groupby with as_index=False
-
-    df = DataFrame(data)
-
-    result = df.groupby('id', as_index=False).first()
-    expected = DataFrame(expected_first)
-    cols = ['id', 'time', 'foo']
-    assert_frame_equal(result[cols], expected[cols])
-
-    result = df.groupby('id', as_index=False)['time'].first()
-    assert_frame_equal(result, expected[['id', 'time']])
-
-    result = df.groupby('id', as_index=False).last()
-    expected = DataFrame(expected_last)
-    cols = ['id', 'time', 'foo']
-    assert_frame_equal(result[cols], expected[cols])
-
-    result = df.groupby('id', as_index=False)['time'].last()
-    assert_frame_equal(result, expected[['id', 'time']])
-
-
-def test_nth_multi_index_as_expected():
-    # PR 9090, related to issue 8979
-    # test nth on MultiIndex
-    three_group = DataFrame(
-        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-               'foo', 'foo', 'foo'],
-         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-               'two', 'two', 'one'],
-         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-               'dull', 'shiny', 'shiny', 'shiny']})
-    grouped = three_group.groupby(['A', 'B'])
-    result = grouped.nth(0)
-    expected = DataFrame(
-        {'C': ['dull', 'dull', 'dull', 'dull']},
-        index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
-                                      ['one', 'two', 'one', 'two']],
-                                     names=['A', 'B']))
-    assert_frame_equal(result, expected)
-
-
-def test_groupby_head_tail():
-    df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-    g_as = df.groupby('A', as_index=True)
-    g_not_as = df.groupby('A', as_index=False)
-
-    # as_index= False, much easier
-    assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1))
-    assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
-
-    empty_not_as = DataFrame(columns=df.columns,
-                             index=pd.Index([], dtype=df.index.dtype))
-    empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype)
-    empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype)
-    assert_frame_equal(empty_not_as, g_not_as.head(0))
-    assert_frame_equal(empty_not_as, g_not_as.tail(0))
-    assert_frame_equal(empty_not_as, g_not_as.head(-1))
-    assert_frame_equal(empty_not_as, g_not_as.tail(-1))
-
-    assert_frame_equal(df, g_not_as.head(7))  # contains all
-    assert_frame_equal(df, g_not_as.tail(7))
-
-    # as_index=True, (used to be different)
-    df_as = df
-
-    assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
-    assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
-
-    empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
-    empty_as['A'] = empty_not_as['A'].astype(df.A.dtype)
-    empty_as['B'] = empty_not_as['B'].astype(df.B.dtype)
-    assert_frame_equal(empty_as, g_as.head(0))
-    assert_frame_equal(empty_as, g_as.tail(0))
-    assert_frame_equal(empty_as, g_as.head(-1))
-    assert_frame_equal(empty_as, g_as.tail(-1))
-
-    assert_frame_equal(df_as, g_as.head(7))  # contains all
-    assert_frame_equal(df_as, g_as.tail(7))
-
-    # test with selection
-    assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []])
-    assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
-    assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
-    assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
-
-    assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []])
-    assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
-    assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
-    assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
-
-
-def test_group_selection_cache():
-    # GH 12839 nth, head, and tail should return same result consistently
-    df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-    expected = df.iloc[[0, 2]].set_index('A')
-
-    g = df.groupby('A')
-    result1 = g.head(n=2)
-    result2 = g.nth(0)
-    assert_frame_equal(result1, df)
-    assert_frame_equal(result2, expected)
-
-    g = df.groupby('A')
-    result1 = g.tail(n=2)
-    result2 = g.nth(0)
-    assert_frame_equal(result1, df)
-    assert_frame_equal(result2, expected)
-
-    g = df.groupby('A')
-    result1 = g.nth(0)
-    result2 = g.head(n=2)
-    assert_frame_equal(result1, expected)
-    assert_frame_equal(result2, df)
-
-    g = df.groupby('A')
-    result1 = g.nth(0)
-    result2 = g.tail(n=2)
-    assert_frame_equal(result1, expected)
-    assert_frame_equal(result2, df)
-
-
-def test_nth_empty():
-    # GH 16064
-    df = DataFrame(index=[0], columns=['a', 'b', 'c'])
-    result = df.groupby('a').nth(10)
-    expected = DataFrame(index=Index([], name='a'), columns=['b', 'c'])
-    assert_frame_equal(result, expected)
-
-    result = df.groupby(['a', 'b']).nth(10)
-    expected = DataFrame(index=MultiIndex([[], []], [[], []],
-                                          names=['a', 'b']),
-                         columns=['c'])
-    assert_frame_equal(result, expected)
-
-
-def test_nth_column_order():
-    # GH 20760
-    # Check that nth preserves column order
-    df = DataFrame([[1, 'b', 100],
-                    [1, 'a', 50],
-                    [1, 'a', np.nan],
-                    [2, 'c', 200],
-                    [2, 'd', 150]],
-                   columns=['A', 'C', 'B'])
-    result = df.groupby('A').nth(0)
-    expected = DataFrame([['b', 100.0],
-                          ['c', 200.0]],
-                         columns=['C', 'B'],
-                         index=Index([1, 2], name='A'))
-    assert_frame_equal(result, expected)
-
-    result = df.groupby('A').nth(-1, dropna='any')
-    expected = DataFrame([['a', 50.0],
-                          ['d', 150.0]],
-                         columns=['C', 'B'],
-                         index=Index([1, 2], name='A'))
-    assert_frame_equal(result, expected)
@@ -1,306 +0,0 @@
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import DataFrame, Series, concat
-from pandas.util import testing as tm
-
-
-def test_rank_apply():
-    lev1 = tm.rands_array(10, 100)
-    lev2 = tm.rands_array(10, 130)
-    lab1 = np.random.randint(0, 100, size=500)
-    lab2 = np.random.randint(0, 130, size=500)
-
-    df = DataFrame({'value': np.random.randn(500),
-                    'key1': lev1.take(lab1),
-                    'key2': lev2.take(lab2)})
-
-    result = df.groupby(['key1', 'key2']).value.rank()
-
-    expected = [piece.value.rank()
-                for key, piece in df.groupby(['key1', 'key2'])]
-    expected = concat(expected, axis=0)
-    expected = expected.reindex(result.index)
-    tm.assert_series_equal(result, expected)
-
-    result = df.groupby(['key1', 'key2']).value.rank(pct=True)
-
-    expected = [piece.value.rank(pct=True)
-                for key, piece in df.groupby(['key1', 'key2'])]
-    expected = concat(expected, axis=0)
-    expected = expected.reindex(result.index)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("grps", [
-    ['qux'], ['qux', 'quux']])
-@pytest.mark.parametrize("vals", [
-    [2, 2, 8, 2, 6],
-    [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'),
-     pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
-     pd.Timestamp('2018-01-06')]])
-@pytest.mark.parametrize("ties_method,ascending,pct,exp", [
-    ('average', True, False, [2., 2., 5., 2., 4.]),
-    ('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]),
-    ('average', False, False, [4., 4., 1., 4., 2.]),
-    ('average', False, True, [.8, .8, .2, .8, .4]),
-    ('min', True, False, [1., 1., 5., 1., 4.]),
-    ('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]),
-    ('min', False, False, [3., 3., 1., 3., 2.]),
-    ('min', False, True, [.6, .6, .2, .6, .4]),
-    ('max', True, False, [3., 3., 5., 3., 4.]),
-    ('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]),
-    ('max', False, False, [5., 5., 1., 5., 2.]),
-    ('max', False, True, [1., 1., .2, 1., .4]),
-    ('first', True, False, [1., 2., 5., 3., 4.]),
-    ('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]),
-    ('first', False, False, [3., 4., 1., 5., 2.]),
-    ('first', False, True, [.6, .8, .2, 1., .4]),
-    ('dense', True, False, [1., 1., 3., 1., 2.]),
-    ('dense', True, True, [1. / 3., 1. / 3., 3. / 3., 1. / 3., 2. / 3.]),
-    ('dense', False, False, [3., 3., 1., 3., 2.]),
-    ('dense', False, True, [3. / 3., 3. / 3., 1. / 3., 3. / 3., 2. / 3.]),
-])
-def test_rank_args(grps, vals, ties_method, ascending, pct, exp):
-    key = np.repeat(grps, len(vals))
-    vals = vals * len(grps)
-    df = DataFrame({'key': key, 'val': vals})
-    result = df.groupby('key').rank(method=ties_method,
-                                    ascending=ascending, pct=pct)
-
-    exp_df = DataFrame(exp * len(grps), columns=['val'])
-    tm.assert_frame_equal(result, exp_df)
-
-
-@pytest.mark.parametrize("grps", [
-    ['qux'], ['qux', 'quux']])
-@pytest.mark.parametrize("vals", [
-    [-np.inf, -np.inf, np.nan, 1., np.nan, np.inf, np.inf],
-])
-@pytest.mark.parametrize("ties_method,ascending,na_option,exp", [
-    ('average', True, 'keep', [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]),
-    ('average', True, 'top', [3.5, 3.5, 1.5, 5., 1.5, 6.5, 6.5]),
-    ('average', True, 'bottom', [1.5, 1.5, 6.5, 3., 6.5, 4.5, 4.5]),
-    ('average', False, 'keep', [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]),
-    ('average', False, 'top', [6.5, 6.5, 1.5, 5., 1.5, 3.5, 3.5]),
-    ('average', False, 'bottom', [4.5, 4.5, 6.5, 3., 6.5, 1.5, 1.5]),
-    ('min', True, 'keep', [1., 1., np.nan, 3., np.nan, 4., 4.]),
-    ('min', True, 'top', [3., 3., 1., 5., 1., 6., 6.]),
-    ('min', True, 'bottom', [1., 1., 6., 3., 6., 4., 4.]),
-    ('min', False, 'keep', [4., 4., np.nan, 3., np.nan, 1., 1.]),
-    ('min', False, 'top', [6., 6., 1., 5., 1., 3., 3.]),
-    ('min', False, 'bottom', [4., 4., 6., 3., 6., 1., 1.]),
-    ('max', True, 'keep', [2., 2., np.nan, 3., np.nan, 5., 5.]),
-    ('max', True, 'top', [4., 4., 2., 5., 2., 7., 7.]),
-    ('max', True, 'bottom', [2., 2., 7., 3., 7., 5., 5.]),
-    ('max', False, 'keep', [5., 5., np.nan, 3., np.nan, 2., 2.]),
-    ('max', False, 'top', [7., 7., 2., 5., 2., 4., 4.]),
-    ('max', False, 'bottom', [5., 5., 7., 3., 7., 2., 2.]),
-    ('first', True, 'keep', [1., 2., np.nan, 3., np.nan, 4., 5.]),
-    ('first', True, 'top', [3., 4., 1., 5., 2., 6., 7.]),
-    ('first', True, 'bottom', [1., 2., 6., 3., 7., 4., 5.]),
-    ('first', False, 'keep', [4., 5., np.nan, 3., np.nan, 1., 2.]),
-    ('first', False, 'top', [6., 7., 1., 5., 2., 3., 4.]),
-    ('first', False, 'bottom', [4., 5., 6., 3., 7., 1., 2.]),
-    ('dense', True, 'keep', [1., 1., np.nan, 2., np.nan, 3., 3.]),
-    ('dense', True, 'top', [2., 2., 1., 3., 1., 4., 4.]),
-    ('dense', True, 'bottom', [1., 1., 4., 2., 4., 3., 3.]),
-    ('dense', False, 'keep', [3., 3., np.nan, 2., np.nan, 1., 1.]),
-    ('dense', False, 'top', [4., 4., 1., 3., 1., 2., 2.]),
-    ('dense', False, 'bottom', [3., 3., 4., 2., 4., 1., 1.])
-])
-def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp):
-    # GH 20561
-    key = np.repeat(grps, len(vals))
-    vals = vals * len(grps)
-    df = DataFrame({'key': key, 'val': vals})
-    result = df.groupby('key').rank(method=ties_method,
-                                    ascending=ascending,
-                                    na_option=na_option)
-    exp_df = DataFrame(exp * len(grps), columns=['val'])
-    tm.assert_frame_equal(result, exp_df)
-
-
-@pytest.mark.parametrize("grps", [
-    ['qux'], ['qux', 'quux']])
-@pytest.mark.parametrize("vals", [
-    [2, 2, np.nan, 8, 2, 6, np.nan, np.nan],
-    [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), np.nan,
-     pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
-     pd.Timestamp('2018-01-06'), np.nan, np.nan]
-])
-@pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [
-    ('average', True, 'keep', False,
-        [2., 2., np.nan, 5., 2., 4., np.nan, np.nan]),
-    ('average', True, 'keep', True,
-        [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]),
-    ('average', False, 'keep', False,
-        [4., 4., np.nan, 1., 4., 2., np.nan, np.nan]),
-    ('average', False, 'keep', True,
-        [.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]),
-    ('min', True, 'keep', False,
-        [1., 1., np.nan, 5., 1., 4., np.nan, np.nan]),
-    ('min', True, 'keep', True,
-        [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]),
-    ('min', False, 'keep', False,
-        [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
-    ('min', False, 'keep', True,
-        [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
-    ('max', True, 'keep', False,
-        [3., 3., np.nan, 5., 3., 4., np.nan, np.nan]),
-    ('max', True, 'keep', True,
-        [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
-    ('max', False, 'keep', False,
-        [5., 5., np.nan, 1., 5., 2., np.nan, np.nan]),
-    ('max', False, 'keep', True,
-        [1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
-    ('first', True, 'keep', False,
-        [1., 2., np.nan, 5., 3., 4., np.nan, np.nan]),
-    ('first', True, 'keep', True,
-        [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
-    ('first', False, 'keep', False,
-        [3., 4., np.nan, 1., 5., 2., np.nan, np.nan]),
-    ('first', False, 'keep', True,
-        [.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
-    ('dense', True, 'keep', False,
-        [1., 1., np.nan, 3., 1., 2., np.nan, np.nan]),
-    ('dense', True, 'keep', True,
-        [1. / 3., 1. / 3., np.nan, 3. / 3., 1. / 3., 2. / 3., np.nan, np.nan]),
-    ('dense', False, 'keep', False,
-        [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
-    ('dense', False, 'keep', True,
-        [3. / 3., 3. / 3., np.nan, 1. / 3., 3. / 3., 2. / 3., np.nan, np.nan]),
-    ('average', True, 'bottom', False, [2., 2., 7., 5., 2., 4., 7., 7.]),
-    ('average', True, 'bottom', True,
-        [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]),
-    ('average', False, 'bottom', False, [4., 4., 7., 1., 4., 2., 7., 7.]),
-    ('average', False, 'bottom', True,
-        [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]),
-    ('min', True, 'bottom', False, [1., 1., 6., 5., 1., 4., 6., 6.]),
-    ('min', True, 'bottom', True,
-        [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]),
-    ('min', False, 'bottom', False, [3., 3., 6., 1., 3., 2., 6., 6.]),
-    ('min', False, 'bottom', True,
-        [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]),
-    ('max', True, 'bottom', False, [3., 3., 8., 5., 3., 4., 8., 8.]),
-    ('max', True, 'bottom', True,
-        [0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]),
-    ('max', False, 'bottom', False, [5., 5., 8., 1., 5., 2., 8., 8.]),
-    ('max', False, 'bottom', True,
-        [0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]),
-    ('first', True, 'bottom', False, [1., 2., 6., 5., 3., 4., 7., 8.]),
-    ('first', True, 'bottom', True,
-        [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]),
-    ('first', False, 'bottom', False, [3., 4., 6., 1., 5., 2., 7., 8.]),
-    ('first', False, 'bottom', True,
-        [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]),
-    ('dense', True, 'bottom', False, [1., 1., 4., 3., 1., 2., 4., 4.]),
-    ('dense', True, 'bottom', True,
-     [0.25, 0.25, 1., 0.75, 0.25, 0.5, 1., 1.]),
-    ('dense', False, 'bottom', False, [3., 3., 4., 1., 3., 2., 4., 4.]),
-    ('dense', False, 'bottom', True,
-     [0.75, 0.75, 1., 0.25, 0.75, 0.5, 1., 1.])
-])
-def test_rank_args_missing(grps, vals, ties_method, ascending,
-                           na_option, pct, exp):
-    key = np.repeat(grps, len(vals))
-    vals = vals * len(grps)
-    df = DataFrame({'key': key, 'val': vals})
-    result = df.groupby('key').rank(method=ties_method,
-                                    ascending=ascending,
-                                    na_option=na_option, pct=pct)
-
-    exp_df = DataFrame(exp * len(grps), columns=['val'])
-    tm.assert_frame_equal(result, exp_df)
-
-
-@pytest.mark.parametrize("pct,exp", [
-    (False, [3., 3., 3., 3., 3.]),
-    (True, [.6, .6, .6, .6, .6])])
-def test_rank_resets_each_group(pct, exp):
-    df = DataFrame(
-        {'key': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
-         'val': [1] * 10}
-    )
-    result = df.groupby('key').rank(pct=pct)
-    exp_df = DataFrame(exp * 2, columns=['val'])
-    tm.assert_frame_equal(result, exp_df)
-
-
-def test_rank_avg_even_vals():
-    df = DataFrame({'key': ['a'] * 4, 'val': [1] * 4})
-    result = df.groupby('key').rank()
-    exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=['val'])
-    tm.assert_frame_equal(result, exp_df)
-
-
-@pytest.mark.parametrize("ties_method", [
-    'average', 'min', 'max', 'first', 'dense'])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
-@pytest.mark.parametrize("pct", [True, False])
-@pytest.mark.parametrize("vals", [
-    ['bar', 'bar', 'foo', 'bar', 'baz'],
-    ['bar', np.nan, 'foo', np.nan, 'baz']
-])
-def test_rank_object_raises(ties_method, ascending, na_option,
-                            pct, vals):
-    df = DataFrame({'key': ['foo'] * 5, 'val': vals})
-
-    with pytest.raises(TypeError, match="not callable"):
-        df.groupby('key').rank(method=ties_method,
-                               ascending=ascending,
-                               na_option=na_option, pct=pct)
-
-
-@pytest.mark.parametrize("na_option", [True, "bad", 1])
-@pytest.mark.parametrize("ties_method", [
-    'average', 'min', 'max', 'first', 'dense'])
-@pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("pct", [True, False])
-@pytest.mark.parametrize("vals", [
-    ['bar', 'bar', 'foo', 'bar', 'baz'],
-    ['bar', np.nan, 'foo', np.nan, 'baz'],
-    [1, np.nan, 2, np.nan, 3]
-])
-def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals):
-    df = DataFrame({'key': ['foo'] * 5, 'val': vals})
-    msg = "na_option must be one of 'keep', 'top', or 'bottom'"
-
-    with pytest.raises(ValueError, match=msg):
-        df.groupby('key').rank(method=ties_method,
-                               ascending=ascending,
-                               na_option=na_option, pct=pct)
-
-
-def test_rank_empty_group():
-    # see gh-22519
-    column = "A"
-    df = DataFrame({
-        "A": [0, 1, 0],
-        "B": [1., np.nan, 2.]
-    })
-
-    result = df.groupby(column).B.rank(pct=True)
-    expected = Series([0.5, np.nan, 1.0], name="B")
-    tm.assert_series_equal(result, expected)
-
-    result = df.groupby(column).rank(pct=True)
-    expected = DataFrame({"B": [0.5, np.nan, 1.0]})
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("input_key,input_value,output_value", [
-    ([1, 2], [1, 1], [1.0, 1.0]),
-    ([1, 1, 2, 2], [1, 2, 1, 2], [0.5, 1.0, 0.5, 1.0]),
-    ([1, 1, 2, 2], [1, 2, 1, np.nan], [0.5, 1.0, 1.0, np.nan]),
-    ([1, 1, 2], [1, 2, np.nan], [0.5, 1.0, np.nan])
-])
-def test_rank_zero_div(input_key, input_value, output_value):
-    # GH 23666
-    df = DataFrame({"A": input_key, "B": input_value})
-
-    result = df.groupby("A").rank(method="dense", pct=True)
-    expected = DataFrame({"B": output_value})
-    tm.assert_frame_equal(result, expected)
@@ -1,652 +0,0 @@
-""" test with the TimeGrouper / grouping with datetimes """
-
-from datetime import datetime
-
-import numpy as np
-from numpy import nan
-import pytest
-import pytz
-
-from pandas.compat import StringIO
-
-import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range
-from pandas.core.groupby.ops import BinGrouper
-from pandas.util import testing as tm
-from pandas.util.testing import assert_frame_equal, assert_series_equal
-
-
-class TestGroupBy(object):
-
-    def test_groupby_with_timegrouper(self):
-        # GH 4161
-        # TimeGrouper requires a sorted index
-        # also verifies that the resultant index has the correct name
-        df_original = DataFrame({
-            'Buyer': 'Carl Carl Carl Carl Joe Carl'.split(),
-            'Quantity': [18, 3, 5, 1, 9, 3],
-            'Date': [
-                datetime(2013, 9, 1, 13, 0),
-                datetime(2013, 9, 1, 13, 5),
-                datetime(2013, 10, 1, 20, 0),
-                datetime(2013, 10, 3, 10, 0),
-                datetime(2013, 12, 2, 12, 0),
-                datetime(2013, 9, 2, 14, 0),
-            ]
-        })
-
-        # GH 6908 change target column's order
-        df_reordered = df_original.sort_values(by='Quantity')
-
-        for df in [df_original, df_reordered]:
-            df = df.set_index(['Date'])
-
-            expected = DataFrame(
-                {'Quantity': 0},
-                index=date_range('20130901',
-                                 '20131205', freq='5D',
-                                 name='Date', closed='left'))
-            expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype='int64')
-
-            result1 = df.resample('5D') .sum()
-            assert_frame_equal(result1, expected)
-
-            df_sorted = df.sort_index()
-            result2 = df_sorted.groupby(pd.Grouper(freq='5D')).sum()
-            assert_frame_equal(result2, expected)
-
-            result3 = df.groupby(pd.Grouper(freq='5D')).sum()
-            assert_frame_equal(result3, expected)
-
-    @pytest.mark.parametrize("should_sort", [True, False])
-    def test_groupby_with_timegrouper_methods(self, should_sort):
-        # GH 3881
-        # make sure API of timegrouper conforms
-
-        df = pd.DataFrame({
-            'Branch': 'A A A A A B'.split(),
-            'Buyer': 'Carl Mark Carl Joe Joe Carl'.split(),
-            'Quantity': [1, 3, 5, 8, 9, 3],
-            'Date': [
-                datetime(2013, 1, 1, 13, 0),
-                datetime(2013, 1, 1, 13, 5),
-                datetime(2013, 10, 1, 20, 0),
-                datetime(2013, 10, 2, 10, 0),
-                datetime(2013, 12, 2, 12, 0),
-                datetime(2013, 12, 2, 14, 0),
-            ]
-        })
-
-        if should_sort:
-            df = df.sort_values(by='Quantity', ascending=False)
-
-        df = df.set_index('Date', drop=False)
-        g = df.groupby(pd.Grouper(freq='6M'))
-        assert g.group_keys
-
-        assert isinstance(g.grouper, BinGrouper)
-        groups = g.groups
-        assert isinstance(groups, dict)
-        assert len(groups) == 3
-
-    def test_timegrouper_with_reg_groups(self):
-
-        # GH 3794
-        # allow combinateion of timegrouper/reg groups
-
-        df_original = DataFrame({
-            'Branch': 'A A A A A A A B'.split(),
-            'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(),
-            'Quantity': [1, 3, 5, 1, 8, 1, 9, 3],
-            'Date': [
-                datetime(2013, 1, 1, 13, 0),
-                datetime(2013, 1, 1, 13, 5),
-                datetime(2013, 10, 1, 20, 0),
-                datetime(2013, 10, 2, 10, 0),
-                datetime(2013, 10, 1, 20, 0),
-                datetime(2013, 10, 2, 10, 0),
-                datetime(2013, 12, 2, 12, 0),
-                datetime(2013, 12, 2, 14, 0),
-            ]
-        }).set_index('Date')
-
-        df_sorted = df_original.sort_values(by='Quantity', ascending=False)
-
-        for df in [df_original, df_sorted]:
-            expected = DataFrame({
-                'Buyer': 'Carl Joe Mark'.split(),
-                'Quantity': [10, 18, 3],
-                'Date': [
-                    datetime(2013, 12, 31, 0, 0),
-                    datetime(2013, 12, 31, 0, 0),
-                    datetime(2013, 12, 31, 0, 0),
-                ]
-            }).set_index(['Date', 'Buyer'])
-
-            result = df.groupby([pd.Grouper(freq='A'), 'Buyer']).sum()
-            assert_frame_equal(result, expected)
-
-            expected = DataFrame({
-                'Buyer': 'Carl Mark Carl Joe'.split(),
-                'Quantity': [1, 3, 9, 18],
-                'Date': [
-                    datetime(2013, 1, 1, 0, 0),
-                    datetime(2013, 1, 1, 0, 0),
-                    datetime(2013, 7, 1, 0, 0),
-                    datetime(2013, 7, 1, 0, 0),
-                ]
-            }).set_index(['Date', 'Buyer'])
-            result = df.groupby([pd.Grouper(freq='6MS'), 'Buyer']).sum()
-            assert_frame_equal(result, expected)
-
-        df_original = DataFrame({
-            'Branch': 'A A A A A A A B'.split(),
-            'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(),
-            'Quantity': [1, 3, 5, 1, 8, 1, 9, 3],
-            'Date': [
-                datetime(2013, 10, 1, 13, 0),
-                datetime(2013, 10, 1, 13, 5),
-                datetime(2013, 10, 1, 20, 0),
-                datetime(2013, 10, 2, 10, 0),
-                datetime(2013, 10, 1, 20, 0),
-                datetime(2013, 10, 2, 10, 0),
-                datetime(2013, 10, 2, 12, 0),
-                datetime(2013, 10, 2, 14, 0),
-            ]
-        }).set_index('Date')
-
-        df_sorted = df_original.sort_values(by='Quantity', ascending=False)
-        for df in [df_original, df_sorted]:
-
-            expected = DataFrame({
-                'Buyer': 'Carl Joe Mark Carl Joe'.split(),
-                'Quantity': [6, 8, 3, 4, 10],
-                'Date': [
-                    datetime(2013, 10, 1, 0, 0),
-                    datetime(2013, 10, 1, 0, 0),
-                    datetime(2013, 10, 1, 0, 0),
-                    datetime(2013, 10, 2, 0, 0),
-                    datetime(2013, 10, 2, 0, 0),
-                ]
-            }).set_index(['Date', 'Buyer'])
-
-            result = df.groupby([pd.Grouper(freq='1D'), 'Buyer']).sum()
-            assert_frame_equal(result, expected)
-
-            result = df.groupby([pd.Grouper(freq='1M'), 'Buyer']).sum()
-            expected = DataFrame({
-                'Buyer': 'Carl Joe Mark'.split(),
-                'Quantity': [10, 18, 3],
-                'Date': [
-                    datetime(2013, 10, 31, 0, 0),
-                    datetime(2013, 10, 31, 0, 0),
-                    datetime(2013, 10, 31, 0, 0),
-                ]
-            }).set_index(['Date', 'Buyer'])
-            assert_frame_equal(result, expected)
-
-            # passing the name
-            df = df.reset_index()
-            result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer'
-                                 ]).sum()
-            assert_frame_equal(result, expected)
-
-            with pytest.raises(KeyError):
-                df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum()
-
-            # passing the level
-            df = df.set_index('Date')
-            result = df.groupby([pd.Grouper(freq='1M', level='Date'), 'Buyer'
-                                 ]).sum()
-            assert_frame_equal(result, expected)
-            result = df.groupby([pd.Grouper(freq='1M', level=0), 'Buyer']).sum(
-            )
-            assert_frame_equal(result, expected)
-
-            with pytest.raises(ValueError):
-                df.groupby([pd.Grouper(freq='1M', level='foo'),
-                            'Buyer']).sum()
-
-            # multi names
-            df = df.copy()
-            df['Date'] = df.index + pd.offsets.MonthEnd(2)
-            result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer'
-                                 ]).sum()
-            expected = DataFrame({
-                'Buyer': 'Carl Joe Mark'.split(),
-                'Quantity': [10, 18, 3],
-                'Date': [
-                    datetime(2013, 11, 30, 0, 0),
-                    datetime(2013, 11, 30, 0, 0),
-                    datetime(2013, 11, 30, 0, 0),
-                ]
-            }).set_index(['Date', 'Buyer'])
-            assert_frame_equal(result, expected)
-
-            # error as we have both a level and a name!
-            with pytest.raises(ValueError):
-                df.groupby([pd.Grouper(freq='1M', key='Date',
-                                       level='Date'), 'Buyer']).sum()
-
-            # single groupers
-            expected = DataFrame({'Quantity': [31],
-                                  'Date': [datetime(2013, 10, 31, 0, 0)
-                                           ]}).set_index('Date')
-            result = df.groupby(pd.Grouper(freq='1M')).sum()
-            assert_frame_equal(result, expected)
-
-            result = df.groupby([pd.Grouper(freq='1M')]).sum()
-            assert_frame_equal(result, expected)
-
-            expected = DataFrame({'Quantity': [31],
-                                  'Date': [datetime(2013, 11, 30, 0, 0)
-                                           ]}).set_index('Date')
-            result = df.groupby(pd.Grouper(freq='1M', key='Date')).sum()
-            assert_frame_equal(result, expected)
-
-            result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum()
-            assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize('freq', ['D', 'M', 'A', 'Q-APR'])
-    def test_timegrouper_with_reg_groups_freq(self, freq):
-        # GH 6764 multiple grouping with/without sort
-        df = DataFrame({
-            'date': pd.to_datetime([
-                '20121002', '20121007', '20130130', '20130202', '20130305',
-                '20121002', '20121207', '20130130', '20130202', '20130305',
-                '20130202', '20130305'
-            ]),
-            'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
-            'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301,
-                           359, 801],
-            'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12]
-        }).set_index('date')
-
-        expected = (
-            df.groupby('user_id')['whole_cost']
-              .resample(freq)
-              .sum(min_count=1)  # XXX
-              .dropna()
-              .reorder_levels(['date', 'user_id'])
-              .sort_index()
-              .astype('int64')
-        )
-        expected.name = 'whole_cost'
-
-        result1 = df.sort_index().groupby([pd.Grouper(freq=freq),
-                                           'user_id'])['whole_cost'].sum()
-        assert_series_equal(result1, expected)
-
-        result2 = df.groupby([pd.Grouper(freq=freq), 'user_id'])[
-            'whole_cost'].sum()
-        assert_series_equal(result2, expected)
-
-    def test_timegrouper_get_group(self):
-        # GH 6914
-
-        df_original = DataFrame({
-            'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(),
-            'Quantity': [18, 3, 5, 1, 9, 3],
-            'Date': [datetime(2013, 9, 1, 13, 0),
-                     datetime(2013, 9, 1, 13, 5),
-                     datetime(2013, 10, 1, 20, 0),
-                     datetime(2013, 10, 3, 10, 0),
-                     datetime(2013, 12, 2, 12, 0),
-                     datetime(2013, 9, 2, 14, 0), ]
-        })
-        df_reordered = df_original.sort_values(by='Quantity')
-
-        # single grouping
-        expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]],
-                         df_original.iloc[[4]]]
-        dt_list = ['2013-09-30', '2013-10-31', '2013-12-31']
-
-        for df in [df_original, df_reordered]:
-            grouped = df.groupby(pd.Grouper(freq='M', key='Date'))
-            for t, expected in zip(dt_list, expected_list):
-                dt = pd.Timestamp(t)
-                result = grouped.get_group(dt)
-                assert_frame_equal(result, expected)
-
-        # multiple grouping
-        expected_list = [df_original.iloc[[1]], df_original.iloc[[3]],
-                         df_original.iloc[[4]]]
-        g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'),
-                  ('Joe', '2013-12-31')]
-
-        for df in [df_original, df_reordered]:
-            grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')])
-            for (b, t), expected in zip(g_list, expected_list):
-                dt = pd.Timestamp(t)
-                result = grouped.get_group((b, dt))
-                assert_frame_equal(result, expected)
-
-        # with index
-        df_original = df_original.set_index('Date')
-        df_reordered = df_original.sort_values(by='Quantity')
-
-        expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]],
-                         df_original.iloc[[4]]]
-
-        for df in [df_original, df_reordered]:
-            grouped = df.groupby(pd.Grouper(freq='M'))
-            for t, expected in zip(dt_list, expected_list):
-                dt = pd.Timestamp(t)
-                result = grouped.get_group(dt)
-                assert_frame_equal(result, expected)
-
-    def test_timegrouper_apply_return_type_series(self):
-        # Using `apply` with the `TimeGrouper` should give the
-        # same return type as an `apply` with a `Grouper`.
-        # Issue #11742
-        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
-                           'value': [10, 13]})
-        df_dt = df.copy()
-        df_dt['date'] = pd.to_datetime(df_dt['date'])
-
-        def sumfunc_series(x):
-            return pd.Series([x['value'].sum()], ('sum',))
-
-        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
-        result = (df_dt.groupby(pd.Grouper(freq='M', key='date'))
-                  .apply(sumfunc_series))
-        assert_frame_equal(result.reset_index(drop=True),
-                           expected.reset_index(drop=True))
-
-    def test_timegrouper_apply_return_type_value(self):
-        # Using `apply` with the `TimeGrouper` should give the
-        # same return type as an `apply` with a `Grouper`.
-        # Issue #11742
-        df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
-                           'value': [10, 13]})
-        df_dt = df.copy()
-        df_dt['date'] = pd.to_datetime(df_dt['date'])
-
-        def sumfunc_value(x):
-            return x.value.sum()
-
-        expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
-                      .apply(sumfunc_value))
-        assert_series_equal(result.reset_index(drop=True),
-                            expected.reset_index(drop=True))
-
-    def test_groupby_groups_datetimeindex(self):
-        # GH#1430
-        periods = 1000
-        ind = pd.date_range(start='2012/1/1', freq='5min', periods=periods)
-        df = DataFrame({'high': np.arange(periods),
-                        'low': np.arange(periods)}, index=ind)
-        grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
-
-        # it works!
-        groups = grouped.groups
-        assert isinstance(list(groups.keys())[0], datetime)
-
-        # GH#11442
-        index = pd.date_range('2015/01/01', periods=5, name='date')
-        df = pd.DataFrame({'A': [5, 6, 7, 8, 9],
-                           'B': [1, 2, 3, 4, 5]}, index=index)
-        result = df.groupby(level='date').groups
-        dates = ['2015-01-05', '2015-01-04', '2015-01-03',
-                 '2015-01-02', '2015-01-01']
-        expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date')
-                    for date in dates}
-        tm.assert_dict_equal(result, expected)
-
-        grouped = df.groupby(level='date')
-        for date in dates:
-            result = grouped.get_group(date)
-            data = [[df.loc[date, 'A'], df.loc[date, 'B']]]
-            expected_index = pd.DatetimeIndex([date], name='date')
-            expected = pd.DataFrame(data,
-                                    columns=list('AB'),
-                                    index=expected_index)
-            tm.assert_frame_equal(result, expected)
-
-    def test_groupby_groups_datetimeindex_tz(self):
-        # GH 3950
-        dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00',
-                 '2011-07-19 09:00:00', '2011-07-19 07:00:00',
-                 '2011-07-19 08:00:00', '2011-07-19 09:00:00']
-        df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'],
-                        'datetime': dates,
-                        'value1': np.arange(6, dtype='int64'),
-                        'value2': [1, 2] * 3})
-        df['datetime'] = df['datetime'].apply(
-            lambda d: Timestamp(d, tz='US/Pacific'))
-
-        exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00',
-                                     '2011-07-19 07:00:00',
-                                     '2011-07-19 08:00:00',
-                                     '2011-07-19 08:00:00',
-                                     '2011-07-19 09:00:00',
-                                     '2011-07-19 09:00:00'],
-                                    tz='US/Pacific', name='datetime')
-        exp_idx2 = Index(['a', 'b'] * 3, name='label')
-        exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
-        expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
-                              'value2': [1, 2, 2, 1, 1, 2]},
-                             index=exp_idx, columns=['value1', 'value2'])
-
-        result = df.groupby(['datetime', 'label']).sum()
-        assert_frame_equal(result, expected)
-
-        # by level
-        didx = pd.DatetimeIndex(dates, tz='Asia/Tokyo')
-        df = DataFrame({'value1': np.arange(6, dtype='int64'),
-                        'value2': [1, 2, 3, 1, 2, 3]},
-                       index=didx)
-
-        exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00',
-                                    '2011-07-19 08:00:00',
-                                    '2011-07-19 09:00:00'], tz='Asia/Tokyo')
-        expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
-                             index=exp_idx, columns=['value1', 'value2'])
-
-        result = df.groupby(level=0).sum()
-        assert_frame_equal(result, expected)
-
-    def test_frame_datetime64_handling_groupby(self):
-        # it works!
-        df = DataFrame([(3, np.datetime64('2012-07-03')),
-                        (3, np.datetime64('2012-07-04'))],
-                       columns=['a', 'date'])
-        result = df.groupby('a').first()
-        assert result['date'][3] == Timestamp('2012-07-03')
-
-    def test_groupby_multi_timezone(self):
-
-        # combining multiple / different timezones yields UTC
-
-        data = """0,2000-01-28 16:47:00,America/Chicago
-1,2000-01-29 16:48:00,America/Chicago
-2,2000-01-30 16:49:00,America/Los_Angeles
-3,2000-01-31 16:50:00,America/Chicago
-4,2000-01-01 16:50:00,America/New_York"""
-
-        df = pd.read_csv(StringIO(data), header=None,
-                         names=['value', 'date', 'tz'])
-        result = df.groupby('tz').date.apply(
-            lambda x: pd.to_datetime(x).dt.tz_localize(x.name))
-
-        expected = Series([Timestamp('2000-01-28 16:47:00-0600',
-                                     tz='America/Chicago'),
-                           Timestamp('2000-01-29 16:48:00-0600',
-                                     tz='America/Chicago'),
-                           Timestamp('2000-01-30 16:49:00-0800',
-                                     tz='America/Los_Angeles'),
-                           Timestamp('2000-01-31 16:50:00-0600',
-                                     tz='America/Chicago'),
-                           Timestamp('2000-01-01 16:50:00-0500',
-                                     tz='America/New_York')],
-                          name='date',
-                          dtype=object)
-        assert_series_equal(result, expected)
-
-        tz = 'America/Chicago'
-        res_values = df.groupby('tz').date.get_group(tz)
-        result = pd.to_datetime(res_values).dt.tz_localize(tz)
-        exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00',
-                             '2000-01-31 16:50:00'],
-                            index=[0, 1, 3], name='date')
-        expected = pd.to_datetime(exp_values).dt.tz_localize(tz)
-        assert_series_equal(result, expected)
-
-    def test_groupby_groups_periods(self):
-        dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00',
-                 '2011-07-19 09:00:00', '2011-07-19 07:00:00',
-                 '2011-07-19 08:00:00', '2011-07-19 09:00:00']
-        df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'],
-                        'period': [pd.Period(d, freq='H') for d in dates],
-                        'value1': np.arange(6, dtype='int64'),
-                        'value2': [1, 2] * 3})
-
-        exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00',
-                                   '2011-07-19 07:00:00',
-                                   '2011-07-19 08:00:00',
-                                   '2011-07-19 08:00:00',
-                                   '2011-07-19 09:00:00',
-                                   '2011-07-19 09:00:00'],
-                                  freq='H', name='period')
-        exp_idx2 = Index(['a', 'b'] * 3, name='label')
-        exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
-        expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
-                              'value2': [1, 2, 2, 1, 1, 2]},
-                             index=exp_idx, columns=['value1', 'value2'])
-
-        result = df.groupby(['period', 'label']).sum()
-        assert_frame_equal(result, expected)
-
-        # by level
-        didx = pd.PeriodIndex(dates, freq='H')
-        df = DataFrame({'value1': np.arange(6, dtype='int64'),
-                        'value2': [1, 2, 3, 1, 2, 3]},
-                       index=didx)
-
-        exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00',
-                                  '2011-07-19 08:00:00',
-                                  '2011-07-19 09:00:00'], freq='H')
-        expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
-                             index=exp_idx, columns=['value1', 'value2'])
-
-        result = df.groupby(level=0).sum()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_first_datetime64(self):
-        df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)])
-        df[1] = df[1].view('M8[ns]')
-
-        assert issubclass(df[1].dtype.type, np.datetime64)
-
-        result = df.groupby(level=0).first()
-        got_dt = result[1].dtype
-        assert issubclass(got_dt.type, np.datetime64)
-
-        result = df[1].groupby(level=0).first()
-        got_dt = result.dtype
-        assert issubclass(got_dt.type, np.datetime64)
-
-    def test_groupby_max_datetime64(self):
-        # GH 5869
-        # datetimelike dtype conversion from int
-        df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
-        expected = df.groupby('A')['A'].apply(lambda x: x.max())
-        result = df.groupby('A')['A'].max()
-        assert_series_equal(result, expected)
-
-    def test_groupby_datetime64_32_bit(self):
-        # GH 6410 / numpy 4328
-        # 32-bit under 1.9-dev indexing issue
-
-        df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')] * 2})
-        result = df.groupby("A")["B"].transform(min)
-        expected = Series([pd.Timestamp('2000-01-1')] * 2, name='B')
-        assert_series_equal(result, expected)
-
-    def test_groupby_with_timezone_selection(self):
-        # GH 11616
-        # Test that column selection returns output in correct timezone.
-        np.random.seed(42)
-        df = pd.DataFrame({
-            'factor': np.random.randint(0, 3, size=60),
-            'time': pd.date_range('01/01/2000 00:00', periods=60,
-                                  freq='s', tz='UTC')
-        })
-        df1 = df.groupby('factor').max()['time']
-        df2 = df.groupby('factor')['time'].max()
-        tm.assert_series_equal(df1, df2)
-
-    def test_timezone_info(self):
-        # see gh-11682: Timezone info lost when broadcasting
-        # scalar datetime to DataFrame
-
-        df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]})
-        assert df['b'][0].tzinfo == pytz.utc
-        df = pd.DataFrame({'a': [1, 2, 3]})
-        df['b'] = datetime.now(pytz.utc)
-        assert df['b'][0].tzinfo == pytz.utc
-
-    def test_datetime_count(self):
-        df = DataFrame({'a': [1, 2, 3] * 2,
-                        'dates': pd.date_range('now', periods=6, freq='T')})
-        result = df.groupby('a').dates.count()
-        expected = Series([
-            2, 2, 2
-        ], index=Index([1, 2, 3], name='a'), name='dates')
-        tm.assert_series_equal(result, expected)
-
-    def test_first_last_max_min_on_time_data(self):
-        # GH 10295
-        # Verify that NaT is not in the result of max, min, first and last on
-        # Dataframe with datetime or timedelta values.
-        from datetime import timedelta as td
-        df_test = DataFrame(
-            {'dt': [nan, '2015-07-24 10:10', '2015-07-25 11:11',
-                    '2015-07-23 12:12', nan],
-             'td': [nan, td(days=1), td(days=2), td(days=3), nan]})
-        df_test.dt = pd.to_datetime(df_test.dt)
-        df_test['group'] = 'A'
-        df_ref = df_test[df_test.dt.notna()]
-
-        grouped_test = df_test.groupby('group')
-        grouped_ref = df_ref.groupby('group')
-
-        assert_frame_equal(grouped_ref.max(), grouped_test.max())
-        assert_frame_equal(grouped_ref.min(), grouped_test.min())
-        assert_frame_equal(grouped_ref.first(), grouped_test.first())
-        assert_frame_equal(grouped_ref.last(), grouped_test.last())
-
-    def test_nunique_with_timegrouper_and_nat(self):
-        # GH 17575
-        test = pd.DataFrame({
-            'time': [Timestamp('2016-06-28 09:35:35'),
-                     pd.NaT,
-                     Timestamp('2016-06-28 16:46:28')],
-            'data': ['1', '2', '3']})
-
-        grouper = pd.Grouper(key='time', freq='h')
-        result = test.groupby(grouper)['data'].nunique()
-        expected = test[test.time.notnull()].groupby(grouper)['data'].nunique()
-        tm.assert_series_equal(result, expected)
-
-    def test_scalar_call_versus_list_call(self):
-        # Issue: 17530
-        data_frame = {
-            'location': ['shanghai', 'beijing', 'shanghai'],
-            'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15',
-                               '2017-08-11 22:23:15'],
-                              dtype='datetime64[ns]'),
-            'value': [1, 2, 3]
-        }
-        data_frame = pd.DataFrame(data_frame).set_index('time')
-        grouper = pd.Grouper(freq='D')
-
-        grouped = data_frame.groupby(grouper)
-        result = grouped.count()
-        grouped = data_frame.groupby([grouper])
-        expected = grouped.count()
-
-        assert_frame_equal(result, expected)
@@ -1,836 +0,0 @@
-""" test with the .transform """
-
-import numpy as np
-import pytest
-
-from pandas._libs import groupby
-from pandas.compat import StringIO
-
-from pandas.core.dtypes.common import ensure_platform_int, is_timedelta64_dtype
-
-import pandas as pd
-from pandas import DataFrame, MultiIndex, Series, Timestamp, concat, date_range
-from pandas.core.config import option_context
-from pandas.core.groupby.groupby import DataError
-from pandas.util import testing as tm
-from pandas.util.testing import assert_frame_equal, assert_series_equal
-
-
-def assert_fp_equal(a, b):
-    assert (np.abs(a - b) < 1e-12).all()
-
-
-def test_transform():
-    data = Series(np.arange(9) // 3, index=np.arange(9))
-
-    index = np.arange(9)
-    np.random.shuffle(index)
-    data = data.reindex(index)
-
-    grouped = data.groupby(lambda x: x // 3)
-
-    transformed = grouped.transform(lambda x: x * x.sum())
-    assert transformed[7] == 12
-
-    # GH 8046
-    # make sure that we preserve the input order
-
-    df = DataFrame(
-        np.arange(6, dtype='int64').reshape(
-            3, 2), columns=["a", "b"], index=[0, 2, 1])
-    key = [0, 0, 1]
-    expected = df.sort_index().groupby(key).transform(
-        lambda x: x - x.mean()).groupby(key).mean()
-    result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(
-        key).mean()
-    assert_frame_equal(result, expected)
-
-    def demean(arr):
-        return arr - arr.mean()
-
-    people = DataFrame(np.random.randn(5, 5),
-                       columns=['a', 'b', 'c', 'd', 'e'],
-                       index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
-    key = ['one', 'two', 'one', 'two', 'one']
-    result = people.groupby(key).transform(demean).groupby(key).mean()
-    expected = people.groupby(key).apply(demean).groupby(key).mean()
-    assert_frame_equal(result, expected)
-
-    # GH 8430
-    df = tm.makeTimeDataFrame()
-    g = df.groupby(pd.Grouper(freq='M'))
-    g.transform(lambda x: x - 1)
-
-    # GH 9700
-    df = DataFrame({'a': range(5, 10), 'b': range(5)})
-    result = df.groupby('a').transform(max)
-    expected = DataFrame({'b': range(5)})
-    tm.assert_frame_equal(result, expected)
-
-
-def test_transform_fast():
-
-    df = DataFrame({'id': np.arange(100000) / 3,
-                    'val': np.random.randn(100000)})
-
-    grp = df.groupby('id')['val']
-
-    values = np.repeat(grp.mean().values,
-                       ensure_platform_int(grp.count().values))
-    expected = pd.Series(values, index=df.index, name='val')
-
-    result = grp.transform(np.mean)
-    assert_series_equal(result, expected)
-
-    result = grp.transform('mean')
-    assert_series_equal(result, expected)
-
-    # GH 12737
-    df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5],
-                       'd': pd.date_range('2014-1-1', '2014-1-4'),
-                       'i': [1, 2, 3, 4]},
-                      columns=['grouping', 'f', 'i', 'd'])
-    result = df.groupby('grouping').transform('first')
-
-    dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'),
-             pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')]
-    expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5],
-                             'd': dates,
-                             'i': [1, 2, 2, 4]},
-                            columns=['f', 'i', 'd'])
-    assert_frame_equal(result, expected)
-
-    # selection
-    result = df.groupby('grouping')[['f', 'i']].transform('first')
-    expected = expected[['f', 'i']]
-    assert_frame_equal(result, expected)
-
-    # dup columns
-    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
-    result = df.groupby('g').transform('first')
-    expected = df.drop('g', axis=1)
-    assert_frame_equal(result, expected)
-
-
-def test_transform_broadcast(tsframe, ts):
-    grouped = ts.groupby(lambda x: x.month)
-    result = grouped.transform(np.mean)
-
-    tm.assert_index_equal(result.index, ts.index)
-    for _, gp in grouped:
-        assert_fp_equal(result.reindex(gp.index), gp.mean())
-
-    grouped = tsframe.groupby(lambda x: x.month)
-    result = grouped.transform(np.mean)
-    tm.assert_index_equal(result.index, tsframe.index)
-    for _, gp in grouped:
-        agged = gp.mean()
-        res = result.reindex(gp.index)
-        for col in tsframe:
-            assert_fp_equal(res[col], agged[col])
-
-    # group columns
-    grouped = tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
-                              axis=1)
-    result = grouped.transform(np.mean)
-    tm.assert_index_equal(result.index, tsframe.index)
-    tm.assert_index_equal(result.columns, tsframe.columns)
-    for _, gp in grouped:
-        agged = gp.mean(1)
-        res = result.reindex(columns=gp.columns)
-        for idx in gp.index:
-            assert_fp_equal(res.xs(idx), agged[idx])
-
-
-def test_transform_axis(tsframe):
-
-    # make sure that we are setting the axes
-    # correctly when on axis=0 or 1
-    # in the presence of a non-monotonic indexer
-    # GH12713
-
-    base = tsframe.iloc[0:5]
-    r = len(base.index)
-    c = len(base.columns)
-    tso = DataFrame(np.random.randn(r, c),
-                    index=base.index,
-                    columns=base.columns,
-                    dtype='float64')
-    # monotonic
-    ts = tso
-    grouped = ts.groupby(lambda x: x.weekday())
-    result = ts - grouped.transform('mean')
-    expected = grouped.apply(lambda x: x - x.mean())
-    assert_frame_equal(result, expected)
-
-    ts = ts.T
-    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
-    result = ts - grouped.transform('mean')
-    expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
-    assert_frame_equal(result, expected)
-
-    # non-monotonic
-    ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
-    grouped = ts.groupby(lambda x: x.weekday())
-    result = ts - grouped.transform('mean')
-    expected = grouped.apply(lambda x: x - x.mean())
-    assert_frame_equal(result, expected)
-
-    ts = ts.T
-    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
-    result = ts - grouped.transform('mean')
-    expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
-    assert_frame_equal(result, expected)
-
-
-def test_transform_dtype():
-    # GH 9807
-    # Check transform dtype output is preserved
-    df = DataFrame([[1, 3], [2, 3]])
-    result = df.groupby(1).transform('mean')
-    expected = DataFrame([[1.5], [1.5]])
-    assert_frame_equal(result, expected)
-
-
-def test_transform_bug():
-    # GH 5712
-    # transforming on a datetime column
-    df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
-    result = df.groupby('A')['B'].transform(
-        lambda x: x.rank(ascending=False))
-    expected = Series(np.arange(5, 0, step=-1), name='B')
-    assert_series_equal(result, expected)
-
-
-def test_transform_numeric_to_boolean():
-    # GH 16875
-    # inconsistency in transforming boolean values
-    expected = pd.Series([True, True], name='A')
-
-    df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]})
-    result = df.groupby('B').A.transform(lambda x: True)
-    assert_series_equal(result, expected)
-
-    df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]})
-    result = df.groupby('B').A.transform(lambda x: True)
-    assert_series_equal(result, expected)
-
-
-def test_transform_datetime_to_timedelta():
-    # GH 15429
-    # transforming a datetime to timedelta
-    df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
-    expected = pd.Series([
-        Timestamp('20130101') - Timestamp('20130101')] * 5, name='A')
-
-    # this does date math without changing result type in transform
-    base_time = df['A'][0]
-    result = df.groupby('A')['A'].transform(
-        lambda x: x.max() - x.min() + base_time) - base_time
-    assert_series_equal(result, expected)
-
-    # this does date math and causes the transform to return timedelta
-    result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min())
-    assert_series_equal(result, expected)
-
-
-def test_transform_datetime_to_numeric():
-    # GH 10972
-    # convert dt to float
-    df = DataFrame({
-        'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
-    result = df.groupby('a').b.transform(
-        lambda x: x.dt.dayofweek - x.dt.dayofweek.mean())
-
-    expected = Series([-0.5, 0.5], name='b')
-    assert_series_equal(result, expected)
-
-    # convert dt to int
-    df = DataFrame({
-        'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
-    result = df.groupby('a').b.transform(
-        lambda x: x.dt.dayofweek - x.dt.dayofweek.min())
-
-    expected = Series([0, 1], name='b')
-    assert_series_equal(result, expected)
-
-
-def test_transform_casting():
-    # 13046
-    data = """
-    idx     A         ID3              DATETIME
-    0   B-028  b76cd912ff "2014-10-08 13:43:27"
-    1   B-054  4a57ed0b02 "2014-10-08 14:26:19"
-    2   B-076  1a682034f8 "2014-10-08 14:29:01"
-    3   B-023  b76cd912ff "2014-10-08 18:39:34"
-    4   B-023  f88g8d7sds "2014-10-08 18:40:18"
-    5   B-033  b76cd912ff "2014-10-08 18:44:30"
-    6   B-032  b76cd912ff "2014-10-08 18:46:00"
-    7   B-037  b76cd912ff "2014-10-08 18:52:15"
-    8   B-046  db959faf02 "2014-10-08 18:59:59"
-    9   B-053  b76cd912ff "2014-10-08 19:17:48"
-    10  B-065  b76cd912ff "2014-10-08 19:21:38"
-    """
-    df = pd.read_csv(StringIO(data), sep=r'\s+',
-                     index_col=[0], parse_dates=['DATETIME'])
-
-    result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff())
-    assert is_timedelta64_dtype(result.dtype)
-
-    result = df[['ID3', 'DATETIME']].groupby('ID3').transform(
-        lambda x: x.diff())
-    assert is_timedelta64_dtype(result.DATETIME.dtype)
-
-
-def test_transform_multiple(ts):
-    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
-
-    grouped.transform(lambda x: x * 2)
-    grouped.transform(np.mean)
-
-
-def test_dispatch_transform(tsframe):
-    df = tsframe[::5].reindex(tsframe.index)
-
-    grouped = df.groupby(lambda x: x.month)
-
-    filled = grouped.fillna(method='pad')
-    fillit = lambda x: x.fillna(method='pad')
-    expected = df.groupby(lambda x: x.month).transform(fillit)
-    assert_frame_equal(filled, expected)
-
-
-def test_transform_select_columns(df):
-    f = lambda x: x.mean()
-    result = df.groupby('A')['C', 'D'].transform(f)
-
-    selection = df[['C', 'D']]
-    expected = selection.groupby(df['A']).transform(f)
-
-    assert_frame_equal(result, expected)
-
-
-def test_transform_exclude_nuisance(df):
-
-    # this also tests orderings in transform between
-    # series/frame to make sure it's consistent
-    expected = {}
-    grouped = df.groupby('A')
-    expected['C'] = grouped['C'].transform(np.mean)
-    expected['D'] = grouped['D'].transform(np.mean)
-    expected = DataFrame(expected)
-    result = df.groupby('A').transform(np.mean)
-
-    assert_frame_equal(result, expected)
-
-
-def test_transform_function_aliases(df):
-    result = df.groupby('A').transform('mean')
-    expected = df.groupby('A').transform(np.mean)
-    assert_frame_equal(result, expected)
-
-    result = df.groupby('A')['C'].transform('mean')
-    expected = df.groupby('A')['C'].transform(np.mean)
-    assert_series_equal(result, expected)
-
-
-def test_series_fast_transform_date():
-    # GH 13191
-    df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3],
-                       'd': pd.date_range('2014-1-1', '2014-1-4')})
-    result = df.groupby('grouping')['d'].transform('first')
-    dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'),
-             pd.Timestamp('2014-1-4')]
-    expected = pd.Series(dates, name='d')
-    assert_series_equal(result, expected)
-
-
-def test_transform_length():
-    # GH 9697
-    df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]})
-    expected = pd.Series([3.0] * 4)
-
-    def nsum(x):
-        return np.nansum(x)
-
-    results = [df.groupby('col1').transform(sum)['col2'],
-               df.groupby('col1')['col2'].transform(sum),
-               df.groupby('col1').transform(nsum)['col2'],
-               df.groupby('col1')['col2'].transform(nsum)]
-    for result in results:
-        assert_series_equal(result, expected, check_names=False)
-
-
-def test_transform_coercion():
-
-    # 14457
-    # when we are transforming be sure to not coerce
-    # via assignment
-    df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1]))
-    g = df.groupby('A')
-
-    expected = g.transform(np.mean)
-    result = g.transform(lambda x: np.mean(x))
-    assert_frame_equal(result, expected)
-
-
-def test_groupby_transform_with_int():
-
-    # GH 3740, make sure that we might upcast on item-by-item transform
-
-    # floats
-    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'),
-                        C=Series(
-                            [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo'))
-    with np.errstate(all='ignore'):
-        result = df.groupby('A').transform(
-            lambda x: (x - x.mean()) / x.std())
-    expected = DataFrame(dict(B=np.nan, C=Series(
-        [-1, 0, 1, -1, 0, 1], dtype='float64')))
-    assert_frame_equal(result, expected)
-
-    # int case
-    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1,
-                        C=[1, 2, 3, 1, 2, 3], D='foo'))
-    with np.errstate(all='ignore'):
-        result = df.groupby('A').transform(
-            lambda x: (x - x.mean()) / x.std())
-    expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1]))
-    assert_frame_equal(result, expected)
-
-    # int that needs float conversion
-    s = Series([2, 3, 4, 10, 5, -1])
-    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo'))
-    with np.errstate(all='ignore'):
-        result = df.groupby('A').transform(
-            lambda x: (x - x.mean()) / x.std())
-
-    s1 = s.iloc[0:3]
-    s1 = (s1 - s1.mean()) / s1.std()
-    s2 = s.iloc[3:6]
-    s2 = (s2 - s2.mean()) / s2.std()
-    expected = DataFrame(dict(B=np.nan, C=concat([s1, s2])))
-    assert_frame_equal(result, expected)
-
-    # int downcasting
-    result = df.groupby('A').transform(lambda x: x * 2 / 2)
-    expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1]))
-    assert_frame_equal(result, expected)
-
-
-def test_groupby_transform_with_nan_group():
-    # GH 9941
-    df = pd.DataFrame({'a': range(10),
-                       'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]})
-    result = df.groupby(df.b)['a'].transform(max)
-    expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.],
-                         name='a')
-    assert_series_equal(result, expected)
-
-
-def test_transform_mixed_type():
-    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
-                                    ])
-    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                    'c': np.tile(['a', 'b', 'c'], 2),
-                    'v': np.arange(1., 7.)}, index=index)
-
-    def f(group):
-        group['g'] = group['d'] * 2
-        return group[:1]
-
-    grouped = df.groupby('c')
-    result = grouped.apply(f)
-
-    assert result['d'].dtype == np.float64
-
-    # this is by definition a mutating operation!
-    with option_context('mode.chained_assignment', None):
-        for key, group in grouped:
-            res = f(group)
-            assert_frame_equal(res, result.loc[key])
-
-
-def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
-    """
-    Check a group transform that executes a cumulative function.
-
-    Parameters
-    ----------
-    pd_op : callable
-        The pandas cumulative function.
-    np_op : callable
-        The analogous one in NumPy.
-    dtype : type
-        The specified dtype of the data.
-    """
-
-    is_datetimelike = False
-
-    data = np.array([[1], [2], [3], [4]], dtype=dtype)
-    ans = np.zeros_like(data)
-
-    labels = np.array([0, 0, 0, 0], dtype=np.int64)
-    pd_op(ans, data, labels, is_datetimelike)
-
-    tm.assert_numpy_array_equal(np_op(data), ans[:, 0],
-                                check_dtype=False)
-
-
-def test_cython_group_transform_cumsum(any_real_dtype):
-    # see gh-4095
-    dtype = np.dtype(any_real_dtype).type
-    pd_op, np_op = groupby.group_cumsum, np.cumsum
-    _check_cython_group_transform_cumulative(pd_op, np_op, dtype)
-
-
-def test_cython_group_transform_cumprod():
-    # see gh-4095
-    dtype = np.float64
-    pd_op, np_op = groupby.group_cumprod_float64, np.cumproduct
-    _check_cython_group_transform_cumulative(pd_op, np_op, dtype)
-
-
-def test_cython_group_transform_algos():
-    # see gh-4095
-    is_datetimelike = False
-
-    # with nans
-    labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
-
-    data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64')
-    actual = np.zeros_like(data)
-    actual.fill(np.nan)
-    groupby.group_cumprod_float64(actual, data, labels, is_datetimelike)
-    expected = np.array([1, 2, 6, np.nan, 24], dtype='float64')
-    tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-    actual = np.zeros_like(data)
-    actual.fill(np.nan)
-    groupby.group_cumsum(actual, data, labels, is_datetimelike)
-    expected = np.array([1, 3, 6, np.nan, 10], dtype='float64')
-    tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-    # timedelta
-    is_datetimelike = True
-    data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None]
-    actual = np.zeros_like(data, dtype='int64')
-    groupby.group_cumsum(actual, data.view('int64'), labels,
-                         is_datetimelike)
-    expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64(
-        2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'),
-        np.timedelta64(5, 'ns')])
-    tm.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected)
-
-
-@pytest.mark.parametrize(
-    "op, args, targop",
-    [('cumprod', (), lambda x: x.cumprod()),
-     ('cumsum', (), lambda x: x.cumsum()),
-     ('shift', (-1, ), lambda x: x.shift(-1)),
-     ('shift', (1, ), lambda x: x.shift())])
-def test_cython_transform_series(op, args, targop):
-    # GH 4095
-    s = Series(np.random.randn(1000))
-    s_missing = s.copy()
-    s_missing.iloc[2:10] = np.nan
-    labels = np.random.randint(0, 50, size=1000).astype(float)
-
-    # series
-    for data in [s, s_missing]:
-        # print(data.head())
-        expected = data.groupby(labels).transform(targop)
-
-        tm.assert_series_equal(
-            expected,
-            data.groupby(labels).transform(op, *args))
-        tm.assert_series_equal(expected, getattr(
-            data.groupby(labels), op)(*args))
-
-
-@pytest.mark.parametrize("op", ['cumprod', 'cumsum'])
-@pytest.mark.parametrize("skipna", [False, True])
-@pytest.mark.parametrize('input, exp', [
-    # When everything is NaN
-    ({'key': ['b'] * 10, 'value': np.nan},
-     pd.Series([np.nan] * 10, name='value')),
-    # When there is a single NaN
-    ({'key': ['b'] * 10 + ['a'] * 2,
-      'value': [3] * 3 + [np.nan] + [3] * 8},
-     {('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0],
-      ('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729.,
-                          2187., 6561., 19683., 3.0, 9.0],
-      ('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0],
-      ('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18.,
-                         21., 24., 27., 3.0, 6.0]})])
-def test_groupby_cum_skipna(op, skipna, input, exp):
-    df = pd.DataFrame(input)
-    result = df.groupby('key')['value'].transform(op, skipna=skipna)
-    if isinstance(exp, dict):
-        expected = exp[(op, skipna)]
-    else:
-        expected = exp
-    expected = pd.Series(expected, name='value')
-    tm.assert_series_equal(expected, result)
-
-
-@pytest.mark.parametrize(
-    "op, args, targop",
-    [('cumprod', (), lambda x: x.cumprod()),
-     ('cumsum', (), lambda x: x.cumsum()),
-     ('shift', (-1, ), lambda x: x.shift(-1)),
-     ('shift', (1, ), lambda x: x.shift())])
-def test_cython_transform_frame(op, args, targop):
-    s = Series(np.random.randn(1000))
-    s_missing = s.copy()
-    s_missing.iloc[2:10] = np.nan
-    labels = np.random.randint(0, 50, size=1000).astype(float)
-    strings = list('qwertyuiopasdfghjklz')
-    strings_missing = strings[:]
-    strings_missing[5] = np.nan
-    df = DataFrame({'float': s,
-                    'float_missing': s_missing,
-                    'int': [1, 1, 1, 1, 2] * 200,
-                    'datetime': pd.date_range('1990-1-1', periods=1000),
-                    'timedelta': pd.timedelta_range(1, freq='s',
-                                                    periods=1000),
-                    'string': strings * 50,
-                    'string_missing': strings_missing * 50},
-                   columns=['float', 'float_missing', 'int', 'datetime',
-                            'timedelta', 'string', 'string_missing'])
-    df['cat'] = df['string'].astype('category')
-
-    df2 = df.copy()
-    df2.index = pd.MultiIndex.from_product([range(100), range(10)])
-
-    # DataFrame - Single and MultiIndex,
-    # group by values, index level, columns
-    for df in [df, df2]:
-        for gb_target in [dict(by=labels), dict(level=0), dict(by='string')
-                          ]:  # dict(by='string_missing')]:
-            # dict(by=['int','string'])]:
-
-            gb = df.groupby(**gb_target)
-            # whitelisted methods set the selection before applying
-            # bit a of hack to make sure the cythonized shift
-            # is equivalent to pre 0.17.1 behavior
-            if op == 'shift':
-                gb._set_group_selection()
-
-            if op != 'shift' and 'int' not in gb_target:
-                # numeric apply fastpath promotes dtype so have
-                # to apply separately and concat
-                i = gb[['int']].apply(targop)
-                f = gb[['float', 'float_missing']].apply(targop)
-                expected = pd.concat([f, i], axis=1)
-            else:
-                expected = gb.apply(targop)
-
-            expected = expected.sort_index(axis=1)
-            tm.assert_frame_equal(expected,
-                                  gb.transform(op, *args).sort_index(
-                                      axis=1))
-            tm.assert_frame_equal(
-                expected,
-                getattr(gb, op)(*args).sort_index(axis=1))
-            # individual columns
-            for c in df:
-                if c not in ['float', 'int', 'float_missing'
-                             ] and op != 'shift':
-                    msg = "No numeric types to aggregate"
-                    with pytest.raises(DataError, match=msg):
-                        gb[c].transform(op)
-                    with pytest.raises(DataError, match=msg):
-                        getattr(gb[c], op)()
-                else:
-                    expected = gb[c].apply(targop)
-                    expected.name = c
-                    tm.assert_series_equal(expected,
-                                           gb[c].transform(op, *args))
-                    tm.assert_series_equal(expected,
-                                           getattr(gb[c], op)(*args))
-
-
-def test_transform_with_non_scalar_group():
-    # GH 10165
-    cols = pd.MultiIndex.from_tuples([
-        ('syn', 'A'), ('mis', 'A'), ('non', 'A'),
-        ('syn', 'C'), ('mis', 'C'), ('non', 'C'),
-        ('syn', 'T'), ('mis', 'T'), ('non', 'T'),
-        ('syn', 'G'), ('mis', 'G'), ('non', 'G')])
-    df = pd.DataFrame(np.random.randint(1, 10, (4, 12)),
-                      columns=cols,
-                      index=['A', 'C', 'G', 'T'])
-
-    msg = 'transform must return a scalar value for each group.*'
-    with pytest.raises(ValueError, match=msg):
-        df.groupby(axis=1, level=1).transform(
-            lambda z: z.div(z.sum(axis=1), axis=0))
-
-
-@pytest.mark.parametrize('cols,exp,comp_func', [
-    ('a', pd.Series([1, 1, 1], name='a'), tm.assert_series_equal),
-    (['a', 'c'], pd.DataFrame({'a': [1, 1, 1], 'c': [1, 1, 1]}),
-     tm.assert_frame_equal)
-])
-@pytest.mark.parametrize('agg_func', [
-    'count', 'rank', 'size'])
-def test_transform_numeric_ret(cols, exp, comp_func, agg_func):
-    if agg_func == 'size' and isinstance(cols, list):
-        pytest.xfail("'size' transformation not supported with "
-                     "NDFrameGroupy")
-
-    # GH 19200
-    df = pd.DataFrame(
-        {'a': pd.date_range('2018-01-01', periods=3),
-         'b': range(3),
-         'c': range(7, 10)})
-
-    result = df.groupby('b')[cols].transform(agg_func)
-
-    if agg_func == 'rank':
-        exp = exp.astype('float')
-
-    comp_func(result, exp)
-
-
-@pytest.mark.parametrize("mix_groupings", [True, False])
-@pytest.mark.parametrize("as_series", [True, False])
-@pytest.mark.parametrize("val1,val2", [
-    ('foo', 'bar'), (1, 2), (1., 2.)])
-@pytest.mark.parametrize("fill_method,limit,exp_vals", [
-    ("ffill", None,
-     [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
-    ("ffill", 1,
-     [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
-    ("bfill", None,
-     ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
-    ("bfill", 1,
-     [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
-])
-def test_group_fill_methods(mix_groupings, as_series, val1, val2,
-                            fill_method, limit, exp_vals):
-    vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
-    _exp_vals = list(exp_vals)
-    # Overwrite placeholder values
-    for index, exp_val in enumerate(_exp_vals):
-        if exp_val == 'val1':
-            _exp_vals[index] = val1
-        elif exp_val == 'val2':
-            _exp_vals[index] = val2
-
-    # Need to modify values and expectations depending on the
-    # Series / DataFrame that we ultimately want to generate
-    if mix_groupings:  # ['a', 'b', 'a, 'b', ...]
-        keys = ['a', 'b'] * len(vals)
-
-        def interweave(list_obj):
-            temp = list()
-            for x in list_obj:
-                temp.extend([x, x])
-
-            return temp
-
-        _exp_vals = interweave(_exp_vals)
-        vals = interweave(vals)
-    else:  # ['a', 'a', 'a', ... 'b', 'b', 'b']
-        keys = ['a'] * len(vals) + ['b'] * len(vals)
-        _exp_vals = _exp_vals * 2
-        vals = vals * 2
-
-    df = DataFrame({'key': keys, 'val': vals})
-    if as_series:
-        result = getattr(
-            df.groupby('key')['val'], fill_method)(limit=limit)
-        exp = Series(_exp_vals, name='val')
-        assert_series_equal(result, exp)
-    else:
-        result = getattr(df.groupby('key'), fill_method)(limit=limit)
-        exp = DataFrame({'key': keys, 'val': _exp_vals})
-        assert_frame_equal(result, exp)
-
-
-@pytest.mark.parametrize("fill_method", ['ffill', 'bfill'])
-def test_pad_stable_sorting(fill_method):
-    # GH 21207
-    x = [0] * 20
-    y = [np.nan] * 10 + [1] * 10
-
-    if fill_method == 'bfill':
-        y = y[::-1]
-
-    df = pd.DataFrame({'x': x, 'y': y})
-    expected = df.copy()
-
-    result = getattr(df.groupby('x'), fill_method)()
-
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("test_series", [True, False])
-@pytest.mark.parametrize("freq", [
-    None,
-    pytest.param('D', marks=pytest.mark.xfail(
-        reason='GH#23918 before method uses freq in vectorized approach'))])
-@pytest.mark.parametrize("periods,fill_method,limit", [
-    (1, 'ffill', None), (1, 'ffill', 1),
-    (1, 'bfill', None), (1, 'bfill', 1),
-    (-1, 'ffill', None), (-1, 'ffill', 1),
-    (-1, 'bfill', None), (-1, 'bfill', 1),
-])
-def test_pct_change(test_series, freq, periods, fill_method, limit):
-    # GH  21200, 21621
-    vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4]
-    keys = ['a', 'b']
-    key_v = np.repeat(keys, len(vals))
-    df = DataFrame({'key': key_v, 'vals': vals * 2})
-
-    df_g = getattr(df.groupby('key'), fill_method)(limit=limit)
-    grp = df_g.groupby('key')
-
-    expected = grp['vals'].obj / grp['vals'].shift(periods) - 1
-
-    if test_series:
-        result = df.groupby('key')['vals'].pct_change(
-            periods=periods, fill_method=fill_method, limit=limit, freq=freq)
-        tm.assert_series_equal(result, expected)
-    else:
-        result = df.groupby('key').pct_change(
-            periods=periods, fill_method=fill_method, limit=limit, freq=freq)
-        tm.assert_frame_equal(result, expected.to_frame('vals'))
-
-
-@pytest.mark.parametrize("func", [np.any, np.all])
-def test_any_all_np_func(func):
-    # GH 20653
-    df = pd.DataFrame([['foo', True],
-                       [np.nan, True],
-                       ['foo', True]], columns=['key', 'val'])
-
-    exp = pd.Series([True, np.nan, True], name='val')
-
-    res = df.groupby('key')['val'].transform(func)
-    tm.assert_series_equal(res, exp)
-
-
-def test_groupby_transform_rename():
-    # https://github.com/pandas-dev/pandas/issues/23461
-    def demean_rename(x):
-        result = x - x.mean()
-
-        if isinstance(x, pd.Series):
-            return result
-
-        result = result.rename(
-            columns={c: '{}_demeaned'.format(c) for c in result.columns})
-
-        return result
-
-    df = pd.DataFrame({'group': list('ababa'),
-                       'value': [1, 1, 1, 2, 2]})
-    expected = pd.DataFrame({'value': [-1. / 3, -0.5, -1. / 3, 0.5, 2. / 3]})
-
-    result = df.groupby('group').transform(demean_rename)
-    tm.assert_frame_equal(result, expected)
-    result_single = df.groupby('group').value.transform(demean_rename)
-    tm.assert_series_equal(result_single, expected['value'])
@@ -1,76 +0,0 @@
-"""
-these are systematically testing all of the args to value_counts
-with different size combinations. This is to ensure stability of the sorting
-and proper parameter handling
-"""
-
-from itertools import product
-
-import numpy as np
-import pytest
-
-from pandas import DataFrame, MultiIndex, Series, date_range
-from pandas.util import testing as tm
-
-
-# our starting frame
-def seed_df(seed_nans, n, m):
-    np.random.seed(1234)
-    days = date_range('2015-08-24', periods=10)
-
-    frame = DataFrame({
-        '1st': np.random.choice(
-            list('abcd'), n),
-        '2nd': np.random.choice(days, n),
-        '3rd': np.random.randint(1, m + 1, n)
-    })
-
-    if seed_nans:
-        frame.loc[1::11, '1st'] = np.nan
-        frame.loc[3::17, '2nd'] = np.nan
-        frame.loc[7::19, '3rd'] = np.nan
-        frame.loc[8::19, '3rd'] = np.nan
-        frame.loc[9::19, '3rd'] = np.nan
-
-    return frame
-
-
-# create input df, keys, and the bins
-binned = []
-ids = []
-for seed_nans in [True, False]:
-    for n, m in product((100, 1000), (5, 20)):
-
-        df = seed_df(seed_nans, n, m)
-        bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2)
-        keys = '1st', '2nd', ['1st', '2nd']
-        for k, b in product(keys, bins):
-            binned.append((df, k, b, n, m))
-            ids.append("{}-{}-{}".format(k, n, m))
-
-
-@pytest.mark.slow
-@pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
-def test_series_groupby_value_counts(df, keys, bins, n, m):
-
-    def rebuild_index(df):
-        arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
-        df.index = MultiIndex.from_arrays(arr, names=df.index.names)
-        return df
-
-    for isort, normalize, sort, ascending, dropna \
-            in product((False, True), repeat=5):
-
-        kwargs = dict(normalize=normalize, sort=sort,
-                      ascending=ascending, dropna=dropna, bins=bins)
-
-        gr = df.groupby(keys, sort=isort)
-        left = gr['3rd'].value_counts(**kwargs)
-
-        gr = df.groupby(keys, sort=isort)
-        right = gr['3rd'].apply(Series.value_counts, **kwargs)
-        right.index.names = right.index.names[:-1] + ['3rd']
-
-        # have to sort on index because of unstable sort on values
-        left, right = map(rebuild_index, (left, right))  # xref GH9212
-        tm.assert_series_equal(left.sort_index(), right.sort_index())
@@ -1,297 +0,0 @@
-"""
-test methods relating to generic function evaluation
-the so-called white/black lists
-"""
-
-from string import ascii_lowercase
-
-import numpy as np
-import pytest
-
-from pandas import DataFrame, Index, MultiIndex, Series, compat, date_range
-from pandas.util import testing as tm
-
-AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
-                 'mad', 'std', 'var', 'sem']
-AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad']
-
-df_whitelist = [
-    'quantile',
-    'fillna',
-    'mad',
-    'take',
-    'idxmax',
-    'idxmin',
-    'tshift',
-    'skew',
-    'plot',
-    'hist',
-    'dtypes',
-    'corrwith',
-    'corr',
-    'cov',
-    'diff',
-]
-
-
-@pytest.fixture(params=df_whitelist)
-def df_whitelist_fixture(request):
-    return request.param
-
-
-s_whitelist = [
-    'quantile',
-    'fillna',
-    'mad',
-    'take',
-    'idxmax',
-    'idxmin',
-    'tshift',
-    'skew',
-    'plot',
-    'hist',
-    'dtype',
-    'corr',
-    'cov',
-    'diff',
-    'unique',
-    'nlargest',
-    'nsmallest',
-    'is_monotonic_increasing',
-    'is_monotonic_decreasing',
-]
-
-
-@pytest.fixture(params=s_whitelist)
-def s_whitelist_fixture(request):
-    return request.param
-
-
-@pytest.fixture
-def mframe():
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                              'three']],
-                       codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                              [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    return DataFrame(np.random.randn(10, 3), index=index,
-                     columns=['A', 'B', 'C'])
-
-
-@pytest.fixture
-def df():
-    return DataFrame(
-        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-         'C': np.random.randn(8),
-         'D': np.random.randn(8)})
-
-
-@pytest.fixture
-def df_letters():
-    letters = np.array(list(ascii_lowercase))
-    N = 10
-    random_letters = letters.take(np.random.randint(0, 26, N))
-    df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
-                    'letters': Series(random_letters)})
-    return df
-
-
-@pytest.mark.parametrize("whitelist", [df_whitelist, s_whitelist])
-def test_groupby_whitelist(df_letters, whitelist):
-    df = df_letters
-    if whitelist == df_whitelist:
-        # dataframe
-        obj = df_letters
-    else:
-        obj = df_letters['floats']
-
-    gb = obj.groupby(df.letters)
-
-    assert set(whitelist) == set(gb._apply_whitelist)
-
-
-def check_whitelist(obj, df, m):
-    # check the obj for a particular whitelist m
-
-    gb = obj.groupby(df.letters)
-
-    f = getattr(type(gb), m)
-
-    # name
-    try:
-        n = f.__name__
-    except AttributeError:
-        return
-    assert n == m
-
-    # qualname
-    if compat.PY3:
-        try:
-            n = f.__qualname__
-        except AttributeError:
-            return
-        assert n.endswith(m)
-
-
-def test_groupby_series_whitelist(df_letters, s_whitelist_fixture):
-    m = s_whitelist_fixture
-    df = df_letters
-    check_whitelist(df.letters, df, m)
-
-
-def test_groupby_frame_whitelist(df_letters, df_whitelist_fixture):
-    m = df_whitelist_fixture
-    df = df_letters
-    check_whitelist(df, df, m)
-
-
-@pytest.fixture
-def raw_frame():
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                              'three']],
-                       codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                              [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    raw_frame = DataFrame(np.random.randn(10, 3), index=index,
-                          columns=Index(['A', 'B', 'C'], name='exp'))
-    raw_frame.iloc[1, [1, 2]] = np.nan
-    raw_frame.iloc[7, [0, 1]] = np.nan
-    return raw_frame
-
-
-@pytest.mark.parametrize('op', AGG_FUNCTIONS)
-@pytest.mark.parametrize('level', [0, 1])
-@pytest.mark.parametrize('axis', [0, 1])
-@pytest.mark.parametrize('skipna', [True, False])
-@pytest.mark.parametrize('sort', [True, False])
-def test_regression_whitelist_methods(
-        raw_frame, op, level,
-        axis, skipna, sort):
-    # GH6944
-    # GH 17537
-    # explicitly test the whitelist methods
-
-    if axis == 0:
-        frame = raw_frame
-    else:
-        frame = raw_frame.T
-
-    if op in AGG_FUNCTIONS_WITH_SKIPNA:
-        grouped = frame.groupby(level=level, axis=axis, sort=sort)
-        result = getattr(grouped, op)(skipna=skipna)
-        expected = getattr(frame, op)(level=level, axis=axis,
-                                      skipna=skipna)
-        if sort:
-            expected = expected.sort_index(axis=axis, level=level)
-        tm.assert_frame_equal(result, expected)
-    else:
-        grouped = frame.groupby(level=level, axis=axis, sort=sort)
-        result = getattr(grouped, op)()
-        expected = getattr(frame, op)(level=level, axis=axis)
-        if sort:
-            expected = expected.sort_index(axis=axis, level=level)
-        tm.assert_frame_equal(result, expected)
-
-
-def test_groupby_blacklist(df_letters):
-    df = df_letters
-    s = df_letters.floats
-
-    blacklist = [
-        'eval', 'query', 'abs', 'where',
-        'mask', 'align', 'groupby', 'clip', 'astype',
-        'at', 'combine', 'consolidate', 'convert_objects',
-    ]
-    to_methods = [method for method in dir(df) if method.startswith('to_')]
-
-    blacklist.extend(to_methods)
-
-    # e.g., to_csv
-    defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the "
-                               "'apply' method$)")
-
-    # e.g., query, eval
-    not_defined = "(?:^{1!r} object has no attribute {0!r}$)"
-    fmt = defined_but_not_allowed + '|' + not_defined
-    for bl in blacklist:
-        for obj in (df, s):
-            gb = obj.groupby(df.letters)
-            msg = fmt.format(bl, type(gb).__name__)
-            with pytest.raises(AttributeError, match=msg):
-                getattr(gb, bl)
-
-
-def test_tab_completion(mframe):
-    grp = mframe.groupby(level='second')
-    results = {v for v in dir(grp) if not v.startswith('_')}
-    expected = {
-        'A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter',
-        'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max',
-        'mean', 'median', 'min', 'ngroups', 'nth', 'ohlc', 'plot',
-        'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count',
-        'nunique', 'head', 'describe', 'cummax', 'quantile',
-        'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna',
-        'cumsum', 'cumcount', 'ngroup', 'all', 'shift', 'skew',
-        'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith',
-        'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin',
-        'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding', 'pipe',
-    }
-    assert results == expected
-
-
-def test_groupby_function_rename(mframe):
-    grp = mframe.groupby(level='second')
-    for name in ['sum', 'prod', 'min', 'max', 'first', 'last']:
-        f = getattr(grp, name)
-        assert f.__name__ == name
-
-
-def test_groupby_selection_with_methods(df):
-    # some methods which require DatetimeIndex
-    rng = date_range('2014', periods=len(df))
-    df.index = rng
-
-    g = df.groupby(['A'])[['C']]
-    g_exp = df[['C']].groupby(df['A'])
-    # TODO check groupby with > 1 col ?
-
-    # methods which are called as .foo()
-    methods = ['count',
-               'corr',
-               'cummax',
-               'cummin',
-               'cumprod',
-               'describe',
-               'rank',
-               'quantile',
-               'diff',
-               'shift',
-               'all',
-               'any',
-               'idxmin',
-               'idxmax',
-               'ffill',
-               'bfill',
-               'pct_change',
-               'tshift']
-
-    for m in methods:
-        res = getattr(g, m)()
-        exp = getattr(g_exp, m)()
-
-        # should always be frames!
-        tm.assert_frame_equal(res, exp)
-
-    # methods which aren't just .foo()
-    tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
-    tm.assert_frame_equal(g.dtypes, g_exp.dtypes)
-    tm.assert_frame_equal(g.apply(lambda x: x.sum()),
-                          g_exp.apply(lambda x: x.sum()))
-
-    tm.assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean())
-    tm.assert_frame_equal(g.resample('D').ohlc(),
-                          g_exp.resample('D').ohlc())
-
-    tm.assert_frame_equal(g.filter(lambda x: len(x) == 3),
-                          g_exp.filter(lambda x: len(x) == 3))