Static code analysis and corrections

2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,858 @@
+# pylint: disable=E1103
+
+from warnings import catch_warnings
+
+import numpy as np
+from numpy.random import randn
+import pytest
+
+from pandas._libs import join as libjoin
+import pandas.compat as compat
+from pandas.compat import lrange
+
+import pandas as pd
+from pandas import DataFrame, Index, MultiIndex, Series, concat, merge
+from pandas.tests.reshape.merge.test_merge import NGROUPS, N, get_test_data
+import pandas.util.testing as tm
+from pandas.util.testing import assert_frame_equal
+
+a_ = np.array
+
+
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+class TestJoin(object):
+
+    def setup_method(self, method):
+        # aggregate multiple columns
+        self.df = DataFrame({'key1': get_test_data(),
+                             'key2': get_test_data(),
+                             'data1': np.random.randn(N),
+                             'data2': np.random.randn(N)})
+
+        # exclude a couple keys for fun
+        self.df = self.df[self.df['key2'] > 1]
+
+        self.df2 = DataFrame({'key1': get_test_data(n=N // 5),
+                              'key2': get_test_data(ngroups=NGROUPS // 2,
+                                                    n=N // 5),
+                              'value': np.random.randn(N // 5)})
+
+        index, data = tm.getMixedTypeDict()
+        self.target = DataFrame(data, index=index)
+
+        # Join on string value
+        self.source = DataFrame({'MergedA': data['A'], 'MergedD': data['D']},
+                                index=data['C'])
+
+    def test_cython_left_outer_join(self):
+        left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
+        right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
+        max_group = 5
+
+        ls, rs = libjoin.left_outer_join(left, right, max_group)
+
+        exp_ls = left.argsort(kind='mergesort')
+        exp_rs = right.argsort(kind='mergesort')
+
+        exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5,
+                     6, 6, 7, 7, 8, 8, 9, 10])
+        exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                     4, 5, 4, 5, 4, 5, -1, -1])
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
+        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+
+    def test_cython_right_outer_join(self):
+        left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
+        right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
+        max_group = 5
+
+        rs, ls = libjoin.left_outer_join(right, left, max_group)
+
+        exp_ls = left.argsort(kind='mergesort')
+        exp_rs = right.argsort(kind='mergesort')
+
+        #            0        1        1        1
+        exp_li = a_([0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5,
+                     #            2        2        4
+                     6, 7, 8, 6, 7, 8, -1])
+        exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3,
+                     4, 4, 4, 5, 5, 5, 6])
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
+        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+
+    def test_cython_inner_join(self):
+        left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
+        right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64)
+        max_group = 5
+
+        ls, rs = libjoin.inner_join(left, right, max_group)
+
+        exp_ls = left.argsort(kind='mergesort')
+        exp_rs = right.argsort(kind='mergesort')
+
+        exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5,
+                     6, 6, 7, 7, 8, 8])
+        exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3,
+                     4, 5, 4, 5, 4, 5])
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
+        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+
+    def test_left_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on='key2')
+        _check_join(self.df, self.df2, joined_key2, ['key2'], how='left')
+
+        joined_both = merge(self.df, self.df2)
+        _check_join(self.df, self.df2, joined_both, ['key1', 'key2'],
+                    how='left')
+
+    def test_right_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on='key2', how='right')
+        _check_join(self.df, self.df2, joined_key2, ['key2'], how='right')
+
+        joined_both = merge(self.df, self.df2, how='right')
+        _check_join(self.df, self.df2, joined_both, ['key1', 'key2'],
+                    how='right')
+
+    def test_full_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on='key2', how='outer')
+        _check_join(self.df, self.df2, joined_key2, ['key2'], how='outer')
+
+        joined_both = merge(self.df, self.df2, how='outer')
+        _check_join(self.df, self.df2, joined_both, ['key1', 'key2'],
+                    how='outer')
+
+    def test_inner_join(self):
+        joined_key2 = merge(self.df, self.df2, on='key2', how='inner')
+        _check_join(self.df, self.df2, joined_key2, ['key2'], how='inner')
+
+        joined_both = merge(self.df, self.df2, how='inner')
+        _check_join(self.df, self.df2, joined_both, ['key1', 'key2'],
+                    how='inner')
+
+    def test_handle_overlap(self):
+        joined = merge(self.df, self.df2, on='key2',
+                       suffixes=['.foo', '.bar'])
+
+        assert 'key1.foo' in joined
+        assert 'key1.bar' in joined
+
+    def test_handle_overlap_arbitrary_key(self):
+        joined = merge(self.df, self.df2,
+                       left_on='key2', right_on='key1',
+                       suffixes=['.foo', '.bar'])
+        assert 'key1.foo' in joined
+        assert 'key2.bar' in joined
+
+    def test_join_on(self):
+        target = self.target
+        source = self.source
+
+        merged = target.join(source, on='C')
+        tm.assert_series_equal(merged['MergedA'], target['A'],
+                               check_names=False)
+        tm.assert_series_equal(merged['MergedD'], target['D'],
+                               check_names=False)
+
+        # join with duplicates (fix regression from DataFrame/Matrix merge)
+        df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']})
+        df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c'])
+        joined = df.join(df2, on='key')
+        expected = DataFrame({'key': ['a', 'a', 'b', 'b', 'c'],
+                              'value': [0, 0, 1, 1, 2]})
+        assert_frame_equal(joined, expected)
+
+        # Test when some are missing
+        df_a = DataFrame([[1], [2], [3]], index=['a', 'b', 'c'],
+                         columns=['one'])
+        df_b = DataFrame([['foo'], ['bar']], index=[1, 2],
+                         columns=['two'])
+        df_c = DataFrame([[1], [2]], index=[1, 2],
+                         columns=['three'])
+        joined = df_a.join(df_b, on='one')
+        joined = joined.join(df_c, on='one')
+        assert np.isnan(joined['two']['c'])
+        assert np.isnan(joined['three']['c'])
+
+        # merge column not p resent
+        with pytest.raises(KeyError, match="^'E'$"):
+            target.join(source, on='E')
+
+        # overlap
+        source_copy = source.copy()
+        source_copy['A'] = 0
+        msg = ("You are trying to merge on float64 and object columns. If"
+               " you wish to proceed you should use pd.concat")
+        with pytest.raises(ValueError, match=msg):
+            target.join(source_copy, on='A')
+
+    def test_join_on_fails_with_different_right_index(self):
+        df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
+                        'b': np.random.randn(3)})
+        df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
+                         'b': np.random.randn(10)},
+                        index=tm.makeCustomIndex(10, 2))
+        msg = (r'len\(left_on\) must equal the number of levels in the index'
+               ' of "right"')
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, left_on='a', right_index=True)
+
+    def test_join_on_fails_with_different_left_index(self):
+        df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
+                        'b': np.random.randn(3)},
+                       index=tm.makeCustomIndex(3, 2))
+        df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
+                         'b': np.random.randn(10)})
+        msg = (r'len\(right_on\) must equal the number of levels in the index'
+               ' of "left"')
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, right_on='b', left_index=True)
+
+    def test_join_on_fails_with_different_column_counts(self):
+        df = DataFrame({'a': np.random.choice(['m', 'f'], size=3),
+                        'b': np.random.randn(3)})
+        df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10),
+                         'b': np.random.randn(10)},
+                        index=tm.makeCustomIndex(10, 2))
+        msg = r"len\(right_on\) must equal len\(left_on\)"
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, right_on='a', left_on=['a', 'b'])
+
+    @pytest.mark.parametrize("wrong_type", [2, 'str', None, np.array([0, 1])])
+    def test_join_on_fails_with_wrong_object_type(self, wrong_type):
+        # GH12081 - original issue
+
+        # GH21220 - merging of Series and DataFrame is now allowed
+        # Edited test to remove the Series object from test parameters
+
+        df = DataFrame({'a': [1, 1]})
+        msg = ("Can only merge Series or DataFrame objects, a {} was passed"
+               .format(str(type(wrong_type))))
+        with pytest.raises(TypeError, match=msg):
+            merge(wrong_type, df, left_on='a', right_on='a')
+        with pytest.raises(TypeError, match=msg):
+            merge(df, wrong_type, left_on='a', right_on='a')
+
+    def test_join_on_pass_vector(self):
+        expected = self.target.join(self.source, on='C')
+        del expected['C']
+
+        join_col = self.target.pop('C')
+        result = self.target.join(self.source, on=join_col)
+        assert_frame_equal(result, expected)
+
+    def test_join_with_len0(self):
+        # nothing to merge
+        merged = self.target.join(self.source.reindex([]), on='C')
+        for col in self.source:
+            assert col in merged
+            assert merged[col].isna().all()
+
+        merged2 = self.target.join(self.source.reindex([]), on='C',
+                                   how='inner')
+        tm.assert_index_equal(merged2.columns, merged.columns)
+        assert len(merged2) == 0
+
+    def test_join_on_inner(self):
+        df = DataFrame({'key': ['a', 'a', 'd', 'b', 'b', 'c']})
+        df2 = DataFrame({'value': [0, 1]}, index=['a', 'b'])
+
+        joined = df.join(df2, on='key', how='inner')
+
+        expected = df.join(df2, on='key')
+        expected = expected[expected['value'].notna()]
+        tm.assert_series_equal(joined['key'], expected['key'],
+                               check_dtype=False)
+        tm.assert_series_equal(joined['value'], expected['value'],
+                               check_dtype=False)
+        tm.assert_index_equal(joined.index, expected.index)
+
+    def test_join_on_singlekey_list(self):
+        df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']})
+        df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c'])
+
+        # corner cases
+        joined = df.join(df2, on=['key'])
+        expected = df.join(df2, on='key')
+
+        assert_frame_equal(joined, expected)
+
+    def test_join_on_series(self):
+        result = self.target.join(self.source['MergedA'], on='C')
+        expected = self.target.join(self.source[['MergedA']], on='C')
+        assert_frame_equal(result, expected)
+
+    def test_join_on_series_buglet(self):
+        # GH #638
+        df = DataFrame({'a': [1, 1]})
+        ds = Series([2], index=[1], name='b')
+        result = df.join(ds, on='a')
+        expected = DataFrame({'a': [1, 1],
+                              'b': [2, 2]}, index=df.index)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_index_mixed(self, join_type):
+        # no overlapping blocks
+        df1 = DataFrame(index=np.arange(10))
+        df1['bool'] = True
+        df1['string'] = 'foo'
+
+        df2 = DataFrame(index=np.arange(5, 15))
+        df2['int'] = 1
+        df2['float'] = 1.
+
+        joined = df1.join(df2, how=join_type)
+        expected = _join_by_hand(df1, df2, how=join_type)
+        assert_frame_equal(joined, expected)
+
+        joined = df2.join(df1, how=join_type)
+        expected = _join_by_hand(df2, df1, how=join_type)
+        assert_frame_equal(joined, expected)
+
+    def test_join_index_mixed_overlap(self):
+        df1 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True},
+                        index=np.arange(10),
+                        columns=['A', 'B', 'C', 'D'])
+        assert df1['B'].dtype == np.int64
+        assert df1['D'].dtype == np.bool_
+
+        df2 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True},
+                        index=np.arange(0, 10, 2),
+                        columns=['A', 'B', 'C', 'D'])
+
+        # overlap
+        joined = df1.join(df2, lsuffix='_one', rsuffix='_two')
+        expected_columns = ['A_one', 'B_one', 'C_one', 'D_one',
+                            'A_two', 'B_two', 'C_two', 'D_two']
+        df1.columns = expected_columns[:4]
+        df2.columns = expected_columns[4:]
+        expected = _join_by_hand(df1, df2)
+        assert_frame_equal(joined, expected)
+
+    def test_join_empty_bug(self):
+        # generated an exception in 0.4.3
+        x = DataFrame()
+        x.join(DataFrame([3], index=[0], columns=['A']), how='outer')
+
+    def test_join_unconsolidated(self):
+        # GH #331
+        a = DataFrame(randn(30, 2), columns=['a', 'b'])
+        c = Series(randn(30))
+        a['c'] = c
+        d = DataFrame(randn(30, 1), columns=['q'])
+
+        # it works!
+        a.join(d)
+        d.join(a)
+
+    def test_join_multiindex(self):
+        index1 = MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b'],
+                                         [1, 2, 3, 1, 2, 3]],
+                                        names=['first', 'second'])
+
+        index2 = MultiIndex.from_arrays([['b', 'b', 'b', 'c', 'c', 'c'],
+                                         [1, 2, 3, 1, 2, 3]],
+                                        names=['first', 'second'])
+
+        df1 = DataFrame(data=np.random.randn(6), index=index1,
+                        columns=['var X'])
+        df2 = DataFrame(data=np.random.randn(6), index=index2,
+                        columns=['var Y'])
+
+        df1 = df1.sort_index(level=0)
+        df2 = df2.sort_index(level=0)
+
+        joined = df1.join(df2, how='outer')
+        ex_index = Index(index1.values).union(Index(index2.values))
+        expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
+        expected.index.names = index1.names
+        assert_frame_equal(joined, expected)
+        assert joined.index.names == index1.names
+
+        df1 = df1.sort_index(level=1)
+        df2 = df2.sort_index(level=1)
+
+        joined = df1.join(df2, how='outer').sort_index(level=0)
+        ex_index = Index(index1.values).union(Index(index2.values))
+        expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
+        expected.index.names = index1.names
+
+        assert_frame_equal(joined, expected)
+        assert joined.index.names == index1.names
+
+    def test_join_inner_multiindex(self):
+        key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
+                'qux', 'snap']
+        key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
+                'three', 'one']
+
+        data = np.random.randn(len(key1))
+        data = DataFrame({'key1': key1, 'key2': key2,
+                          'data': data})
+
+        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                                   ['one', 'two', 'three']],
+                           codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                                  [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                           names=['first', 'second'])
+        to_join = DataFrame(np.random.randn(10, 3), index=index,
+                            columns=['j_one', 'j_two', 'j_three'])
+
+        joined = data.join(to_join, on=['key1', 'key2'], how='inner')
+        expected = merge(data, to_join.reset_index(),
+                         left_on=['key1', 'key2'],
+                         right_on=['first', 'second'], how='inner',
+                         sort=False)
+
+        expected2 = merge(to_join, data,
+                          right_on=['key1', 'key2'], left_index=True,
+                          how='inner', sort=False)
+        assert_frame_equal(joined, expected2.reindex_like(joined))
+
+        expected2 = merge(to_join, data, right_on=['key1', 'key2'],
+                          left_index=True, how='inner', sort=False)
+
+        expected = expected.drop(['first', 'second'], axis=1)
+        expected.index = joined.index
+
+        assert joined.index.is_monotonic
+        assert_frame_equal(joined, expected)
+
+        # _assert_same_contents(expected, expected2.loc[:, expected.columns])
+
+    def test_join_hierarchical_mixed(self):
+        # GH 2024
+        df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=['a', 'b', 'c'])
+        new_df = df.groupby(['a']).agg({'b': [np.mean, np.sum]})
+        other_df = DataFrame(
+            [(1, 2, 3), (7, 10, 6)], columns=['a', 'b', 'd'])
+        other_df.set_index('a', inplace=True)
+        # GH 9455, 12219
+        with tm.assert_produces_warning(UserWarning):
+            result = merge(new_df, other_df, left_index=True, right_index=True)
+        assert ('b', 'mean') in result
+        assert 'b' in result
+
+    def test_join_float64_float32(self):
+
+        a = DataFrame(randn(10, 2), columns=['a', 'b'], dtype=np.float64)
+        b = DataFrame(randn(10, 1), columns=['c'], dtype=np.float32)
+        joined = a.join(b)
+        assert joined.dtypes['a'] == 'float64'
+        assert joined.dtypes['b'] == 'float64'
+        assert joined.dtypes['c'] == 'float32'
+
+        a = np.random.randint(0, 5, 100).astype('int64')
+        b = np.random.random(100).astype('float64')
+        c = np.random.random(100).astype('float32')
+        df = DataFrame({'a': a, 'b': b, 'c': c})
+        xpdf = DataFrame({'a': a, 'b': b, 'c': c})
+        s = DataFrame(np.random.random(5).astype('float32'), columns=['md'])
+        rs = df.merge(s, left_on='a', right_index=True)
+        assert rs.dtypes['a'] == 'int64'
+        assert rs.dtypes['b'] == 'float64'
+        assert rs.dtypes['c'] == 'float32'
+        assert rs.dtypes['md'] == 'float32'
+
+        xp = xpdf.merge(s, left_on='a', right_index=True)
+        assert_frame_equal(rs, xp)
+
+    def test_join_many_non_unique_index(self):
+        df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]})
+        df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]})
+        df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]})
+        idf1 = df1.set_index(["a", "b"])
+        idf2 = df2.set_index(["a", "b"])
+        idf3 = df3.set_index(["a", "b"])
+
+        result = idf1.join([idf2, idf3], how='outer')
+
+        df_partially_merged = merge(df1, df2, on=['a', 'b'], how='outer')
+        expected = merge(df_partially_merged, df3, on=['a', 'b'], how='outer')
+
+        result = result.reset_index()
+        expected = expected[result.columns]
+        expected['a'] = expected.a.astype('int64')
+        expected['b'] = expected.b.astype('int64')
+        assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]})
+        df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]})
+        df3 = DataFrame(
+            {"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]})
+        idf1 = df1.set_index(["a", "b"])
+        idf2 = df2.set_index(["a", "b"])
+        idf3 = df3.set_index(["a", "b"])
+        result = idf1.join([idf2, idf3], how='inner')
+
+        df_partially_merged = merge(df1, df2, on=['a', 'b'], how='inner')
+        expected = merge(df_partially_merged, df3, on=['a', 'b'], how='inner')
+
+        result = result.reset_index()
+
+        assert_frame_equal(result, expected.loc[:, result.columns])
+
+        # GH 11519
+        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                              'foo', 'bar', 'foo', 'foo'],
+                        'B': ['one', 'one', 'two', 'three',
+                              'two', 'two', 'one', 'three'],
+                        'C': np.random.randn(8),
+                        'D': np.random.randn(8)})
+        s = Series(np.repeat(np.arange(8), 2),
+                   index=np.repeat(np.arange(8), 2), name='TEST')
+        inner = df.join(s, how='inner')
+        outer = df.join(s, how='outer')
+        left = df.join(s, how='left')
+        right = df.join(s, how='right')
+        assert_frame_equal(inner, outer)
+        assert_frame_equal(inner, left)
+        assert_frame_equal(inner, right)
+
+    def test_join_sort(self):
+        left = DataFrame({'key': ['foo', 'bar', 'baz', 'foo'],
+                          'value': [1, 2, 3, 4]})
+        right = DataFrame({'value2': ['a', 'b', 'c']},
+                          index=['bar', 'baz', 'foo'])
+
+        joined = left.join(right, on='key', sort=True)
+        expected = DataFrame({'key': ['bar', 'baz', 'foo', 'foo'],
+                              'value': [2, 3, 1, 4],
+                              'value2': ['a', 'b', 'c', 'c']},
+                             index=[1, 2, 0, 3])
+        assert_frame_equal(joined, expected)
+
+        # smoke test
+        joined = left.join(right, on='key', sort=False)
+        tm.assert_index_equal(joined.index, pd.Index(lrange(4)))
+
+    def test_join_mixed_non_unique_index(self):
+        # GH 12814, unorderable types in py3 with a non-unique index
+        df1 = DataFrame({'a': [1, 2, 3, 4]}, index=[1, 2, 3, 'a'])
+        df2 = DataFrame({'b': [5, 6, 7, 8]}, index=[1, 3, 3, 4])
+        result = df1.join(df2)
+        expected = DataFrame({'a': [1, 2, 3, 3, 4],
+                              'b': [5, np.nan, 6, 7, np.nan]},
+                             index=[1, 2, 3, 3, 'a'])
+        tm.assert_frame_equal(result, expected)
+
+        df3 = DataFrame({'a': [1, 2, 3, 4]}, index=[1, 2, 2, 'a'])
+        df4 = DataFrame({'b': [5, 6, 7, 8]}, index=[1, 2, 3, 4])
+        result = df3.join(df4)
+        expected = DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 6, np.nan]},
+                             index=[1, 2, 2, 'a'])
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_non_unique_period_index(self):
+        # GH #16871
+        index = pd.period_range('2016-01-01', periods=16, freq='M')
+        df = DataFrame([i for i in range(len(index))],
+                       index=index, columns=['pnum'])
+        df2 = concat([df, df])
+        result = df.join(df2, how='inner', rsuffix='_df2')
+        expected = DataFrame(
+            np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2),
+            columns=['pnum', 'pnum_df2'], index=df2.sort_index().index)
+        tm.assert_frame_equal(result, expected)
+
+    def test_mixed_type_join_with_suffix(self):
+        # GH #916
+        df = DataFrame(np.random.randn(20, 6),
+                       columns=['a', 'b', 'c', 'd', 'e', 'f'])
+        df.insert(0, 'id', 0)
+        df.insert(5, 'dt', 'foo')
+
+        grouped = df.groupby('id')
+        mn = grouped.mean()
+        cn = grouped.count()
+
+        # it works!
+        mn.join(cn, rsuffix='_right')
+
+    def test_join_many(self):
+        df = DataFrame(np.random.randn(10, 6), columns=list('abcdef'))
+        df_list = [df[['a', 'b']], df[['c', 'd']], df[['e', 'f']]]
+
+        joined = df_list[0].join(df_list[1:])
+        tm.assert_frame_equal(joined, df)
+
+        df_list = [df[['a', 'b']][:-2],
+                   df[['c', 'd']][2:], df[['e', 'f']][1:9]]
+
+        def _check_diff_index(df_list, result, exp_index):
+            reindexed = [x.reindex(exp_index) for x in df_list]
+            expected = reindexed[0].join(reindexed[1:])
+            tm.assert_frame_equal(result, expected)
+
+        # different join types
+        joined = df_list[0].join(df_list[1:], how='outer')
+        _check_diff_index(df_list, joined, df.index)
+
+        joined = df_list[0].join(df_list[1:])
+        _check_diff_index(df_list, joined, df_list[0].index)
+
+        joined = df_list[0].join(df_list[1:], how='inner')
+        _check_diff_index(df_list, joined, df.index[2:8])
+
+        msg = "Joining multiple DataFrames only supported for joining on index"
+        with pytest.raises(ValueError, match=msg):
+            df_list[0].join(df_list[1:], on='a')
+
+    def test_join_many_mixed(self):
+        df = DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
+        df['key'] = ['foo', 'bar'] * 4
+        df1 = df.loc[:, ['A', 'B']]
+        df2 = df.loc[:, ['C', 'D']]
+        df3 = df.loc[:, ['key']]
+
+        result = df1.join([df2, df3])
+        assert_frame_equal(result, df)
+
+    def test_join_dups(self):
+
+        # joining dups
+        df = concat([DataFrame(np.random.randn(10, 4),
+                               columns=['A', 'A', 'B', 'B']),
+                     DataFrame(np.random.randint(0, 10, size=20)
+                               .reshape(10, 2),
+                               columns=['A', 'C'])],
+                    axis=1)
+
+        expected = concat([df, df], axis=1)
+        result = df.join(df, rsuffix='_2')
+        result.columns = expected.columns
+        assert_frame_equal(result, expected)
+
+        # GH 4975, invalid join on dups
+        w = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        x = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        y = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        z = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+
+        dta = x.merge(y, left_index=True, right_index=True).merge(
+            z, left_index=True, right_index=True, how="outer")
+        dta = dta.merge(w, left_index=True, right_index=True)
+        expected = concat([x, y, z, w], axis=1)
+        expected.columns = ['x_x', 'y_x', 'x_y',
+                            'y_y', 'x_x', 'y_x', 'x_y', 'y_y']
+        assert_frame_equal(dta, expected)
+
+    def test_panel_join(self):
+        with catch_warnings(record=True):
+            panel = tm.makePanel()
+            tm.add_nans(panel)
+
+            p1 = panel.iloc[:2, :10, :3]
+            p2 = panel.iloc[2:, 5:, 2:]
+
+            # left join
+            result = p1.join(p2)
+            expected = p1.copy()
+            expected['ItemC'] = p2['ItemC']
+            tm.assert_panel_equal(result, expected)
+
+            # right join
+            result = p1.join(p2, how='right')
+            expected = p2.copy()
+            expected['ItemA'] = p1['ItemA']
+            expected['ItemB'] = p1['ItemB']
+            expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC'])
+            tm.assert_panel_equal(result, expected)
+
+            # inner join
+            result = p1.join(p2, how='inner')
+            expected = panel.iloc[:, 5:10, 2:3]
+            tm.assert_panel_equal(result, expected)
+
+            # outer join
+            result = p1.join(p2, how='outer')
+            expected = p1.reindex(major=panel.major_axis,
+                                  minor=panel.minor_axis)
+            expected = expected.join(p2.reindex(major=panel.major_axis,
+                                                minor=panel.minor_axis))
+            tm.assert_panel_equal(result, expected)
+
+    def test_panel_join_overlap(self):
+        with catch_warnings(record=True):
+            panel = tm.makePanel()
+            tm.add_nans(panel)
+
+            p1 = panel.loc[['ItemA', 'ItemB', 'ItemC']]
+            p2 = panel.loc[['ItemB', 'ItemC']]
+
+            # Expected index is
+            #
+            # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2
+            joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2')
+            p1_suf = p1.loc[['ItemB', 'ItemC']].add_suffix('_p1')
+            p2_suf = p2.loc[['ItemB', 'ItemC']].add_suffix('_p2')
+            no_overlap = panel.loc[['ItemA']]
+            expected = no_overlap.join(p1_suf.join(p2_suf))
+            tm.assert_panel_equal(joined, expected)
+
+    def test_panel_join_many(self):
+        with catch_warnings(record=True):
+            tm.K = 10
+            panel = tm.makePanel()
+            tm.K = 4
+
+            panels = [panel.iloc[:2], panel.iloc[2:6], panel.iloc[6:]]
+
+            joined = panels[0].join(panels[1:])
+            tm.assert_panel_equal(joined, panel)
+
+            panels = [panel.iloc[:2, :-5],
+                      panel.iloc[2:6, 2:],
+                      panel.iloc[6:, 5:-7]]
+
+            data_dict = {}
+            for p in panels:
+                data_dict.update(p.iteritems())
+
+            joined = panels[0].join(panels[1:], how='inner')
+            expected = pd.Panel.from_dict(data_dict, intersect=True)
+            tm.assert_panel_equal(joined, expected)
+
+            joined = panels[0].join(panels[1:], how='outer')
+            expected = pd.Panel.from_dict(data_dict, intersect=False)
+            tm.assert_panel_equal(joined, expected)
+
+            # edge cases
+            msg = "Suffixes not supported when passing multiple panels"
+            with pytest.raises(ValueError, match=msg):
+                panels[0].join(panels[1:], how='outer', lsuffix='foo',
+                               rsuffix='bar')
+            msg = "Right join not supported with multiple panels"
+            with pytest.raises(ValueError, match=msg):
+                panels[0].join(panels[1:], how='right')
+
+    def test_join_multi_to_multi(self, join_type):
+        # GH 20475
+        leftindex = MultiIndex.from_product([list('abc'), list('xy'), [1, 2]],
+                                            names=['abc', 'xy', 'num'])
+        left = DataFrame({'v1': range(12)}, index=leftindex)
+
+        rightindex = MultiIndex.from_product([list('abc'), list('xy')],
+                                             names=['abc', 'xy'])
+        right = DataFrame({'v2': [100 * i for i in range(1, 7)]},
+                          index=rightindex)
+
+        result = left.join(right, on=['abc', 'xy'], how=join_type)
+        expected = (left.reset_index()
+                        .merge(right.reset_index(),
+                               on=['abc', 'xy'], how=join_type)
+                        .set_index(['abc', 'xy', 'num'])
+                    )
+        assert_frame_equal(expected, result)
+
+        msg = (r'len\(left_on\) must equal the number of levels in the index'
+               ' of "right"')
+        with pytest.raises(ValueError, match=msg):
+            left.join(right, on='xy', how=join_type)
+
+        with pytest.raises(ValueError, match=msg):
+            right.join(left, on=['abc', 'xy'], how=join_type)
+
+
+def _check_join(left, right, result, join_col, how='left',
+                lsuffix='_x', rsuffix='_y'):
+
+    # some smoke tests
+    for c in join_col:
+        assert(result[c].notna().all())
+
+    left_grouped = left.groupby(join_col)
+    right_grouped = right.groupby(join_col)
+
+    for group_key, group in result.groupby(join_col):
+        l_joined = _restrict_to_columns(group, left.columns, lsuffix)
+        r_joined = _restrict_to_columns(group, right.columns, rsuffix)
+
+        try:
+            lgroup = left_grouped.get_group(group_key)
+        except KeyError:
+            if how in ('left', 'inner'):
+                raise AssertionError('key %s should not have been in the join'
+                                     % str(group_key))
+
+            _assert_all_na(l_joined, left.columns, join_col)
+        else:
+            _assert_same_contents(l_joined, lgroup)
+
+        try:
+            rgroup = right_grouped.get_group(group_key)
+        except KeyError:
+            if how in ('right', 'inner'):
+                raise AssertionError('key %s should not have been in the join'
+                                     % str(group_key))
+
+            _assert_all_na(r_joined, right.columns, join_col)
+        else:
+            _assert_same_contents(r_joined, rgroup)
+
+
+def _restrict_to_columns(group, columns, suffix):
+    found = [c for c in group.columns
+             if c in columns or c.replace(suffix, '') in columns]
+
+    # filter
+    group = group.loc[:, found]
+
+    # get rid of suffixes, if any
+    group = group.rename(columns=lambda x: x.replace(suffix, ''))
+
+    # put in the right order...
+    group = group.loc[:, columns]
+
+    return group
+
+
+def _assert_same_contents(join_chunk, source):
+    NA_SENTINEL = -1234567  # drop_duplicates not so NA-friendly...
+
+    jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values
+    svalues = source.fillna(NA_SENTINEL).drop_duplicates().values
+
+    rows = {tuple(row) for row in jvalues}
+    assert(len(rows) == len(source))
+    assert(all(tuple(row) in rows for row in svalues))
+
+
+def _assert_all_na(join_chunk, source_columns, join_col):
+    for c in source_columns:
+        if c in join_col:
+            continue
+        assert(join_chunk[c].isna().all())
+
+
+def _join_by_hand(a, b, how='left'):
+    join_index = a.index.join(b.index, how=how)
+
+    a_re = a.reindex(join_index)
+    b_re = b.reindex(join_index)
+
+    result_columns = a.columns.append(b.columns)
+
+    for col, s in compat.iteritems(b_re):
+        a_re[col] = s
+    return a_re.reindex(columns=result_columns)
@@ -0,0 +1,177 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+from pandas.util.testing import assert_frame_equal
+
+
+@pytest.fixture
+def df1():
+    return DataFrame(dict(
+        outer=[1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4],
+        inner=[1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2],
+        v1=np.linspace(0, 1, 11)))
+
+
+@pytest.fixture
+def df2():
+    return DataFrame(dict(
+        outer=[1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3],
+        inner=[1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3],
+        v2=np.linspace(10, 11, 12)))
+
+
+@pytest.fixture(params=[[], ['outer'], ['outer', 'inner']])
+def left_df(request, df1):
+    """ Construct left test DataFrame with specified levels
+    (any of 'outer', 'inner', and 'v1')"""
+    levels = request.param
+    if levels:
+        df1 = df1.set_index(levels)
+
+    return df1
+
+
+@pytest.fixture(params=[[], ['outer'], ['outer', 'inner']])
+def right_df(request, df2):
+    """ Construct right test DataFrame with specified levels
+    (any of 'outer', 'inner', and 'v2')"""
+    levels = request.param
+
+    if levels:
+        df2 = df2.set_index(levels)
+
+    return df2
+
+
+def compute_expected(df_left, df_right,
+                     on=None, left_on=None, right_on=None, how=None):
+    """
+    Compute the expected merge result for the test case.
+
+    This method computes the expected result of merging two DataFrames on
+    a combination of their columns and index levels. It does so by
+    explicitly dropping/resetting their named index levels, performing a
+    merge on their columns, and then finally restoring the appropriate
+    index in the result.
+
+    Parameters
+    ----------
+    df_left : DataFrame
+        The left DataFrame (may have zero or more named index levels)
+    df_right : DataFrame
+        The right DataFrame (may have zero or more named index levels)
+    on : list of str
+        The on parameter to the merge operation
+    left_on : list of str
+        The left_on parameter to the merge operation
+    right_on : list of str
+        The right_on parameter to the merge operation
+    how : str
+        The how parameter to the merge operation
+
+    Returns
+    -------
+    DataFrame
+        The expected merge result
+    """
+
+    # Handle on param if specified
+    if on is not None:
+        left_on, right_on = on, on
+
+    # Compute input named index levels
+    left_levels = [n for n in df_left.index.names if n is not None]
+    right_levels = [n for n in df_right.index.names if n is not None]
+
+    # Compute output named index levels
+    output_levels = [i for i in left_on
+                     if i in right_levels and i in left_levels]
+
+    # Drop index levels that aren't involved in the merge
+    drop_left = [n for n in left_levels if n not in left_on]
+    if drop_left:
+        df_left = df_left.reset_index(drop_left, drop=True)
+
+    drop_right = [n for n in right_levels if n not in right_on]
+    if drop_right:
+        df_right = df_right.reset_index(drop_right, drop=True)
+
+    # Convert remaining index levels to columns
+    reset_left = [n for n in left_levels if n in left_on]
+    if reset_left:
+        df_left = df_left.reset_index(level=reset_left)
+
+    reset_right = [n for n in right_levels if n in right_on]
+    if reset_right:
+        df_right = df_right.reset_index(level=reset_right)
+
+    # Perform merge
+    expected = df_left.merge(df_right,
+                             left_on=left_on,
+                             right_on=right_on,
+                             how=how)
+
+    # Restore index levels
+    if output_levels:
+        expected = expected.set_index(output_levels)
+
+    return expected
+
+
+@pytest.mark.parametrize('on,how',
+                         [(['outer'], 'inner'),
+                          (['inner'], 'left'),
+                          (['outer', 'inner'], 'right'),
+                          (['inner', 'outer'], 'outer')])
+def test_merge_indexes_and_columns_on(left_df, right_df, on, how):
+
+    # Construct expected result
+    expected = compute_expected(left_df, right_df, on=on, how=how)
+
+    # Perform merge
+    result = left_df.merge(right_df, on=on, how=how)
+    assert_frame_equal(result, expected, check_like=True)
+
+
+@pytest.mark.parametrize('left_on,right_on,how',
+                         [(['outer'], ['outer'], 'inner'),
+                          (['inner'], ['inner'], 'right'),
+                          (['outer', 'inner'], ['outer', 'inner'], 'left'),
+                          (['inner', 'outer'], ['inner', 'outer'], 'outer')])
+def test_merge_indexes_and_columns_lefton_righton(
+        left_df, right_df, left_on, right_on, how):
+
+    # Construct expected result
+    expected = compute_expected(left_df, right_df,
+                                left_on=left_on,
+                                right_on=right_on,
+                                how=how)
+
+    # Perform merge
+    result = left_df.merge(right_df,
+                           left_on=left_on, right_on=right_on, how=how)
+    assert_frame_equal(result, expected, check_like=True)
+
+
+@pytest.mark.parametrize('left_index',
+                         ['inner', ['inner', 'outer']])
+def test_join_indexes_and_columns_on(df1, df2, left_index, join_type):
+
+    # Construct left_df
+    left_df = df1.set_index(left_index)
+
+    # Construct right_df
+    right_df = df2.set_index(['outer', 'inner'])
+
+    # Result
+    expected = (left_df.reset_index()
+                .join(right_df, on=['outer', 'inner'], how=join_type,
+                      lsuffix='_x', rsuffix='_y')
+                .set_index(left_index))
+
+    # Perform join
+    result = left_df.join(right_df, on=['outer', 'inner'], how=join_type,
+                          lsuffix='_x', rsuffix='_y')
+
+    assert_frame_equal(result, expected, check_like=True)
@@ -0,0 +1,103 @@
+from numpy import nan
+import pytest
+
+import pandas as pd
+from pandas import DataFrame, merge_ordered
+from pandas.util.testing import assert_frame_equal
+
+
+class TestMergeOrdered(object):
+
+    def setup_method(self, method):
+        self.left = DataFrame({'key': ['a', 'c', 'e'],
+                               'lvalue': [1, 2., 3]})
+
+        self.right = DataFrame({'key': ['b', 'c', 'd', 'f'],
+                                'rvalue': [1, 2, 3., 4]})
+
+    def test_basic(self):
+        result = merge_ordered(self.left, self.right, on='key')
+        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'],
+                              'lvalue': [1, nan, 2, nan, 3, nan],
+                              'rvalue': [nan, 1, 2, 3, nan, 4]})
+
+        assert_frame_equal(result, expected)
+
+    def test_ffill(self):
+        result = merge_ordered(
+            self.left, self.right, on='key', fill_method='ffill')
+        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'],
+                              'lvalue': [1., 1, 2, 2, 3, 3.],
+                              'rvalue': [nan, 1, 2, 3, 3, 4]})
+        assert_frame_equal(result, expected)
+
+    def test_multigroup(self):
+        left = pd.concat([self.left, self.left], ignore_index=True)
+
+        left['group'] = ['a'] * 3 + ['b'] * 3
+
+        result = merge_ordered(left, self.right, on='key', left_by='group',
+                               fill_method='ffill')
+        expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'] * 2,
+                              'lvalue': [1., 1, 2, 2, 3, 3.] * 2,
+                              'rvalue': [nan, 1, 2, 3, 3, 4] * 2})
+        expected['group'] = ['a'] * 6 + ['b'] * 6
+
+        assert_frame_equal(result, expected.loc[:, result.columns])
+
+        result2 = merge_ordered(self.right, left, on='key', right_by='group',
+                                fill_method='ffill')
+        assert_frame_equal(result, result2.loc[:, result.columns])
+
+        result = merge_ordered(left, self.right, on='key', left_by='group')
+        assert result['group'].notna().all()
+
+    def test_merge_type(self):
+        class NotADataFrame(DataFrame):
+
+            @property
+            def _constructor(self):
+                return NotADataFrame
+
+        nad = NotADataFrame(self.left)
+        result = nad.merge(self.right, on='key')
+
+        assert isinstance(result, NotADataFrame)
+
+    def test_empty_sequence_concat(self):
+        # GH 9157
+        empty_pat = "[Nn]o objects"
+        none_pat = "objects.*None"
+        test_cases = [
+            ((), empty_pat),
+            ([], empty_pat),
+            ({}, empty_pat),
+            ([None], none_pat),
+            ([None, None], none_pat)
+        ]
+        for df_seq, pattern in test_cases:
+            with pytest.raises(ValueError, match=pattern):
+                pd.concat(df_seq)
+
+        pd.concat([pd.DataFrame()])
+        pd.concat([None, pd.DataFrame()])
+        pd.concat([pd.DataFrame(), None])
+
+    def test_doc_example(self):
+        left = DataFrame({'group': list('aaabbb'),
+                          'key': ['a', 'c', 'e', 'a', 'c', 'e'],
+                          'lvalue': [1, 2, 3] * 2,
+                          })
+
+        right = DataFrame({'key': ['b', 'c', 'd'],
+                           'rvalue': [1, 2, 3]})
+
+        result = merge_ordered(left, right, fill_method='ffill',
+                               left_by='group')
+
+        expected = DataFrame({'group': list('aaaaabbbbb'),
+                              'key': ['a', 'b', 'c', 'd', 'e'] * 2,
+                              'lvalue': [1, 1, 2, 2, 3] * 2,
+                              'rvalue': [nan, 1, 2, 3, 3] * 2})
+
+        assert_frame_equal(result, expected)
@@ -0,0 +1,668 @@
+# pylint: disable=E1103
+
+from collections import OrderedDict
+
+import numpy as np
+from numpy import nan
+from numpy.random import randn
+import pytest
+
+import pandas as pd
+from pandas import DataFrame, Index, MultiIndex, Series
+from pandas.core.reshape.concat import concat
+from pandas.core.reshape.merge import merge
+import pandas.util.testing as tm
+
+
+@pytest.fixture
+def left():
+    """left dataframe (not multi-indexed) for multi-index join tests"""
+    # a little relevant example with NAs
+    key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
+            'qux', 'snap']
+    key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
+            'three', 'one']
+
+    data = np.random.randn(len(key1))
+    return DataFrame({'key1': key1, 'key2': key2, 'data': data})
+
+
+@pytest.fixture
+def right():
+    """right dataframe (multi-indexed) for multi-index join tests"""
+    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                               ['one', 'two', 'three']],
+                       codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                              [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                       names=['key1', 'key2'])
+
+    return DataFrame(np.random.randn(10, 3), index=index,
+                     columns=['j_one', 'j_two', 'j_three'])
+
+
+@pytest.fixture
+def left_multi():
+    return (
+        DataFrame(
+            dict(Origin=['A', 'A', 'B', 'B', 'C'],
+                 Destination=['A', 'B', 'A', 'C', 'A'],
+                 Period=['AM', 'AM', 'IP', 'AM', 'OP'],
+                 TripPurp=['hbw', 'nhb', 'hbo', 'nhb', 'hbw'],
+                 Trips=[1987, 3647, 2470, 4296, 4444]),
+            columns=['Origin', 'Destination', 'Period',
+                     'TripPurp', 'Trips'])
+        .set_index(['Origin', 'Destination', 'Period', 'TripPurp']))
+
+
+@pytest.fixture
+def right_multi():
+    return (
+        DataFrame(
+            dict(Origin=['A', 'A', 'B', 'B', 'C', 'C', 'E'],
+                 Destination=['A', 'B', 'A', 'B', 'A', 'B', 'F'],
+                 Period=['AM', 'AM', 'IP', 'AM', 'OP', 'IP', 'AM'],
+                 LinkType=['a', 'b', 'c', 'b', 'a', 'b', 'a'],
+                 Distance=[100, 80, 90, 80, 75, 35, 55]),
+            columns=['Origin', 'Destination', 'Period',
+                     'LinkType', 'Distance'])
+        .set_index(['Origin', 'Destination', 'Period', 'LinkType']))
+
+
+@pytest.fixture
+def on_cols_multi():
+    return ['Origin', 'Destination', 'Period']
+
+
+@pytest.fixture
+def idx_cols_multi():
+    return ['Origin', 'Destination', 'Period', 'TripPurp', 'LinkType']
+
+
+class TestMergeMulti(object):
+
+    def setup_method(self):
+        self.index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                                        ['one', 'two', 'three']],
+                                codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                                       [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                                names=['first', 'second'])
+        self.to_join = DataFrame(np.random.randn(10, 3), index=self.index,
+                                 columns=['j_one', 'j_two', 'j_three'])
+
+        # a little relevant example with NAs
+        key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
+                'qux', 'snap']
+        key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
+                'three', 'one']
+
+        data = np.random.randn(len(key1))
+        self.data = DataFrame({'key1': key1, 'key2': key2,
+                               'data': data})
+
+    def test_merge_on_multikey(self, left, right, join_type):
+        on_cols = ['key1', 'key2']
+        result = (left.join(right, on=on_cols, how=join_type)
+                  .reset_index(drop=True))
+
+        expected = pd.merge(left, right.reset_index(),
+                            on=on_cols, how=join_type)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = (left.join(right, on=on_cols, how=join_type, sort=True)
+                  .reset_index(drop=True))
+
+        expected = pd.merge(left, right.reset_index(),
+                            on=on_cols, how=join_type, sort=True)
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("sort", [False, True])
+    def test_left_join_multi_index(self, left, right, sort):
+        icols = ['1st', '2nd', '3rd']
+
+        def bind_cols(df):
+            iord = lambda a: 0 if a != a else ord(a)
+            f = lambda ts: ts.map(iord) - ord('a')
+            return (f(df['1st']) + f(df['3rd']) * 1e2 +
+                    df['2nd'].fillna(0) * 1e4)
+
+        def run_asserts(left, right, sort):
+            res = left.join(right, on=icols, how='left', sort=sort)
+
+            assert len(left) < len(res) + 1
+            assert not res['4th'].isna().any()
+            assert not res['5th'].isna().any()
+
+            tm.assert_series_equal(
+                res['4th'], - res['5th'], check_names=False)
+            result = bind_cols(res.iloc[:, :-2])
+            tm.assert_series_equal(res['4th'], result, check_names=False)
+            assert result.name is None
+
+            if sort:
+                tm.assert_frame_equal(
+                    res, res.sort_values(icols, kind='mergesort'))
+
+            out = merge(left, right.reset_index(), on=icols,
+                        sort=sort, how='left')
+
+            res.index = np.arange(len(res))
+            tm.assert_frame_equal(out, res)
+
+        lc = list(map(chr, np.arange(ord('a'), ord('z') + 1)))
+        left = DataFrame(np.random.choice(lc, (5000, 2)),
+                         columns=['1st', '3rd'])
+        left.insert(1, '2nd', np.random.randint(0, 1000, len(left)))
+
+        i = np.random.permutation(len(left))
+        right = left.iloc[i].copy()
+
+        left['4th'] = bind_cols(left)
+        right['5th'] = - bind_cols(right)
+        right.set_index(icols, inplace=True)
+
+        run_asserts(left, right, sort)
+
+        # inject some nulls
+        left.loc[1::23, '1st'] = np.nan
+        left.loc[2::37, '2nd'] = np.nan
+        left.loc[3::43, '3rd'] = np.nan
+        left['4th'] = bind_cols(left)
+
+        i = np.random.permutation(len(left))
+        right = left.iloc[i, :-1]
+        right['5th'] = - bind_cols(right)
+        right.set_index(icols, inplace=True)
+
+        run_asserts(left, right, sort)
+
+    @pytest.mark.parametrize("sort", [False, True])
+    def test_merge_right_vs_left(self, left, right, sort):
+        # compare left vs right merge with multikey
+        on_cols = ['key1', 'key2']
+        merged_left_right = left.merge(right,
+                                       left_on=on_cols, right_index=True,
+                                       how='left', sort=sort)
+
+        merge_right_left = right.merge(left,
+                                       right_on=on_cols, left_index=True,
+                                       how='right', sort=sort)
+
+        # Reorder columns
+        merge_right_left = merge_right_left[merged_left_right.columns]
+
+        tm.assert_frame_equal(merged_left_right, merge_right_left)
+
+    def test_compress_group_combinations(self):
+
+        # ~ 40000000 possible unique groups
+        key1 = tm.rands_array(10, 10000)
+        key1 = np.tile(key1, 2)
+        key2 = key1[::-1]
+
+        df = DataFrame({'key1': key1, 'key2': key2,
+                        'value1': np.random.randn(20000)})
+
+        df2 = DataFrame({'key1': key1[::2], 'key2': key2[::2],
+                         'value2': np.random.randn(10000)})
+
+        # just to hit the label compression code path
+        merge(df, df2, how='outer')
+
+    def test_left_join_index_preserve_order(self):
+
+        on_cols = ['k1', 'k2']
+        left = DataFrame({'k1': [0, 1, 2] * 8,
+                          'k2': ['foo', 'bar'] * 12,
+                          'v': np.array(np.arange(24), dtype=np.int64)})
+
+        index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')])
+        right = DataFrame({'v2': [5, 7]}, index=index)
+
+        result = left.join(right, on=on_cols)
+
+        expected = left.copy()
+        expected['v2'] = np.nan
+        expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5
+        expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7
+
+        tm.assert_frame_equal(result, expected)
+
+        result.sort_values(on_cols, kind='mergesort', inplace=True)
+        expected = left.join(right, on=on_cols, sort=True)
+
+        tm.assert_frame_equal(result, expected)
+
+        # test join with multi dtypes blocks
+        left = DataFrame({'k1': [0, 1, 2] * 8,
+                          'k2': ['foo', 'bar'] * 12,
+                          'k3': np.array([0, 1, 2] * 8, dtype=np.float32),
+                          'v': np.array(np.arange(24), dtype=np.int32)})
+
+        index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')])
+        right = DataFrame({'v2': [5, 7]}, index=index)
+
+        result = left.join(right, on=on_cols)
+
+        expected = left.copy()
+        expected['v2'] = np.nan
+        expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5
+        expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7
+
+        tm.assert_frame_equal(result, expected)
+
+        result = result.sort_values(on_cols, kind='mergesort')
+        expected = left.join(right, on=on_cols, sort=True)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_join_index_multi_match_multiindex(self):
+        left = DataFrame([
+            ['X', 'Y', 'C', 'a'],
+            ['W', 'Y', 'C', 'e'],
+            ['V', 'Q', 'A', 'h'],
+            ['V', 'R', 'D', 'i'],
+            ['X', 'Y', 'D', 'b'],
+            ['X', 'Y', 'A', 'c'],
+            ['W', 'Q', 'B', 'f'],
+            ['W', 'R', 'C', 'g'],
+            ['V', 'Y', 'C', 'j'],
+            ['X', 'Y', 'B', 'd']],
+            columns=['cola', 'colb', 'colc', 'tag'],
+            index=[3, 2, 0, 1, 7, 6, 4, 5, 9, 8])
+
+        right = (DataFrame([
+            ['W', 'R', 'C', 0],
+            ['W', 'Q', 'B', 3],
+            ['W', 'Q', 'B', 8],
+            ['X', 'Y', 'A', 1],
+            ['X', 'Y', 'A', 4],
+            ['X', 'Y', 'B', 5],
+            ['X', 'Y', 'C', 6],
+            ['X', 'Y', 'C', 9],
+            ['X', 'Q', 'C', -6],
+            ['X', 'R', 'C', -9],
+            ['V', 'Y', 'C', 7],
+            ['V', 'R', 'D', 2],
+            ['V', 'R', 'D', -1],
+            ['V', 'Q', 'A', -3]],
+            columns=['col1', 'col2', 'col3', 'val'])
+            .set_index(['col1', 'col2', 'col3']))
+
+        result = left.join(right, on=['cola', 'colb', 'colc'], how='left')
+
+        expected = DataFrame([
+            ['X', 'Y', 'C', 'a', 6],
+            ['X', 'Y', 'C', 'a', 9],
+            ['W', 'Y', 'C', 'e', nan],
+            ['V', 'Q', 'A', 'h', -3],
+            ['V', 'R', 'D', 'i', 2],
+            ['V', 'R', 'D', 'i', -1],
+            ['X', 'Y', 'D', 'b', nan],
+            ['X', 'Y', 'A', 'c', 1],
+            ['X', 'Y', 'A', 'c', 4],
+            ['W', 'Q', 'B', 'f', 3],
+            ['W', 'Q', 'B', 'f', 8],
+            ['W', 'R', 'C', 'g', 0],
+            ['V', 'Y', 'C', 'j', 7],
+            ['X', 'Y', 'B', 'd', 5]],
+            columns=['cola', 'colb', 'colc', 'tag', 'val'],
+            index=[3, 3, 2, 0, 1, 1, 7, 6, 6, 4, 4, 5, 9, 8])
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on=['cola', 'colb', 'colc'],
+                           how='left', sort=True)
+
+        expected = expected.sort_values(['cola', 'colb', 'colc'],
+                                        kind='mergesort')
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_join_index_multi_match(self):
+        left = DataFrame([
+            ['c', 0],
+            ['b', 1],
+            ['a', 2],
+            ['b', 3]],
+            columns=['tag', 'val'],
+            index=[2, 0, 1, 3])
+
+        right = (DataFrame([
+            ['a', 'v'],
+            ['c', 'w'],
+            ['c', 'x'],
+            ['d', 'y'],
+            ['a', 'z'],
+            ['c', 'r'],
+            ['e', 'q'],
+            ['c', 's']],
+            columns=['tag', 'char'])
+            .set_index('tag'))
+
+        result = left.join(right, on='tag', how='left')
+
+        expected = DataFrame([
+            ['c', 0, 'w'],
+            ['c', 0, 'x'],
+            ['c', 0, 'r'],
+            ['c', 0, 's'],
+            ['b', 1, nan],
+            ['a', 2, 'v'],
+            ['a', 2, 'z'],
+            ['b', 3, nan]],
+            columns=['tag', 'val', 'char'],
+            index=[2, 2, 2, 2, 0, 1, 1, 3])
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on='tag', how='left', sort=True)
+        expected2 = expected.sort_values('tag', kind='mergesort')
+
+        tm.assert_frame_equal(result, expected2)
+
+        # GH7331 - maintain left frame order in left merge
+        result = merge(left, right.reset_index(), how='left', on='tag')
+        expected.index = np.arange(len(expected))
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_merge_na_buglet(self):
+        left = DataFrame({'id': list('abcde'), 'v1': randn(5),
+                          'v2': randn(5), 'dummy': list('abcde'),
+                          'v3': randn(5)},
+                         columns=['id', 'v1', 'v2', 'dummy', 'v3'])
+        right = DataFrame({'id': ['a', 'b', np.nan, np.nan, np.nan],
+                           'sv3': [1.234, 5.678, np.nan, np.nan, np.nan]})
+
+        result = merge(left, right, on='id', how='left')
+
+        rdf = right.drop(['id'], axis=1)
+        expected = left.join(rdf)
+        tm.assert_frame_equal(result, expected)
+
+    def test_merge_na_keys(self):
+        data = [[1950, "A", 1.5],
+                [1950, "B", 1.5],
+                [1955, "B", 1.5],
+                [1960, "B", np.nan],
+                [1970, "B", 4.],
+                [1950, "C", 4.],
+                [1960, "C", np.nan],
+                [1965, "C", 3.],
+                [1970, "C", 4.]]
+
+        frame = DataFrame(data, columns=["year", "panel", "data"])
+
+        other_data = [[1960, 'A', np.nan],
+                      [1970, 'A', np.nan],
+                      [1955, 'A', np.nan],
+                      [1965, 'A', np.nan],
+                      [1965, 'B', np.nan],
+                      [1955, 'C', np.nan]]
+        other = DataFrame(other_data, columns=['year', 'panel', 'data'])
+
+        result = frame.merge(other, how='outer')
+
+        expected = frame.fillna(-999).merge(other.fillna(-999), how='outer')
+        expected = expected.replace(-999, np.nan)
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index])
+    def test_merge_datetime_index(self, klass):
+        # see gh-19038
+        df = DataFrame([1, 2, 3],
+                       ["2016-01-01", "2017-01-01", "2018-01-01"],
+                       columns=["a"])
+        df.index = pd.to_datetime(df.index)
+        on_vector = df.index.year
+
+        if klass is not None:
+            on_vector = klass(on_vector)
+
+        expected = DataFrame(
+            OrderedDict([
+                ("a", [1, 2, 3]),
+                ("key_1", [2016, 2017, 2018]),
+            ])
+        )
+
+        result = df.merge(df, on=["a", on_vector], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            OrderedDict([
+                ("key_0", [2016, 2017, 2018]),
+                ("a_x", [1, 2, 3]),
+                ("a_y", [1, 2, 3]),
+            ])
+        )
+
+        result = df.merge(df, on=[df.index.year], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_levels(self):
+
+        # GH 3662
+        # merge multi-levels
+        household = (
+            DataFrame(
+                dict(household_id=[1, 2, 3],
+                     male=[0, 1, 0],
+                     wealth=[196087.3, 316478.7, 294750]),
+                columns=['household_id', 'male', 'wealth'])
+            .set_index('household_id'))
+        portfolio = (
+            DataFrame(
+                dict(household_id=[1, 2, 2, 3, 3, 3, 4],
+                     asset_id=["nl0000301109", "nl0000289783", "gb00b03mlx29",
+                               "gb00b03mlx29", "lu0197800237", "nl0000289965",
+                               np.nan],
+                     name=["ABN Amro", "Robeco", "Royal Dutch Shell",
+                           "Royal Dutch Shell",
+                           "AAB Eastern Europe Equity Fund",
+                           "Postbank BioTech Fonds", np.nan],
+                     share=[1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0]),
+                columns=['household_id', 'asset_id', 'name', 'share'])
+            .set_index(['household_id', 'asset_id']))
+        result = household.join(portfolio, how='inner')
+        expected = (
+            DataFrame(
+                dict(male=[0, 1, 1, 0, 0, 0],
+                     wealth=[196087.3, 316478.7, 316478.7,
+                             294750.0, 294750.0, 294750.0],
+                     name=['ABN Amro', 'Robeco', 'Royal Dutch Shell',
+                           'Royal Dutch Shell',
+                           'AAB Eastern Europe Equity Fund',
+                           'Postbank BioTech Fonds'],
+                     share=[1.00, 0.40, 0.60, 0.15, 0.60, 0.25],
+                     household_id=[1, 2, 2, 3, 3, 3],
+                     asset_id=['nl0000301109', 'nl0000289783', 'gb00b03mlx29',
+                               'gb00b03mlx29', 'lu0197800237',
+                               'nl0000289965']))
+            .set_index(['household_id', 'asset_id'])
+            .reindex(columns=['male', 'wealth', 'name', 'share']))
+        tm.assert_frame_equal(result, expected)
+
+        # equivalency
+        result = (merge(household.reset_index(), portfolio.reset_index(),
+                        on=['household_id'], how='inner')
+                  .set_index(['household_id', 'asset_id']))
+        tm.assert_frame_equal(result, expected)
+
+        result = household.join(portfolio, how='outer')
+        expected = (concat([
+            expected,
+            (DataFrame(
+                dict(share=[1.00]),
+                index=MultiIndex.from_tuples(
+                    [(4, np.nan)],
+                    names=['household_id', 'asset_id'])))
+        ], axis=0, sort=True).reindex(columns=expected.columns))
+        tm.assert_frame_equal(result, expected)
+
+        # invalid cases
+        household.index.name = 'foo'
+
+        with pytest.raises(ValueError):
+            household.join(portfolio, how='inner')
+
+        portfolio2 = portfolio.copy()
+        portfolio2.index.set_names(['household_id', 'foo'])
+
+        with pytest.raises(ValueError):
+            portfolio2.join(portfolio, how='inner')
+
+    def test_join_multi_levels2(self):
+
+        # some more advanced merges
+        # GH6360
+        household = (
+            DataFrame(
+                dict(household_id=[1, 2, 2, 3, 3, 3, 4],
+                     asset_id=["nl0000301109", "nl0000301109", "gb00b03mlx29",
+                               "gb00b03mlx29", "lu0197800237", "nl0000289965",
+                               np.nan],
+                     share=[1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0]),
+                columns=['household_id', 'asset_id', 'share'])
+            .set_index(['household_id', 'asset_id']))
+
+        log_return = DataFrame(dict(
+            asset_id=["gb00b03mlx29", "gb00b03mlx29",
+                      "gb00b03mlx29", "lu0197800237", "lu0197800237"],
+            t=[233, 234, 235, 180, 181],
+            log_return=[.09604978, -.06524096, .03532373, .03025441, .036997]
+        )).set_index(["asset_id", "t"])
+
+        expected = (
+            DataFrame(dict(
+                household_id=[2, 2, 2, 3, 3, 3, 3, 3],
+                asset_id=["gb00b03mlx29", "gb00b03mlx29",
+                          "gb00b03mlx29", "gb00b03mlx29",
+                          "gb00b03mlx29", "gb00b03mlx29",
+                          "lu0197800237", "lu0197800237"],
+                t=[233, 234, 235, 233, 234, 235, 180, 181],
+                share=[0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6],
+                log_return=[.09604978, -.06524096, .03532373,
+                            .09604978, -.06524096, .03532373,
+                            .03025441, .036997]
+            ))
+            .set_index(["household_id", "asset_id", "t"])
+            .reindex(columns=['share', 'log_return']))
+
+        # this is the equivalency
+        result = (merge(household.reset_index(), log_return.reset_index(),
+                        on=['asset_id'], how='inner')
+                  .set_index(['household_id', 'asset_id', 't']))
+        tm.assert_frame_equal(result, expected)
+
+        expected = (
+            DataFrame(dict(
+                household_id=[1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4],
+                asset_id=["nl0000301109", "nl0000301109", "gb00b03mlx29",
+                          "gb00b03mlx29", "gb00b03mlx29",
+                          "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29",
+                          "lu0197800237", "lu0197800237",
+                          "nl0000289965", None],
+                t=[None, None, 233, 234, 235, 233, 234,
+                   235, 180, 181, None, None],
+                share=[1.0, 0.4, 0.6, 0.6, 0.6, 0.15,
+                       0.15, 0.15, 0.6, 0.6, 0.25, 1.0],
+                log_return=[None, None, .09604978, -.06524096, .03532373,
+                            .09604978, -.06524096, .03532373,
+                            .03025441, .036997, None, None]
+            ))
+            .set_index(["household_id", "asset_id", "t"])
+            .reindex(columns=['share', 'log_return']))
+
+        result = (merge(household.reset_index(), log_return.reset_index(),
+                  on=['asset_id'], how='outer')
+                  .set_index(['household_id', 'asset_id', 't']))
+
+        tm.assert_frame_equal(result, expected)
+
+
+class TestJoinMultiMulti(object):
+
+    def test_join_multi_multi(self, left_multi, right_multi, join_type,
+                              on_cols_multi, idx_cols_multi):
+        # Multi-index join tests
+        expected = (pd.merge(left_multi.reset_index(),
+                             right_multi.reset_index(),
+                             how=join_type, on=on_cols_multi).
+                    set_index(idx_cols_multi).sort_index())
+
+        result = left_multi.join(right_multi, how=join_type).sort_index()
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_empty_frames(self, left_multi, right_multi, join_type,
+                                     on_cols_multi, idx_cols_multi):
+
+        left_multi = left_multi.drop(columns=left_multi.columns)
+        right_multi = right_multi.drop(columns=right_multi.columns)
+
+        expected = (pd.merge(left_multi.reset_index(),
+                             right_multi.reset_index(),
+                             how=join_type, on=on_cols_multi)
+                    .set_index(idx_cols_multi).sort_index())
+
+        result = left_multi.join(right_multi, how=join_type).sort_index()
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("box", [None, np.asarray, Series, Index])
+    def test_merge_datetime_index(self, box):
+        # see gh-19038
+        df = DataFrame([1, 2, 3],
+                       ["2016-01-01", "2017-01-01", "2018-01-01"],
+                       columns=["a"])
+        df.index = pd.to_datetime(df.index)
+        on_vector = df.index.year
+
+        if box is not None:
+            on_vector = box(on_vector)
+
+        expected = DataFrame(
+            OrderedDict([
+                ("a", [1, 2, 3]),
+                ("key_1", [2016, 2017, 2018]),
+            ])
+        )
+
+        result = df.merge(df, on=["a", on_vector], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            OrderedDict([
+                ("key_0", [2016, 2017, 2018]),
+                ("a_x", [1, 2, 3]),
+                ("a_y", [1, 2, 3]),
+            ])
+        )
+
+        result = df.merge(df, on=[df.index.year], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    def test_single_common_level(self):
+        index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'),
+                                                ('K1', 'X2')],
+                                               names=['key', 'X'])
+
+        left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
+                             'B': ['B0', 'B1', 'B2']},
+                            index=index_left)
+
+        index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
+                                                 ('K2', 'Y2'), ('K2', 'Y3')],
+                                                names=['key', 'Y'])
+
+        right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
+                              'D': ['D0', 'D1', 'D2', 'D3']},
+                             index=index_right)
+
+        result = left.join(right)
+        expected = (pd.merge(left.reset_index(), right.reset_index(),
+                             on=['key'], how='inner')
+                    .set_index(['key', 'X', 'Y']))
+
+        tm.assert_frame_equal(result, expected)
@@ -0,0 +1,458 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    Categorical, DataFrame, DatetimeIndex, Index, Interval, IntervalIndex,
+    Series, TimedeltaIndex, Timestamp, cut, date_range, isna, qcut,
+    timedelta_range, to_datetime)
+from pandas.api.types import CategoricalDtype as CDT
+import pandas.core.reshape.tile as tmod
+import pandas.util.testing as tm
+
+
+def test_simple():
+    data = np.ones(5, dtype="int64")
+    result = cut(data, 4, labels=False)
+
+    expected = np.array([1, 1, 1, 1, 1])
+    tm.assert_numpy_array_equal(result, expected, check_dtype=False)
+
+
+def test_bins():
+    data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1])
+    result, bins = cut(data, 3, retbins=True)
+
+    intervals = IntervalIndex.from_breaks(bins.round(3))
+    intervals = intervals.take([0, 0, 0, 1, 2, 0])
+    expected = Categorical(intervals, ordered=True)
+
+    tm.assert_categorical_equal(result, expected)
+    tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667,
+                                           6.53333333, 9.7]))
+
+
+def test_right():
+    data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
+    result, bins = cut(data, 4, right=True, retbins=True)
+
+    intervals = IntervalIndex.from_breaks(bins.round(3))
+    expected = Categorical(intervals, ordered=True)
+    expected = expected.take([0, 0, 0, 2, 3, 0, 0])
+
+    tm.assert_categorical_equal(result, expected)
+    tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, 7.325, 9.7]))
+
+
+def test_no_right():
+    data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
+    result, bins = cut(data, 4, right=False, retbins=True)
+
+    intervals = IntervalIndex.from_breaks(bins.round(3), closed="left")
+    intervals = intervals.take([0, 0, 0, 2, 3, 0, 1])
+    expected = Categorical(intervals, ordered=True)
+
+    tm.assert_categorical_equal(result, expected)
+    tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095]))
+
+
+def test_array_like():
+    data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
+    result, bins = cut(data, 3, retbins=True)
+
+    intervals = IntervalIndex.from_breaks(bins.round(3))
+    intervals = intervals.take([0, 0, 0, 1, 2, 0])
+    expected = Categorical(intervals, ordered=True)
+
+    tm.assert_categorical_equal(result, expected)
+    tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667,
+                                           6.53333333, 9.7]))
+
+
+def test_bins_from_interval_index():
+    c = cut(range(5), 3)
+    expected = c
+    result = cut(range(5), bins=expected.categories)
+    tm.assert_categorical_equal(result, expected)
+
+    expected = Categorical.from_codes(np.append(c.codes, -1),
+                                      categories=c.categories,
+                                      ordered=True)
+    result = cut(range(6), bins=expected.categories)
+    tm.assert_categorical_equal(result, expected)
+
+
+def test_bins_from_interval_index_doc_example():
+    # Make sure we preserve the bins.
+    ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60])
+    c = cut(ages, bins=[0, 18, 35, 70])
+    expected = IntervalIndex.from_tuples([(0, 18), (18, 35), (35, 70)])
+    tm.assert_index_equal(c.categories, expected)
+
+    result = cut([25, 20, 50], bins=c.categories)
+    tm.assert_index_equal(result.categories, expected)
+    tm.assert_numpy_array_equal(result.codes,
+                                np.array([1, 1, 2], dtype="int8"))
+
+
+def test_bins_not_overlapping_from_interval_index():
+    # see gh-23980
+    msg = "Overlapping IntervalIndex is not accepted"
+    ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)])
+
+    with pytest.raises(ValueError, match=msg):
+        cut([5, 6], bins=ii)
+
+
+def test_bins_not_monotonic():
+    msg = "bins must increase monotonically"
+    data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
+
+    with pytest.raises(ValueError, match=msg):
+        cut(data, [0.1, 1.5, 1, 10])
+
+
+def test_wrong_num_labels():
+    msg = "Bin labels must be one fewer than the number of bin edges"
+    data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
+
+    with pytest.raises(ValueError, match=msg):
+        cut(data, [0, 1, 10], labels=["foo", "bar", "baz"])
+
+
+@pytest.mark.parametrize("x,bins,msg", [
+    ([], 2, "Cannot cut empty array"),
+    ([1, 2, 3], 0.5, "`bins` should be a positive integer")
+])
+def test_cut_corner(x, bins, msg):
+    with pytest.raises(ValueError, match=msg):
+        cut(x, bins)
+
+
+@pytest.mark.parametrize("arg", [2, np.eye(2), DataFrame(np.eye(2))])
+@pytest.mark.parametrize("cut_func", [cut, qcut])
+def test_cut_not_1d_arg(arg, cut_func):
+    msg = "Input array must be 1 dimensional"
+    with pytest.raises(ValueError, match=msg):
+        cut_func(arg, 2)
+
+
+@pytest.mark.parametrize('data', [
+    [0, 1, 2, 3, 4, np.inf],
+    [-np.inf, 0, 1, 2, 3, 4],
+    [-np.inf, 0, 1, 2, 3, 4, np.inf]])
+def test_int_bins_with_inf(data):
+    # GH 24314
+    msg = 'cannot specify integer `bins` when input data contains infinity'
+    with pytest.raises(ValueError, match=msg):
+        cut(data, bins=3)
+
+
+def test_cut_out_of_range_more():
+    # see gh-1511
+    name = "x"
+
+    ser = Series([0, -1, 0, 1, -3], name=name)
+    ind = cut(ser, [0, 1], labels=False)
+
+    exp = Series([np.nan, np.nan, np.nan, 0, np.nan], name=name)
+    tm.assert_series_equal(ind, exp)
+
+
+@pytest.mark.parametrize("right,breaks,closed", [
+    (True, [-1e-3, 0.25, 0.5, 0.75, 1], "right"),
+    (False, [0, 0.25, 0.5, 0.75, 1 + 1e-3], "left")
+])
+def test_labels(right, breaks, closed):
+    arr = np.tile(np.arange(0, 1.01, 0.1), 4)
+
+    result, bins = cut(arr, 4, retbins=True, right=right)
+    ex_levels = IntervalIndex.from_breaks(breaks, closed=closed)
+    tm.assert_index_equal(result.categories, ex_levels)
+
+
+def test_cut_pass_series_name_to_factor():
+    name = "foo"
+    ser = Series(np.random.randn(100), name=name)
+
+    factor = cut(ser, 4)
+    assert factor.name == name
+
+
+def test_label_precision():
+    arr = np.arange(0, 0.73, 0.01)
+    result = cut(arr, 4, precision=2)
+
+    ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, 0.54, 0.72])
+    tm.assert_index_equal(result.categories, ex_levels)
+
+
+@pytest.mark.parametrize("labels", [None, False])
+def test_na_handling(labels):
+    arr = np.arange(0, 0.75, 0.01)
+    arr[::3] = np.nan
+
+    result = cut(arr, 4, labels=labels)
+    result = np.asarray(result)
+
+    expected = np.where(isna(arr), np.nan, result)
+    tm.assert_almost_equal(result, expected)
+
+
+def test_inf_handling():
+    data = np.arange(6)
+    data_ser = Series(data, dtype="int64")
+
+    bins = [-np.inf, 2, 4, np.inf]
+    result = cut(data, bins)
+    result_ser = cut(data_ser, bins)
+
+    ex_uniques = IntervalIndex.from_breaks(bins)
+    tm.assert_index_equal(result.categories, ex_uniques)
+
+    assert result[5] == Interval(4, np.inf)
+    assert result[0] == Interval(-np.inf, 2)
+    assert result_ser[5] == Interval(4, np.inf)
+    assert result_ser[0] == Interval(-np.inf, 2)
+
+
+def test_cut_out_of_bounds():
+    arr = np.random.randn(100)
+    result = cut(arr, [-1, 0, 1])
+
+    mask = isna(result)
+    ex_mask = (arr < -1) | (arr > 1)
+    tm.assert_numpy_array_equal(mask, ex_mask)
+
+
+@pytest.mark.parametrize("get_labels,get_expected", [
+    (lambda labels: labels,
+     lambda labels: Categorical(["Medium"] + 4 * ["Small"] +
+                                ["Medium", "Large"],
+                                categories=labels, ordered=True)),
+    (lambda labels: Categorical.from_codes([0, 1, 2], labels),
+     lambda labels: Categorical.from_codes([1] + 4 * [0] + [1, 2], labels))
+])
+def test_cut_pass_labels(get_labels, get_expected):
+    bins = [0, 25, 50, 100]
+    arr = [50, 5, 10, 15, 20, 30, 70]
+    labels = ["Small", "Medium", "Large"]
+
+    result = cut(arr, bins, labels=get_labels(labels))
+    tm.assert_categorical_equal(result, get_expected(labels))
+
+
+def test_cut_pass_labels_compat():
+    # see gh-16459
+    arr = [50, 5, 10, 15, 20, 30, 70]
+    labels = ["Good", "Medium", "Bad"]
+
+    result = cut(arr, 3, labels=labels)
+    exp = cut(arr, 3, labels=Categorical(labels, categories=labels,
+                                         ordered=True))
+    tm.assert_categorical_equal(result, exp)
+
+
+@pytest.mark.parametrize("x", [np.arange(11.), np.arange(11.) / 1e10])
+def test_round_frac_just_works(x):
+    # It works.
+    cut(x, 2)
+
+
+@pytest.mark.parametrize("val,precision,expected", [
+    (-117.9998, 3, -118),
+    (117.9998, 3, 118),
+    (117.9998, 2, 118),
+    (0.000123456, 2, 0.00012)
+])
+def test_round_frac(val, precision, expected):
+    # see gh-1979
+    result = tmod._round_frac(val, precision=precision)
+    assert result == expected
+
+
+def test_cut_return_intervals():
+    ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
+    result = cut(ser, 3)
+
+    exp_bins = np.linspace(0, 8, num=4).round(3)
+    exp_bins[0] -= 0.008
+
+    expected = Series(IntervalIndex.from_breaks(exp_bins, closed="right").take(
+        [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+def test_series_ret_bins():
+    # see gh-8589
+    ser = Series(np.arange(4))
+    result, bins = cut(ser, 2, retbins=True)
+
+    expected = Series(IntervalIndex.from_breaks(
+        [-0.003, 1.5, 3], closed="right").repeat(2)).astype(CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs,msg", [
+    (dict(duplicates="drop"), None),
+    (dict(), "Bin edges must be unique"),
+    (dict(duplicates="raise"), "Bin edges must be unique"),
+    (dict(duplicates="foo"), "invalid value for 'duplicates' parameter")
+])
+def test_cut_duplicates_bin(kwargs, msg):
+    # see gh-20947
+    bins = [0, 2, 4, 6, 10, 10]
+    values = Series(np.array([1, 3, 5, 7, 9]), index=["a", "b", "c", "d", "e"])
+
+    if msg is not None:
+        with pytest.raises(ValueError, match=msg):
+            cut(values, bins, **kwargs)
+    else:
+        result = cut(values, bins, **kwargs)
+        expected = cut(values, pd.unique(bins))
+        tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("data", [9.0, -9.0, 0.0])
+@pytest.mark.parametrize("length", [1, 2])
+def test_single_bin(data, length):
+    # see gh-14652, gh-15428
+    ser = Series([data] * length)
+    result = cut(ser, 1, labels=False)
+
+    expected = Series([0] * length)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "array_1_writeable,array_2_writeable",
+    [(True, True), (True, False), (False, False)])
+def test_cut_read_only(array_1_writeable, array_2_writeable):
+    # issue 18773
+    array_1 = np.arange(0, 100, 10)
+    array_1.flags.writeable = array_1_writeable
+
+    array_2 = np.arange(0, 100, 10)
+    array_2.flags.writeable = array_2_writeable
+
+    hundred_elements = np.arange(100)
+    tm.assert_categorical_equal(cut(hundred_elements, array_1),
+                                cut(hundred_elements, array_2))
+
+
+@pytest.mark.parametrize("conv", [
+    lambda v: Timestamp(v),
+    lambda v: to_datetime(v),
+    lambda v: np.datetime64(v),
+    lambda v: Timestamp(v).to_pydatetime(),
+])
+def test_datetime_bin(conv):
+    data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")]
+    bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"]
+
+    expected = Series(IntervalIndex([
+        Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
+        Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])).astype(
+        CDT(ordered=True))
+
+    bins = [conv(v) for v in bin_data]
+    result = Series(cut(data, bins=bins))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("data", [
+    to_datetime(Series(["2013-01-01", "2013-01-02", "2013-01-03"])),
+    [np.datetime64("2013-01-01"), np.datetime64("2013-01-02"),
+     np.datetime64("2013-01-03")],
+    np.array([np.datetime64("2013-01-01"), np.datetime64("2013-01-02"),
+              np.datetime64("2013-01-03")]),
+    DatetimeIndex(["2013-01-01", "2013-01-02", "2013-01-03"])
+])
+def test_datetime_cut(data):
+    # see gh-14714
+    #
+    # Testing time data when it comes in various collection types.
+    result, _ = cut(data, 3, retbins=True)
+    expected = Series(IntervalIndex([
+        Interval(Timestamp("2012-12-31 23:57:07.200000"),
+                 Timestamp("2013-01-01 16:00:00")),
+        Interval(Timestamp("2013-01-01 16:00:00"),
+                 Timestamp("2013-01-02 08:00:00")),
+        Interval(Timestamp("2013-01-02 08:00:00"),
+                 Timestamp("2013-01-03 00:00:00"))])).astype(CDT(ordered=True))
+    tm.assert_series_equal(Series(result), expected)
+
+
+@pytest.mark.parametrize("bins", [
+    3, [Timestamp("2013-01-01 04:57:07.200000"),
+        Timestamp("2013-01-01 21:00:00"),
+        Timestamp("2013-01-02 13:00:00"),
+        Timestamp("2013-01-03 05:00:00")]])
+@pytest.mark.parametrize("box", [list, np.array, Index, Series])
+def test_datetime_tz_cut(bins, box):
+    # see gh-19872
+    tz = "US/Eastern"
+    s = Series(date_range("20130101", periods=3, tz=tz))
+
+    if not isinstance(bins, int):
+        bins = box(bins)
+
+    result = cut(s, bins)
+    expected = Series(IntervalIndex([
+        Interval(Timestamp("2012-12-31 23:57:07.200000", tz=tz),
+                 Timestamp("2013-01-01 16:00:00", tz=tz)),
+        Interval(Timestamp("2013-01-01 16:00:00", tz=tz),
+                 Timestamp("2013-01-02 08:00:00", tz=tz)),
+        Interval(Timestamp("2013-01-02 08:00:00", tz=tz),
+                 Timestamp("2013-01-03 00:00:00", tz=tz))])).astype(
+        CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+def test_datetime_nan_error():
+    msg = "bins must be of datetime64 dtype"
+
+    with pytest.raises(ValueError, match=msg):
+        cut(date_range("20130101", periods=3), bins=[0, 2, 4])
+
+
+def test_datetime_nan_mask():
+    result = cut(date_range("20130102", periods=5),
+                 bins=date_range("20130101", periods=2))
+
+    mask = result.categories.isna()
+    tm.assert_numpy_array_equal(mask, np.array([False]))
+
+    mask = result.isna()
+    tm.assert_numpy_array_equal(mask, np.array([False, True, True,
+                                                True, True]))
+
+
+@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
+def test_datetime_cut_roundtrip(tz):
+    # see gh-19891
+    ser = Series(date_range("20180101", periods=3, tz=tz))
+    result, result_bins = cut(ser, 2, retbins=True)
+
+    expected = cut(ser, result_bins)
+    tm.assert_series_equal(result, expected)
+
+    expected_bins = DatetimeIndex(["2017-12-31 23:57:07.200000",
+                                   "2018-01-02 00:00:00",
+                                   "2018-01-03 00:00:00"])
+    expected_bins = expected_bins.tz_localize(tz)
+    tm.assert_index_equal(result_bins, expected_bins)
+
+
+def test_timedelta_cut_roundtrip():
+    # see gh-19891
+    ser = Series(timedelta_range("1day", periods=3))
+    result, result_bins = cut(ser, 2, retbins=True)
+
+    expected = cut(ser, result_bins)
+    tm.assert_series_equal(result, expected)
+
+    expected_bins = TimedeltaIndex(["0 days 23:57:07.200000",
+                                    "2 days 00:00:00",
+                                    "3 days 00:00:00"])
+    tm.assert_index_equal(result_bins, expected_bins)
@@ -0,0 +1,718 @@
+# -*- coding: utf-8 -*-
+# pylint: disable-msg=W0612,E1101
+
+import numpy as np
+from numpy import nan
+import pytest
+
+from pandas.compat import range
+
+import pandas as pd
+from pandas import DataFrame, lreshape, melt, wide_to_long
+import pandas.util.testing as tm
+
+
+class TestMelt(object):
+
+    def setup_method(self, method):
+        self.df = tm.makeTimeDataFrame()[:10]
+        self.df['id1'] = (self.df['A'] > 0).astype(np.int64)
+        self.df['id2'] = (self.df['B'] > 0).astype(np.int64)
+
+        self.var_name = 'var'
+        self.value_name = 'val'
+
+        self.df1 = pd.DataFrame([[1.067683, -1.110463, 0.20867
+                                  ], [-1.321405, 0.368915, -1.055342],
+                                 [-0.807333, 0.08298, -0.873361]])
+        self.df1.columns = [list('ABC'), list('abc')]
+        self.df1.columns.names = ['CAP', 'low']
+
+    def test_top_level_method(self):
+        result = melt(self.df)
+        assert result.columns.tolist() == ['variable', 'value']
+
+    def test_method_signatures(self):
+        tm.assert_frame_equal(self.df.melt(),
+                              melt(self.df))
+
+        tm.assert_frame_equal(self.df.melt(id_vars=['id1', 'id2'],
+                                           value_vars=['A', 'B']),
+                              melt(self.df,
+                                   id_vars=['id1', 'id2'],
+                                   value_vars=['A', 'B']))
+
+        tm.assert_frame_equal(self.df.melt(var_name=self.var_name,
+                                           value_name=self.value_name),
+                              melt(self.df,
+                                   var_name=self.var_name,
+                                   value_name=self.value_name))
+
+        tm.assert_frame_equal(self.df1.melt(col_level=0),
+                              melt(self.df1, col_level=0))
+
+    def test_default_col_names(self):
+        result = self.df.melt()
+        assert result.columns.tolist() == ['variable', 'value']
+
+        result1 = self.df.melt(id_vars=['id1'])
+        assert result1.columns.tolist() == ['id1', 'variable', 'value']
+
+        result2 = self.df.melt(id_vars=['id1', 'id2'])
+        assert result2.columns.tolist() == ['id1', 'id2', 'variable', 'value']
+
+    def test_value_vars(self):
+        result3 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A')
+        assert len(result3) == 10
+
+        result4 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'])
+        expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2,
+                               'id2': self.df['id2'].tolist() * 2,
+                               'variable': ['A'] * 10 + ['B'] * 10,
+                               'value': (self.df['A'].tolist() +
+                                         self.df['B'].tolist())},
+                              columns=['id1', 'id2', 'variable', 'value'])
+        tm.assert_frame_equal(result4, expected4)
+
+    def test_value_vars_types(self):
+        # GH 15348
+        expected = DataFrame({'id1': self.df['id1'].tolist() * 2,
+                              'id2': self.df['id2'].tolist() * 2,
+                              'variable': ['A'] * 10 + ['B'] * 10,
+                              'value': (self.df['A'].tolist() +
+                                        self.df['B'].tolist())},
+                             columns=['id1', 'id2', 'variable', 'value'])
+
+        for type_ in (tuple, list, np.array):
+            result = self.df.melt(id_vars=['id1', 'id2'],
+                                  value_vars=type_(('A', 'B')))
+            tm.assert_frame_equal(result, expected)
+
+    def test_vars_work_with_multiindex(self):
+        expected = DataFrame({
+            ('A', 'a'): self.df1[('A', 'a')],
+            'CAP': ['B'] * len(self.df1),
+            'low': ['b'] * len(self.df1),
+            'value': self.df1[('B', 'b')],
+        }, columns=[('A', 'a'), 'CAP', 'low', 'value'])
+
+        result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')])
+        tm.assert_frame_equal(result, expected)
+
+    def test_single_vars_work_with_multiindex(self):
+        expected = DataFrame({
+            'A': {0: 1.067683, 1: -1.321405, 2: -0.807333},
+            'CAP': {0: 'B', 1: 'B', 2: 'B'},
+            'value': {0: -1.110463, 1: 0.368915, 2: 0.08298}})
+        result = self.df1.melt(['A'], ['B'], col_level=0)
+        tm.assert_frame_equal(result, expected)
+
+    def test_tuple_vars_fail_with_multiindex(self):
+        # melt should fail with an informative error message if
+        # the columns have a MultiIndex and a tuple is passed
+        # for id_vars or value_vars.
+        tuple_a = ('A', 'a')
+        list_a = [tuple_a]
+        tuple_b = ('B', 'b')
+        list_b = [tuple_b]
+
+        msg = (r"(id|value)_vars must be a list of tuples when columns are"
+               " a MultiIndex")
+        for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b),
+                                    (tuple_a, tuple_b)):
+            with pytest.raises(ValueError, match=msg):
+                self.df1.melt(id_vars=id_vars, value_vars=value_vars)
+
+    def test_custom_var_name(self):
+        result5 = self.df.melt(var_name=self.var_name)
+        assert result5.columns.tolist() == ['var', 'value']
+
+        result6 = self.df.melt(id_vars=['id1'], var_name=self.var_name)
+        assert result6.columns.tolist() == ['id1', 'var', 'value']
+
+        result7 = self.df.melt(id_vars=['id1', 'id2'], var_name=self.var_name)
+        assert result7.columns.tolist() == ['id1', 'id2', 'var', 'value']
+
+        result8 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A',
+                               var_name=self.var_name)
+        assert result8.columns.tolist() == ['id1', 'id2', 'var', 'value']
+
+        result9 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'],
+                               var_name=self.var_name)
+        expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2,
+                               'id2': self.df['id2'].tolist() * 2,
+                               self.var_name: ['A'] * 10 + ['B'] * 10,
+                               'value': (self.df['A'].tolist() +
+                                         self.df['B'].tolist())},
+                              columns=['id1', 'id2', self.var_name, 'value'])
+        tm.assert_frame_equal(result9, expected9)
+
+    def test_custom_value_name(self):
+        result10 = self.df.melt(value_name=self.value_name)
+        assert result10.columns.tolist() == ['variable', 'val']
+
+        result11 = self.df.melt(id_vars=['id1'], value_name=self.value_name)
+        assert result11.columns.tolist() == ['id1', 'variable', 'val']
+
+        result12 = self.df.melt(id_vars=['id1', 'id2'],
+                                value_name=self.value_name)
+        assert result12.columns.tolist() == ['id1', 'id2', 'variable', 'val']
+
+        result13 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A',
+                                value_name=self.value_name)
+        assert result13.columns.tolist() == ['id1', 'id2', 'variable', 'val']
+
+        result14 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'],
+                                value_name=self.value_name)
+        expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2,
+                                'id2': self.df['id2'].tolist() * 2,
+                                'variable': ['A'] * 10 + ['B'] * 10,
+                                self.value_name: (self.df['A'].tolist() +
+                                                  self.df['B'].tolist())},
+                               columns=['id1', 'id2', 'variable',
+                                        self.value_name])
+        tm.assert_frame_equal(result14, expected14)
+
+    def test_custom_var_and_value_name(self):
+
+        result15 = self.df.melt(var_name=self.var_name,
+                                value_name=self.value_name)
+        assert result15.columns.tolist() == ['var', 'val']
+
+        result16 = self.df.melt(id_vars=['id1'], var_name=self.var_name,
+                                value_name=self.value_name)
+        assert result16.columns.tolist() == ['id1', 'var', 'val']
+
+        result17 = self.df.melt(id_vars=['id1', 'id2'],
+                                var_name=self.var_name,
+                                value_name=self.value_name)
+        assert result17.columns.tolist() == ['id1', 'id2', 'var', 'val']
+
+        result18 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A',
+                                var_name=self.var_name,
+                                value_name=self.value_name)
+        assert result18.columns.tolist() == ['id1', 'id2', 'var', 'val']
+
+        result19 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'],
+                                var_name=self.var_name,
+                                value_name=self.value_name)
+        expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2,
+                                'id2': self.df['id2'].tolist() * 2,
+                                self.var_name: ['A'] * 10 + ['B'] * 10,
+                                self.value_name: (self.df['A'].tolist() +
+                                                  self.df['B'].tolist())},
+                               columns=['id1', 'id2', self.var_name,
+                                        self.value_name])
+        tm.assert_frame_equal(result19, expected19)
+
+        df20 = self.df.copy()
+        df20.columns.name = 'foo'
+        result20 = df20.melt()
+        assert result20.columns.tolist() == ['foo', 'value']
+
+    def test_col_level(self):
+        res1 = self.df1.melt(col_level=0)
+        res2 = self.df1.melt(col_level='CAP')
+        assert res1.columns.tolist() == ['CAP', 'value']
+        assert res2.columns.tolist() == ['CAP', 'value']
+
+    def test_multiindex(self):
+        res = self.df1.melt()
+        assert res.columns.tolist() == ['CAP', 'low', 'value']
+
+    @pytest.mark.parametrize("col", [
+        pd.Series(pd.date_range('2010', periods=5, tz='US/Pacific')),
+        pd.Series(["a", "b", "c", "a", "d"], dtype="category"),
+        pd.Series([0, 1, 0, 0, 0])])
+    def test_pandas_dtypes(self, col):
+        # GH 15785
+        df = DataFrame({'klass': range(5),
+                        'col': col,
+                        'attr1': [1, 0, 0, 0, 0],
+                        'attr2': col})
+        expected_value = pd.concat([pd.Series([1, 0, 0, 0, 0]), col],
+                                   ignore_index=True)
+        result = melt(df, id_vars=['klass', 'col'], var_name='attribute',
+                      value_name='value')
+        expected = DataFrame({0: list(range(5)) * 2,
+                              1: pd.concat([col] * 2, ignore_index=True),
+                              2: ['attr1'] * 5 + ['attr2'] * 5,
+                              3: expected_value})
+        expected.columns = ['klass', 'col', 'attribute', 'value']
+        tm.assert_frame_equal(result, expected)
+
+    def test_melt_missing_columns_raises(self):
+        # GH-23575
+        # This test is to ensure that pandas raises an error if melting is
+        # attempted with column names absent from the dataframe
+
+        # Generate data
+        df = pd.DataFrame(np.random.randn(5, 4), columns=list('abcd'))
+
+        # Try to melt with missing `value_vars` column name
+        msg = "The following '{Var}' are not present in the DataFrame: {Col}"
+        with pytest.raises(
+                KeyError,
+                match=msg.format(Var='value_vars', Col="\\['C'\\]")):
+            df.melt(['a', 'b'], ['C', 'd'])
+
+        # Try to melt with missing `id_vars` column name
+        with pytest.raises(
+                KeyError,
+                match=msg.format(Var='id_vars', Col="\\['A'\\]")):
+            df.melt(['A', 'b'], ['c', 'd'])
+
+        # Multiple missing
+        with pytest.raises(
+                KeyError,
+                match=msg.format(Var='id_vars',
+                                 Col="\\['not_here', 'or_there'\\]")):
+            df.melt(['a', 'b', 'not_here', 'or_there'], ['c', 'd'])
+
+        # Multiindex melt fails if column is missing from multilevel melt
+        multi = df.copy()
+        multi.columns = [list('ABCD'), list('abcd')]
+        with pytest.raises(
+            KeyError,
+            match=msg.format(Var='id_vars',
+                             Col="\\['E'\\]")):
+            multi.melt([('E', 'a')], [('B', 'b')])
+        # Multiindex fails if column is missing from single level melt
+        with pytest.raises(
+            KeyError,
+            match=msg.format(Var='value_vars',
+                             Col="\\['F'\\]")):
+            multi.melt(['A'], ['F'], col_level=0)
+
+
+class TestLreshape(object):
+
+    def test_pairs(self):
+        data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', '21dec2008',
+                            '11jan2009'],
+                'birthwt': [1766, 3301, 1454, 3139, 4133],
+                'id': [101, 102, 103, 104, 105],
+                'sex': ['Male', 'Female', 'Female', 'Female', 'Female'],
+                'visitdt1': ['11jan2009', '22dec2008', '04jan2009',
+                             '29dec2008', '20jan2009'],
+                'visitdt2':
+                ['21jan2009', nan, '22jan2009', '31dec2008', '03feb2009'],
+                'visitdt3': ['05feb2009', nan, nan, '02jan2009', '15feb2009'],
+                'wt1': [1823, 3338, 1549, 3298, 4306],
+                'wt2': [2011.0, nan, 1892.0, 3338.0, 4575.0],
+                'wt3': [2293.0, nan, nan, 3377.0, 4805.0]}
+
+        df = DataFrame(data)
+
+        spec = {'visitdt': ['visitdt%d' % i for i in range(1, 4)],
+                'wt': ['wt%d' % i for i in range(1, 4)]}
+        result = lreshape(df, spec)
+
+        exp_data = {'birthdt':
+                    ['08jan2009', '20dec2008', '30dec2008', '21dec2008',
+                     '11jan2009', '08jan2009', '30dec2008', '21dec2008',
+                     '11jan2009', '08jan2009', '21dec2008', '11jan2009'],
+                    'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 1454, 3139,
+                                4133, 1766, 3139, 4133],
+                    'id': [101, 102, 103, 104, 105, 101, 103, 104, 105, 101,
+                           104, 105],
+                    'sex': ['Male', 'Female', 'Female', 'Female', 'Female',
+                            'Male', 'Female', 'Female', 'Female', 'Male',
+                            'Female', 'Female'],
+                    'visitdt': ['11jan2009', '22dec2008', '04jan2009',
+                                '29dec2008', '20jan2009', '21jan2009',
+                                '22jan2009', '31dec2008', '03feb2009',
+                                '05feb2009', '02jan2009', '15feb2009'],
+                    'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0,
+                           1892.0, 3338.0, 4575.0, 2293.0, 3377.0, 4805.0]}
+        exp = DataFrame(exp_data, columns=result.columns)
+        tm.assert_frame_equal(result, exp)
+
+        result = lreshape(df, spec, dropna=False)
+        exp_data = {'birthdt':
+                    ['08jan2009', '20dec2008', '30dec2008', '21dec2008',
+                     '11jan2009', '08jan2009', '20dec2008', '30dec2008',
+                     '21dec2008', '11jan2009', '08jan2009', '20dec2008',
+                     '30dec2008', '21dec2008', '11jan2009'],
+                    'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 3301, 1454,
+                                3139, 4133, 1766, 3301, 1454, 3139, 4133],
+                    'id': [101, 102, 103, 104, 105, 101, 102, 103, 104, 105,
+                           101, 102, 103, 104, 105],
+                    'sex': ['Male', 'Female', 'Female', 'Female', 'Female',
+                            'Male', 'Female', 'Female', 'Female', 'Female',
+                            'Male', 'Female', 'Female', 'Female', 'Female'],
+                    'visitdt': ['11jan2009', '22dec2008', '04jan2009',
+                                '29dec2008', '20jan2009', '21jan2009', nan,
+                                '22jan2009', '31dec2008', '03feb2009',
+                                '05feb2009', nan, nan, '02jan2009',
+                                '15feb2009'],
+                    'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, nan,
+                           1892.0, 3338.0, 4575.0, 2293.0, nan, nan, 3377.0,
+                           4805.0]}
+        exp = DataFrame(exp_data, columns=result.columns)
+        tm.assert_frame_equal(result, exp)
+
+        spec = {'visitdt': ['visitdt%d' % i for i in range(1, 3)],
+                'wt': ['wt%d' % i for i in range(1, 4)]}
+        msg = "All column lists must be same length"
+        with pytest.raises(ValueError, match=msg):
+            lreshape(df, spec)
+
+
+class TestWideToLong(object):
+
+    def test_simple(self):
+        np.random.seed(123)
+        x = np.random.randn(3)
+        df = pd.DataFrame({"A1970": {0: "a",
+                                     1: "b",
+                                     2: "c"},
+                           "A1980": {0: "d",
+                                     1: "e",
+                                     2: "f"},
+                           "B1970": {0: 2.5,
+                                     1: 1.2,
+                                     2: .7},
+                           "B1980": {0: 3.2,
+                                     1: 1.3,
+                                     2: .1},
+                           "X": dict(zip(
+                               range(3), x))})
+        df["id"] = df.index
+        exp_data = {"X": x.tolist() + x.tolist(),
+                    "A": ['a', 'b', 'c', 'd', 'e', 'f'],
+                    "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
+                    "year": [1970, 1970, 1970, 1980, 1980, 1980],
+                    "id": [0, 1, 2, 0, 1, 2]}
+        expected = DataFrame(exp_data)
+        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
+        result = wide_to_long(df, ["A", "B"], i="id", j="year")
+        tm.assert_frame_equal(result, expected)
+
+    def test_stubs(self):
+        # GH9204
+        df = pd.DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]])
+        df.columns = ['id', 'inc1', 'inc2', 'edu1', 'edu2']
+        stubs = ['inc', 'edu']
+
+        # TODO: unused?
+        df_long = pd.wide_to_long(df, stubs, i='id', j='age')  # noqa
+
+        assert stubs == ['inc', 'edu']
+
+    def test_separating_character(self):
+        # GH14779
+        np.random.seed(123)
+        x = np.random.randn(3)
+        df = pd.DataFrame({"A.1970": {0: "a",
+                                      1: "b",
+                                      2: "c"},
+                           "A.1980": {0: "d",
+                                      1: "e",
+                                      2: "f"},
+                           "B.1970": {0: 2.5,
+                                      1: 1.2,
+                                      2: .7},
+                           "B.1980": {0: 3.2,
+                                      1: 1.3,
+                                      2: .1},
+                           "X": dict(zip(
+                               range(3), x))})
+        df["id"] = df.index
+        exp_data = {"X": x.tolist() + x.tolist(),
+                    "A": ['a', 'b', 'c', 'd', 'e', 'f'],
+                    "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
+                    "year": [1970, 1970, 1970, 1980, 1980, 1980],
+                    "id": [0, 1, 2, 0, 1, 2]}
+        expected = DataFrame(exp_data)
+        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
+        result = wide_to_long(df, ["A", "B"], i="id", j="year", sep=".")
+        tm.assert_frame_equal(result, expected)
+
+    def test_escapable_characters(self):
+        np.random.seed(123)
+        x = np.random.randn(3)
+        df = pd.DataFrame({"A(quarterly)1970": {0: "a",
+                                                1: "b",
+                                                2: "c"},
+                           "A(quarterly)1980": {0: "d",
+                                                1: "e",
+                                                2: "f"},
+                           "B(quarterly)1970": {0: 2.5,
+                                                1: 1.2,
+                                                2: .7},
+                           "B(quarterly)1980": {0: 3.2,
+                                                1: 1.3,
+                                                2: .1},
+                           "X": dict(zip(
+                               range(3), x))})
+        df["id"] = df.index
+        exp_data = {"X": x.tolist() + x.tolist(),
+                    "A(quarterly)": ['a', 'b', 'c', 'd', 'e', 'f'],
+                    "B(quarterly)": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
+                    "year": [1970, 1970, 1970, 1980, 1980, 1980],
+                    "id": [0, 1, 2, 0, 1, 2]}
+        expected = DataFrame(exp_data)
+        expected = expected.set_index(
+            ['id', 'year'])[["X", "A(quarterly)", "B(quarterly)"]]
+        result = wide_to_long(df, ["A(quarterly)", "B(quarterly)"],
+                              i="id", j="year")
+        tm.assert_frame_equal(result, expected)
+
+    def test_unbalanced(self):
+        # test that we can have a varying amount of time variables
+        df = pd.DataFrame({'A2010': [1.0, 2.0],
+                           'A2011': [3.0, 4.0],
+                           'B2010': [5.0, 6.0],
+                           'X': ['X1', 'X2']})
+        df['id'] = df.index
+        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
+                    'A': [1.0, 3.0, 2.0, 4.0],
+                    'B': [5.0, np.nan, 6.0, np.nan],
+                    'id': [0, 0, 1, 1],
+                    'year': [2010, 2011, 2010, 2011]}
+        expected = pd.DataFrame(exp_data)
+        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
+        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
+        tm.assert_frame_equal(result, expected)
+
+    def test_character_overlap(self):
+        # Test we handle overlapping characters in both id_vars and value_vars
+        df = pd.DataFrame({
+            'A11': ['a11', 'a22', 'a33'],
+            'A12': ['a21', 'a22', 'a23'],
+            'B11': ['b11', 'b12', 'b13'],
+            'B12': ['b21', 'b22', 'b23'],
+            'BB11': [1, 2, 3],
+            'BB12': [4, 5, 6],
+            'BBBX': [91, 92, 93],
+            'BBBZ': [91, 92, 93]
+        })
+        df['id'] = df.index
+        expected = pd.DataFrame({
+            'BBBX': [91, 92, 93, 91, 92, 93],
+            'BBBZ': [91, 92, 93, 91, 92, 93],
+            'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'],
+            'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'],
+            'BB': [1, 2, 3, 4, 5, 6],
+            'id': [0, 1, 2, 0, 1, 2],
+            'year': [11, 11, 11, 12, 12, 12]})
+        expected = expected.set_index(['id', 'year'])[
+            ['BBBX', 'BBBZ', 'A', 'B', 'BB']]
+        result = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year')
+        tm.assert_frame_equal(result.sort_index(axis=1),
+                              expected.sort_index(axis=1))
+
+    def test_invalid_separator(self):
+        # if an invalid separator is supplied a empty data frame is returned
+        sep = 'nope!'
+        df = pd.DataFrame({'A2010': [1.0, 2.0],
+                           'A2011': [3.0, 4.0],
+                           'B2010': [5.0, 6.0],
+                           'X': ['X1', 'X2']})
+        df['id'] = df.index
+        exp_data = {'X': '',
+                    'A2010': [],
+                    'A2011': [],
+                    'B2010': [],
+                    'id': [],
+                    'year': [],
+                    'A': [],
+                    'B': []}
+        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
+        expected = expected.set_index(['id', 'year'])[[
+            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
+        expected.index.set_levels([0, 1], level=0, inplace=True)
+        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
+        tm.assert_frame_equal(result.sort_index(axis=1),
+                              expected.sort_index(axis=1))
+
+    def test_num_string_disambiguation(self):
+        # Test that we can disambiguate number value_vars from
+        # string value_vars
+        df = pd.DataFrame({
+            'A11': ['a11', 'a22', 'a33'],
+            'A12': ['a21', 'a22', 'a23'],
+            'B11': ['b11', 'b12', 'b13'],
+            'B12': ['b21', 'b22', 'b23'],
+            'BB11': [1, 2, 3],
+            'BB12': [4, 5, 6],
+            'Arating': [91, 92, 93],
+            'Arating_old': [91, 92, 93]
+        })
+        df['id'] = df.index
+        expected = pd.DataFrame({
+            'Arating': [91, 92, 93, 91, 92, 93],
+            'Arating_old': [91, 92, 93, 91, 92, 93],
+            'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'],
+            'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'],
+            'BB': [1, 2, 3, 4, 5, 6],
+            'id': [0, 1, 2, 0, 1, 2],
+            'year': [11, 11, 11, 12, 12, 12]})
+        expected = expected.set_index(['id', 'year'])[
+            ['Arating', 'Arating_old', 'A', 'B', 'BB']]
+        result = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year')
+        tm.assert_frame_equal(result.sort_index(axis=1),
+                              expected.sort_index(axis=1))
+
+    def test_invalid_suffixtype(self):
+        # If all stubs names end with a string, but a numeric suffix is
+        # assumed,  an empty data frame is returned
+        df = pd.DataFrame({'Aone': [1.0, 2.0],
+                           'Atwo': [3.0, 4.0],
+                           'Bone': [5.0, 6.0],
+                           'X': ['X1', 'X2']})
+        df['id'] = df.index
+        exp_data = {'X': '',
+                    'Aone': [],
+                    'Atwo': [],
+                    'Bone': [],
+                    'id': [],
+                    'year': [],
+                    'A': [],
+                    'B': []}
+        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
+
+        expected = expected.set_index(['id', 'year'])
+        expected.index.set_levels([0, 1], level=0, inplace=True)
+        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
+        tm.assert_frame_equal(result.sort_index(axis=1),
+                              expected.sort_index(axis=1))
+
+    def test_multiple_id_columns(self):
+        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
+        df = pd.DataFrame({
+            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
+            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
+            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
+            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
+        })
+        expected = pd.DataFrame({
+            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
+                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
+            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
+            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
+            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
+                    2, 1, 2, 1, 2, 1, 2, 1, 2]
+        })
+        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
+        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
+        tm.assert_frame_equal(result, expected)
+
+    def test_non_unique_idvars(self):
+        # GH16382
+        # Raise an error message if non unique id vars (i) are passed
+        df = pd.DataFrame({
+            'A_A1': [1, 2, 3, 4, 5],
+            'B_B1': [1, 2, 3, 4, 5],
+            'x': [1, 1, 1, 1, 1]
+        })
+        msg = "the id variables need to uniquely identify each row"
+        with pytest.raises(ValueError, match=msg):
+            wide_to_long(df, ['A_A', 'B_B'], i='x', j='colname')
+
+    def test_cast_j_int(self):
+        df = pd.DataFrame({
+            'actor_1': ['CCH Pounder', 'Johnny Depp', 'Christoph Waltz'],
+            'actor_2': ['Joel David Moore', 'Orlando Bloom', 'Rory Kinnear'],
+            'actor_fb_likes_1': [1000.0, 40000.0, 11000.0],
+            'actor_fb_likes_2': [936.0, 5000.0, 393.0],
+            'title': ['Avatar', "Pirates of the Caribbean", 'Spectre']})
+
+        expected = pd.DataFrame({
+            'actor': ['CCH Pounder',
+                      'Johnny Depp',
+                      'Christoph Waltz',
+                      'Joel David Moore',
+                      'Orlando Bloom',
+                      'Rory Kinnear'],
+            'actor_fb_likes': [1000.0, 40000.0, 11000.0, 936.0, 5000.0, 393.0],
+            'num': [1, 1, 1, 2, 2, 2],
+            'title': ['Avatar',
+                      'Pirates of the Caribbean',
+                      'Spectre',
+                      'Avatar',
+                      'Pirates of the Caribbean',
+                      'Spectre']}).set_index(['title', 'num'])
+        result = wide_to_long(df, ['actor', 'actor_fb_likes'],
+                              i='title', j='num', sep='_')
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_identical_stubnames(self):
+        df = pd.DataFrame({'A2010': [1.0, 2.0],
+                           'A2011': [3.0, 4.0],
+                           'B2010': [5.0, 6.0],
+                           'A': ['X1', 'X2']})
+        msg = "stubname can't be identical to a column name"
+        with pytest.raises(ValueError, match=msg):
+            wide_to_long(df, ['A', 'B'], i='A', j='colname')
+
+    def test_nonnumeric_suffix(self):
+        df = pd.DataFrame({'treatment_placebo': [1.0, 2.0],
+                           'treatment_test': [3.0, 4.0],
+                           'result_placebo': [5.0, 6.0],
+                           'A': ['X1', 'X2']})
+        expected = pd.DataFrame({
+            'A': ['X1', 'X1', 'X2', 'X2'],
+            'colname': ['placebo', 'test', 'placebo', 'test'],
+            'result': [5.0, np.nan, 6.0, np.nan],
+            'treatment': [1.0, 3.0, 2.0, 4.0]})
+        expected = expected.set_index(['A', 'colname'])
+        result = wide_to_long(df, ['result', 'treatment'],
+                              i='A', j='colname', suffix='[a-z]+', sep='_')
+        tm.assert_frame_equal(result, expected)
+
+    def test_mixed_type_suffix(self):
+        df = pd.DataFrame({
+            'A': ['X1', 'X2'],
+            'result_1': [0, 9],
+            'result_foo': [5.0, 6.0],
+            'treatment_1': [1.0, 2.0],
+            'treatment_foo': [3.0, 4.0]})
+        expected = pd.DataFrame({
+            'A': ['X1', 'X2', 'X1', 'X2'],
+            'colname': ['1', '1', 'foo', 'foo'],
+            'result': [0.0, 9.0, 5.0, 6.0],
+            'treatment': [1.0, 2.0, 3.0, 4.0]}).set_index(['A', 'colname'])
+        result = wide_to_long(df, ['result', 'treatment'],
+                              i='A', j='colname', suffix='.+', sep='_')
+        tm.assert_frame_equal(result, expected)
+
+    def test_float_suffix(self):
+        df = pd.DataFrame({
+            'treatment_1.1': [1.0, 2.0],
+            'treatment_2.1': [3.0, 4.0],
+            'result_1.2': [5.0, 6.0],
+            'result_1': [0, 9],
+            'A': ['X1', 'X2']})
+        expected = pd.DataFrame({
+            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
+            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
+            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
+            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
+        expected = expected.set_index(['A', 'colname'])
+        result = wide_to_long(df, ['result', 'treatment'],
+                              i='A', j='colname', suffix='[0-9.]+', sep='_')
+        tm.assert_frame_equal(result, expected)
+
+    def test_col_substring_of_stubname(self):
+        # GH22468
+        # Don't raise ValueError when a column name is a substring
+        # of a stubname that's been passed as a string
+        wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
+                     'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81},
+                     'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6},
+                     'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67},
+                     'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}
+                     }
+        wide_df = pd.DataFrame.from_dict(wide_data)
+        expected = pd.wide_to_long(wide_df,
+                                   stubnames=['PA'],
+                                   i=['node_id', 'A'],
+                                   j='time')
+        result = pd.wide_to_long(wide_df,
+                                 stubnames='PA',
+                                 i=['node_id', 'A'],
+                                 j='time')
+        tm.assert_frame_equal(result, expected)
@@ -0,0 +1,199 @@
+import os
+
+import numpy as np
+import pytest
+
+from pandas.compat import zip
+
+from pandas import (
+    Categorical, DatetimeIndex, Interval, IntervalIndex, NaT, Series,
+    TimedeltaIndex, Timestamp, cut, date_range, isna, qcut, timedelta_range)
+from pandas.api.types import CategoricalDtype as CDT
+from pandas.core.algorithms import quantile
+import pandas.util.testing as tm
+
+from pandas.tseries.offsets import Day, Nano
+
+
+def test_qcut():
+    arr = np.random.randn(1000)
+
+    # We store the bins as Index that have been
+    # rounded to comparisons are a bit tricky.
+    labels, bins = qcut(arr, 4, retbins=True)
+    ex_bins = quantile(arr, [0, .25, .5, .75, 1.])
+
+    result = labels.categories.left.values
+    assert np.allclose(result, ex_bins[:-1], atol=1e-2)
+
+    result = labels.categories.right.values
+    assert np.allclose(result, ex_bins[1:], atol=1e-2)
+
+    ex_levels = cut(arr, ex_bins, include_lowest=True)
+    tm.assert_categorical_equal(labels, ex_levels)
+
+
+def test_qcut_bounds():
+    arr = np.random.randn(1000)
+
+    factor = qcut(arr, 10, labels=False)
+    assert len(np.unique(factor)) == 10
+
+
+def test_qcut_specify_quantiles():
+    arr = np.random.randn(100)
+    factor = qcut(arr, [0, .25, .5, .75, 1.])
+
+    expected = qcut(arr, 4)
+    tm.assert_categorical_equal(factor, expected)
+
+
+def test_qcut_all_bins_same():
+    with pytest.raises(ValueError, match="edges.*unique"):
+        qcut([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3)
+
+
+def test_qcut_include_lowest():
+    values = np.arange(10)
+    ii = qcut(values, 4)
+
+    ex_levels = IntervalIndex([Interval(-0.001, 2.25), Interval(2.25, 4.5),
+                               Interval(4.5, 6.75), Interval(6.75, 9)])
+    tm.assert_index_equal(ii.categories, ex_levels)
+
+
+def test_qcut_nas():
+    arr = np.random.randn(100)
+    arr[:20] = np.nan
+
+    result = qcut(arr, 4)
+    assert isna(result[:20]).all()
+
+
+def test_qcut_index():
+    result = qcut([0, 2], 2)
+    intervals = [Interval(-0.001, 1), Interval(1, 2)]
+
+    expected = Categorical(intervals, ordered=True)
+    tm.assert_categorical_equal(result, expected)
+
+
+def test_qcut_binning_issues(datapath):
+    # see gh-1978, gh-1979
+    cut_file = datapath(os.path.join("reshape", "data", "cut_data.csv"))
+    arr = np.loadtxt(cut_file)
+    result = qcut(arr, 20)
+
+    starts = []
+    ends = []
+
+    for lev in np.unique(result):
+        s = lev.left
+        e = lev.right
+        assert s != e
+
+        starts.append(float(s))
+        ends.append(float(e))
+
+    for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]),
+                                  zip(ends[:-1], ends[1:])):
+        assert sp < sn
+        assert ep < en
+        assert ep <= sn
+
+
+def test_qcut_return_intervals():
+    ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
+    res = qcut(ser, [0, 0.333, 0.666, 1])
+
+    exp_levels = np.array([Interval(-0.001, 2.664),
+                           Interval(2.664, 5.328), Interval(5.328, 8)])
+    exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(
+        CDT(ordered=True))
+    tm.assert_series_equal(res, exp)
+
+
+@pytest.mark.parametrize("kwargs,msg", [
+    (dict(duplicates="drop"), None),
+    (dict(), "Bin edges must be unique"),
+    (dict(duplicates="raise"), "Bin edges must be unique"),
+    (dict(duplicates="foo"), "invalid value for 'duplicates' parameter")
+])
+def test_qcut_duplicates_bin(kwargs, msg):
+    # see gh-7751
+    values = [0, 0, 0, 0, 1, 2, 3]
+
+    if msg is not None:
+        with pytest.raises(ValueError, match=msg):
+            qcut(values, 3, **kwargs)
+    else:
+        result = qcut(values, 3, **kwargs)
+        expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)])
+        tm.assert_index_equal(result.categories, expected)
+
+
+@pytest.mark.parametrize("data,start,end", [
+    (9.0, 8.999, 9.0),
+    (0.0, -0.001, 0.0),
+    (-9.0, -9.001, -9.0),
+])
+@pytest.mark.parametrize("length", [1, 2])
+@pytest.mark.parametrize("labels", [None, False])
+def test_single_quantile(data, start, end, length, labels):
+    # see gh-15431
+    ser = Series([data] * length)
+    result = qcut(ser, 1, labels=labels)
+
+    if labels is None:
+        intervals = IntervalIndex([Interval(start, end)] *
+                                  length, closed="right")
+        expected = Series(intervals).astype(CDT(ordered=True))
+    else:
+        expected = Series([0] * length)
+
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("ser", [
+    Series(DatetimeIndex(["20180101", NaT, "20180103"])),
+    Series(TimedeltaIndex(["0 days", NaT, "2 days"]))],
+    ids=lambda x: str(x.dtype))
+def test_qcut_nat(ser):
+    # see gh-19768
+    intervals = IntervalIndex.from_tuples([
+        (ser[0] - Nano(), ser[2] - Day()),
+        np.nan, (ser[2] - Day(), ser[2])])
+    expected = Series(Categorical(intervals, ordered=True))
+
+    result = qcut(ser, 2)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("bins", [3, np.linspace(0, 1, 4)])
+def test_datetime_tz_qcut(bins):
+    # see gh-19872
+    tz = "US/Eastern"
+    ser = Series(date_range("20130101", periods=3, tz=tz))
+
+    result = qcut(ser, bins)
+    expected = Series(IntervalIndex([
+        Interval(Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
+                 Timestamp("2013-01-01 16:00:00", tz=tz)),
+        Interval(Timestamp("2013-01-01 16:00:00", tz=tz),
+                 Timestamp("2013-01-02 08:00:00", tz=tz)),
+        Interval(Timestamp("2013-01-02 08:00:00", tz=tz),
+                 Timestamp("2013-01-03 00:00:00", tz=tz))])).astype(
+        CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("arg,expected_bins", [
+    [timedelta_range("1day", periods=3),
+     TimedeltaIndex(["1 days", "2 days", "3 days"])],
+    [date_range("20180101", periods=3),
+     DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"])]])
+def test_date_like_qcut_bins(arg, expected_bins):
+    # see gh-19891
+    ser = Series(arg)
+    result, result_bins = qcut(ser, 2, retbins=True)
+    tm.assert_index_equal(result_bins, expected_bins)
@@ -0,0 +1,621 @@
+# -*- coding: utf-8 -*-
+# pylint: disable-msg=W0612,E1101
+
+from collections import OrderedDict
+
+import numpy as np
+from numpy import nan
+import pytest
+
+from pandas.compat import u
+
+from pandas.core.dtypes.common import is_integer_dtype
+
+import pandas as pd
+from pandas import Categorical, DataFrame, Index, Series, get_dummies
+from pandas.core.sparse.api import SparseArray, SparseDtype
+import pandas.util.testing as tm
+from pandas.util.testing import assert_frame_equal
+
+
+class TestGetDummies(object):
+
+    @pytest.fixture
+    def df(self):
+        return DataFrame({'A': ['a', 'b', 'a'],
+                          'B': ['b', 'b', 'c'],
+                          'C': [1, 2, 3]})
+
+    @pytest.fixture(params=['uint8', 'i8', np.float64, bool, None])
+    def dtype(self, request):
+        return np.dtype(request.param)
+
+    @pytest.fixture(params=['dense', 'sparse'])
+    def sparse(self, request):
+        # params are strings to simplify reading test results,
+        # e.g. TestGetDummies::test_basic[uint8-sparse] instead of [uint8-True]
+        return request.param == 'sparse'
+
+    def effective_dtype(self, dtype):
+        if dtype is None:
+            return np.uint8
+        return dtype
+
+    def test_raises_on_dtype_object(self, df):
+        with pytest.raises(ValueError):
+            get_dummies(df, dtype='object')
+
+    def test_basic(self, sparse, dtype):
+        s_list = list('abc')
+        s_series = Series(s_list)
+        s_series_index = Series(s_list, list('ABC'))
+
+        expected = DataFrame({'a': [1, 0, 0],
+                              'b': [0, 1, 0],
+                              'c': [0, 0, 1]},
+                             dtype=self.effective_dtype(dtype))
+        if sparse:
+            expected = expected.apply(pd.SparseArray, fill_value=0.0)
+        result = get_dummies(s_list, sparse=sparse, dtype=dtype)
+        assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, sparse=sparse, dtype=dtype)
+        assert_frame_equal(result, expected)
+
+        expected.index = list('ABC')
+        result = get_dummies(s_series_index, sparse=sparse, dtype=dtype)
+        assert_frame_equal(result, expected)
+
+    def test_basic_types(self, sparse, dtype):
+        # GH 10531
+        s_list = list('abc')
+        s_series = Series(s_list)
+        s_df = DataFrame({'a': [0, 1, 0, 1, 2],
+                          'b': ['A', 'A', 'B', 'C', 'C'],
+                          'c': [2, 3, 3, 3, 2]})
+
+        expected = DataFrame({'a': [1, 0, 0],
+                              'b': [0, 1, 0],
+                              'c': [0, 0, 1]},
+                             dtype=self.effective_dtype(dtype),
+                             columns=list('abc'))
+        if sparse:
+            if is_integer_dtype(dtype):
+                fill_value = 0
+            elif dtype == bool:
+                fill_value = False
+            else:
+                fill_value = 0.0
+
+            expected = expected.apply(SparseArray, fill_value=fill_value)
+        result = get_dummies(s_list, sparse=sparse, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, sparse=sparse, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(s_df, columns=s_df.columns,
+                             sparse=sparse, dtype=dtype)
+        if sparse:
+            dtype_name = 'Sparse[{}, {}]'.format(
+                self.effective_dtype(dtype).name,
+                fill_value
+            )
+        else:
+            dtype_name = self.effective_dtype(dtype).name
+
+        expected = Series({dtype_name: 8})
+        tm.assert_series_equal(result.get_dtype_counts(), expected)
+
+        result = get_dummies(s_df, columns=['a'], sparse=sparse, dtype=dtype)
+
+        expected_counts = {'int64': 1, 'object': 1}
+        expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0)
+
+        expected = Series(expected_counts).sort_index()
+        tm.assert_series_equal(result.get_dtype_counts().sort_index(),
+                               expected)
+
+    def test_just_na(self, sparse):
+        just_na_list = [np.nan]
+        just_na_series = Series(just_na_list)
+        just_na_series_index = Series(just_na_list, index=['A'])
+
+        res_list = get_dummies(just_na_list, sparse=sparse)
+        res_series = get_dummies(just_na_series, sparse=sparse)
+        res_series_index = get_dummies(just_na_series_index, sparse=sparse)
+
+        assert res_list.empty
+        assert res_series.empty
+        assert res_series_index.empty
+
+        assert res_list.index.tolist() == [0]
+        assert res_series.index.tolist() == [0]
+        assert res_series_index.index.tolist() == ['A']
+
+    def test_include_na(self, sparse, dtype):
+        s = ['a', 'b', np.nan]
+        res = get_dummies(s, sparse=sparse, dtype=dtype)
+        exp = DataFrame({'a': [1, 0, 0],
+                         'b': [0, 1, 0]},
+                        dtype=self.effective_dtype(dtype))
+        if sparse:
+            exp = exp.apply(pd.SparseArray, fill_value=0.0)
+        assert_frame_equal(res, exp)
+
+        # Sparse dataframes do not allow nan labelled columns, see #GH8822
+        res_na = get_dummies(s, dummy_na=True, sparse=sparse, dtype=dtype)
+        exp_na = DataFrame({nan: [0, 0, 1],
+                            'a': [1, 0, 0],
+                            'b': [0, 1, 0]},
+                           dtype=self.effective_dtype(dtype))
+        exp_na = exp_na.reindex(['a', 'b', nan], axis=1)
+        # hack (NaN handling in assert_index_equal)
+        exp_na.columns = res_na.columns
+        if sparse:
+            exp_na = exp_na.apply(pd.SparseArray, fill_value=0.0)
+        assert_frame_equal(res_na, exp_na)
+
+        res_just_na = get_dummies([nan], dummy_na=True,
+                                  sparse=sparse, dtype=dtype)
+        exp_just_na = DataFrame(Series(1, index=[0]), columns=[nan],
+                                dtype=self.effective_dtype(dtype))
+        tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values)
+
+    def test_unicode(self, sparse):
+        # See GH 6885 - get_dummies chokes on unicode values
+        import unicodedata
+        e = 'e'
+        eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
+        s = [e, eacute, eacute]
+        res = get_dummies(s, prefix='letter', sparse=sparse)
+        exp = DataFrame({'letter_e': [1, 0, 0],
+                         u('letter_%s') % eacute: [0, 1, 1]},
+                        dtype=np.uint8)
+        if sparse:
+            exp = exp.apply(pd.SparseArray, fill_value=0)
+        assert_frame_equal(res, exp)
+
+    def test_dataframe_dummies_all_obj(self, df, sparse):
+        df = df[['A', 'B']]
+        result = get_dummies(df, sparse=sparse)
+        expected = DataFrame({'A_a': [1, 0, 1],
+                              'A_b': [0, 1, 0],
+                              'B_b': [1, 1, 0],
+                              'B_c': [0, 0, 1]},
+                             dtype=np.uint8)
+        if sparse:
+            expected = pd.DataFrame({
+                "A_a": pd.SparseArray([1, 0, 1], dtype='uint8'),
+                "A_b": pd.SparseArray([0, 1, 0], dtype='uint8'),
+                "B_b": pd.SparseArray([1, 1, 0], dtype='uint8'),
+                "B_c": pd.SparseArray([0, 0, 1], dtype='uint8'),
+            })
+
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
+        result = get_dummies(df, sparse=sparse, dtype=dtype)
+        if sparse:
+            arr = SparseArray
+            typ = SparseDtype(dtype, 0)
+        else:
+            arr = np.array
+            typ = dtype
+        expected = DataFrame({'C': [1, 2, 3],
+                              'A_a': arr([1, 0, 1], dtype=typ),
+                              'A_b': arr([0, 1, 0], dtype=typ),
+                              'B_b': arr([1, 1, 0], dtype=typ),
+                              'B_c': arr([0, 0, 1], dtype=typ)})
+        expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_list(self, df, sparse):
+        prefixes = ['from_A', 'from_B']
+        result = get_dummies(df, prefix=prefixes, sparse=sparse)
+        expected = DataFrame({'C': [1, 2, 3],
+                              'from_A_a': [1, 0, 1],
+                              'from_A_b': [0, 1, 0],
+                              'from_B_b': [1, 1, 0],
+                              'from_B_c': [0, 0, 1]},
+                             dtype=np.uint8)
+        expected[['C']] = df[['C']]
+        cols = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
+        expected = expected[['C'] + cols]
+
+        typ = pd.SparseArray if sparse else pd.Series
+        expected[cols] = expected[cols].apply(lambda x: typ(x))
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_str(self, df, sparse):
+        # not that you should do this...
+        result = get_dummies(df, prefix='bad', sparse=sparse)
+        bad_columns = ['bad_a', 'bad_b', 'bad_b', 'bad_c']
+        expected = DataFrame([[1, 1, 0, 1, 0],
+                              [2, 0, 1, 1, 0],
+                              [3, 1, 0, 0, 1]],
+                             columns=['C'] + bad_columns,
+                             dtype=np.uint8)
+        expected = expected.astype({"C": np.int64})
+        if sparse:
+            # work around astyping & assigning with duplicate columns
+            # https://github.com/pandas-dev/pandas/issues/14427
+            expected = pd.concat([
+                pd.Series([1, 2, 3], name='C'),
+                pd.Series([1, 0, 1], name='bad_a', dtype='Sparse[uint8]'),
+                pd.Series([0, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
+                pd.Series([1, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
+                pd.Series([0, 0, 1], name='bad_c', dtype='Sparse[uint8]'),
+            ], axis=1)
+
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_subset(self, df, sparse):
+        result = get_dummies(df, prefix=['from_A'], columns=['A'],
+                             sparse=sparse)
+        expected = DataFrame({'B': ['b', 'b', 'c'],
+                              'C': [1, 2, 3],
+                              'from_A_a': [1, 0, 1],
+                              'from_A_b': [0, 1, 0]}, dtype=np.uint8)
+        expected[['C']] = df[['C']]
+        if sparse:
+            cols = ['from_A_a', 'from_A_b']
+            expected[cols] = expected[cols].apply(lambda x: pd.SparseSeries(x))
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_sep(self, df, sparse):
+        result = get_dummies(df, prefix_sep='..', sparse=sparse)
+        expected = DataFrame({'C': [1, 2, 3],
+                              'A..a': [1, 0, 1],
+                              'A..b': [0, 1, 0],
+                              'B..b': [1, 1, 0],
+                              'B..c': [0, 0, 1]},
+                             dtype=np.uint8)
+        expected[['C']] = df[['C']]
+        expected = expected[['C', 'A..a', 'A..b', 'B..b', 'B..c']]
+        if sparse:
+            cols = ['A..a', 'A..b', 'B..b', 'B..c']
+            expected[cols] = expected[cols].apply(lambda x: pd.SparseSeries(x))
+
+        assert_frame_equal(result, expected)
+
+        result = get_dummies(df, prefix_sep=['..', '__'], sparse=sparse)
+        expected = expected.rename(columns={'B..b': 'B__b', 'B..c': 'B__c'})
+        assert_frame_equal(result, expected)
+
+        result = get_dummies(df, prefix_sep={'A': '..', 'B': '__'},
+                             sparse=sparse)
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_bad_length(self, df, sparse):
+        with pytest.raises(ValueError):
+            get_dummies(df, prefix=['too few'], sparse=sparse)
+
+    def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse):
+        with pytest.raises(ValueError):
+            get_dummies(df, prefix_sep=['bad'], sparse=sparse)
+
+    def test_dataframe_dummies_prefix_dict(self, sparse):
+        prefixes = {'A': 'from_A', 'B': 'from_B'}
+        df = DataFrame({'C': [1, 2, 3],
+                        'A': ['a', 'b', 'a'],
+                        'B': ['b', 'b', 'c']})
+        result = get_dummies(df, prefix=prefixes, sparse=sparse)
+
+        expected = DataFrame({'C': [1, 2, 3],
+                              'from_A_a': [1, 0, 1],
+                              'from_A_b': [0, 1, 0],
+                              'from_B_b': [1, 1, 0],
+                              'from_B_c': [0, 0, 1]})
+
+        columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
+        expected[columns] = expected[columns].astype(np.uint8)
+        if sparse:
+            expected[columns] = expected[columns].apply(
+                lambda x: pd.SparseSeries(x)
+            )
+
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_with_na(self, df, sparse, dtype):
+        df.loc[3, :] = [np.nan, np.nan, np.nan]
+        result = get_dummies(df, dummy_na=True,
+                             sparse=sparse, dtype=dtype).sort_index(axis=1)
+
+        if sparse:
+            arr = SparseArray
+            typ = SparseDtype(dtype, 0)
+        else:
+            arr = np.array
+            typ = dtype
+
+        expected = DataFrame({'C': [1, 2, 3, np.nan],
+                              'A_a': arr([1, 0, 1, 0], dtype=typ),
+                              'A_b': arr([0, 1, 0, 0], dtype=typ),
+                              'A_nan': arr([0, 0, 0, 1], dtype=typ),
+                              'B_b': arr([1, 1, 0, 0], dtype=typ),
+                              'B_c': arr([0, 0, 1, 0], dtype=typ),
+                              'B_nan': arr([0, 0, 0, 1], dtype=typ)
+                              }).sort_index(axis=1)
+
+        assert_frame_equal(result, expected)
+
+        result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype)
+        expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
+        df['cat'] = pd.Categorical(['x', 'y', 'y'])
+        result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1)
+        if sparse:
+            arr = SparseArray
+            typ = SparseDtype(dtype, 0)
+        else:
+            arr = np.array
+            typ = dtype
+
+        expected = DataFrame({'C': [1, 2, 3],
+                              'A_a': arr([1, 0, 1], dtype=typ),
+                              'A_b': arr([0, 1, 0], dtype=typ),
+                              'B_b': arr([1, 1, 0], dtype=typ),
+                              'B_c': arr([0, 0, 1], dtype=typ),
+                              'cat_x': arr([1, 0, 0], dtype=typ),
+                              'cat_y': arr([0, 1, 1], dtype=typ)
+                              }).sort_index(axis=1)
+
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('get_dummies_kwargs,expected', [
+        ({'data': pd.DataFrame(({u'ä': ['a']}))},
+         pd.DataFrame({u'ä_a': [1]}, dtype=np.uint8)),
+
+        ({'data': pd.DataFrame({'x': [u'ä']})},
+         pd.DataFrame({u'x_ä': [1]}, dtype=np.uint8)),
+
+        ({'data': pd.DataFrame({'x': [u'a']}), 'prefix':u'ä'},
+         pd.DataFrame({u'ä_a': [1]}, dtype=np.uint8)),
+
+        ({'data': pd.DataFrame({'x': [u'a']}), 'prefix_sep':u'ä'},
+         pd.DataFrame({u'xäa': [1]}, dtype=np.uint8))])
+    def test_dataframe_dummies_unicode(self, get_dummies_kwargs, expected):
+        # GH22084 pd.get_dummies incorrectly encodes unicode characters
+        # in dataframe column names
+        result = get_dummies(**get_dummies_kwargs)
+        assert_frame_equal(result, expected)
+
+    def test_basic_drop_first(self, sparse):
+        # GH12402 Add a new parameter `drop_first` to avoid collinearity
+        # Basic case
+        s_list = list('abc')
+        s_series = Series(s_list)
+        s_series_index = Series(s_list, list('ABC'))
+
+        expected = DataFrame({'b': [0, 1, 0],
+                              'c': [0, 0, 1]},
+                             dtype=np.uint8)
+
+        result = get_dummies(s_list, drop_first=True, sparse=sparse)
+        if sparse:
+            expected = expected.apply(pd.SparseArray, fill_value=0)
+        assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, drop_first=True, sparse=sparse)
+        assert_frame_equal(result, expected)
+
+        expected.index = list('ABC')
+        result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
+        assert_frame_equal(result, expected)
+
+    def test_basic_drop_first_one_level(self, sparse):
+        # Test the case that categorical variable only has one level.
+        s_list = list('aaa')
+        s_series = Series(s_list)
+        s_series_index = Series(s_list, list('ABC'))
+
+        expected = DataFrame(index=np.arange(3))
+
+        result = get_dummies(s_list, drop_first=True, sparse=sparse)
+        assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, drop_first=True, sparse=sparse)
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame(index=list('ABC'))
+        result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
+        assert_frame_equal(result, expected)
+
+    def test_basic_drop_first_NA(self, sparse):
+        # Test NA handling together with drop_first
+        s_NA = ['a', 'b', np.nan]
+        res = get_dummies(s_NA, drop_first=True, sparse=sparse)
+        exp = DataFrame({'b': [0, 1, 0]}, dtype=np.uint8)
+        if sparse:
+            exp = exp.apply(pd.SparseArray, fill_value=0)
+
+        assert_frame_equal(res, exp)
+
+        res_na = get_dummies(s_NA, dummy_na=True, drop_first=True,
+                             sparse=sparse)
+        exp_na = DataFrame(
+            {'b': [0, 1, 0],
+             nan: [0, 0, 1]},
+            dtype=np.uint8).reindex(['b', nan], axis=1)
+        if sparse:
+            exp_na = exp_na.apply(pd.SparseArray, fill_value=0)
+        assert_frame_equal(res_na, exp_na)
+
+        res_just_na = get_dummies([nan], dummy_na=True, drop_first=True,
+                                  sparse=sparse)
+        exp_just_na = DataFrame(index=np.arange(1))
+        assert_frame_equal(res_just_na, exp_just_na)
+
+    def test_dataframe_dummies_drop_first(self, df, sparse):
+        df = df[['A', 'B']]
+        result = get_dummies(df, drop_first=True, sparse=sparse)
+        expected = DataFrame({'A_b': [0, 1, 0],
+                              'B_c': [0, 0, 1]},
+                             dtype=np.uint8)
+        if sparse:
+            expected = expected.apply(pd.SparseArray, fill_value=0)
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_drop_first_with_categorical(
+            self, df, sparse, dtype):
+        df['cat'] = pd.Categorical(['x', 'y', 'y'])
+        result = get_dummies(df, drop_first=True, sparse=sparse)
+        expected = DataFrame({'C': [1, 2, 3],
+                              'A_b': [0, 1, 0],
+                              'B_c': [0, 0, 1],
+                              'cat_y': [0, 1, 1]})
+        cols = ['A_b', 'B_c', 'cat_y']
+        expected[cols] = expected[cols].astype(np.uint8)
+        expected = expected[['C', 'A_b', 'B_c', 'cat_y']]
+        if sparse:
+            for col in cols:
+                expected[col] = pd.SparseSeries(expected[col])
+        assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_drop_first_with_na(self, df, sparse):
+        df.loc[3, :] = [np.nan, np.nan, np.nan]
+        result = get_dummies(df, dummy_na=True, drop_first=True,
+                             sparse=sparse).sort_index(axis=1)
+        expected = DataFrame({'C': [1, 2, 3, np.nan],
+                              'A_b': [0, 1, 0, 0],
+                              'A_nan': [0, 0, 0, 1],
+                              'B_c': [0, 0, 1, 0],
+                              'B_nan': [0, 0, 0, 1]})
+        cols = ['A_b', 'A_nan', 'B_c', 'B_nan']
+        expected[cols] = expected[cols].astype(np.uint8)
+        expected = expected.sort_index(axis=1)
+        if sparse:
+            for col in cols:
+                expected[col] = pd.SparseSeries(expected[col])
+
+        assert_frame_equal(result, expected)
+
+        result = get_dummies(df, dummy_na=False, drop_first=True,
+                             sparse=sparse)
+        expected = expected[['C', 'A_b', 'B_c']]
+        assert_frame_equal(result, expected)
+
+    def test_int_int(self):
+        data = Series([1, 2, 1])
+        result = pd.get_dummies(data)
+        expected = DataFrame([[1, 0],
+                              [0, 1],
+                              [1, 0]],
+                             columns=[1, 2],
+                             dtype=np.uint8)
+        tm.assert_frame_equal(result, expected)
+
+        data = Series(pd.Categorical(['a', 'b', 'a']))
+        result = pd.get_dummies(data)
+        expected = DataFrame([[1, 0],
+                              [0, 1],
+                              [1, 0]],
+                             columns=pd.Categorical(['a', 'b']),
+                             dtype=np.uint8)
+        tm.assert_frame_equal(result, expected)
+
+    def test_int_df(self, dtype):
+        data = DataFrame(
+            {'A': [1, 2, 1],
+             'B': pd.Categorical(['a', 'b', 'a']),
+             'C': [1, 2, 1],
+             'D': [1., 2., 1.]
+             }
+        )
+        columns = ['C', 'D', 'A_1', 'A_2', 'B_a', 'B_b']
+        expected = DataFrame([
+            [1, 1., 1, 0, 1, 0],
+            [2, 2., 0, 1, 0, 1],
+            [1, 1., 1, 0, 1, 0]
+        ], columns=columns)
+        expected[columns[2:]] = expected[columns[2:]].astype(dtype)
+        result = pd.get_dummies(data, columns=['A', 'B'], dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_preserve_categorical_dtype(self, dtype):
+        # GH13854
+        for ordered in [False, True]:
+            cat = pd.Categorical(list("xy"), categories=list("xyz"),
+                                 ordered=ordered)
+            result = get_dummies(cat, dtype=dtype)
+
+            data = np.array([[1, 0, 0], [0, 1, 0]],
+                            dtype=self.effective_dtype(dtype))
+            cols = pd.CategoricalIndex(cat.categories,
+                                       categories=cat.categories,
+                                       ordered=ordered)
+            expected = DataFrame(data, columns=cols,
+                                 dtype=self.effective_dtype(dtype))
+
+            tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('sparse', [True, False])
+    def test_get_dummies_dont_sparsify_all_columns(self, sparse):
+        # GH18914
+        df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]),
+                                              ('Nation', ['AB', 'CD'])]))
+        df = get_dummies(df, columns=['Nation'], sparse=sparse)
+        df2 = df.reindex(columns=['GDP'])
+
+        tm.assert_frame_equal(df[['GDP']], df2)
+
+    def test_get_dummies_duplicate_columns(self, df):
+        # GH20839
+        df.columns = ["A", "A", "A"]
+        result = get_dummies(df).sort_index(axis=1)
+
+        expected = DataFrame([[1, 1, 0, 1, 0],
+                              [2, 0, 1, 1, 0],
+                              [3, 1, 0, 0, 1]],
+                             columns=['A', 'A_a', 'A_b', 'A_b', 'A_c'],
+                             dtype=np.uint8).sort_index(axis=1)
+
+        expected = expected.astype({"A": np.int64})
+
+        tm.assert_frame_equal(result, expected)
+
+
+class TestCategoricalReshape(object):
+
+    @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+    def test_reshaping_panel_categorical(self):
+
+        p = tm.makePanel()
+        p['str'] = 'foo'
+        df = p.to_frame()
+
+        df['category'] = df['str'].astype('category')
+        result = df['category'].unstack()
+
+        c = Categorical(['foo'] * len(p.major_axis))
+        expected = DataFrame({'A': c.copy(),
+                              'B': c.copy(),
+                              'C': c.copy(),
+                              'D': c.copy()},
+                             columns=Index(list('ABCD'), name='minor'),
+                             index=p.major_axis.set_names('major'))
+        tm.assert_frame_equal(result, expected)
+
+
+class TestMakeAxisDummies(object):
+
+    def test_preserve_categorical_dtype(self):
+        # GH13854
+        for ordered in [False, True]:
+            cidx = pd.CategoricalIndex(list("xyz"), ordered=ordered)
+            midx = pd.MultiIndex(levels=[['a'], cidx],
+                                 codes=[[0, 0], [0, 1]])
+            df = DataFrame([[10, 11]], index=midx)
+
+            expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
+                                 index=midx, columns=cidx)
+
+            from pandas.core.reshape.reshape import make_axis_dummies
+            result = make_axis_dummies(df)
+            tm.assert_frame_equal(result, expected)
+
+            result = make_axis_dummies(df, transform=lambda x: x)
+            tm.assert_frame_equal(result, expected)
@@ -0,0 +1,346 @@
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.concat import union_categoricals
+
+import pandas as pd
+from pandas import Categorical, CategoricalIndex, Series
+from pandas.util import testing as tm
+
+
+class TestUnionCategoricals(object):
+
+    def test_union_categorical(self):
+        # GH 13361
+        data = [
+            (list('abc'), list('abd'), list('abcabd')),
+            ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
+            ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
+
+            (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'],
+             ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']),
+
+            (pd.date_range('2014-01-01', '2014-01-05'),
+             pd.date_range('2014-01-06', '2014-01-07'),
+             pd.date_range('2014-01-01', '2014-01-07')),
+
+            (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'),
+             pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'),
+             pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')),
+
+            (pd.period_range('2014-01-01', '2014-01-05'),
+             pd.period_range('2014-01-06', '2014-01-07'),
+             pd.period_range('2014-01-01', '2014-01-07')),
+        ]
+
+        for a, b, combined in data:
+            for box in [Categorical, CategoricalIndex, Series]:
+                result = union_categoricals([box(Categorical(a)),
+                                             box(Categorical(b))])
+                expected = Categorical(combined)
+                tm.assert_categorical_equal(result, expected,
+                                            check_category_order=True)
+
+        # new categories ordered by appearance
+        s = Categorical(['x', 'y', 'z'])
+        s2 = Categorical(['a', 'b', 'c'])
+        result = union_categoricals([s, s2])
+        expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
+                               categories=['x', 'y', 'z', 'a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        s = Categorical([0, 1.2, 2], ordered=True)
+        s2 = Categorical([0, 1.2, 2], ordered=True)
+        result = union_categoricals([s, s2])
+        expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True)
+        tm.assert_categorical_equal(result, expected)
+
+        # must exactly match types
+        s = Categorical([0, 1.2, 2])
+        s2 = Categorical([2, 3, 4])
+        msg = 'dtype of categories must be the same'
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([s, s2])
+
+        msg = 'No Categoricals to union'
+        with pytest.raises(ValueError, match=msg):
+            union_categoricals([])
+
+    def test_union_categoricals_nan(self):
+        # GH 13759
+        res = union_categoricals([pd.Categorical([1, 2, np.nan]),
+                                  pd.Categorical([3, 2, np.nan])])
+        exp = Categorical([1, 2, np.nan, 3, 2, np.nan])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([pd.Categorical(['A', 'B']),
+                                  pd.Categorical(['B', 'B', np.nan])])
+        exp = Categorical(['A', 'B', 'B', 'B', np.nan])
+        tm.assert_categorical_equal(res, exp)
+
+        val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'),
+                pd.NaT]
+        val2 = [pd.NaT, pd.Timestamp('2011-01-01'),
+                pd.Timestamp('2011-02-01')]
+
+        res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)])
+        exp = Categorical(val1 + val2,
+                          categories=[pd.Timestamp('2011-01-01'),
+                                      pd.Timestamp('2011-03-01'),
+                                      pd.Timestamp('2011-02-01')])
+        tm.assert_categorical_equal(res, exp)
+
+        # all NaN
+        res = union_categoricals([pd.Categorical(np.array([np.nan, np.nan],
+                                                          dtype=object)),
+                                  pd.Categorical(['X'])])
+        exp = Categorical([np.nan, np.nan, 'X'])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([pd.Categorical([np.nan, np.nan]),
+                                  pd.Categorical([np.nan, np.nan])])
+        exp = Categorical([np.nan, np.nan, np.nan, np.nan])
+        tm.assert_categorical_equal(res, exp)
+
+    def test_union_categoricals_empty(self):
+        # GH 13759
+        res = union_categoricals([pd.Categorical([]),
+                                  pd.Categorical([])])
+        exp = Categorical([])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([Categorical([]),
+                                  Categorical(['1'])])
+        exp = Categorical(['1'])
+        tm.assert_categorical_equal(res, exp)
+
+    def test_union_categorical_same_category(self):
+        # check fastpath
+        c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
+        c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4])
+        res = union_categoricals([c1, c2])
+        exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan],
+                          categories=[1, 2, 3, 4])
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z'])
+        c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z'])
+        res = union_categoricals([c1, c2])
+        exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'],
+                          categories=['x', 'y', 'z'])
+        tm.assert_categorical_equal(res, exp)
+
+    def test_union_categorical_same_categories_different_order(self):
+        # https://github.com/pandas-dev/pandas/issues/19096
+        c1 = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c'])
+        c2 = Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c'])
+        result = union_categoricals([c1, c2])
+        expected = Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
+                               categories=['a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+    def test_union_categoricals_ordered(self):
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], ordered=False)
+
+        msg = 'Categorical.ordered must be the same'
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2])
+
+        res = union_categoricals([c1, c1])
+        exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True)
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
+        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
+
+        res = union_categoricals([c1, c2])
+        exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True)
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
+
+        msg = "to union ordered Categoricals, all categories must be the same"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2])
+
+    def test_union_categoricals_ignore_order(self):
+        # GH 15219
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], ordered=False)
+
+        res = union_categoricals([c1, c2], ignore_order=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        msg = 'Categorical.ordered must be the same'
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2], ignore_order=False)
+
+        res = union_categoricals([c1, c1], ignore_order=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([c1, c1], ignore_order=False)
+        exp = Categorical([1, 2, 3, 1, 2, 3],
+                          categories=[1, 2, 3], ordered=True)
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
+        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
+
+        res = union_categoricals([c1, c2], ignore_order=True)
+        exp = Categorical([1, 2, 3, np.nan, 3, 2])
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
+
+        res = union_categoricals([c1, c2], ignore_order=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([c2, c1], ignore_order=True,
+                                 sort_categories=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([4, 5, 6], ordered=True)
+        result = union_categoricals([c1, c2], ignore_order=True)
+        expected = Categorical([1, 2, 3, 4, 5, 6])
+        tm.assert_categorical_equal(result, expected)
+
+        msg = "to union ordered Categoricals, all categories must be the same"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2], ignore_order=False)
+
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2])
+
+    def test_union_categoricals_sort(self):
+        # GH 13846
+        c1 = Categorical(['x', 'y', 'z'])
+        c2 = Categorical(['a', 'b', 'c'])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
+                               categories=['a', 'b', 'c', 'x', 'y', 'z'])
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath
+        c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
+        c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(['a', 'b', 'b', 'c'],
+                               categories=['a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b'])
+        c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b'])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(['a', 'b', 'b', 'c'],
+                               categories=['a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath - skip resort
+        c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
+        c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(['a', 'b', 'b', 'c'],
+                               categories=['a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(['x', np.nan])
+        c2 = Categorical([np.nan, 'b'])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(['x', np.nan, np.nan, 'b'],
+                               categories=['b', 'x'])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([np.nan])
+        c2 = Categorical([np.nan])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical([np.nan, np.nan])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([])
+        c2 = Categorical([])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical([])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
+        c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
+        with pytest.raises(TypeError):
+            union_categoricals([c1, c2], sort_categories=True)
+
+    def test_union_categoricals_sort_false(self):
+        # GH 13846
+        c1 = Categorical(['x', 'y', 'z'])
+        c2 = Categorical(['a', 'b', 'c'])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
+                               categories=['x', 'y', 'z', 'a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath
+        c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
+        c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(['a', 'b', 'b', 'c'],
+                               categories=['b', 'a', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath - skip resort
+        c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
+        c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(['a', 'b', 'b', 'c'],
+                               categories=['a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(['x', np.nan])
+        c2 = Categorical([np.nan, 'b'])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(['x', np.nan, np.nan, 'b'],
+                               categories=['x', 'b'])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([np.nan])
+        c2 = Categorical([np.nan])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical([np.nan, np.nan])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([])
+        c2 = Categorical([])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical([])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
+        c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(['b', 'a', 'a', 'c'],
+                               categories=['b', 'a', 'c'], ordered=True)
+        tm.assert_categorical_equal(result, expected)
+
+    def test_union_categorical_unwrap(self):
+        # GH 14173
+        c1 = Categorical(['a', 'b'])
+        c2 = pd.Series(['b', 'c'], dtype='category')
+        result = union_categoricals([c1, c2])
+        expected = Categorical(['a', 'b', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
+        c2 = CategoricalIndex(c2)
+        result = union_categoricals([c1, c2])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Series(c1)
+        result = union_categoricals([c1, c2])
+        tm.assert_categorical_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            union_categoricals([c1, ['a', 'b', 'c']])
@@ -0,0 +1,53 @@
+import numpy as np
+import pytest
+
+from pandas import Index, date_range
+from pandas.core.reshape.util import cartesian_product
+import pandas.util.testing as tm
+
+
+class TestCartesianProduct(object):
+
+    def test_simple(self):
+        x, y = list('ABC'), [1, 22]
+        result1, result2 = cartesian_product([x, y])
+        expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C'])
+        expected2 = np.array([1, 22, 1, 22, 1, 22])
+        tm.assert_numpy_array_equal(result1, expected1)
+        tm.assert_numpy_array_equal(result2, expected2)
+
+    def test_datetimeindex(self):
+        # regression test for GitHub issue #6439
+        # make sure that the ordering on datetimeindex is consistent
+        x = date_range('2000-01-01', periods=2)
+        result1, result2 = [Index(y).day for y in cartesian_product([x, x])]
+        expected1 = Index([1, 1, 2, 2])
+        expected2 = Index([1, 2, 1, 2])
+        tm.assert_index_equal(result1, expected1)
+        tm.assert_index_equal(result2, expected2)
+
+    def test_empty(self):
+        # product of empty factors
+        X = [[], [0, 1], []]
+        Y = [[], [], ['a', 'b', 'c']]
+        for x, y in zip(X, Y):
+            expected1 = np.array([], dtype=np.asarray(x).dtype)
+            expected2 = np.array([], dtype=np.asarray(y).dtype)
+            result1, result2 = cartesian_product([x, y])
+            tm.assert_numpy_array_equal(result1, expected1)
+            tm.assert_numpy_array_equal(result2, expected2)
+
+        # empty product (empty input):
+        result = cartesian_product([])
+        expected = []
+        assert result == expected
+
+    @pytest.mark.parametrize("X", [
+        1, [1], [1, 2], [[1], 2],
+        'a', ['a'], ['a', 'b'], [['a'], 'b']
+    ])
+    def test_invalid_input(self, X):
+        msg = "Input must be a list-like of list-likes"
+
+        with pytest.raises(TypeError, match=msg):
+            cartesian_product(X=X)