started work on backend

2019-01-21 17:36:00 +01:00
parent a1a8bca34b
commit 9f9a7e4974
4032 changed files with 745079 additions and 0 deletions
@@ -0,0 +1,292 @@
+import pytest
+import datetime
+
+from warnings import catch_warnings
+import numpy as np
+import pandas as pd
+
+from pandas import DataFrame, Series, Index, MultiIndex
+from pandas.util import hash_array, hash_pandas_object
+from pandas.core.util.hashing import hash_tuples, hash_tuple, _hash_scalar
+import pandas.util.testing as tm
+
+
+class TestHashing(object):
+
+    def setup_method(self, method):
+        self.df = DataFrame(
+            {'i32': np.array([1, 2, 3] * 3, dtype='int32'),
+             'f32': np.array([None, 2.5, 3.5] * 3, dtype='float32'),
+             'cat': Series(['a', 'b', 'c'] * 3).astype('category'),
+             'obj': Series(['d', 'e', 'f'] * 3),
+             'bool': np.array([True, False, True] * 3),
+             'dt': Series(pd.date_range('20130101', periods=9)),
+             'dt_tz': Series(pd.date_range('20130101', periods=9,
+                                           tz='US/Eastern')),
+             'td': Series(pd.timedelta_range('2000', periods=9))})
+
+    def test_consistency(self):
+        # check that our hash doesn't change because of a mistake
+        # in the actual code; this is the ground truth
+        result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
+        expected = Series(np.array([3600424527151052760, 1374399572096150070,
+                                    477881037637427054], dtype='uint64'),
+                          index=['foo', 'bar', 'baz'])
+        tm.assert_series_equal(result, expected)
+
+    def test_hash_array(self):
+        for name, s in self.df.iteritems():
+            a = s.values
+            tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
+
+    def test_hash_array_mixed(self):
+        result1 = hash_array(np.array([3, 4, 'All']))
+        result2 = hash_array(np.array(['3', '4', 'All']))
+        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
+        tm.assert_numpy_array_equal(result1, result2)
+        tm.assert_numpy_array_equal(result1, result3)
+
+    def test_hash_array_errors(self):
+
+        for val in [5, 'foo', pd.Timestamp('20130101')]:
+            pytest.raises(TypeError, hash_array, val)
+
+    def check_equal(self, obj, **kwargs):
+        a = hash_pandas_object(obj, **kwargs)
+        b = hash_pandas_object(obj, **kwargs)
+        tm.assert_series_equal(a, b)
+
+        kwargs.pop('index', None)
+        a = hash_pandas_object(obj, **kwargs)
+        b = hash_pandas_object(obj, **kwargs)
+        tm.assert_series_equal(a, b)
+
+    def check_not_equal_with_index(self, obj):
+
+        # check that we are not hashing the same if
+        # we include the index
+        if not isinstance(obj, Index):
+            a = hash_pandas_object(obj, index=True)
+            b = hash_pandas_object(obj, index=False)
+            if len(obj):
+                assert not (a == b).all()
+
+    def test_hash_tuples(self):
+        tups = [(1, 'one'), (1, 'two'), (2, 'one')]
+        result = hash_tuples(tups)
+        expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = hash_tuples(tups[0])
+        assert result == expected[0]
+
+    def test_hash_tuple(self):
+        # test equivalence between hash_tuples and hash_tuple
+        for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'),
+                    ('A', pd.Timestamp("2012-01-01"))]:
+            result = hash_tuple(tup)
+            expected = hash_tuples([tup])[0]
+            assert result == expected
+
+    def test_hash_scalar(self):
+        for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
+                    pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
+                    datetime.datetime(2012, 1, 1),
+                    pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
+                    pd.Timedelta('1 days'), datetime.timedelta(1),
+                    pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
+                    np.nan, pd.NaT, None]:
+            result = _hash_scalar(val)
+            expected = hash_array(np.array([val], dtype=object),
+                                  categorize=True)
+            assert result[0] == expected[0]
+
+    def test_hash_tuples_err(self):
+
+        for val in [5, 'foo', pd.Timestamp('20130101')]:
+            pytest.raises(TypeError, hash_tuples, val)
+
+    def test_multiindex_unique(self):
+        mi = MultiIndex.from_tuples([(118, 472), (236, 118),
+                                     (51, 204), (102, 51)])
+        assert mi.is_unique
+        result = hash_pandas_object(mi)
+        assert result.is_unique
+
+    def test_multiindex_objects(self):
+        mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
+                        labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
+                        names=['col1', 'col2'])
+        recons = mi._sort_levels_monotonic()
+
+        # these are equal
+        assert mi.equals(recons)
+        assert Index(mi.values).equals(Index(recons.values))
+
+        # _hashed_values and hash_pandas_object(..., index=False)
+        # equivalency
+        expected = hash_pandas_object(
+            mi, index=False).values
+        result = mi._hashed_values
+        tm.assert_numpy_array_equal(result, expected)
+
+        expected = hash_pandas_object(
+            recons, index=False).values
+        result = recons._hashed_values
+        tm.assert_numpy_array_equal(result, expected)
+
+        expected = mi._hashed_values
+        result = recons._hashed_values
+
+        # values should match, but in different order
+        tm.assert_numpy_array_equal(np.sort(result),
+                                    np.sort(expected))
+
+    def test_hash_pandas_object(self):
+
+        for obj in [Series([1, 2, 3]),
+                    Series([1.0, 1.5, 3.2]),
+                    Series([1.0, 1.5, np.nan]),
+                    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
+                    Series(['a', 'b', 'c']),
+                    Series(['a', np.nan, 'c']),
+                    Series(['a', None, 'c']),
+                    Series([True, False, True]),
+                    Series(),
+                    Index([1, 2, 3]),
+                    Index([True, False, True]),
+                    DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
+                    DataFrame(),
+                    tm.makeMissingDataframe(),
+                    tm.makeMixedDataFrame(),
+                    tm.makeTimeDataFrame(),
+                    tm.makeTimeSeries(),
+                    tm.makeTimedeltaIndex(),
+                    tm.makePeriodIndex(),
+                    Series(tm.makePeriodIndex()),
+                    Series(pd.date_range('20130101',
+                                         periods=3, tz='US/Eastern')),
+                    MultiIndex.from_product(
+                        [range(5),
+                         ['foo', 'bar', 'baz'],
+                         pd.date_range('20130101', periods=2)]),
+                    MultiIndex.from_product(
+                        [pd.CategoricalIndex(list('aabc')),
+                         range(3)])]:
+            self.check_equal(obj)
+            self.check_not_equal_with_index(obj)
+
+    def test_hash_pandas_object2(self):
+        for name, s in self.df.iteritems():
+            self.check_equal(s)
+            self.check_not_equal_with_index(s)
+
+    def test_hash_pandas_empty_object(self):
+        for obj in [Series([], dtype='float64'),
+                    Series([], dtype='object'),
+                    Index([])]:
+            self.check_equal(obj)
+
+            # these are by-definition the same with
+            # or w/o the index as the data is empty
+
+    def test_categorical_consistency(self):
+        # GH15143
+        # Check that categoricals hash consistent with their values, not codes
+        # This should work for categoricals of any dtype
+        for s1 in [Series(['a', 'b', 'c', 'd']),
+                   Series([1000, 2000, 3000, 4000]),
+                   Series(pd.date_range(0, periods=4))]:
+            s2 = s1.astype('category').cat.set_categories(s1)
+            s3 = s2.cat.set_categories(list(reversed(s1)))
+            for categorize in [True, False]:
+                # These should all hash identically
+                h1 = hash_pandas_object(s1, categorize=categorize)
+                h2 = hash_pandas_object(s2, categorize=categorize)
+                h3 = hash_pandas_object(s3, categorize=categorize)
+                tm.assert_series_equal(h1, h2)
+                tm.assert_series_equal(h1, h3)
+
+    def test_categorical_with_nan_consistency(self):
+        c = pd.Categorical.from_codes(
+            [-1, 0, 1, 2, 3, 4],
+            categories=pd.date_range('2012-01-01', periods=5, name='B'))
+        expected = hash_array(c, categorize=False)
+        c = pd.Categorical.from_codes(
+            [-1, 0],
+            categories=[pd.Timestamp('2012-01-01')])
+        result = hash_array(c, categorize=False)
+        assert result[0] in expected
+        assert result[1] in expected
+
+    def test_pandas_errors(self):
+
+        for obj in [pd.Timestamp('20130101')]:
+            with pytest.raises(TypeError):
+                hash_pandas_object(obj)
+
+        with catch_warnings(record=True):
+            obj = tm.makePanel()
+        with pytest.raises(TypeError):
+            hash_pandas_object(obj)
+
+    def test_hash_keys(self):
+        # using different hash keys, should have different hashes
+        # for the same data
+
+        # this only matters for object dtypes
+        obj = Series(list('abc'))
+        a = hash_pandas_object(obj, hash_key='9876543210123456')
+        b = hash_pandas_object(obj, hash_key='9876543210123465')
+        assert (a != b).all()
+
+    def test_invalid_key(self):
+        # this only matters for object dtypes
+        def f():
+            hash_pandas_object(Series(list('abc')), hash_key='foo')
+        pytest.raises(ValueError, f)
+
+    def test_alread_encoded(self):
+        # if already encoded then ok
+
+        obj = Series(list('abc')).str.encode('utf8')
+        self.check_equal(obj)
+
+    def test_alternate_encoding(self):
+
+        obj = Series(list('abc'))
+        self.check_equal(obj, encoding='ascii')
+
+    def test_same_len_hash_collisions(self):
+
+        for l in range(8):
+            length = 2**(l + 8) + 1
+            s = tm.rands_array(length, 2)
+            result = hash_array(s, 'utf8')
+            assert not result[0] == result[1]
+
+        for l in range(8):
+            length = 2**(l + 8)
+            s = tm.rands_array(length, 2)
+            result = hash_array(s, 'utf8')
+            assert not result[0] == result[1]
+
+    def test_hash_collisions(self):
+
+        # hash collisions are bad
+        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
+        L = ['Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
+             'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe']  # noqa
+
+        # these should be different!
+        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
+        expected1 = np.array([14963968704024874985], dtype=np.uint64)
+        tm.assert_numpy_array_equal(result1, expected1)
+
+        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
+        expected2 = np.array([16428432627716348016], dtype=np.uint64)
+        tm.assert_numpy_array_equal(result2, expected2)
+
+        result = hash_array(np.asarray(L, dtype=object), 'utf8')
+        tm.assert_numpy_array_equal(
+            result, np.concatenate([expected1, expected2], axis=0))
@@ -0,0 +1,856 @@
+# -*- coding: utf-8 -*-
+import os
+import pandas as pd
+import pytest
+import numpy as np
+import sys
+from pandas import Series, DataFrame
+import pandas.util.testing as tm
+import pandas.util._test_decorators as td
+from pandas.util.testing import (assert_almost_equal, raise_with_traceback,
+                                 assert_index_equal, assert_series_equal,
+                                 assert_frame_equal, assert_numpy_array_equal,
+                                 RNGContext)
+
+
+class TestAssertAlmostEqual(object):
+
+    def _assert_almost_equal_both(self, a, b, **kwargs):
+        assert_almost_equal(a, b, **kwargs)
+        assert_almost_equal(b, a, **kwargs)
+
+    def _assert_not_almost_equal_both(self, a, b, **kwargs):
+        pytest.raises(AssertionError, assert_almost_equal, a, b, **kwargs)
+        pytest.raises(AssertionError, assert_almost_equal, b, a, **kwargs)
+
+    def test_assert_almost_equal_numbers(self):
+        self._assert_almost_equal_both(1.1, 1.1)
+        self._assert_almost_equal_both(1.1, 1.100001)
+        self._assert_almost_equal_both(np.int16(1), 1.000001)
+        self._assert_almost_equal_both(np.float64(1.1), 1.1)
+        self._assert_almost_equal_both(np.uint32(5), 5)
+
+        self._assert_not_almost_equal_both(1.1, 1)
+        self._assert_not_almost_equal_both(1.1, True)
+        self._assert_not_almost_equal_both(1, 2)
+        self._assert_not_almost_equal_both(1.0001, np.int16(1))
+
+    def test_assert_almost_equal_numbers_with_zeros(self):
+        self._assert_almost_equal_both(0, 0)
+        self._assert_almost_equal_both(0, 0.0)
+        self._assert_almost_equal_both(0, np.float64(0))
+        self._assert_almost_equal_both(0.000001, 0)
+
+        self._assert_not_almost_equal_both(0.001, 0)
+        self._assert_not_almost_equal_both(1, 0)
+
+    def test_assert_almost_equal_numbers_with_mixed(self):
+        self._assert_not_almost_equal_both(1, 'abc')
+        self._assert_not_almost_equal_both(1, [1, ])
+        self._assert_not_almost_equal_both(1, object())
+
+    @pytest.mark.parametrize(
+        "left_dtype",
+        ['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object'])
+    @pytest.mark.parametrize(
+        "right_dtype",
+        ['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object'])
+    def test_assert_almost_equal_edge_case_ndarrays(
+            self, left_dtype, right_dtype):
+
+        # empty compare
+        self._assert_almost_equal_both(np.array([], dtype=left_dtype),
+                                       np.array([], dtype=right_dtype),
+                                       check_dtype=False)
+
+    def test_assert_almost_equal_dicts(self):
+        self._assert_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 2})
+
+        self._assert_not_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 3})
+        self._assert_not_almost_equal_both({'a': 1, 'b': 2},
+                                           {'a': 1, 'b': 2, 'c': 3})
+        self._assert_not_almost_equal_both({'a': 1}, 1)
+        self._assert_not_almost_equal_both({'a': 1}, 'abc')
+        self._assert_not_almost_equal_both({'a': 1}, [1, ])
+
+    def test_assert_almost_equal_dict_like_object(self):
+        class DictLikeObj(object):
+
+            def keys(self):
+                return ('a', )
+
+            def __getitem__(self, item):
+                if item == 'a':
+                    return 1
+
+        self._assert_almost_equal_both({'a': 1}, DictLikeObj(),
+                                       check_dtype=False)
+
+        self._assert_not_almost_equal_both({'a': 2}, DictLikeObj(),
+                                           check_dtype=False)
+
+    def test_assert_almost_equal_strings(self):
+        self._assert_almost_equal_both('abc', 'abc')
+
+        self._assert_not_almost_equal_both('abc', 'abcd')
+        self._assert_not_almost_equal_both('abc', 'abd')
+        self._assert_not_almost_equal_both('abc', 1)
+        self._assert_not_almost_equal_both('abc', [1, ])
+
+    def test_assert_almost_equal_iterables(self):
+        self._assert_almost_equal_both([1, 2, 3], [1, 2, 3])
+        self._assert_almost_equal_both(np.array([1, 2, 3]),
+                                       np.array([1, 2, 3]))
+
+        # class / dtype are different
+        self._assert_not_almost_equal_both(np.array([1, 2, 3]), [1, 2, 3])
+        self._assert_not_almost_equal_both(np.array([1, 2, 3]),
+                                           np.array([1., 2., 3.]))
+
+        # Can't compare generators
+        self._assert_not_almost_equal_both(iter([1, 2, 3]), [1, 2, 3])
+
+        self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 4])
+        self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 3, 4])
+        self._assert_not_almost_equal_both([1, 2, 3], 1)
+
+    def test_assert_almost_equal_null(self):
+        self._assert_almost_equal_both(None, None)
+
+        self._assert_not_almost_equal_both(None, np.NaN)
+        self._assert_not_almost_equal_both(None, 0)
+        self._assert_not_almost_equal_both(np.NaN, 0)
+
+    def test_assert_almost_equal_inf(self):
+        self._assert_almost_equal_both(np.inf, np.inf)
+        self._assert_almost_equal_both(np.inf, float("inf"))
+        self._assert_not_almost_equal_both(np.inf, 0)
+        self._assert_almost_equal_both(np.array([np.inf, np.nan, -np.inf]),
+                                       np.array([np.inf, np.nan, -np.inf]))
+        self._assert_almost_equal_both(np.array([np.inf, None, -np.inf],
+                                                dtype=np.object_),
+                                       np.array([np.inf, np.nan, -np.inf],
+                                                dtype=np.object_))
+
+    def test_assert_almost_equal_pandas(self):
+        tm.assert_almost_equal(pd.Index([1., 1.1]),
+                               pd.Index([1., 1.100001]))
+        tm.assert_almost_equal(pd.Series([1., 1.1]),
+                               pd.Series([1., 1.100001]))
+        tm.assert_almost_equal(pd.DataFrame({'a': [1., 1.1]}),
+                               pd.DataFrame({'a': [1., 1.100001]}))
+
+    def test_assert_almost_equal_object(self):
+        a = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')]
+        b = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')]
+        self._assert_almost_equal_both(a, b)
+
+
+class TestUtilTesting(object):
+
+    def test_raise_with_traceback(self):
+        with tm.assert_raises_regex(LookupError, "error_text"):
+            try:
+                raise ValueError("THIS IS AN ERROR")
+            except ValueError as e:
+                e = LookupError("error_text")
+                raise_with_traceback(e)
+        with tm.assert_raises_regex(LookupError, "error_text"):
+            try:
+                raise ValueError("This is another error")
+            except ValueError:
+                e = LookupError("error_text")
+                _, _, traceback = sys.exc_info()
+                raise_with_traceback(e, traceback)
+
+
+class TestAssertNumpyArrayEqual(object):
+
+    @td.skip_if_windows
+    def test_numpy_array_equal_message(self):
+
+        expected = """numpy array are different
+
+numpy array shapes are different
+\\[left\\]:  \\(2,\\)
+\\[right\\]: \\(3,\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]))
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]))
+
+        # scalar comparison
+        expected = """Expected type """
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(1, 2)
+        expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5"""
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(1, 2)
+
+        # array / scalar array comparison
+        expected = """numpy array are different
+
+numpy array classes are different
+\\[left\\]:  ndarray
+\\[right\\]: int"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            # numpy_array_equal only accepts np.ndarray
+            assert_numpy_array_equal(np.array([1]), 1)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([1]), 1)
+
+        # scalar / array comparison
+        expected = """numpy array are different
+
+numpy array classes are different
+\\[left\\]:  int
+\\[right\\]: ndarray"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(1, np.array([1]))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(1, np.array([1]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(66\\.66667 %\\)
+\\[left\\]:  \\[nan, 2\\.0, 3\\.0\\]
+\\[right\\]: \\[1\\.0, nan, 3\\.0\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([np.nan, 2, 3]),
+                                     np.array([1, np.nan, 3]))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([np.nan, 2, 3]),
+                                np.array([1, np.nan, 3]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[1, 2\\]
+\\[right\\]: \\[1, 3\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3]))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([1, 2]), np.array([1, 3]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[1\\.1, 2\\.000001\\]
+\\[right\\]: \\[1\\.1, 2.0\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(
+                np.array([1.1, 2.000001]), np.array([1.1, 2.0]))
+
+        # must pass
+        assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(16\\.66667 %\\)
+\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\]
+\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]),
+                                     np.array([[1, 3], [3, 4], [5, 6]]))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]),
+                                np.array([[1, 3], [3, 4], [5, 6]]))
+
+        expected = """numpy array are different
+
+numpy array values are different \\(25\\.0 %\\)
+\\[left\\]:  \\[\\[1, 2\\], \\[3, 4\\]\\]
+\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([[1, 2], [3, 4]]),
+                                     np.array([[1, 3], [3, 4]]))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([[1, 2], [3, 4]]),
+                                np.array([[1, 3], [3, 4]]))
+
+        # allow to overwrite message
+        expected = """Index are different
+
+Index shapes are different
+\\[left\\]:  \\(2,\\)
+\\[right\\]: \\(3,\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]),
+                                     obj='Index')
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]),
+                                obj='Index')
+
+    def test_numpy_array_equal_unicode_message(self):
+        # Test ensures that `assert_numpy_array_equals` raises the right
+        # exception when comparing np.arrays containing differing
+        # unicode objects (#20503)
+
+        expected = """numpy array are different
+
+numpy array values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[á, à, ä\\]
+\\[right\\]: \\[á, à, å\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([u'á', u'à', u'ä']),
+                                     np.array([u'á', u'à', u'å']))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([u'á', u'à', u'ä']),
+                                np.array([u'á', u'à', u'å']))
+
+    @td.skip_if_windows
+    def test_numpy_array_equal_object_message(self):
+
+        a = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')])
+        b = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')])
+
+        expected = """numpy array are different
+
+numpy array values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\]
+\\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(a, b)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(a, b)
+
+    def test_numpy_array_equal_copy_flag(self):
+        a = np.array([1, 2, 3])
+        b = a.copy()
+        c = a.view()
+        expected = r'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)'
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(a, b, check_same='same')
+        expected = r'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)'
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(a, c, check_same='copy')
+
+    def test_assert_almost_equal_iterable_message(self):
+
+        expected = """Iterable are different
+
+Iterable length are different
+\\[left\\]:  2
+\\[right\\]: 3"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal([1, 2], [3, 4, 5])
+
+        expected = """Iterable are different
+
+Iterable values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[1, 2\\]
+\\[right\\]: \\[1, 3\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal([1, 2], [1, 3])
+
+
+class TestAssertIndexEqual(object):
+
+    def test_index_equal_message(self):
+
+        expected = """Index are different
+
+Index levels are different
+\\[left\\]:  1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\],
+           labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)"""
+
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2),
+                                          ('B', 3), ('B', 4)])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, exact=False)
+
+        expected = """MultiIndex level \\[1\\] are different
+
+MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
+\\[left\\]:  Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
+\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
+
+        idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2),
+                                          ('B', 3), ('B', 4)])
+        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2),
+                                          ('B', 3), ('B', 4)])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+
+        expected = """Index are different
+
+Index length are different
+\\[left\\]:  3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
+
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 3, 4])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+
+        expected = """Index are different
+
+Index classes are different
+\\[left\\]:  Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)"""
+
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 3.0])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, exact=True)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, exact=True, check_exact=False)
+
+        expected = """Index are different
+
+Index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
+\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)"""
+
+        idx1 = pd.Index([1, 2, 3.])
+        idx2 = pd.Index([1, 2, 3.0000000001])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+
+        # must success
+        assert_index_equal(idx1, idx2, check_exact=False)
+
+        expected = """Index are different
+
+Index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
+\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)"""
+
+        idx1 = pd.Index([1, 2, 3.])
+        idx2 = pd.Index([1, 2, 3.0001])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+        # must success
+        assert_index_equal(idx1, idx2, check_exact=False,
+                           check_less_precise=True)
+
+        expected = """Index are different
+
+Index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
+\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)"""
+
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 4])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_less_precise=True)
+
+        expected = """MultiIndex level \\[1\\] are different
+
+MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
+\\[left\\]:  Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
+\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
+
+        idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2),
+                                          ('B', 3), ('B', 4)])
+        idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2),
+                                          ('B', 3), ('B', 4)])
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2, check_exact=False)
+
+    def test_index_equal_metadata_message(self):
+
+        expected = """Index are different
+
+Attribute "names" are different
+\\[left\\]:  \\[None\\]
+\\[right\\]: \\[u?'x'\\]"""
+
+        idx1 = pd.Index([1, 2, 3])
+        idx2 = pd.Index([1, 2, 3], name='x')
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+
+        # same name, should pass
+        assert_index_equal(pd.Index([1, 2, 3], name=np.nan),
+                           pd.Index([1, 2, 3], name=np.nan))
+        assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT),
+                           pd.Index([1, 2, 3], name=pd.NaT))
+
+        expected = """Index are different
+
+Attribute "names" are different
+\\[left\\]:  \\[nan\\]
+\\[right\\]: \\[NaT\\]"""
+
+        idx1 = pd.Index([1, 2, 3], name=np.nan)
+        idx2 = pd.Index([1, 2, 3], name=pd.NaT)
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(idx1, idx2)
+
+    def test_categorical_index_equality(self):
+        expected = """Index are different
+
+Attribute "dtype" are different
+\\[left\\]:  CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
+\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
+ordered=False\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
+                               pd.Index(pd.Categorical(['a', 'b'],
+                                        categories=['a', 'b', 'c'])))
+
+    def test_categorical_index_equality_relax_categories_check(self):
+        assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
+                           pd.Index(pd.Categorical(['a', 'b'],
+                                    categories=['a', 'b', 'c'])),
+                           check_categorical=False)
+
+
+class TestAssertSeriesEqual(object):
+
+    def _assert_equal(self, x, y, **kwargs):
+        assert_series_equal(x, y, **kwargs)
+        assert_series_equal(y, x, **kwargs)
+
+    def _assert_not_equal(self, a, b, **kwargs):
+        pytest.raises(AssertionError, assert_series_equal, a, b, **kwargs)
+        pytest.raises(AssertionError, assert_series_equal, b, a, **kwargs)
+
+    def test_equal(self):
+        self._assert_equal(Series(range(3)), Series(range(3)))
+        self._assert_equal(Series(list('abc')), Series(list('abc')))
+        self._assert_equal(Series(list(u'áàä')), Series(list(u'áàä')))
+
+    def test_not_equal(self):
+        self._assert_not_equal(Series(range(3)), Series(range(3)) + 1)
+        self._assert_not_equal(Series(list('abc')), Series(list('xyz')))
+        self._assert_not_equal(Series(list(u'áàä')), Series(list(u'éèë')))
+        self._assert_not_equal(Series(list(u'áàä')), Series(list(b'aaa')))
+        self._assert_not_equal(Series(range(3)), Series(range(4)))
+        self._assert_not_equal(
+            Series(range(3)), Series(
+                range(3), dtype='float64'))
+        self._assert_not_equal(
+            Series(range(3)), Series(
+                range(3), index=[1, 2, 4]))
+
+        # ATM meta data is not checked in assert_series_equal
+        # self._assert_not_equal(Series(range(3)),Series(range(3),name='foo'),check_names=True)
+
+    def test_less_precise(self):
+        s1 = Series([0.12345], dtype='float64')
+        s2 = Series([0.12346], dtype='float64')
+
+        pytest.raises(AssertionError, assert_series_equal, s1, s2)
+        self._assert_equal(s1, s2, check_less_precise=True)
+        for i in range(4):
+            self._assert_equal(s1, s2, check_less_precise=i)
+        pytest.raises(AssertionError, assert_series_equal, s1, s2, 10)
+
+        s1 = Series([0.12345], dtype='float32')
+        s2 = Series([0.12346], dtype='float32')
+
+        pytest.raises(AssertionError, assert_series_equal, s1, s2)
+        self._assert_equal(s1, s2, check_less_precise=True)
+        for i in range(4):
+            self._assert_equal(s1, s2, check_less_precise=i)
+        pytest.raises(AssertionError, assert_series_equal, s1, s2, 10)
+
+        # even less than less precise
+        s1 = Series([0.1235], dtype='float32')
+        s2 = Series([0.1236], dtype='float32')
+
+        pytest.raises(AssertionError, assert_series_equal, s1, s2)
+        pytest.raises(AssertionError, assert_series_equal, s1, s2, True)
+
+    def test_index_dtype(self):
+        df1 = DataFrame.from_records(
+            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
+        df2 = DataFrame.from_records(
+            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
+        self._assert_not_equal(df1.c, df2.c, check_index_type=True)
+
+    def test_multiindex_dtype(self):
+        df1 = DataFrame.from_records(
+            {'a': [1, 2], 'b': [2.1, 1.5],
+             'c': ['l1', 'l2']}, index=['a', 'b'])
+        df2 = DataFrame.from_records(
+            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
+             'c': ['l1', 'l2']}, index=['a', 'b'])
+        self._assert_not_equal(df1.c, df2.c, check_index_type=True)
+
+    def test_series_equal_message(self):
+
+        expected = """Series are different
+
+Series length are different
+\\[left\\]:  3, RangeIndex\\(start=0, stop=3, step=1\\)
+\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4]))
+
+        expected = """Series are different
+
+Series values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[1, 2, 3\\]
+\\[right\\]: \\[1, 2, 4\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]),
+                                check_less_precise=True)
+
+    def test_categorical_series_equality(self):
+        expected = """Attributes are different
+
+Attribute "dtype" are different
+\\[left\\]:  CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
+\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
+ordered=False\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
+                                pd.Series(pd.Categorical(['a', 'b'],
+                                          categories=['a', 'b', 'c'])))
+
+    def test_categorical_series_equality_relax_categories_check(self):
+        assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
+                            pd.Series(pd.Categorical(['a', 'b'],
+                                      categories=['a', 'b', 'c'])),
+                            check_categorical=False)
+
+
+class TestAssertFrameEqual(object):
+
+    def _assert_equal(self, x, y, **kwargs):
+        assert_frame_equal(x, y, **kwargs)
+        assert_frame_equal(y, x, **kwargs)
+
+    def _assert_not_equal(self, a, b, **kwargs):
+        pytest.raises(AssertionError, assert_frame_equal, a, b, **kwargs)
+        pytest.raises(AssertionError, assert_frame_equal, b, a, **kwargs)
+
+    def test_equal_with_different_row_order(self):
+        # check_like=True ignores row-column orderings
+        df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
+                           index=['a', 'b', 'c'])
+        df2 = pd.DataFrame({'A': [3, 2, 1], 'B': [6, 5, 4]},
+                           index=['c', 'b', 'a'])
+
+        self._assert_equal(df1, df2, check_like=True)
+        self._assert_not_equal(df1, df2)
+
+    def test_not_equal_with_different_shape(self):
+        self._assert_not_equal(pd.DataFrame({'A': [1, 2, 3]}),
+                               pd.DataFrame({'A': [1, 2, 3, 4]}))
+
+    def test_index_dtype(self):
+        df1 = DataFrame.from_records(
+            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
+        df2 = DataFrame.from_records(
+            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
+        self._assert_not_equal(df1, df2, check_index_type=True)
+
+    def test_multiindex_dtype(self):
+        df1 = DataFrame.from_records(
+            {'a': [1, 2], 'b': [2.1, 1.5],
+             'c': ['l1', 'l2']}, index=['a', 'b'])
+        df2 = DataFrame.from_records(
+            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
+             'c': ['l1', 'l2']}, index=['a', 'b'])
+        self._assert_not_equal(df1, df2, check_index_type=True)
+
+    def test_empty_dtypes(self):
+        df1 = pd.DataFrame(columns=["col1", "col2"])
+        df1["col1"] = df1["col1"].astype('int64')
+        df2 = pd.DataFrame(columns=["col1", "col2"])
+        self._assert_equal(df1, df2, check_dtype=False)
+        self._assert_not_equal(df1, df2, check_dtype=True)
+
+    def test_frame_equal_message(self):
+
+        expected = """DataFrame are different
+
+DataFrame shape mismatch
+\\[left\\]:  \\(3, 2\\)
+\\[right\\]: \\(3, 1\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}),
+                               pd.DataFrame({'A': [1, 2, 3]}))
+
+        expected = """DataFrame\\.index are different
+
+DataFrame\\.index values are different \\(33\\.33333 %\\)
+\\[left\\]:  Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\)
+\\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
+                                            index=['a', 'b', 'c']),
+                               pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
+                                            index=['a', 'b', 'd']))
+
+        expected = """DataFrame\\.columns are different
+
+DataFrame\\.columns values are different \\(50\\.0 %\\)
+\\[left\\]:  Index\\(\\[u?'A', u?'B'\\], dtype='object'\\)
+\\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
+                                            index=['a', 'b', 'c']),
+                               pd.DataFrame({'A': [1, 2, 3], 'b': [4, 5, 6]},
+                                            index=['a', 'b', 'c']))
+
+        expected = """DataFrame\\.iloc\\[:, 1\\] are different
+
+DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[4, 5, 6\\]
+\\[right\\]: \\[4, 5, 7\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}),
+                               pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}))
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}),
+                               pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}),
+                               by_blocks=True)
+
+    def test_frame_equal_message_unicode(self):
+        # Test ensures that `assert_frame_equals` raises the right
+        # exception when comparing DataFrames containing differing
+        # unicode objects (#20503)
+
+        expected = """DataFrame\\.iloc\\[:, 1\\] are different
+
+DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[é, è, ë\\]
+\\[right\\]: \\[é, è, e̊\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'e̊']}))
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'e̊']}),
+                               by_blocks=True)
+
+        expected = """DataFrame\\.iloc\\[:, 0\\] are different
+
+DataFrame\\.iloc\\[:, 0\\] values are different \\(100\\.0 %\\)
+\\[left\\]:  \\[á, à, ä\\]
+\\[right\\]: \\[a, a, a\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': ['a', 'a', 'a'],
+                                             'E': ['e', 'e', 'e']}))
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': ['a', 'a', 'a'],
+                                             'E': ['e', 'e', 'e']}),
+                               by_blocks=True)
+
+
+class TestAssertCategoricalEqual(object):
+
+    def test_categorical_equal_message(self):
+
+        expected = """Categorical\\.categories are different
+
+Categorical\\.categories values are different \\(25\\.0 %\\)
+\\[left\\]:  Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)
+\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)"""
+
+        a = pd.Categorical([1, 2, 3, 4])
+        b = pd.Categorical([1, 2, 3, 5])
+        with tm.assert_raises_regex(AssertionError, expected):
+            tm.assert_categorical_equal(a, b)
+
+        expected = """Categorical\\.codes are different
+
+Categorical\\.codes values are different \\(50\\.0 %\\)
+\\[left\\]:  \\[0, 1, 3, 2\\]
+\\[right\\]: \\[0, 1, 2, 3\\]"""
+
+        a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4])
+        b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
+        with tm.assert_raises_regex(AssertionError, expected):
+            tm.assert_categorical_equal(a, b)
+
+        expected = """Categorical are different
+
+Attribute "ordered" are different
+\\[left\\]:  False
+\\[right\\]: True"""
+
+        a = pd.Categorical([1, 2, 3, 4], ordered=False)
+        b = pd.Categorical([1, 2, 3, 4], ordered=True)
+        with tm.assert_raises_regex(AssertionError, expected):
+            tm.assert_categorical_equal(a, b)
+
+
+class TestRNGContext(object):
+
+    def test_RNGContext(self):
+        expected0 = 1.764052345967664
+        expected1 = 1.6243453636632417
+
+        with RNGContext(0):
+            with RNGContext(1):
+                assert np.random.randn() == expected1
+            assert np.random.randn() == expected0
+
+
+class TestLocale(object):
+
+    def test_locale(self):
+        if sys.platform == 'win32':
+            pytest.skip(
+                "skipping on win platforms as locale not available")
+
+        # GH9744
+        locales = tm.get_locales()
+        assert len(locales) >= 1
+
+
+def test_datapath_missing(datapath, request):
+    if not request.config.getoption("--strict-data-files"):
+        pytest.skip("Need to set '--strict-data-files'")
+
+    with pytest.raises(ValueError):
+        datapath('not_a_file')
+
+    result = datapath('data', 'iris.csv')
+    expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
+    assert result == expected
@@ -0,0 +1,512 @@
+# -*- coding: utf-8 -*-
+import os
+import locale
+import codecs
+import sys
+from uuid import uuid4
+from collections import OrderedDict
+
+import pytest
+from pandas.compat import intern, PY3
+import pandas.core.common as com
+from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf
+from pandas.util._decorators import deprecate_kwarg, make_signature
+from pandas.util._validators import (validate_args, validate_kwargs,
+                                     validate_args_and_kwargs,
+                                     validate_bool_kwarg)
+
+import pandas.util.testing as tm
+import pandas.util._test_decorators as td
+
+
+class TestDecorators(object):
+
+    def setup_method(self, method):
+        @deprecate_kwarg('old', 'new')
+        def _f1(new=False):
+            return new
+
+        @deprecate_kwarg('old', 'new', {'yes': True, 'no': False})
+        def _f2(new=False):
+            return new
+
+        @deprecate_kwarg('old', 'new', lambda x: x + 1)
+        def _f3(new=0):
+            return new
+
+        @deprecate_kwarg('old', None)
+        def _f4(old=True, unchanged=True):
+            return old
+
+        self.f1 = _f1
+        self.f2 = _f2
+        self.f3 = _f3
+        self.f4 = _f4
+
+    def test_deprecate_kwarg(self):
+        x = 78
+        with tm.assert_produces_warning(FutureWarning):
+            result = self.f1(old=x)
+        assert result is x
+        with tm.assert_produces_warning(None):
+            self.f1(new=x)
+
+    def test_dict_deprecate_kwarg(self):
+        x = 'yes'
+        with tm.assert_produces_warning(FutureWarning):
+            result = self.f2(old=x)
+        assert result
+
+    def test_missing_deprecate_kwarg(self):
+        x = 'bogus'
+        with tm.assert_produces_warning(FutureWarning):
+            result = self.f2(old=x)
+        assert result == 'bogus'
+
+    def test_callable_deprecate_kwarg(self):
+        x = 5
+        with tm.assert_produces_warning(FutureWarning):
+            result = self.f3(old=x)
+        assert result == x + 1
+        with pytest.raises(TypeError):
+            self.f3(old='hello')
+
+    def test_bad_deprecate_kwarg(self):
+        with pytest.raises(TypeError):
+            @deprecate_kwarg('old', 'new', 0)
+            def f4(new=None):
+                pass
+
+    def test_deprecate_keyword(self):
+        x = 9
+        with tm.assert_produces_warning(FutureWarning):
+            result = self.f4(old=x)
+        assert result is x
+        with tm.assert_produces_warning(None):
+            result = self.f4(unchanged=x)
+        assert result is True
+
+
+def test_rands():
+    r = tm.rands(10)
+    assert(len(r) == 10)
+
+
+def test_rands_array():
+    arr = tm.rands_array(5, size=10)
+    assert(arr.shape == (10,))
+    assert(len(arr[0]) == 5)
+
+    arr = tm.rands_array(7, size=(10, 10))
+    assert(arr.shape == (10, 10))
+    assert(len(arr[1, 1]) == 7)
+
+
+class TestValidateArgs(object):
+    fname = 'func'
+
+    def test_bad_min_fname_arg_count(self):
+        msg = "'max_fname_arg_count' must be non-negative"
+        with tm.assert_raises_regex(ValueError, msg):
+            validate_args(self.fname, (None,), -1, 'foo')
+
+    def test_bad_arg_length_max_value_single(self):
+        args = (None, None)
+        compat_args = ('foo',)
+
+        min_fname_arg_count = 0
+        max_length = len(compat_args) + min_fname_arg_count
+        actual_length = len(args) + min_fname_arg_count
+        msg = (r"{fname}\(\) takes at most {max_length} "
+               r"argument \({actual_length} given\)"
+               .format(fname=self.fname, max_length=max_length,
+                       actual_length=actual_length))
+
+        with tm.assert_raises_regex(TypeError, msg):
+            validate_args(self.fname, args,
+                          min_fname_arg_count,
+                          compat_args)
+
+    def test_bad_arg_length_max_value_multiple(self):
+        args = (None, None)
+        compat_args = dict(foo=None)
+
+        min_fname_arg_count = 2
+        max_length = len(compat_args) + min_fname_arg_count
+        actual_length = len(args) + min_fname_arg_count
+        msg = (r"{fname}\(\) takes at most {max_length} "
+               r"arguments \({actual_length} given\)"
+               .format(fname=self.fname, max_length=max_length,
+                       actual_length=actual_length))
+
+        with tm.assert_raises_regex(TypeError, msg):
+            validate_args(self.fname, args,
+                          min_fname_arg_count,
+                          compat_args)
+
+    def test_not_all_defaults(self):
+        bad_arg = 'foo'
+        msg = ("the '{arg}' parameter is not supported "
+               r"in the pandas implementation of {func}\(\)".
+               format(arg=bad_arg, func=self.fname))
+
+        compat_args = OrderedDict()
+        compat_args['foo'] = 2
+        compat_args['bar'] = -1
+        compat_args['baz'] = 3
+
+        arg_vals = (1, -1, 3)
+
+        for i in range(1, 3):
+            with tm.assert_raises_regex(ValueError, msg):
+                validate_args(self.fname, arg_vals[:i], 2, compat_args)
+
+    def test_validation(self):
+        # No exceptions should be thrown
+        validate_args(self.fname, (None,), 2, dict(out=None))
+
+        compat_args = OrderedDict()
+        compat_args['axis'] = 1
+        compat_args['out'] = None
+
+        validate_args(self.fname, (1, None), 2, compat_args)
+
+
+class TestValidateKwargs(object):
+    fname = 'func'
+
+    def test_bad_kwarg(self):
+        goodarg = 'f'
+        badarg = goodarg + 'o'
+
+        compat_args = OrderedDict()
+        compat_args[goodarg] = 'foo'
+        compat_args[badarg + 'o'] = 'bar'
+        kwargs = {goodarg: 'foo', badarg: 'bar'}
+        msg = (r"{fname}\(\) got an unexpected "
+               r"keyword argument '{arg}'".format(
+                   fname=self.fname, arg=badarg))
+
+        with tm.assert_raises_regex(TypeError, msg):
+            validate_kwargs(self.fname, kwargs, compat_args)
+
+    def test_not_all_none(self):
+        bad_arg = 'foo'
+        msg = (r"the '{arg}' parameter is not supported "
+               r"in the pandas implementation of {func}\(\)".
+               format(arg=bad_arg, func=self.fname))
+
+        compat_args = OrderedDict()
+        compat_args['foo'] = 1
+        compat_args['bar'] = 's'
+        compat_args['baz'] = None
+
+        kwarg_keys = ('foo', 'bar', 'baz')
+        kwarg_vals = (2, 's', None)
+
+        for i in range(1, 3):
+            kwargs = dict(zip(kwarg_keys[:i],
+                              kwarg_vals[:i]))
+
+            with tm.assert_raises_regex(ValueError, msg):
+                validate_kwargs(self.fname, kwargs, compat_args)
+
+    def test_validation(self):
+        # No exceptions should be thrown
+        compat_args = OrderedDict()
+        compat_args['f'] = None
+        compat_args['b'] = 1
+        compat_args['ba'] = 's'
+        kwargs = dict(f=None, b=1)
+        validate_kwargs(self.fname, kwargs, compat_args)
+
+    def test_validate_bool_kwarg(self):
+        arg_names = ['inplace', 'copy']
+        invalid_values = [1, "True", [1, 2, 3], 5.0]
+        valid_values = [True, False, None]
+
+        for name in arg_names:
+            for value in invalid_values:
+                with tm.assert_raises_regex(ValueError,
+                                            "For argument \"%s\" "
+                                            "expected type bool, "
+                                            "received type %s" %
+                                            (name, type(value).__name__)):
+                    validate_bool_kwarg(value, name)
+
+            for value in valid_values:
+                assert validate_bool_kwarg(value, name) == value
+
+
+class TestValidateKwargsAndArgs(object):
+    fname = 'func'
+
+    def test_invalid_total_length_max_length_one(self):
+        compat_args = ('foo',)
+        kwargs = {'foo': 'FOO'}
+        args = ('FoO', 'BaZ')
+
+        min_fname_arg_count = 0
+        max_length = len(compat_args) + min_fname_arg_count
+        actual_length = len(kwargs) + len(args) + min_fname_arg_count
+        msg = (r"{fname}\(\) takes at most {max_length} "
+               r"argument \({actual_length} given\)"
+               .format(fname=self.fname, max_length=max_length,
+                       actual_length=actual_length))
+
+        with tm.assert_raises_regex(TypeError, msg):
+            validate_args_and_kwargs(self.fname, args, kwargs,
+                                     min_fname_arg_count,
+                                     compat_args)
+
+    def test_invalid_total_length_max_length_multiple(self):
+        compat_args = ('foo', 'bar', 'baz')
+        kwargs = {'foo': 'FOO', 'bar': 'BAR'}
+        args = ('FoO', 'BaZ')
+
+        min_fname_arg_count = 2
+        max_length = len(compat_args) + min_fname_arg_count
+        actual_length = len(kwargs) + len(args) + min_fname_arg_count
+        msg = (r"{fname}\(\) takes at most {max_length} "
+               r"arguments \({actual_length} given\)"
+               .format(fname=self.fname, max_length=max_length,
+                       actual_length=actual_length))
+
+        with tm.assert_raises_regex(TypeError, msg):
+            validate_args_and_kwargs(self.fname, args, kwargs,
+                                     min_fname_arg_count,
+                                     compat_args)
+
+    def test_no_args_with_kwargs(self):
+        bad_arg = 'bar'
+        min_fname_arg_count = 2
+
+        compat_args = OrderedDict()
+        compat_args['foo'] = -5
+        compat_args[bad_arg] = 1
+
+        msg = (r"the '{arg}' parameter is not supported "
+               r"in the pandas implementation of {func}\(\)".
+               format(arg=bad_arg, func=self.fname))
+
+        args = ()
+        kwargs = {'foo': -5, bad_arg: 2}
+        tm.assert_raises_regex(ValueError, msg,
+                               validate_args_and_kwargs,
+                               self.fname, args, kwargs,
+                               min_fname_arg_count, compat_args)
+
+        args = (-5, 2)
+        kwargs = {}
+        tm.assert_raises_regex(ValueError, msg,
+                               validate_args_and_kwargs,
+                               self.fname, args, kwargs,
+                               min_fname_arg_count, compat_args)
+
+    def test_duplicate_argument(self):
+        min_fname_arg_count = 2
+        compat_args = OrderedDict()
+        compat_args['foo'] = None
+        compat_args['bar'] = None
+        compat_args['baz'] = None
+        kwargs = {'foo': None, 'bar': None}
+        args = (None,)  # duplicate value for 'foo'
+
+        msg = (r"{fname}\(\) got multiple values for keyword "
+               r"argument '{arg}'".format(fname=self.fname, arg='foo'))
+
+        with tm.assert_raises_regex(TypeError, msg):
+            validate_args_and_kwargs(self.fname, args, kwargs,
+                                     min_fname_arg_count,
+                                     compat_args)
+
+    def test_validation(self):
+        # No exceptions should be thrown
+        compat_args = OrderedDict()
+        compat_args['foo'] = 1
+        compat_args['bar'] = None
+        compat_args['baz'] = -2
+        kwargs = {'baz': -2}
+        args = (1, None)
+
+        min_fname_arg_count = 2
+        validate_args_and_kwargs(self.fname, args, kwargs,
+                                 min_fname_arg_count,
+                                 compat_args)
+
+
+class TestMove(object):
+
+    def test_cannot_create_instance_of_stolenbuffer(self):
+        """Stolen buffers need to be created through the smart constructor
+        ``move_into_mutable_buffer`` which has a bunch of checks in it.
+        """
+        msg = "cannot create 'pandas.util._move.stolenbuf' instances"
+        with tm.assert_raises_regex(TypeError, msg):
+            stolenbuf()
+
+    def test_more_than_one_ref(self):
+        """Test case for when we try to use ``move_into_mutable_buffer`` when
+        the object being moved has other references.
+        """
+        b = b'testing'
+
+        with pytest.raises(BadMove) as e:
+            def handle_success(type_, value, tb):
+                assert value.args[0] is b
+                return type(e).handle_success(e, type_, value, tb)  # super
+
+            e.handle_success = handle_success
+            move_into_mutable_buffer(b)
+
+    def test_exactly_one_ref(self):
+        """Test case for when the object being moved has exactly one reference.
+        """
+        b = b'testing'
+
+        # We need to pass an expression on the stack to ensure that there are
+        # not extra references hanging around. We cannot rewrite this test as
+        #   buf = b[:-3]
+        #   as_stolen_buf = move_into_mutable_buffer(buf)
+        # because then we would have more than one reference to buf.
+        as_stolen_buf = move_into_mutable_buffer(b[:-3])
+
+        # materialize as bytearray to show that it is mutable
+        assert bytearray(as_stolen_buf) == b'test'
+
+    @pytest.mark.skipif(PY3, reason='bytes objects cannot be interned in py3')
+    def test_interned(self):
+        salt = uuid4().hex
+
+        def make_string():
+            # We need to actually create a new string so that it has refcount
+            # one. We use a uuid so that we know the string could not already
+            # be in the intern table.
+            return ''.join(('testing: ', salt))
+
+        # This should work, the string has one reference on the stack.
+        move_into_mutable_buffer(make_string())
+
+        refcount = [None]  # nonlocal
+
+        def ref_capture(ob):
+            # Subtract two because those are the references owned by this
+            # frame:
+            #   1. The local variables of this stack frame.
+            #   2. The python data stack of this stack frame.
+            refcount[0] = sys.getrefcount(ob) - 2
+            return ob
+
+        with pytest.raises(BadMove):
+            # If we intern the string it will still have one reference but now
+            # it is in the intern table so if other people intern the same
+            # string while the mutable buffer holds the first string they will
+            # be the same instance.
+            move_into_mutable_buffer(ref_capture(intern(make_string())))  # noqa
+
+        assert refcount[0] == 1
+
+
+def test_numpy_errstate_is_default():
+    # The defaults since numpy 1.6.0
+    expected = {'over': 'warn', 'divide': 'warn', 'invalid': 'warn',
+                'under': 'ignore'}
+    import numpy as np
+    from pandas.compat import numpy  # noqa
+    # The errstate should be unchanged after that import.
+    assert np.geterr() == expected
+
+
+@td.skip_if_windows
+class TestLocaleUtils(object):
+
+    @classmethod
+    def setup_class(cls):
+        cls.locales = tm.get_locales()
+        cls.current_locale = locale.getlocale()
+
+        if not cls.locales:
+            pytest.skip("No locales found")
+
+    @classmethod
+    def teardown_class(cls):
+        del cls.locales
+        del cls.current_locale
+
+    def test_get_locales(self):
+        # all systems should have at least a single locale
+        assert len(tm.get_locales()) > 0
+
+    def test_get_locales_prefix(self):
+        if len(self.locales) == 1:
+            pytest.skip("Only a single locale found, no point in "
+                        "trying to test filtering locale prefixes")
+        first_locale = self.locales[0]
+        assert len(tm.get_locales(prefix=first_locale[:2])) > 0
+
+    def test_set_locale(self):
+        if len(self.locales) == 1:
+            pytest.skip("Only a single locale found, no point in "
+                        "trying to test setting another locale")
+
+        if com._all_none(*self.current_locale):
+            # Not sure why, but on some travis runs with pytest,
+            # getlocale() returned (None, None).
+            pytest.skip("Current locale is not set.")
+
+        locale_override = os.environ.get('LOCALE_OVERRIDE', None)
+
+        if locale_override is None:
+            lang, enc = 'it_CH', 'UTF-8'
+        elif locale_override == 'C':
+            lang, enc = 'en_US', 'ascii'
+        else:
+            lang, enc = locale_override.split('.')
+
+        enc = codecs.lookup(enc).name
+        new_locale = lang, enc
+
+        if not tm._can_set_locale(new_locale):
+            with pytest.raises(locale.Error):
+                with tm.set_locale(new_locale):
+                    pass
+        else:
+            with tm.set_locale(new_locale) as normalized_locale:
+                new_lang, new_enc = normalized_locale.split('.')
+                new_enc = codecs.lookup(enc).name
+                normalized_locale = new_lang, new_enc
+                assert normalized_locale == new_locale
+
+        current_locale = locale.getlocale()
+        assert current_locale == self.current_locale
+
+
+def test_make_signature():
+    # See GH 17608
+    # Case where the func does not have default kwargs
+    sig = make_signature(validate_kwargs)
+    assert sig == (['fname', 'kwargs', 'compat_args'],
+                   ['fname', 'kwargs', 'compat_args'])
+
+    # Case where the func does have default kwargs
+    sig = make_signature(deprecate_kwarg)
+    assert sig == (['old_arg_name', 'new_arg_name',
+                    'mapping=None', 'stacklevel=2'],
+                   ['old_arg_name', 'new_arg_name', 'mapping', 'stacklevel'])
+
+
+def test_safe_import(monkeypatch):
+    assert not td.safe_import("foo")
+    assert not td.safe_import("pandas", min_version="99.99.99")
+
+    # Create dummy module to be imported
+    import types
+    import sys
+    mod_name = "hello123"
+    mod = types.ModuleType(mod_name)
+    mod.__version__ = "1.5"
+
+    assert not td.safe_import(mod_name)
+    monkeypatch.setitem(sys.modules, mod_name, mod)
+    assert not td.safe_import(mod_name, min_version="2.0")
+    assert td.safe_import(mod_name, min_version="1.0")