started work on backend

This commit is contained in:
d3m1g0d
2019-01-21 17:36:00 +01:00
parent a1a8bca34b
commit 9f9a7e4974
4032 changed files with 745079 additions and 0 deletions
@@ -0,0 +1,292 @@
import pytest
import datetime
from warnings import catch_warnings
import numpy as np
import pandas as pd
from pandas import DataFrame, Series, Index, MultiIndex
from pandas.util import hash_array, hash_pandas_object
from pandas.core.util.hashing import hash_tuples, hash_tuple, _hash_scalar
import pandas.util.testing as tm
class TestHashing(object):
def setup_method(self, method):
self.df = DataFrame(
{'i32': np.array([1, 2, 3] * 3, dtype='int32'),
'f32': np.array([None, 2.5, 3.5] * 3, dtype='float32'),
'cat': Series(['a', 'b', 'c'] * 3).astype('category'),
'obj': Series(['d', 'e', 'f'] * 3),
'bool': np.array([True, False, True] * 3),
'dt': Series(pd.date_range('20130101', periods=9)),
'dt_tz': Series(pd.date_range('20130101', periods=9,
tz='US/Eastern')),
'td': Series(pd.timedelta_range('2000', periods=9))})
def test_consistency(self):
# check that our hash doesn't change because of a mistake
# in the actual code; this is the ground truth
result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
expected = Series(np.array([3600424527151052760, 1374399572096150070,
477881037637427054], dtype='uint64'),
index=['foo', 'bar', 'baz'])
tm.assert_series_equal(result, expected)
def test_hash_array(self):
for name, s in self.df.iteritems():
a = s.values
tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
def test_hash_array_mixed(self):
result1 = hash_array(np.array([3, 4, 'All']))
result2 = hash_array(np.array(['3', '4', 'All']))
result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
tm.assert_numpy_array_equal(result1, result2)
tm.assert_numpy_array_equal(result1, result3)
def test_hash_array_errors(self):
for val in [5, 'foo', pd.Timestamp('20130101')]:
pytest.raises(TypeError, hash_array, val)
def check_equal(self, obj, **kwargs):
a = hash_pandas_object(obj, **kwargs)
b = hash_pandas_object(obj, **kwargs)
tm.assert_series_equal(a, b)
kwargs.pop('index', None)
a = hash_pandas_object(obj, **kwargs)
b = hash_pandas_object(obj, **kwargs)
tm.assert_series_equal(a, b)
def check_not_equal_with_index(self, obj):
# check that we are not hashing the same if
# we include the index
if not isinstance(obj, Index):
a = hash_pandas_object(obj, index=True)
b = hash_pandas_object(obj, index=False)
if len(obj):
assert not (a == b).all()
def test_hash_tuples(self):
tups = [(1, 'one'), (1, 'two'), (2, 'one')]
result = hash_tuples(tups)
expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values
tm.assert_numpy_array_equal(result, expected)
result = hash_tuples(tups[0])
assert result == expected[0]
def test_hash_tuple(self):
# test equivalence between hash_tuples and hash_tuple
for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'),
('A', pd.Timestamp("2012-01-01"))]:
result = hash_tuple(tup)
expected = hash_tuples([tup])[0]
assert result == expected
def test_hash_scalar(self):
for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
datetime.datetime(2012, 1, 1),
pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
pd.Timedelta('1 days'), datetime.timedelta(1),
pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
np.nan, pd.NaT, None]:
result = _hash_scalar(val)
expected = hash_array(np.array([val], dtype=object),
categorize=True)
assert result[0] == expected[0]
def test_hash_tuples_err(self):
for val in [5, 'foo', pd.Timestamp('20130101')]:
pytest.raises(TypeError, hash_tuples, val)
def test_multiindex_unique(self):
mi = MultiIndex.from_tuples([(118, 472), (236, 118),
(51, 204), (102, 51)])
assert mi.is_unique
result = hash_pandas_object(mi)
assert result.is_unique
def test_multiindex_objects(self):
mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
names=['col1', 'col2'])
recons = mi._sort_levels_monotonic()
# these are equal
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# _hashed_values and hash_pandas_object(..., index=False)
# equivalency
expected = hash_pandas_object(
mi, index=False).values
result = mi._hashed_values
tm.assert_numpy_array_equal(result, expected)
expected = hash_pandas_object(
recons, index=False).values
result = recons._hashed_values
tm.assert_numpy_array_equal(result, expected)
expected = mi._hashed_values
result = recons._hashed_values
# values should match, but in different order
tm.assert_numpy_array_equal(np.sort(result),
np.sort(expected))
def test_hash_pandas_object(self):
for obj in [Series([1, 2, 3]),
Series([1.0, 1.5, 3.2]),
Series([1.0, 1.5, np.nan]),
Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
Series(['a', 'b', 'c']),
Series(['a', np.nan, 'c']),
Series(['a', None, 'c']),
Series([True, False, True]),
Series(),
Index([1, 2, 3]),
Index([True, False, True]),
DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
DataFrame(),
tm.makeMissingDataframe(),
tm.makeMixedDataFrame(),
tm.makeTimeDataFrame(),
tm.makeTimeSeries(),
tm.makeTimedeltaIndex(),
tm.makePeriodIndex(),
Series(tm.makePeriodIndex()),
Series(pd.date_range('20130101',
periods=3, tz='US/Eastern')),
MultiIndex.from_product(
[range(5),
['foo', 'bar', 'baz'],
pd.date_range('20130101', periods=2)]),
MultiIndex.from_product(
[pd.CategoricalIndex(list('aabc')),
range(3)])]:
self.check_equal(obj)
self.check_not_equal_with_index(obj)
def test_hash_pandas_object2(self):
for name, s in self.df.iteritems():
self.check_equal(s)
self.check_not_equal_with_index(s)
def test_hash_pandas_empty_object(self):
for obj in [Series([], dtype='float64'),
Series([], dtype='object'),
Index([])]:
self.check_equal(obj)
# these are by-definition the same with
# or w/o the index as the data is empty
def test_categorical_consistency(self):
# GH15143
# Check that categoricals hash consistent with their values, not codes
# This should work for categoricals of any dtype
for s1 in [Series(['a', 'b', 'c', 'd']),
Series([1000, 2000, 3000, 4000]),
Series(pd.date_range(0, periods=4))]:
s2 = s1.astype('category').cat.set_categories(s1)
s3 = s2.cat.set_categories(list(reversed(s1)))
for categorize in [True, False]:
# These should all hash identically
h1 = hash_pandas_object(s1, categorize=categorize)
h2 = hash_pandas_object(s2, categorize=categorize)
h3 = hash_pandas_object(s3, categorize=categorize)
tm.assert_series_equal(h1, h2)
tm.assert_series_equal(h1, h3)
def test_categorical_with_nan_consistency(self):
c = pd.Categorical.from_codes(
[-1, 0, 1, 2, 3, 4],
categories=pd.date_range('2012-01-01', periods=5, name='B'))
expected = hash_array(c, categorize=False)
c = pd.Categorical.from_codes(
[-1, 0],
categories=[pd.Timestamp('2012-01-01')])
result = hash_array(c, categorize=False)
assert result[0] in expected
assert result[1] in expected
def test_pandas_errors(self):
for obj in [pd.Timestamp('20130101')]:
with pytest.raises(TypeError):
hash_pandas_object(obj)
with catch_warnings(record=True):
obj = tm.makePanel()
with pytest.raises(TypeError):
hash_pandas_object(obj)
def test_hash_keys(self):
# using different hash keys, should have different hashes
# for the same data
# this only matters for object dtypes
obj = Series(list('abc'))
a = hash_pandas_object(obj, hash_key='9876543210123456')
b = hash_pandas_object(obj, hash_key='9876543210123465')
assert (a != b).all()
def test_invalid_key(self):
# this only matters for object dtypes
def f():
hash_pandas_object(Series(list('abc')), hash_key='foo')
pytest.raises(ValueError, f)
def test_alread_encoded(self):
# if already encoded then ok
obj = Series(list('abc')).str.encode('utf8')
self.check_equal(obj)
def test_alternate_encoding(self):
obj = Series(list('abc'))
self.check_equal(obj, encoding='ascii')
def test_same_len_hash_collisions(self):
for l in range(8):
length = 2**(l + 8) + 1
s = tm.rands_array(length, 2)
result = hash_array(s, 'utf8')
assert not result[0] == result[1]
for l in range(8):
length = 2**(l + 8)
s = tm.rands_array(length, 2)
result = hash_array(s, 'utf8')
assert not result[0] == result[1]
def test_hash_collisions(self):
# hash collisions are bad
# https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
L = ['Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9', # noqa
'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe'] # noqa
# these should be different!
result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
expected1 = np.array([14963968704024874985], dtype=np.uint64)
tm.assert_numpy_array_equal(result1, expected1)
result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
expected2 = np.array([16428432627716348016], dtype=np.uint64)
tm.assert_numpy_array_equal(result2, expected2)
result = hash_array(np.asarray(L, dtype=object), 'utf8')
tm.assert_numpy_array_equal(
result, np.concatenate([expected1, expected2], axis=0))
@@ -0,0 +1,856 @@
# -*- coding: utf-8 -*-
import os
import pandas as pd
import pytest
import numpy as np
import sys
from pandas import Series, DataFrame
import pandas.util.testing as tm
import pandas.util._test_decorators as td
from pandas.util.testing import (assert_almost_equal, raise_with_traceback,
assert_index_equal, assert_series_equal,
assert_frame_equal, assert_numpy_array_equal,
RNGContext)
class TestAssertAlmostEqual(object):
def _assert_almost_equal_both(self, a, b, **kwargs):
assert_almost_equal(a, b, **kwargs)
assert_almost_equal(b, a, **kwargs)
def _assert_not_almost_equal_both(self, a, b, **kwargs):
pytest.raises(AssertionError, assert_almost_equal, a, b, **kwargs)
pytest.raises(AssertionError, assert_almost_equal, b, a, **kwargs)
def test_assert_almost_equal_numbers(self):
self._assert_almost_equal_both(1.1, 1.1)
self._assert_almost_equal_both(1.1, 1.100001)
self._assert_almost_equal_both(np.int16(1), 1.000001)
self._assert_almost_equal_both(np.float64(1.1), 1.1)
self._assert_almost_equal_both(np.uint32(5), 5)
self._assert_not_almost_equal_both(1.1, 1)
self._assert_not_almost_equal_both(1.1, True)
self._assert_not_almost_equal_both(1, 2)
self._assert_not_almost_equal_both(1.0001, np.int16(1))
def test_assert_almost_equal_numbers_with_zeros(self):
self._assert_almost_equal_both(0, 0)
self._assert_almost_equal_both(0, 0.0)
self._assert_almost_equal_both(0, np.float64(0))
self._assert_almost_equal_both(0.000001, 0)
self._assert_not_almost_equal_both(0.001, 0)
self._assert_not_almost_equal_both(1, 0)
def test_assert_almost_equal_numbers_with_mixed(self):
self._assert_not_almost_equal_both(1, 'abc')
self._assert_not_almost_equal_both(1, [1, ])
self._assert_not_almost_equal_both(1, object())
@pytest.mark.parametrize(
"left_dtype",
['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object'])
@pytest.mark.parametrize(
"right_dtype",
['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object'])
def test_assert_almost_equal_edge_case_ndarrays(
self, left_dtype, right_dtype):
# empty compare
self._assert_almost_equal_both(np.array([], dtype=left_dtype),
np.array([], dtype=right_dtype),
check_dtype=False)
def test_assert_almost_equal_dicts(self):
self._assert_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 2})
self._assert_not_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 3})
self._assert_not_almost_equal_both({'a': 1, 'b': 2},
{'a': 1, 'b': 2, 'c': 3})
self._assert_not_almost_equal_both({'a': 1}, 1)
self._assert_not_almost_equal_both({'a': 1}, 'abc')
self._assert_not_almost_equal_both({'a': 1}, [1, ])
def test_assert_almost_equal_dict_like_object(self):
class DictLikeObj(object):
def keys(self):
return ('a', )
def __getitem__(self, item):
if item == 'a':
return 1
self._assert_almost_equal_both({'a': 1}, DictLikeObj(),
check_dtype=False)
self._assert_not_almost_equal_both({'a': 2}, DictLikeObj(),
check_dtype=False)
def test_assert_almost_equal_strings(self):
self._assert_almost_equal_both('abc', 'abc')
self._assert_not_almost_equal_both('abc', 'abcd')
self._assert_not_almost_equal_both('abc', 'abd')
self._assert_not_almost_equal_both('abc', 1)
self._assert_not_almost_equal_both('abc', [1, ])
def test_assert_almost_equal_iterables(self):
self._assert_almost_equal_both([1, 2, 3], [1, 2, 3])
self._assert_almost_equal_both(np.array([1, 2, 3]),
np.array([1, 2, 3]))
# class / dtype are different
self._assert_not_almost_equal_both(np.array([1, 2, 3]), [1, 2, 3])
self._assert_not_almost_equal_both(np.array([1, 2, 3]),
np.array([1., 2., 3.]))
# Can't compare generators
self._assert_not_almost_equal_both(iter([1, 2, 3]), [1, 2, 3])
self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 4])
self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 3, 4])
self._assert_not_almost_equal_both([1, 2, 3], 1)
def test_assert_almost_equal_null(self):
self._assert_almost_equal_both(None, None)
self._assert_not_almost_equal_both(None, np.NaN)
self._assert_not_almost_equal_both(None, 0)
self._assert_not_almost_equal_both(np.NaN, 0)
def test_assert_almost_equal_inf(self):
self._assert_almost_equal_both(np.inf, np.inf)
self._assert_almost_equal_both(np.inf, float("inf"))
self._assert_not_almost_equal_both(np.inf, 0)
self._assert_almost_equal_both(np.array([np.inf, np.nan, -np.inf]),
np.array([np.inf, np.nan, -np.inf]))
self._assert_almost_equal_both(np.array([np.inf, None, -np.inf],
dtype=np.object_),
np.array([np.inf, np.nan, -np.inf],
dtype=np.object_))
def test_assert_almost_equal_pandas(self):
tm.assert_almost_equal(pd.Index([1., 1.1]),
pd.Index([1., 1.100001]))
tm.assert_almost_equal(pd.Series([1., 1.1]),
pd.Series([1., 1.100001]))
tm.assert_almost_equal(pd.DataFrame({'a': [1., 1.1]}),
pd.DataFrame({'a': [1., 1.100001]}))
def test_assert_almost_equal_object(self):
a = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')]
b = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')]
self._assert_almost_equal_both(a, b)
class TestUtilTesting(object):
def test_raise_with_traceback(self):
with tm.assert_raises_regex(LookupError, "error_text"):
try:
raise ValueError("THIS IS AN ERROR")
except ValueError as e:
e = LookupError("error_text")
raise_with_traceback(e)
with tm.assert_raises_regex(LookupError, "error_text"):
try:
raise ValueError("This is another error")
except ValueError:
e = LookupError("error_text")
_, _, traceback = sys.exc_info()
raise_with_traceback(e, traceback)
class TestAssertNumpyArrayEqual(object):
@td.skip_if_windows
def test_numpy_array_equal_message(self):
expected = """numpy array are different
numpy array shapes are different
\\[left\\]: \\(2,\\)
\\[right\\]: \\(3,\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]))
# scalar comparison
expected = """Expected type """
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(1, 2)
expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5"""
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(1, 2)
# array / scalar array comparison
expected = """numpy array are different
numpy array classes are different
\\[left\\]: ndarray
\\[right\\]: int"""
with tm.assert_raises_regex(AssertionError, expected):
# numpy_array_equal only accepts np.ndarray
assert_numpy_array_equal(np.array([1]), 1)
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([1]), 1)
# scalar / array comparison
expected = """numpy array are different
numpy array classes are different
\\[left\\]: int
\\[right\\]: ndarray"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(1, np.array([1]))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(1, np.array([1]))
expected = """numpy array are different
numpy array values are different \\(66\\.66667 %\\)
\\[left\\]: \\[nan, 2\\.0, 3\\.0\\]
\\[right\\]: \\[1\\.0, nan, 3\\.0\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([np.nan, 2, 3]),
np.array([1, np.nan, 3]))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([np.nan, 2, 3]),
np.array([1, np.nan, 3]))
expected = """numpy array are different
numpy array values are different \\(50\\.0 %\\)
\\[left\\]: \\[1, 2\\]
\\[right\\]: \\[1, 3\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3]))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([1, 2]), np.array([1, 3]))
expected = """numpy array are different
numpy array values are different \\(50\\.0 %\\)
\\[left\\]: \\[1\\.1, 2\\.000001\\]
\\[right\\]: \\[1\\.1, 2.0\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(
np.array([1.1, 2.000001]), np.array([1.1, 2.0]))
# must pass
assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0]))
expected = """numpy array are different
numpy array values are different \\(16\\.66667 %\\)
\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]),
np.array([[1, 3], [3, 4], [5, 6]]))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]),
np.array([[1, 3], [3, 4], [5, 6]]))
expected = """numpy array are different
numpy array values are different \\(25\\.0 %\\)
\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\]
\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([[1, 2], [3, 4]]),
np.array([[1, 3], [3, 4]]))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([[1, 2], [3, 4]]),
np.array([[1, 3], [3, 4]]))
# allow to overwrite message
expected = """Index are different
Index shapes are different
\\[left\\]: \\(2,\\)
\\[right\\]: \\(3,\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]),
obj='Index')
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]),
obj='Index')
def test_numpy_array_equal_unicode_message(self):
# Test ensures that `assert_numpy_array_equals` raises the right
# exception when comparing np.arrays containing differing
# unicode objects (#20503)
expected = """numpy array are different
numpy array values are different \\(33\\.33333 %\\)
\\[left\\]: \\[á, à, ä\\]
\\[right\\]: \\[á, à, å\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(np.array([u'á', u'à', u'ä']),
np.array([u'á', u'à', u'å']))
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(np.array([u'á', u'à', u'ä']),
np.array([u'á', u'à', u'å']))
@td.skip_if_windows
def test_numpy_array_equal_object_message(self):
a = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')])
b = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')])
expected = """numpy array are different
numpy array values are different \\(50\\.0 %\\)
\\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\]
\\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(a, b)
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal(a, b)
def test_numpy_array_equal_copy_flag(self):
a = np.array([1, 2, 3])
b = a.copy()
c = a.view()
expected = r'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)'
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(a, b, check_same='same')
expected = r'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)'
with tm.assert_raises_regex(AssertionError, expected):
assert_numpy_array_equal(a, c, check_same='copy')
def test_assert_almost_equal_iterable_message(self):
expected = """Iterable are different
Iterable length are different
\\[left\\]: 2
\\[right\\]: 3"""
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal([1, 2], [3, 4, 5])
expected = """Iterable are different
Iterable values are different \\(50\\.0 %\\)
\\[left\\]: \\[1, 2\\]
\\[right\\]: \\[1, 3\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_almost_equal([1, 2], [1, 3])
class TestAssertIndexEqual(object):
def test_index_equal_message(self):
expected = """Index are different
Index levels are different
\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\],
labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)"""
idx1 = pd.Index([1, 2, 3])
idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2),
('B', 3), ('B', 4)])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, exact=False)
expected = """MultiIndex level \\[1\\] are different
MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2),
('B', 3), ('B', 4)])
idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2),
('B', 3), ('B', 4)])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, check_exact=False)
expected = """Index are different
Index length are different
\\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
idx1 = pd.Index([1, 2, 3])
idx2 = pd.Index([1, 2, 3, 4])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, check_exact=False)
expected = """Index are different
Index classes are different
\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)"""
idx1 = pd.Index([1, 2, 3])
idx2 = pd.Index([1, 2, 3.0])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, exact=True)
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, exact=True, check_exact=False)
expected = """Index are different
Index values are different \\(33\\.33333 %\\)
\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)"""
idx1 = pd.Index([1, 2, 3.])
idx2 = pd.Index([1, 2, 3.0000000001])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
# must success
assert_index_equal(idx1, idx2, check_exact=False)
expected = """Index are different
Index values are different \\(33\\.33333 %\\)
\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\)
\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)"""
idx1 = pd.Index([1, 2, 3.])
idx2 = pd.Index([1, 2, 3.0001])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, check_exact=False)
# must success
assert_index_equal(idx1, idx2, check_exact=False,
check_less_precise=True)
expected = """Index are different
Index values are different \\(33\\.33333 %\\)
\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)"""
idx1 = pd.Index([1, 2, 3])
idx2 = pd.Index([1, 2, 4])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, check_less_precise=True)
expected = """MultiIndex level \\[1\\] are different
MultiIndex level \\[1\\] values are different \\(25\\.0 %\\)
\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2),
('B', 3), ('B', 4)])
idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2),
('B', 3), ('B', 4)])
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2, check_exact=False)
def test_index_equal_metadata_message(self):
expected = """Index are different
Attribute "names" are different
\\[left\\]: \\[None\\]
\\[right\\]: \\[u?'x'\\]"""
idx1 = pd.Index([1, 2, 3])
idx2 = pd.Index([1, 2, 3], name='x')
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
# same name, should pass
assert_index_equal(pd.Index([1, 2, 3], name=np.nan),
pd.Index([1, 2, 3], name=np.nan))
assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT),
pd.Index([1, 2, 3], name=pd.NaT))
expected = """Index are different
Attribute "names" are different
\\[left\\]: \\[nan\\]
\\[right\\]: \\[NaT\\]"""
idx1 = pd.Index([1, 2, 3], name=np.nan)
idx2 = pd.Index([1, 2, 3], name=pd.NaT)
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(idx1, idx2)
def test_categorical_index_equality(self):
expected = """Index are different
Attribute "dtype" are different
\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
ordered=False\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
pd.Index(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])))
def test_categorical_index_equality_relax_categories_check(self):
assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])),
pd.Index(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])),
check_categorical=False)
class TestAssertSeriesEqual(object):
def _assert_equal(self, x, y, **kwargs):
assert_series_equal(x, y, **kwargs)
assert_series_equal(y, x, **kwargs)
def _assert_not_equal(self, a, b, **kwargs):
pytest.raises(AssertionError, assert_series_equal, a, b, **kwargs)
pytest.raises(AssertionError, assert_series_equal, b, a, **kwargs)
def test_equal(self):
self._assert_equal(Series(range(3)), Series(range(3)))
self._assert_equal(Series(list('abc')), Series(list('abc')))
self._assert_equal(Series(list(u'áàä')), Series(list(u'áàä')))
def test_not_equal(self):
self._assert_not_equal(Series(range(3)), Series(range(3)) + 1)
self._assert_not_equal(Series(list('abc')), Series(list('xyz')))
self._assert_not_equal(Series(list(u'áàä')), Series(list(u'éèë')))
self._assert_not_equal(Series(list(u'áàä')), Series(list(b'aaa')))
self._assert_not_equal(Series(range(3)), Series(range(4)))
self._assert_not_equal(
Series(range(3)), Series(
range(3), dtype='float64'))
self._assert_not_equal(
Series(range(3)), Series(
range(3), index=[1, 2, 4]))
# ATM meta data is not checked in assert_series_equal
# self._assert_not_equal(Series(range(3)),Series(range(3),name='foo'),check_names=True)
def test_less_precise(self):
s1 = Series([0.12345], dtype='float64')
s2 = Series([0.12346], dtype='float64')
pytest.raises(AssertionError, assert_series_equal, s1, s2)
self._assert_equal(s1, s2, check_less_precise=True)
for i in range(4):
self._assert_equal(s1, s2, check_less_precise=i)
pytest.raises(AssertionError, assert_series_equal, s1, s2, 10)
s1 = Series([0.12345], dtype='float32')
s2 = Series([0.12346], dtype='float32')
pytest.raises(AssertionError, assert_series_equal, s1, s2)
self._assert_equal(s1, s2, check_less_precise=True)
for i in range(4):
self._assert_equal(s1, s2, check_less_precise=i)
pytest.raises(AssertionError, assert_series_equal, s1, s2, 10)
# even less than less precise
s1 = Series([0.1235], dtype='float32')
s2 = Series([0.1236], dtype='float32')
pytest.raises(AssertionError, assert_series_equal, s1, s2)
pytest.raises(AssertionError, assert_series_equal, s1, s2, True)
def test_index_dtype(self):
df1 = DataFrame.from_records(
{'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
df2 = DataFrame.from_records(
{'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
self._assert_not_equal(df1.c, df2.c, check_index_type=True)
def test_multiindex_dtype(self):
df1 = DataFrame.from_records(
{'a': [1, 2], 'b': [2.1, 1.5],
'c': ['l1', 'l2']}, index=['a', 'b'])
df2 = DataFrame.from_records(
{'a': [1.0, 2.0], 'b': [2.1, 1.5],
'c': ['l1', 'l2']}, index=['a', 'b'])
self._assert_not_equal(df1.c, df2.c, check_index_type=True)
def test_series_equal_message(self):
expected = """Series are different
Series length are different
\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\)
\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4]))
expected = """Series are different
Series values are different \\(33\\.33333 %\\)
\\[left\\]: \\[1, 2, 3\\]
\\[right\\]: \\[1, 2, 4\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]))
with tm.assert_raises_regex(AssertionError, expected):
assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]),
check_less_precise=True)
def test_categorical_series_equality(self):
expected = """Attributes are different
Attribute "dtype" are different
\\[left\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b'\\], ordered=False\\)
\\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \
ordered=False\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
pd.Series(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])))
def test_categorical_series_equality_relax_categories_check(self):
assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])),
pd.Series(pd.Categorical(['a', 'b'],
categories=['a', 'b', 'c'])),
check_categorical=False)
class TestAssertFrameEqual(object):
def _assert_equal(self, x, y, **kwargs):
assert_frame_equal(x, y, **kwargs)
assert_frame_equal(y, x, **kwargs)
def _assert_not_equal(self, a, b, **kwargs):
pytest.raises(AssertionError, assert_frame_equal, a, b, **kwargs)
pytest.raises(AssertionError, assert_frame_equal, b, a, **kwargs)
def test_equal_with_different_row_order(self):
# check_like=True ignores row-column orderings
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
index=['a', 'b', 'c'])
df2 = pd.DataFrame({'A': [3, 2, 1], 'B': [6, 5, 4]},
index=['c', 'b', 'a'])
self._assert_equal(df1, df2, check_like=True)
self._assert_not_equal(df1, df2)
def test_not_equal_with_different_shape(self):
self._assert_not_equal(pd.DataFrame({'A': [1, 2, 3]}),
pd.DataFrame({'A': [1, 2, 3, 4]}))
def test_index_dtype(self):
df1 = DataFrame.from_records(
{'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
df2 = DataFrame.from_records(
{'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
self._assert_not_equal(df1, df2, check_index_type=True)
def test_multiindex_dtype(self):
df1 = DataFrame.from_records(
{'a': [1, 2], 'b': [2.1, 1.5],
'c': ['l1', 'l2']}, index=['a', 'b'])
df2 = DataFrame.from_records(
{'a': [1.0, 2.0], 'b': [2.1, 1.5],
'c': ['l1', 'l2']}, index=['a', 'b'])
self._assert_not_equal(df1, df2, check_index_type=True)
def test_empty_dtypes(self):
df1 = pd.DataFrame(columns=["col1", "col2"])
df1["col1"] = df1["col1"].astype('int64')
df2 = pd.DataFrame(columns=["col1", "col2"])
self._assert_equal(df1, df2, check_dtype=False)
self._assert_not_equal(df1, df2, check_dtype=True)
def test_frame_equal_message(self):
expected = """DataFrame are different
DataFrame shape mismatch
\\[left\\]: \\(3, 2\\)
\\[right\\]: \\(3, 1\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}),
pd.DataFrame({'A': [1, 2, 3]}))
expected = """DataFrame\\.index are different
DataFrame\\.index values are different \\(33\\.33333 %\\)
\\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\)
\\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
index=['a', 'b', 'c']),
pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
index=['a', 'b', 'd']))
expected = """DataFrame\\.columns are different
DataFrame\\.columns values are different \\(50\\.0 %\\)
\\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\)
\\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)"""
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
index=['a', 'b', 'c']),
pd.DataFrame({'A': [1, 2, 3], 'b': [4, 5, 6]},
index=['a', 'b', 'c']))
expected = """DataFrame\\.iloc\\[:, 1\\] are different
DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
\\[left\\]: \\[4, 5, 6\\]
\\[right\\]: \\[4, 5, 7\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}),
pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}))
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}),
pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}),
by_blocks=True)
def test_frame_equal_message_unicode(self):
# Test ensures that `assert_frame_equals` raises the right
# exception when comparing DataFrames containing differing
# unicode objects (#20503)
expected = """DataFrame\\.iloc\\[:, 1\\] are different
DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
\\[left\\]: \\[é, è, ë\\]
\\[right\\]: \\[é, è, e̊\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'']}))
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'']}),
by_blocks=True)
expected = """DataFrame\\.iloc\\[:, 0\\] are different
DataFrame\\.iloc\\[:, 0\\] values are different \\(100\\.0 %\\)
\\[left\\]: \\[á, à, ä\\]
\\[right\\]: \\[a, a, a\\]"""
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': ['a', 'a', 'a'],
'E': ['e', 'e', 'e']}))
with tm.assert_raises_regex(AssertionError, expected):
assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
'E': [u'é', u'è', u'ë']}),
pd.DataFrame({'A': ['a', 'a', 'a'],
'E': ['e', 'e', 'e']}),
by_blocks=True)
class TestAssertCategoricalEqual(object):
def test_categorical_equal_message(self):
expected = """Categorical\\.categories are different
Categorical\\.categories values are different \\(25\\.0 %\\)
\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)"""
a = pd.Categorical([1, 2, 3, 4])
b = pd.Categorical([1, 2, 3, 5])
with tm.assert_raises_regex(AssertionError, expected):
tm.assert_categorical_equal(a, b)
expected = """Categorical\\.codes are different
Categorical\\.codes values are different \\(50\\.0 %\\)
\\[left\\]: \\[0, 1, 3, 2\\]
\\[right\\]: \\[0, 1, 2, 3\\]"""
a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4])
b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
with tm.assert_raises_regex(AssertionError, expected):
tm.assert_categorical_equal(a, b)
expected = """Categorical are different
Attribute "ordered" are different
\\[left\\]: False
\\[right\\]: True"""
a = pd.Categorical([1, 2, 3, 4], ordered=False)
b = pd.Categorical([1, 2, 3, 4], ordered=True)
with tm.assert_raises_regex(AssertionError, expected):
tm.assert_categorical_equal(a, b)
class TestRNGContext(object):
def test_RNGContext(self):
expected0 = 1.764052345967664
expected1 = 1.6243453636632417
with RNGContext(0):
with RNGContext(1):
assert np.random.randn() == expected1
assert np.random.randn() == expected0
class TestLocale(object):
def test_locale(self):
if sys.platform == 'win32':
pytest.skip(
"skipping on win platforms as locale not available")
# GH9744
locales = tm.get_locales()
assert len(locales) >= 1
def test_datapath_missing(datapath, request):
if not request.config.getoption("--strict-data-files"):
pytest.skip("Need to set '--strict-data-files'")
with pytest.raises(ValueError):
datapath('not_a_file')
result = datapath('data', 'iris.csv')
expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
assert result == expected
@@ -0,0 +1,512 @@
# -*- coding: utf-8 -*-
import os
import locale
import codecs
import sys
from uuid import uuid4
from collections import OrderedDict
import pytest
from pandas.compat import intern, PY3
import pandas.core.common as com
from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf
from pandas.util._decorators import deprecate_kwarg, make_signature
from pandas.util._validators import (validate_args, validate_kwargs,
validate_args_and_kwargs,
validate_bool_kwarg)
import pandas.util.testing as tm
import pandas.util._test_decorators as td
class TestDecorators(object):
def setup_method(self, method):
@deprecate_kwarg('old', 'new')
def _f1(new=False):
return new
@deprecate_kwarg('old', 'new', {'yes': True, 'no': False})
def _f2(new=False):
return new
@deprecate_kwarg('old', 'new', lambda x: x + 1)
def _f3(new=0):
return new
@deprecate_kwarg('old', None)
def _f4(old=True, unchanged=True):
return old
self.f1 = _f1
self.f2 = _f2
self.f3 = _f3
self.f4 = _f4
def test_deprecate_kwarg(self):
x = 78
with tm.assert_produces_warning(FutureWarning):
result = self.f1(old=x)
assert result is x
with tm.assert_produces_warning(None):
self.f1(new=x)
def test_dict_deprecate_kwarg(self):
x = 'yes'
with tm.assert_produces_warning(FutureWarning):
result = self.f2(old=x)
assert result
def test_missing_deprecate_kwarg(self):
x = 'bogus'
with tm.assert_produces_warning(FutureWarning):
result = self.f2(old=x)
assert result == 'bogus'
def test_callable_deprecate_kwarg(self):
x = 5
with tm.assert_produces_warning(FutureWarning):
result = self.f3(old=x)
assert result == x + 1
with pytest.raises(TypeError):
self.f3(old='hello')
def test_bad_deprecate_kwarg(self):
with pytest.raises(TypeError):
@deprecate_kwarg('old', 'new', 0)
def f4(new=None):
pass
def test_deprecate_keyword(self):
x = 9
with tm.assert_produces_warning(FutureWarning):
result = self.f4(old=x)
assert result is x
with tm.assert_produces_warning(None):
result = self.f4(unchanged=x)
assert result is True
def test_rands():
r = tm.rands(10)
assert(len(r) == 10)
def test_rands_array():
arr = tm.rands_array(5, size=10)
assert(arr.shape == (10,))
assert(len(arr[0]) == 5)
arr = tm.rands_array(7, size=(10, 10))
assert(arr.shape == (10, 10))
assert(len(arr[1, 1]) == 7)
class TestValidateArgs(object):
fname = 'func'
def test_bad_min_fname_arg_count(self):
msg = "'max_fname_arg_count' must be non-negative"
with tm.assert_raises_regex(ValueError, msg):
validate_args(self.fname, (None,), -1, 'foo')
def test_bad_arg_length_max_value_single(self):
args = (None, None)
compat_args = ('foo',)
min_fname_arg_count = 0
max_length = len(compat_args) + min_fname_arg_count
actual_length = len(args) + min_fname_arg_count
msg = (r"{fname}\(\) takes at most {max_length} "
r"argument \({actual_length} given\)"
.format(fname=self.fname, max_length=max_length,
actual_length=actual_length))
with tm.assert_raises_regex(TypeError, msg):
validate_args(self.fname, args,
min_fname_arg_count,
compat_args)
def test_bad_arg_length_max_value_multiple(self):
args = (None, None)
compat_args = dict(foo=None)
min_fname_arg_count = 2
max_length = len(compat_args) + min_fname_arg_count
actual_length = len(args) + min_fname_arg_count
msg = (r"{fname}\(\) takes at most {max_length} "
r"arguments \({actual_length} given\)"
.format(fname=self.fname, max_length=max_length,
actual_length=actual_length))
with tm.assert_raises_regex(TypeError, msg):
validate_args(self.fname, args,
min_fname_arg_count,
compat_args)
def test_not_all_defaults(self):
bad_arg = 'foo'
msg = ("the '{arg}' parameter is not supported "
r"in the pandas implementation of {func}\(\)".
format(arg=bad_arg, func=self.fname))
compat_args = OrderedDict()
compat_args['foo'] = 2
compat_args['bar'] = -1
compat_args['baz'] = 3
arg_vals = (1, -1, 3)
for i in range(1, 3):
with tm.assert_raises_regex(ValueError, msg):
validate_args(self.fname, arg_vals[:i], 2, compat_args)
def test_validation(self):
# No exceptions should be thrown
validate_args(self.fname, (None,), 2, dict(out=None))
compat_args = OrderedDict()
compat_args['axis'] = 1
compat_args['out'] = None
validate_args(self.fname, (1, None), 2, compat_args)
class TestValidateKwargs(object):
fname = 'func'
def test_bad_kwarg(self):
goodarg = 'f'
badarg = goodarg + 'o'
compat_args = OrderedDict()
compat_args[goodarg] = 'foo'
compat_args[badarg + 'o'] = 'bar'
kwargs = {goodarg: 'foo', badarg: 'bar'}
msg = (r"{fname}\(\) got an unexpected "
r"keyword argument '{arg}'".format(
fname=self.fname, arg=badarg))
with tm.assert_raises_regex(TypeError, msg):
validate_kwargs(self.fname, kwargs, compat_args)
def test_not_all_none(self):
bad_arg = 'foo'
msg = (r"the '{arg}' parameter is not supported "
r"in the pandas implementation of {func}\(\)".
format(arg=bad_arg, func=self.fname))
compat_args = OrderedDict()
compat_args['foo'] = 1
compat_args['bar'] = 's'
compat_args['baz'] = None
kwarg_keys = ('foo', 'bar', 'baz')
kwarg_vals = (2, 's', None)
for i in range(1, 3):
kwargs = dict(zip(kwarg_keys[:i],
kwarg_vals[:i]))
with tm.assert_raises_regex(ValueError, msg):
validate_kwargs(self.fname, kwargs, compat_args)
def test_validation(self):
# No exceptions should be thrown
compat_args = OrderedDict()
compat_args['f'] = None
compat_args['b'] = 1
compat_args['ba'] = 's'
kwargs = dict(f=None, b=1)
validate_kwargs(self.fname, kwargs, compat_args)
def test_validate_bool_kwarg(self):
arg_names = ['inplace', 'copy']
invalid_values = [1, "True", [1, 2, 3], 5.0]
valid_values = [True, False, None]
for name in arg_names:
for value in invalid_values:
with tm.assert_raises_regex(ValueError,
"For argument \"%s\" "
"expected type bool, "
"received type %s" %
(name, type(value).__name__)):
validate_bool_kwarg(value, name)
for value in valid_values:
assert validate_bool_kwarg(value, name) == value
class TestValidateKwargsAndArgs(object):
fname = 'func'
def test_invalid_total_length_max_length_one(self):
compat_args = ('foo',)
kwargs = {'foo': 'FOO'}
args = ('FoO', 'BaZ')
min_fname_arg_count = 0
max_length = len(compat_args) + min_fname_arg_count
actual_length = len(kwargs) + len(args) + min_fname_arg_count
msg = (r"{fname}\(\) takes at most {max_length} "
r"argument \({actual_length} given\)"
.format(fname=self.fname, max_length=max_length,
actual_length=actual_length))
with tm.assert_raises_regex(TypeError, msg):
validate_args_and_kwargs(self.fname, args, kwargs,
min_fname_arg_count,
compat_args)
def test_invalid_total_length_max_length_multiple(self):
compat_args = ('foo', 'bar', 'baz')
kwargs = {'foo': 'FOO', 'bar': 'BAR'}
args = ('FoO', 'BaZ')
min_fname_arg_count = 2
max_length = len(compat_args) + min_fname_arg_count
actual_length = len(kwargs) + len(args) + min_fname_arg_count
msg = (r"{fname}\(\) takes at most {max_length} "
r"arguments \({actual_length} given\)"
.format(fname=self.fname, max_length=max_length,
actual_length=actual_length))
with tm.assert_raises_regex(TypeError, msg):
validate_args_and_kwargs(self.fname, args, kwargs,
min_fname_arg_count,
compat_args)
def test_no_args_with_kwargs(self):
bad_arg = 'bar'
min_fname_arg_count = 2
compat_args = OrderedDict()
compat_args['foo'] = -5
compat_args[bad_arg] = 1
msg = (r"the '{arg}' parameter is not supported "
r"in the pandas implementation of {func}\(\)".
format(arg=bad_arg, func=self.fname))
args = ()
kwargs = {'foo': -5, bad_arg: 2}
tm.assert_raises_regex(ValueError, msg,
validate_args_and_kwargs,
self.fname, args, kwargs,
min_fname_arg_count, compat_args)
args = (-5, 2)
kwargs = {}
tm.assert_raises_regex(ValueError, msg,
validate_args_and_kwargs,
self.fname, args, kwargs,
min_fname_arg_count, compat_args)
def test_duplicate_argument(self):
min_fname_arg_count = 2
compat_args = OrderedDict()
compat_args['foo'] = None
compat_args['bar'] = None
compat_args['baz'] = None
kwargs = {'foo': None, 'bar': None}
args = (None,) # duplicate value for 'foo'
msg = (r"{fname}\(\) got multiple values for keyword "
r"argument '{arg}'".format(fname=self.fname, arg='foo'))
with tm.assert_raises_regex(TypeError, msg):
validate_args_and_kwargs(self.fname, args, kwargs,
min_fname_arg_count,
compat_args)
def test_validation(self):
# No exceptions should be thrown
compat_args = OrderedDict()
compat_args['foo'] = 1
compat_args['bar'] = None
compat_args['baz'] = -2
kwargs = {'baz': -2}
args = (1, None)
min_fname_arg_count = 2
validate_args_and_kwargs(self.fname, args, kwargs,
min_fname_arg_count,
compat_args)
class TestMove(object):
def test_cannot_create_instance_of_stolenbuffer(self):
"""Stolen buffers need to be created through the smart constructor
``move_into_mutable_buffer`` which has a bunch of checks in it.
"""
msg = "cannot create 'pandas.util._move.stolenbuf' instances"
with tm.assert_raises_regex(TypeError, msg):
stolenbuf()
def test_more_than_one_ref(self):
"""Test case for when we try to use ``move_into_mutable_buffer`` when
the object being moved has other references.
"""
b = b'testing'
with pytest.raises(BadMove) as e:
def handle_success(type_, value, tb):
assert value.args[0] is b
return type(e).handle_success(e, type_, value, tb) # super
e.handle_success = handle_success
move_into_mutable_buffer(b)
def test_exactly_one_ref(self):
"""Test case for when the object being moved has exactly one reference.
"""
b = b'testing'
# We need to pass an expression on the stack to ensure that there are
# not extra references hanging around. We cannot rewrite this test as
# buf = b[:-3]
# as_stolen_buf = move_into_mutable_buffer(buf)
# because then we would have more than one reference to buf.
as_stolen_buf = move_into_mutable_buffer(b[:-3])
# materialize as bytearray to show that it is mutable
assert bytearray(as_stolen_buf) == b'test'
@pytest.mark.skipif(PY3, reason='bytes objects cannot be interned in py3')
def test_interned(self):
salt = uuid4().hex
def make_string():
# We need to actually create a new string so that it has refcount
# one. We use a uuid so that we know the string could not already
# be in the intern table.
return ''.join(('testing: ', salt))
# This should work, the string has one reference on the stack.
move_into_mutable_buffer(make_string())
refcount = [None] # nonlocal
def ref_capture(ob):
# Subtract two because those are the references owned by this
# frame:
# 1. The local variables of this stack frame.
# 2. The python data stack of this stack frame.
refcount[0] = sys.getrefcount(ob) - 2
return ob
with pytest.raises(BadMove):
# If we intern the string it will still have one reference but now
# it is in the intern table so if other people intern the same
# string while the mutable buffer holds the first string they will
# be the same instance.
move_into_mutable_buffer(ref_capture(intern(make_string()))) # noqa
assert refcount[0] == 1
def test_numpy_errstate_is_default():
# The defaults since numpy 1.6.0
expected = {'over': 'warn', 'divide': 'warn', 'invalid': 'warn',
'under': 'ignore'}
import numpy as np
from pandas.compat import numpy # noqa
# The errstate should be unchanged after that import.
assert np.geterr() == expected
@td.skip_if_windows
class TestLocaleUtils(object):
@classmethod
def setup_class(cls):
cls.locales = tm.get_locales()
cls.current_locale = locale.getlocale()
if not cls.locales:
pytest.skip("No locales found")
@classmethod
def teardown_class(cls):
del cls.locales
del cls.current_locale
def test_get_locales(self):
# all systems should have at least a single locale
assert len(tm.get_locales()) > 0
def test_get_locales_prefix(self):
if len(self.locales) == 1:
pytest.skip("Only a single locale found, no point in "
"trying to test filtering locale prefixes")
first_locale = self.locales[0]
assert len(tm.get_locales(prefix=first_locale[:2])) > 0
def test_set_locale(self):
if len(self.locales) == 1:
pytest.skip("Only a single locale found, no point in "
"trying to test setting another locale")
if com._all_none(*self.current_locale):
# Not sure why, but on some travis runs with pytest,
# getlocale() returned (None, None).
pytest.skip("Current locale is not set.")
locale_override = os.environ.get('LOCALE_OVERRIDE', None)
if locale_override is None:
lang, enc = 'it_CH', 'UTF-8'
elif locale_override == 'C':
lang, enc = 'en_US', 'ascii'
else:
lang, enc = locale_override.split('.')
enc = codecs.lookup(enc).name
new_locale = lang, enc
if not tm._can_set_locale(new_locale):
with pytest.raises(locale.Error):
with tm.set_locale(new_locale):
pass
else:
with tm.set_locale(new_locale) as normalized_locale:
new_lang, new_enc = normalized_locale.split('.')
new_enc = codecs.lookup(enc).name
normalized_locale = new_lang, new_enc
assert normalized_locale == new_locale
current_locale = locale.getlocale()
assert current_locale == self.current_locale
def test_make_signature():
# See GH 17608
# Case where the func does not have default kwargs
sig = make_signature(validate_kwargs)
assert sig == (['fname', 'kwargs', 'compat_args'],
['fname', 'kwargs', 'compat_args'])
# Case where the func does have default kwargs
sig = make_signature(deprecate_kwarg)
assert sig == (['old_arg_name', 'new_arg_name',
'mapping=None', 'stacklevel=2'],
['old_arg_name', 'new_arg_name', 'mapping', 'stacklevel'])
def test_safe_import(monkeypatch):
assert not td.safe_import("foo")
assert not td.safe_import("pandas", min_version="99.99.99")
# Create dummy module to be imported
import types
import sys
mod_name = "hello123"
mod = types.ModuleType(mod_name)
mod.__version__ = "1.5"
assert not td.safe_import(mod_name)
monkeypatch.setitem(sys.modules, mod_name, mod)
assert not td.safe_import(mod_name, min_version="2.0")
assert td.safe_import(mod_name, min_version="1.0")