pruned venvs

2019-03-12 21:56:25 +01:00
parent 8ee094481c
commit 33f0511081
4095 changed files with 0 additions and 748399 deletions
@@ -1,178 +0,0 @@
-"""Test extension array for storing nested data in a pandas container.
-
-The JSONArray stores lists of dictionaries. The storage mechanism is a list,
-not an ndarray.
-
-Note:
-
-We currently store lists of UserDicts (Py3 only). Pandas has a few places
-internally that specifically check for dicts, and does non-scalar things
-in that case. We *want* the dictionaries to be treated as scalars, so we
-hack around pandas by using UserDicts.
-"""
-import collections
-import itertools
-import numbers
-import random
-import string
-import sys
-
-import numpy as np
-
-from pandas.core.dtypes.base import ExtensionDtype
-from pandas.core.arrays import ExtensionArray
-
-
-class JSONDtype(ExtensionDtype):
-    type = collections.Mapping
-    name = 'json'
-    try:
-        na_value = collections.UserDict()
-    except AttributeError:
-        # source compatibility with Py2.
-        na_value = {}
-
-    @classmethod
-    def construct_from_string(cls, string):
-        if string == cls.name:
-            return cls()
-        else:
-            raise TypeError("Cannot construct a '{}' from "
-                            "'{}'".format(cls, string))
-
-
-class JSONArray(ExtensionArray):
-    dtype = JSONDtype()
-
-    def __init__(self, values):
-        for val in values:
-            if not isinstance(val, self.dtype.type):
-                raise TypeError
-        self.data = values
-
-        # Some aliases for common attribute names to ensure pandas supports
-        # these
-        self._items = self._data = self.data
-        # those aliases are currently not working due to assumptions
-        # in internal code (GH-20735)
-        # self._values = self.values = self.data
-
-    @classmethod
-    def _from_sequence(cls, scalars):
-        return cls(scalars)
-
-    @classmethod
-    def _from_factorized(cls, values, original):
-        return cls([collections.UserDict(x) for x in values if x != ()])
-
-    def __getitem__(self, item):
-        if isinstance(item, numbers.Integral):
-            return self.data[item]
-        elif isinstance(item, np.ndarray) and item.dtype == 'bool':
-            return self._from_sequence([x for x, m in zip(self, item) if m])
-        elif isinstance(item, collections.Iterable):
-            # fancy indexing
-            return type(self)([self.data[i] for i in item])
-        else:
-            # slice
-            return type(self)(self.data[item])
-
-    def __setitem__(self, key, value):
-        if isinstance(key, numbers.Integral):
-            self.data[key] = value
-        else:
-            if not isinstance(value, (type(self),
-                                      collections.Sequence)):
-                # broadcast value
-                value = itertools.cycle([value])
-
-            if isinstance(key, np.ndarray) and key.dtype == 'bool':
-                # masking
-                for i, (k, v) in enumerate(zip(key, value)):
-                    if k:
-                        assert isinstance(v, self.dtype.type)
-                        self.data[i] = v
-            else:
-                for k, v in zip(key, value):
-                    assert isinstance(v, self.dtype.type)
-                    self.data[k] = v
-
-    def __len__(self):
-        return len(self.data)
-
-    def __repr__(self):
-        return 'JSONArary({!r})'.format(self.data)
-
-    @property
-    def nbytes(self):
-        return sys.getsizeof(self.data)
-
-    def isna(self):
-        return np.array([x == self.dtype.na_value for x in self.data],
-                        dtype=bool)
-
-    def take(self, indexer, allow_fill=False, fill_value=None):
-        # re-implement here, since NumPy has trouble setting
-        # sized objects like UserDicts into scalar slots of
-        # an ndarary.
-        indexer = np.asarray(indexer)
-        msg = ("Index is out of bounds or cannot do a "
-               "non-empty take from an empty array.")
-
-        if allow_fill:
-            if fill_value is None:
-                fill_value = self.dtype.na_value
-            # bounds check
-            if (indexer < -1).any():
-                raise ValueError
-            try:
-                output = [self.data[loc] if loc != -1 else fill_value
-                          for loc in indexer]
-            except IndexError:
-                raise IndexError(msg)
-        else:
-            try:
-                output = [self.data[loc] for loc in indexer]
-            except IndexError:
-                raise IndexError(msg)
-
-        return self._from_sequence(output)
-
-    def copy(self, deep=False):
-        return type(self)(self.data[:])
-
-    def astype(self, dtype, copy=True):
-        # NumPy has issues when all the dicts are the same length.
-        # np.array([UserDict(...), UserDict(...)]) fails,
-        # but np.array([{...}, {...}]) works, so cast.
-        return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
-
-    def unique(self):
-        # Parent method doesn't work since np.array will try to infer
-        # a 2-dim object.
-        return type(self)([
-            dict(x) for x in list(set(tuple(d.items()) for d in self.data))
-        ])
-
-    @classmethod
-    def _concat_same_type(cls, to_concat):
-        data = list(itertools.chain.from_iterable([x.data for x in to_concat]))
-        return cls(data)
-
-    def _values_for_factorize(self):
-        frozen = self._values_for_argsort()
-        return frozen, ()
-
-    def _values_for_argsort(self):
-        # Disable NumPy's shape inference by including an empty tuple...
-        # If all the elemnts of self are the same size P, NumPy will
-        # cast them to an (N, P) array, instead of an (N,) array of tuples.
-        frozen = [()] + list(tuple(x.items()) for x in self)
-        return np.array(frozen, dtype=object)[1:]
-
-
-def make_data():
-    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
-    return [collections.UserDict([
-        (random.choice(string.ascii_letters), random.randint(0, 100))
-        for _ in range(random.randint(0, 10))]) for _ in range(100)]
@@ -1,232 +0,0 @@
-import operator
-import collections
-
-import pytest
-
-import pandas as pd
-import pandas.util.testing as tm
-from pandas.compat import PY2, PY36
-from pandas.tests.extension import base
-
-from .array import JSONArray, JSONDtype, make_data
-
-pytestmark = pytest.mark.skipif(PY2, reason="Py2 doesn't have a UserDict")
-
-
-@pytest.fixture
-def dtype():
-    return JSONDtype()
-
-
-@pytest.fixture
-def data():
-    """Length-100 PeriodArray for semantics test."""
-    data = make_data()
-
-    # Why the while loop? NumPy is unable to construct an ndarray from
-    # equal-length ndarrays. Many of our operations involve coercing the
-    # EA to an ndarray of objects. To avoid random test failures, we ensure
-    # that our data is coercable to an ndarray. Several tests deal with only
-    # the first two elements, so that's what we'll check.
-
-    while len(data[0]) == len(data[1]):
-        data = make_data()
-
-    return JSONArray(data)
-
-
-@pytest.fixture
-def data_missing():
-    """Length 2 array with [NA, Valid]"""
-    return JSONArray([{}, {'a': 10}])
-
-
-@pytest.fixture
-def data_for_sorting():
-    return JSONArray([{'b': 1}, {'c': 4}, {'a': 2, 'c': 3}])
-
-
-@pytest.fixture
-def data_missing_for_sorting():
-    return JSONArray([{'b': 1}, {}, {'a': 4}])
-
-
-@pytest.fixture
-def na_value(dtype):
-    return dtype.na_value
-
-
-@pytest.fixture
-def na_cmp():
-    return operator.eq
-
-
-@pytest.fixture
-def data_for_grouping():
-    return JSONArray([
-        {'b': 1}, {'b': 1},
-        {}, {},
-        {'a': 0, 'c': 2}, {'a': 0, 'c': 2},
-        {'b': 1},
-        {'c': 2},
-    ])
-
-
-class BaseJSON(object):
-    # NumPy doesn't handle an array of equal-length UserDicts.
-    # The default assert_series_equal eventually does a
-    # Series.values, which raises. We work around it by
-    # converting the UserDicts to dicts.
-    def assert_series_equal(self, left, right, **kwargs):
-        if left.dtype.name == 'json':
-            assert left.dtype == right.dtype
-            left = pd.Series(JSONArray(left.values.astype(object)),
-                             index=left.index, name=left.name)
-            right = pd.Series(JSONArray(right.values.astype(object)),
-                              index=right.index, name=right.name)
-        tm.assert_series_equal(left, right, **kwargs)
-
-    def assert_frame_equal(self, left, right, *args, **kwargs):
-        tm.assert_index_equal(
-            left.columns, right.columns,
-            exact=kwargs.get('check_column_type', 'equiv'),
-            check_names=kwargs.get('check_names', True),
-            check_exact=kwargs.get('check_exact', False),
-            check_categorical=kwargs.get('check_categorical', True),
-            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
-
-        jsons = (left.dtypes == 'json').index
-
-        for col in jsons:
-            self.assert_series_equal(left[col], right[col],
-                                     *args, **kwargs)
-
-        left = left.drop(columns=jsons)
-        right = right.drop(columns=jsons)
-        tm.assert_frame_equal(left, right, *args, **kwargs)
-
-
-class TestDtype(BaseJSON, base.BaseDtypeTests):
-    pass
-
-
-class TestInterface(BaseJSON, base.BaseInterfaceTests):
-    def test_custom_asserts(self):
-        # This would always trigger the KeyError from trying to put
-        # an array of equal-length UserDicts inside an ndarray.
-        data = JSONArray([collections.UserDict({'a': 1}),
-                          collections.UserDict({'b': 2}),
-                          collections.UserDict({'c': 3})])
-        a = pd.Series(data)
-        self.assert_series_equal(a, a)
-        self.assert_frame_equal(a.to_frame(), a.to_frame())
-
-        b = pd.Series(data.take([0, 0, 1]))
-        with pytest.raises(AssertionError):
-            self.assert_series_equal(a, b)
-
-        with pytest.raises(AssertionError):
-            self.assert_frame_equal(a.to_frame(), b.to_frame())
-
-
-class TestConstructors(BaseJSON, base.BaseConstructorsTests):
-    pass
-
-
-class TestReshaping(BaseJSON, base.BaseReshapingTests):
-    pass
-
-
-class TestGetitem(BaseJSON, base.BaseGetitemTests):
-    pass
-
-
-class TestMissing(BaseJSON, base.BaseMissingTests):
-    @pytest.mark.xfail(reason="Setting a dict as a scalar")
-    def test_fillna_series(self):
-        """We treat dictionaries as a mapping in fillna, not a scalar."""
-
-    @pytest.mark.xfail(reason="Setting a dict as a scalar")
-    def test_fillna_frame(self):
-        """We treat dictionaries as a mapping in fillna, not a scalar."""
-
-
-unhashable = pytest.mark.skip(reason="Unhashable")
-unstable = pytest.mark.skipif(not PY36,  # 3.6 or higher
-                              reason="Dictionary order unstable")
-
-
-class TestMethods(BaseJSON, base.BaseMethodsTests):
-    @unhashable
-    def test_value_counts(self, all_data, dropna):
-        pass
-
-    @unhashable
-    def test_sort_values_frame(self):
-        # TODO (EA.factorize): see if _values_for_factorize allows this.
-        pass
-
-    @unstable
-    def test_argsort(self, data_for_sorting):
-        super(TestMethods, self).test_argsort(data_for_sorting)
-
-    @unstable
-    def test_argsort_missing(self, data_missing_for_sorting):
-        super(TestMethods, self).test_argsort_missing(
-            data_missing_for_sorting)
-
-    @unstable
-    @pytest.mark.parametrize('ascending', [True, False])
-    def test_sort_values(self, data_for_sorting, ascending):
-        super(TestMethods, self).test_sort_values(
-            data_for_sorting, ascending)
-
-    @unstable
-    @pytest.mark.parametrize('ascending', [True, False])
-    def test_sort_values_missing(self, data_missing_for_sorting, ascending):
-        super(TestMethods, self).test_sort_values_missing(
-            data_missing_for_sorting, ascending)
-
-
-class TestCasting(BaseJSON, base.BaseCastingTests):
-    @pytest.mark.xfail
-    def test_astype_str(self):
-        """This currently fails in NumPy on np.array(self, dtype=str) with
-
-        *** ValueError: setting an array element with a sequence
-        """
-
-
-# We intentionally don't run base.BaseSetitemTests because pandas'
-# internals has trouble setting sequences of values into scalar positions.
-
-
-class TestGroupby(BaseJSON, base.BaseGroupbyTests):
-
-    @unhashable
-    def test_groupby_extension_transform(self):
-        """
-        This currently fails in Series.name.setter, since the
-        name must be hashable, but the value is a dictionary.
-        I think this is what we want, i.e. `.name` should be the original
-        values, and not the values for factorization.
-        """
-
-    @unhashable
-    def test_groupby_extension_apply(self):
-        """
-        This fails in Index._do_unique_check with
-
-        >   hash(val)
-        E   TypeError: unhashable type: 'UserDict' with
-
-        I suspect that once we support Index[ExtensionArray],
-        we'll be able to dispatch unique.
-        """
-
-    @unstable
-    @pytest.mark.parametrize('as_index', [True, False])
-    def test_groupby_extension_agg(self, as_index, data_for_grouping):
-        super(TestGroupby, self).test_groupby_extension_agg(
-            as_index, data_for_grouping
-        )