Static code analysis and corrections

This commit is contained in:
Kristjan Komlosi
2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,470 @@
"""
compat
======
Cross-compatible functions for Python 2 and 3.
Key items to import for 2/3 compatible code:
* iterators: range(), map(), zip(), filter(), reduce()
* lists: lrange(), lmap(), lzip(), lfilter()
* unicode: u() [no unicode builtin in Python 3]
* longs: long (int in Python 3)
* callable
* iterable method compatibility: iteritems, iterkeys, itervalues
* Uses the original method if available, otherwise uses items, keys, values.
* types:
* text_type: unicode in Python 2, str in Python 3
* binary_type: str in Python 2, bytes in Python 3
* string_types: basestring in Python 2, str in Python 3
* bind_method: binds functions to classes
* add_metaclass(metaclass) - class decorator that recreates class with with the
given metaclass instead (and avoids intermediary class creation)
Other items:
* platform checker
"""
# pylint disable=W0611
# flake8: noqa
import re
import functools
import itertools
from distutils.version import LooseVersion
from itertools import product
import sys
import platform
import types
from unicodedata import east_asian_width
import struct
import inspect
from collections import namedtuple
import collections
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] >= 3
PY35 = sys.version_info >= (3, 5)
PY36 = sys.version_info >= (3, 6)
PY37 = sys.version_info >= (3, 7)
PYPY = platform.python_implementation() == 'PyPy'
try:
import __builtin__ as builtins
# not writeable when instantiated with string, doesn't handle unicode well
from cStringIO import StringIO as cStringIO
# always writeable
from StringIO import StringIO
BytesIO = StringIO
import cPickle
import httplib
except ImportError:
import builtins
from io import StringIO, BytesIO
cStringIO = StringIO
import pickle as cPickle
import http.client as httplib
from pandas.compat.chainmap import DeepChainMap
if PY3:
def isidentifier(s):
return s.isidentifier()
def str_to_bytes(s, encoding=None):
return s.encode(encoding or 'ascii')
def bytes_to_str(b, encoding=None):
return b.decode(encoding or 'utf-8')
# The signature version below is directly copied from Django,
# https://github.com/django/django/pull/4846
def signature(f):
sig = inspect.signature(f)
args = [
p.name for p in sig.parameters.values()
if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
]
varargs = [
p.name for p in sig.parameters.values()
if p.kind == inspect.Parameter.VAR_POSITIONAL
]
varargs = varargs[0] if varargs else None
keywords = [
p.name for p in sig.parameters.values()
if p.kind == inspect.Parameter.VAR_KEYWORD
]
keywords = keywords[0] if keywords else None
defaults = [
p.default for p in sig.parameters.values()
if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
and p.default is not p.empty
] or None
argspec = namedtuple('Signature', ['args', 'defaults',
'varargs', 'keywords'])
return argspec(args, defaults, varargs, keywords)
def get_range_parameters(data):
"""Gets the start, stop, and step parameters from a range object"""
return data.start, data.stop, data.step
# have to explicitly put builtins into the namespace
range = range
map = map
zip = zip
filter = filter
intern = sys.intern
reduce = functools.reduce
long = int
unichr = chr
# This was introduced in Python 3.3, but we don't support
# Python 3.x < 3.5, so checking PY3 is safe.
FileNotFoundError = FileNotFoundError
# list-producing versions of the major Python iterating functions
def lrange(*args, **kwargs):
return list(range(*args, **kwargs))
def lzip(*args, **kwargs):
return list(zip(*args, **kwargs))
def lmap(*args, **kwargs):
return list(map(*args, **kwargs))
def lfilter(*args, **kwargs):
return list(filter(*args, **kwargs))
from importlib import reload
reload = reload
Hashable = collections.abc.Hashable
Iterable = collections.abc.Iterable
Mapping = collections.abc.Mapping
MutableMapping = collections.abc.MutableMapping
Sequence = collections.abc.Sequence
Sized = collections.abc.Sized
Set = collections.abc.Set
else:
# Python 2
_name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
FileNotFoundError = IOError
def isidentifier(s, dotted=False):
return bool(_name_re.match(s))
def str_to_bytes(s, encoding='ascii'):
return s
def bytes_to_str(b, encoding='ascii'):
return b
def signature(f):
return inspect.getargspec(f)
def get_range_parameters(data):
"""Gets the start, stop, and step parameters from a range object"""
# seems we only have indexing ops to infer
# rather than direct accessors
if len(data) > 1:
step = data[1] - data[0]
stop = data[-1] + step
start = data[0]
elif len(data):
start = data[0]
stop = data[0] + 1
step = 1
else:
start = stop = 0
step = 1
return start, stop, step
# import iterator versions of these functions
range = xrange
intern = intern
zip = itertools.izip
filter = itertools.ifilter
map = itertools.imap
reduce = reduce
long = long
unichr = unichr
# Python 2-builtin ranges produce lists
lrange = builtins.range
lzip = builtins.zip
lmap = builtins.map
lfilter = builtins.filter
reload = builtins.reload
Hashable = collections.Hashable
Iterable = collections.Iterable
Mapping = collections.Mapping
MutableMapping = collections.MutableMapping
Sequence = collections.Sequence
Sized = collections.Sized
Set = collections.Set
if PY2:
def iteritems(obj, **kw):
return obj.iteritems(**kw)
def iterkeys(obj, **kw):
return obj.iterkeys(**kw)
def itervalues(obj, **kw):
return obj.itervalues(**kw)
next = lambda it: it.next()
else:
def iteritems(obj, **kw):
return iter(obj.items(**kw))
def iterkeys(obj, **kw):
return iter(obj.keys(**kw))
def itervalues(obj, **kw):
return iter(obj.values(**kw))
next = next
def bind_method(cls, name, func):
"""Bind a method to class, python 2 and python 3 compatible.
Parameters
----------
cls : type
class to receive bound method
name : basestring
name of method on class instance
func : function
function to be bound as method
Returns
-------
None
"""
# only python 2 has bound/unbound method issue
if not PY3:
setattr(cls, name, types.MethodType(func, None, cls))
else:
setattr(cls, name, func)
# ----------------------------------------------------------------------------
# functions largely based / taken from the six module
# Much of the code in this module comes from Benjamin Peterson's six library.
# The license for this library can be found in LICENSES/SIX and the code can be
# found at https://bitbucket.org/gutworth/six
# Definition of East Asian Width
# http://unicode.org/reports/tr11/
# Ambiguous width can be changed by option
_EAW_MAP = {'Na': 1, 'N': 1, 'W': 2, 'F': 2, 'H': 1}
if PY3:
string_types = str,
integer_types = int,
class_types = type,
text_type = str
binary_type = bytes
def u(s):
return s
def u_safe(s):
return s
def to_str(s):
"""
Convert bytes and non-string into Python 3 str
"""
if isinstance(s, binary_type):
s = bytes_to_str(s)
elif not isinstance(s, string_types):
s = str(s)
return s
def strlen(data, encoding=None):
# encoding is for compat with PY2
return len(data)
def east_asian_len(data, encoding=None, ambiguous_width=1):
"""
Calculate display width considering unicode East Asian Width
"""
if isinstance(data, text_type):
return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
else:
return len(data)
def import_lzma():
""" import lzma from the std library """
import lzma
return lzma
def set_function_name(f, name, cls):
""" Bind the name/qualname attributes of the function """
f.__name__ = name
f.__qualname__ = '{klass}.{name}'.format(
klass=cls.__name__,
name=name)
f.__module__ = cls.__module__
return f
ResourceWarning = ResourceWarning
else:
string_types = basestring,
integer_types = (int, long)
class_types = (type, types.ClassType)
text_type = unicode
binary_type = str
def u(s):
return unicode(s, "unicode_escape")
def u_safe(s):
try:
return unicode(s, "unicode_escape")
except:
return s
def to_str(s):
"""
Convert unicode and non-string into Python 2 str
"""
if not isinstance(s, string_types):
s = str(s)
return s
def strlen(data, encoding=None):
try:
data = data.decode(encoding)
except UnicodeError:
pass
return len(data)
def east_asian_len(data, encoding=None, ambiguous_width=1):
"""
Calculate display width considering unicode East Asian Width
"""
if isinstance(data, text_type):
try:
data = data.decode(encoding)
except UnicodeError:
pass
return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
else:
return len(data)
def import_lzma():
""" import the backported lzma library
or raise ImportError if not available """
from backports import lzma
return lzma
def set_function_name(f, name, cls):
""" Bind the name attributes of the function """
f.__name__ = name
return f
class ResourceWarning(Warning):
pass
string_and_binary_types = string_types + (binary_type,)
try:
# callable reintroduced in later versions of Python
callable = callable
except NameError:
def callable(obj):
return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
if PY2:
# In PY2 functools.wraps doesn't provide metadata pytest needs to generate
# decorated tests using parametrization. See pytest GH issue #2782
def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
updated=functools.WRAPPER_UPDATES):
def wrapper(f):
f = functools.wraps(wrapped, assigned, updated)(f)
f.__wrapped__ = wrapped
return f
return wrapper
else:
wraps = functools.wraps
def add_metaclass(metaclass):
"""Class decorator for creating a class with a metaclass."""
def wrapper(cls):
orig_vars = cls.__dict__.copy()
orig_vars.pop('__dict__', None)
orig_vars.pop('__weakref__', None)
for slots_var in orig_vars.get('__slots__', ()):
orig_vars.pop(slots_var)
return metaclass(cls.__name__, cls.__bases__, orig_vars)
return wrapper
from collections import OrderedDict, Counter
if PY3:
def raise_with_traceback(exc, traceback=Ellipsis):
if traceback == Ellipsis:
_, _, traceback = sys.exc_info()
raise exc.with_traceback(traceback)
else:
# this version of raise is a syntax error in Python 3
exec("""
def raise_with_traceback(exc, traceback=Ellipsis):
if traceback == Ellipsis:
_, _, traceback = sys.exc_info()
raise exc, None, traceback
""")
raise_with_traceback.__doc__ = """Raise exception with existing traceback.
If traceback is not passed, uses sys.exc_info() to get traceback."""
# dateutil minimum version
import dateutil
if LooseVersion(dateutil.__version__) < LooseVersion('2.5'):
raise ImportError('dateutil 2.5.0 is the minimum required version')
from dateutil import parser as _date_parser
parse_date = _date_parser.parse
# In Python 3.7, the private re._pattern_type is removed.
# Python 3.5+ have typing.re.Pattern
if PY36:
import typing
re_type = typing.re.Pattern
else:
re_type = type(re.compile(''))
# https://github.com/pandas-dev/pandas/pull/9123
def is_platform_little_endian():
""" am I little endian """
return sys.byteorder == 'little'
def is_platform_windows():
return sys.platform == 'win32' or sys.platform == 'cygwin'
def is_platform_linux():
return sys.platform == 'linux2'
def is_platform_mac():
return sys.platform == 'darwin'
def is_platform_32bit():
return struct.calcsize("P") * 8 < 64
@@ -0,0 +1,27 @@
try:
from collections import ChainMap
except ImportError:
from pandas.compat.chainmap_impl import ChainMap
class DeepChainMap(ChainMap):
def __setitem__(self, key, value):
for mapping in self.maps:
if key in mapping:
mapping[key] = value
return
self.maps[0][key] = value
def __delitem__(self, key):
for mapping in self.maps:
if key in mapping:
del mapping[key]
return
raise KeyError(key)
# override because the m parameter is introduced in Python 3.4
def new_child(self, m=None):
if m is None:
m = {}
return self.__class__(m, *self.maps)
@@ -0,0 +1,157 @@
import sys
PY3 = sys.version_info[0] >= 3
if PY3:
from collections.abc import MutableMapping
else:
from collections import MutableMapping
try:
from thread import get_ident
except ImportError:
from _thread import get_ident
def recursive_repr(fillvalue='...'):
'Decorator to make a repr function return fillvalue for a recursive call'
def decorating_function(user_function):
repr_running = set()
def wrapper(self):
key = id(self), get_ident()
if key in repr_running:
return fillvalue
repr_running.add(key)
try:
result = user_function(self)
finally:
repr_running.discard(key)
return result
# Can't use functools.wraps() here because of bootstrap issues
wrapper.__module__ = getattr(user_function, '__module__')
wrapper.__doc__ = getattr(user_function, '__doc__')
wrapper.__name__ = getattr(user_function, '__name__')
return wrapper
return decorating_function
class ChainMap(MutableMapping):
""" A ChainMap groups multiple dicts (or other mappings) together
to create a single, updatable view.
The underlying mappings are stored in a list. That list is public and can
be accessed / updated using the *maps* attribute. There is no other state.
Lookups search the underlying mappings successively until a key is found.
In contrast, writes, updates, and deletions only operate on the first
mapping.
"""
def __init__(self, *maps):
"""Initialize a ChainMap by setting *maps* to the given mappings.
If no mappings are provided, a single empty dictionary is used.
"""
self.maps = list(maps) or [{}] # always at least one map
def __missing__(self, key):
raise KeyError(key)
def __getitem__(self, key):
for mapping in self.maps:
try:
# can't use 'key in mapping' with defaultdict
return mapping[key]
except KeyError:
pass
# support subclasses that define __missing__
return self.__missing__(key)
def get(self, key, default=None):
return self[key] if key in self else default
def __len__(self):
# reuses stored hash values if possible
return len(set().union(*self.maps))
def __iter__(self):
return iter(set().union(*self.maps))
def __contains__(self, key):
return any(key in m for m in self.maps)
def __bool__(self):
return any(self.maps)
@recursive_repr()
def __repr__(self):
return '{0.__class__.__name__}({1})'.format(
self, ', '.join(repr(m) for m in self.maps))
@classmethod
def fromkeys(cls, iterable, *args):
'Create a ChainMap with a single dict created from the iterable.'
return cls(dict.fromkeys(iterable, *args))
def copy(self):
"""
New ChainMap or subclass with a new copy of maps[0] and refs to
maps[1:]
"""
return self.__class__(self.maps[0].copy(), *self.maps[1:])
__copy__ = copy
def new_child(self, m=None): # like Django's Context.push()
"""
New ChainMap with a new map followed by all previous maps. If no
map is provided, an empty dict is used.
"""
if m is None:
m = {}
return self.__class__(m, *self.maps)
@property
def parents(self): # like Django's Context.pop()
'New ChainMap from maps[1:].'
return self.__class__(*self.maps[1:])
def __setitem__(self, key, value):
self.maps[0][key] = value
def __delitem__(self, key):
try:
del self.maps[0][key]
except KeyError:
raise KeyError('Key not found in the first mapping: {!r}'
.format(key))
def popitem(self):
"""
Remove and return an item pair from maps[0]. Raise KeyError is maps[0]
is empty.
"""
try:
return self.maps[0].popitem()
except KeyError:
raise KeyError('No keys found in the first mapping.')
def pop(self, key, *args):
"""
Remove *key* from maps[0] and return its value. Raise KeyError if
*key* not in maps[0].
"""
try:
return self.maps[0].pop(key, *args)
except KeyError:
raise KeyError('Key not found in the first mapping: {!r}'
.format(key))
def clear(self):
'Clear maps[0], leaving maps[1:] intact.'
self.maps[0].clear()
@@ -0,0 +1,68 @@
""" support numpy compatiblitiy across versions """
import re
import numpy as np
from distutils.version import LooseVersion
from pandas.compat import string_types, string_and_binary_types
# numpy versioning
_np_version = np.__version__
_nlv = LooseVersion(_np_version)
_np_version_under1p13 = _nlv < LooseVersion('1.13')
_np_version_under1p14 = _nlv < LooseVersion('1.14')
_np_version_under1p15 = _nlv < LooseVersion('1.15')
if _nlv < '1.12':
raise ImportError('this version of pandas is incompatible with '
'numpy < 1.12.0\n'
'your numpy version is {0}.\n'
'Please upgrade numpy to >= 1.12.0 to use '
'this pandas version'.format(_np_version))
_tz_regex = re.compile('[+-]0000$')
def tz_replacer(s):
if isinstance(s, string_types):
if s.endswith('Z'):
s = s[:-1]
elif _tz_regex.search(s):
s = s[:-5]
return s
def np_datetime64_compat(s, *args, **kwargs):
"""
provide compat for construction of strings to numpy datetime64's with
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
warning, when need to pass '2015-01-01 09:00:00'
"""
s = tz_replacer(s)
return np.datetime64(s, *args, **kwargs)
def np_array_datetime64_compat(arr, *args, **kwargs):
"""
provide compat for construction of an array of strings to a
np.array(..., dtype=np.datetime64(..))
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
warning, when need to pass '2015-01-01 09:00:00'
"""
# is_list_like
if (hasattr(arr, '__iter__')
and not isinstance(arr, string_and_binary_types)):
arr = [tz_replacer(s) for s in arr]
else:
arr = tz_replacer(arr)
return np.array(arr, *args, **kwargs)
__all__ = ['np',
'_np_version_under1p13',
'_np_version_under1p14',
'_np_version_under1p15'
]
@@ -0,0 +1,402 @@
"""
For compatibility with numpy libraries, pandas functions or
methods have to accept '*args' and '**kwargs' parameters to
accommodate numpy arguments that are not actually used or
respected in the pandas implementation.
To ensure that users do not abuse these parameters, validation
is performed in 'validators.py' to make sure that any extra
parameters passed correspond ONLY to those in the numpy signature.
Part of that validation includes whether or not the user attempted
to pass in non-default values for these extraneous parameters. As we
want to discourage users from relying on these parameters when calling
the pandas implementation, we want them only to pass in the default values
for these parameters.
This module provides a set of commonly used default arguments for functions
and methods that are spread throughout the codebase. This module will make it
easier to adjust to future upstream changes in the analogous numpy signatures.
"""
from numpy import ndarray
from pandas.compat import OrderedDict
from pandas.errors import UnsupportedFunctionCall
from pandas.util._validators import (
validate_args, validate_args_and_kwargs, validate_kwargs)
from pandas.core.dtypes.common import is_bool, is_integer
class CompatValidator(object):
def __init__(self, defaults, fname=None, method=None,
max_fname_arg_count=None):
self.fname = fname
self.method = method
self.defaults = defaults
self.max_fname_arg_count = max_fname_arg_count
def __call__(self, args, kwargs, fname=None,
max_fname_arg_count=None, method=None):
if args or kwargs:
fname = self.fname if fname is None else fname
max_fname_arg_count = (self.max_fname_arg_count if
max_fname_arg_count is None
else max_fname_arg_count)
method = self.method if method is None else method
if method == 'args':
validate_args(fname, args, max_fname_arg_count, self.defaults)
elif method == 'kwargs':
validate_kwargs(fname, kwargs, self.defaults)
elif method == 'both':
validate_args_and_kwargs(fname, args, kwargs,
max_fname_arg_count,
self.defaults)
else:
raise ValueError("invalid validation method "
"'{method}'".format(method=method))
ARGMINMAX_DEFAULTS = dict(out=None)
validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin',
method='both', max_fname_arg_count=1)
validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax',
method='both', max_fname_arg_count=1)
def process_skipna(skipna, args):
if isinstance(skipna, ndarray) or skipna is None:
args = (skipna,) + args
skipna = True
return skipna, args
def validate_argmin_with_skipna(skipna, args, kwargs):
"""
If 'Series.argmin' is called via the 'numpy' library,
the third parameter in its signature is 'out', which
takes either an ndarray or 'None', so check if the
'skipna' parameter is either an instance of ndarray or
is None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmin(args, kwargs)
return skipna
def validate_argmax_with_skipna(skipna, args, kwargs):
"""
If 'Series.argmax' is called via the 'numpy' library,
the third parameter in its signature is 'out', which
takes either an ndarray or 'None', so check if the
'skipna' parameter is either an instance of ndarray or
is None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmax(args, kwargs)
return skipna
ARGSORT_DEFAULTS = OrderedDict()
ARGSORT_DEFAULTS['axis'] = -1
ARGSORT_DEFAULTS['kind'] = 'quicksort'
ARGSORT_DEFAULTS['order'] = None
validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort',
max_fname_arg_count=0, method='both')
# two different signatures of argsort, this second validation
# for when the `kind` param is supported
ARGSORT_DEFAULTS_KIND = OrderedDict()
ARGSORT_DEFAULTS_KIND['axis'] = -1
ARGSORT_DEFAULTS_KIND['order'] = None
validate_argsort_kind = CompatValidator(ARGSORT_DEFAULTS_KIND, fname='argsort',
max_fname_arg_count=0, method='both')
def validate_argsort_with_ascending(ascending, args, kwargs):
"""
If 'Categorical.argsort' is called via the 'numpy' library, the
first parameter in its signature is 'axis', which takes either
an integer or 'None', so check if the 'ascending' parameter has
either integer type or is None, since 'ascending' itself should
be a boolean
"""
if is_integer(ascending) or ascending is None:
args = (ascending,) + args
ascending = True
validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
return ascending
CLIP_DEFAULTS = dict(out=None)
validate_clip = CompatValidator(CLIP_DEFAULTS, fname='clip',
method='both', max_fname_arg_count=3)
def validate_clip_with_axis(axis, args, kwargs):
"""
If 'NDFrame.clip' is called via the numpy library, the third
parameter in its signature is 'out', which can takes an ndarray,
so check if the 'axis' parameter is an instance of ndarray, since
'axis' itself should either be an integer or None
"""
if isinstance(axis, ndarray):
args = (axis,) + args
axis = None
validate_clip(args, kwargs)
return axis
COMPRESS_DEFAULTS = OrderedDict()
COMPRESS_DEFAULTS['axis'] = None
COMPRESS_DEFAULTS['out'] = None
validate_compress = CompatValidator(COMPRESS_DEFAULTS, fname='compress',
method='both', max_fname_arg_count=1)
CUM_FUNC_DEFAULTS = OrderedDict()
CUM_FUNC_DEFAULTS['dtype'] = None
CUM_FUNC_DEFAULTS['out'] = None
validate_cum_func = CompatValidator(CUM_FUNC_DEFAULTS, method='both',
max_fname_arg_count=1)
validate_cumsum = CompatValidator(CUM_FUNC_DEFAULTS, fname='cumsum',
method='both', max_fname_arg_count=1)
def validate_cum_func_with_skipna(skipna, args, kwargs, name):
"""
If this function is called via the 'numpy' library, the third
parameter in its signature is 'dtype', which takes either a
'numpy' dtype or 'None', so check if the 'skipna' parameter is
a boolean or not
"""
if not is_bool(skipna):
args = (skipna,) + args
skipna = True
validate_cum_func(args, kwargs, fname=name)
return skipna
ALLANY_DEFAULTS = OrderedDict()
ALLANY_DEFAULTS['dtype'] = None
ALLANY_DEFAULTS['out'] = None
ALLANY_DEFAULTS['keepdims'] = False
validate_all = CompatValidator(ALLANY_DEFAULTS, fname='all',
method='both', max_fname_arg_count=1)
validate_any = CompatValidator(ALLANY_DEFAULTS, fname='any',
method='both', max_fname_arg_count=1)
LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False)
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs')
MINMAX_DEFAULTS = dict(out=None, keepdims=False)
validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min',
method='both', max_fname_arg_count=1)
validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max',
method='both', max_fname_arg_count=1)
RESHAPE_DEFAULTS = dict(order='C')
validate_reshape = CompatValidator(RESHAPE_DEFAULTS, fname='reshape',
method='both', max_fname_arg_count=1)
REPEAT_DEFAULTS = dict(axis=None)
validate_repeat = CompatValidator(REPEAT_DEFAULTS, fname='repeat',
method='both', max_fname_arg_count=1)
ROUND_DEFAULTS = dict(out=None)
validate_round = CompatValidator(ROUND_DEFAULTS, fname='round',
method='both', max_fname_arg_count=1)
SORT_DEFAULTS = OrderedDict()
SORT_DEFAULTS['axis'] = -1
SORT_DEFAULTS['kind'] = 'quicksort'
SORT_DEFAULTS['order'] = None
validate_sort = CompatValidator(SORT_DEFAULTS, fname='sort',
method='kwargs')
STAT_FUNC_DEFAULTS = OrderedDict()
STAT_FUNC_DEFAULTS['dtype'] = None
STAT_FUNC_DEFAULTS['out'] = None
PROD_DEFAULTS = SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
SUM_DEFAULTS['keepdims'] = False
SUM_DEFAULTS['initial'] = None
MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
MEDIAN_DEFAULTS['overwrite_input'] = False
MEDIAN_DEFAULTS['keepdims'] = False
STAT_FUNC_DEFAULTS['keepdims'] = False
validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS,
method='kwargs')
validate_sum = CompatValidator(SUM_DEFAULTS, fname='sum',
method='both', max_fname_arg_count=1)
validate_prod = CompatValidator(PROD_DEFAULTS, fname="prod",
method="both", max_fname_arg_count=1)
validate_mean = CompatValidator(STAT_FUNC_DEFAULTS, fname='mean',
method='both', max_fname_arg_count=1)
validate_median = CompatValidator(MEDIAN_DEFAULTS, fname='median',
method='both', max_fname_arg_count=1)
STAT_DDOF_FUNC_DEFAULTS = OrderedDict()
STAT_DDOF_FUNC_DEFAULTS['dtype'] = None
STAT_DDOF_FUNC_DEFAULTS['out'] = None
STAT_DDOF_FUNC_DEFAULTS['keepdims'] = False
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS,
method='kwargs')
TAKE_DEFAULTS = OrderedDict()
TAKE_DEFAULTS['out'] = None
TAKE_DEFAULTS['mode'] = 'raise'
validate_take = CompatValidator(TAKE_DEFAULTS, fname='take',
method='kwargs')
def validate_take_with_convert(convert, args, kwargs):
"""
If this function is called via the 'numpy' library, the third
parameter in its signature is 'axis', which takes either an
ndarray or 'None', so check if the 'convert' parameter is either
an instance of ndarray or is None
"""
if isinstance(convert, ndarray) or convert is None:
args = (convert,) + args
convert = True
validate_take(args, kwargs, max_fname_arg_count=3, method='both')
return convert
TRANSPOSE_DEFAULTS = dict(axes=None)
validate_transpose = CompatValidator(TRANSPOSE_DEFAULTS, fname='transpose',
method='both', max_fname_arg_count=0)
def validate_transpose_for_generic(inst, kwargs):
try:
validate_transpose(tuple(), kwargs)
except ValueError as e:
klass = type(inst).__name__
msg = str(e)
# the Panel class actual relies on the 'axes' parameter if called
# via the 'numpy' library, so let's make sure the error is specific
# about saying that the parameter is not supported for particular
# implementations of 'transpose'
if "the 'axes' parameter is not supported" in msg:
msg += " for {klass} instances".format(klass=klass)
raise ValueError(msg)
def validate_window_func(name, args, kwargs):
numpy_args = ('axis', 'dtype', 'out')
msg = ("numpy operations are not "
"valid with window objects. "
"Use .{func}() directly instead ".format(func=name))
if len(args) > 0:
raise UnsupportedFunctionCall(msg)
for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)
def validate_rolling_func(name, args, kwargs):
numpy_args = ('axis', 'dtype', 'out')
msg = ("numpy operations are not "
"valid with window objects. "
"Use .rolling(...).{func}() instead ".format(func=name))
if len(args) > 0:
raise UnsupportedFunctionCall(msg)
for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)
def validate_expanding_func(name, args, kwargs):
numpy_args = ('axis', 'dtype', 'out')
msg = ("numpy operations are not "
"valid with window objects. "
"Use .expanding(...).{func}() instead ".format(func=name))
if len(args) > 0:
raise UnsupportedFunctionCall(msg)
for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)
def validate_groupby_func(name, args, kwargs, allowed=None):
"""
'args' and 'kwargs' should be empty, except for allowed
kwargs because all of
their necessary parameters are explicitly listed in
the function signature
"""
if allowed is None:
allowed = []
kwargs = set(kwargs) - set(allowed)
if len(args) + len(kwargs) > 0:
raise UnsupportedFunctionCall((
"numpy operations are not valid "
"with groupby. Use .groupby(...)."
"{func}() instead".format(func=name)))
RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod',
'mean', 'std', 'var')
def validate_resampler_func(method, args, kwargs):
"""
'args' and 'kwargs' should be empty because all of
their necessary parameters are explicitly listed in
the function signature
"""
if len(args) + len(kwargs) > 0:
if method in RESAMPLER_NUMPY_OPS:
raise UnsupportedFunctionCall((
"numpy operations are not valid "
"with resample. Use .resample(...)."
"{func}() instead".format(func=method)))
else:
raise TypeError("too many arguments passed in")
def validate_minmax_axis(axis):
"""
Ensure that the axis argument passed to min, max, argmin, or argmax is
zero or None, as otherwise it will be incorrectly ignored.
Parameters
----------
axis : int or None
Raises
------
ValueError
"""
ndim = 1 # hard-coded for Index
if axis is None:
return
if axis >= ndim or (axis < 0 and ndim + axis < 0):
raise ValueError("`axis` must be fewer than the number of "
"dimensions ({ndim})".format(ndim=ndim))
@@ -0,0 +1,229 @@
"""
Support pre-0.12 series pickle compatibility.
"""
import copy
import pickle as pkl
import sys
from pandas.compat import string_types, u # noqa
import pandas # noqa
from pandas import Index, compat
def load_reduce(self):
stack = self.stack
args = stack.pop()
func = stack[-1]
if len(args) and type(args[0]) is type:
n = args[0].__name__ # noqa
try:
stack[-1] = func(*args)
return
except Exception as e:
# If we have a deprecated function,
# try to replace and try again.
msg = '_reconstruct: First argument must be a sub-type of ndarray'
if msg in str(e):
try:
cls = args[0]
stack[-1] = object.__new__(cls)
return
except TypeError:
pass
# try to re-encode the arguments
if getattr(self, 'encoding', None) is not None:
args = tuple(arg.encode(self.encoding)
if isinstance(arg, string_types)
else arg for arg in args)
try:
stack[-1] = func(*args)
return
except TypeError:
pass
# unknown exception, re-raise
if getattr(self, 'is_verbose', None):
print(sys.exc_info())
print(func, args)
raise
# If classes are moved, provide compat here.
_class_locations_map = {
('pandas.core.sparse.array', 'SparseArray'):
('pandas.core.arrays', 'SparseArray'),
# 15477
#
# TODO: When FrozenNDArray is removed, add
# the following lines for compat:
#
# ('pandas.core.base', 'FrozenNDArray'):
# ('numpy', 'ndarray'),
# ('pandas.core.indexes.frozen', 'FrozenNDArray'):
# ('numpy', 'ndarray'),
#
# Afterwards, remove the current entry
# for `pandas.core.base.FrozenNDArray`.
('pandas.core.base', 'FrozenNDArray'):
('pandas.core.indexes.frozen', 'FrozenNDArray'),
('pandas.core.base', 'FrozenList'):
('pandas.core.indexes.frozen', 'FrozenList'),
# 10890
('pandas.core.series', 'TimeSeries'):
('pandas.core.series', 'Series'),
('pandas.sparse.series', 'SparseTimeSeries'):
('pandas.core.sparse.series', 'SparseSeries'),
# 12588, extensions moving
('pandas._sparse', 'BlockIndex'):
('pandas._libs.sparse', 'BlockIndex'),
('pandas.tslib', 'Timestamp'):
('pandas._libs.tslib', 'Timestamp'),
# 18543 moving period
('pandas._period', 'Period'): ('pandas._libs.tslibs.period', 'Period'),
('pandas._libs.period', 'Period'):
('pandas._libs.tslibs.period', 'Period'),
# 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
('pandas.tslib', '__nat_unpickle'):
('pandas._libs.tslibs.nattype', '__nat_unpickle'),
('pandas._libs.tslib', '__nat_unpickle'):
('pandas._libs.tslibs.nattype', '__nat_unpickle'),
# 15998 top-level dirs moving
('pandas.sparse.array', 'SparseArray'):
('pandas.core.arrays.sparse', 'SparseArray'),
('pandas.sparse.series', 'SparseSeries'):
('pandas.core.sparse.series', 'SparseSeries'),
('pandas.sparse.frame', 'SparseDataFrame'):
('pandas.core.sparse.frame', 'SparseDataFrame'),
('pandas.indexes.base', '_new_Index'):
('pandas.core.indexes.base', '_new_Index'),
('pandas.indexes.base', 'Index'):
('pandas.core.indexes.base', 'Index'),
('pandas.indexes.numeric', 'Int64Index'):
('pandas.core.indexes.numeric', 'Int64Index'),
('pandas.indexes.range', 'RangeIndex'):
('pandas.core.indexes.range', 'RangeIndex'),
('pandas.indexes.multi', 'MultiIndex'):
('pandas.core.indexes.multi', 'MultiIndex'),
('pandas.tseries.index', '_new_DatetimeIndex'):
('pandas.core.indexes.datetimes', '_new_DatetimeIndex'),
('pandas.tseries.index', 'DatetimeIndex'):
('pandas.core.indexes.datetimes', 'DatetimeIndex'),
('pandas.tseries.period', 'PeriodIndex'):
('pandas.core.indexes.period', 'PeriodIndex'),
# 19269, arrays moving
('pandas.core.categorical', 'Categorical'):
('pandas.core.arrays', 'Categorical'),
# 19939, add timedeltaindex, float64index compat from 15998 move
('pandas.tseries.tdi', 'TimedeltaIndex'):
('pandas.core.indexes.timedeltas', 'TimedeltaIndex'),
('pandas.indexes.numeric', 'Float64Index'):
('pandas.core.indexes.numeric', 'Float64Index'),
}
# our Unpickler sub-class to override methods and some dispatcher
# functions for compat
if compat.PY3:
class Unpickler(pkl._Unpickler):
def find_class(self, module, name):
# override superclass
key = (module, name)
module, name = _class_locations_map.get(key, key)
return super(Unpickler, self).find_class(module, name)
else:
class Unpickler(pkl.Unpickler):
def find_class(self, module, name):
# override superclass
key = (module, name)
module, name = _class_locations_map.get(key, key)
__import__(module)
mod = sys.modules[module]
klass = getattr(mod, name)
return klass
Unpickler.dispatch = copy.copy(Unpickler.dispatch)
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
def load_newobj(self):
args = self.stack.pop()
cls = self.stack[-1]
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
else:
obj = cls.__new__(cls, *args)
self.stack[-1] = obj
Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
def load_newobj_ex(self):
kwargs = self.stack.pop()
args = self.stack.pop()
cls = self.stack.pop()
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
else:
obj = cls.__new__(cls, *args, **kwargs)
self.append(obj)
try:
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
except (AttributeError, KeyError):
pass
def load(fh, encoding=None, compat=False, is_verbose=False):
"""load a pickle, with a provided encoding
if compat is True:
fake the old class hierarchy
if it works, then return the new type objects
Parameters
----------
fh : a filelike object
encoding : an optional encoding
compat : provide Series compatibility mode, boolean, default False
is_verbose : show exception output
"""
try:
fh.seek(0)
if encoding is not None:
up = Unpickler(fh, encoding=encoding)
else:
up = Unpickler(fh)
up.is_verbose = is_verbose
return up.load()
except (ValueError, TypeError):
raise