Static code analysis and corrections

This commit is contained in:
Kristjan Komlosi
2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,8 @@
"""
Timeseries API
"""
# flake8: noqa
from pandas.tseries.frequencies import infer_freq
import pandas.tseries.offsets as offsets
@@ -0,0 +1,20 @@
# flake8: noqa
import warnings
from pandas.plotting._converter import (time2num,
TimeConverter, TimeFormatter,
PeriodConverter, get_datevalue,
DatetimeConverter,
PandasAutoDateFormatter,
PandasAutoDateLocator,
MilliSecondLocator, get_finder,
TimeSeries_DateLocator,
TimeSeries_DateFormatter)
def register():
from pandas.plotting._converter import register as register_
msg = ("'pandas.tseries.converter.register' has been moved and renamed to "
"'pandas.plotting.register_matplotlib_converters'. ")
warnings.warn(msg, FutureWarning, stacklevel=2)
register_()
@@ -0,0 +1,270 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
from pandas.compat import zip
from pandas import compat
import re
import numpy as np
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.common import (
is_period_arraylike,
is_timedelta64_dtype,
is_datetime64_dtype)
from pandas.tseries.offsets import DateOffset
from pandas._libs.tslib import Timedelta
import pandas._libs.tslibs.frequencies as libfreqs
from pandas._libs.tslibs.frequencies import ( # noqa, semi-public API
get_freq, get_base_alias, get_to_timestamp_base, get_freq_code,
FreqGroup,
is_subperiod, is_superperiod)
from pandas._libs.tslibs.resolution import (Resolution,
_FrequencyInferer,
_TimedeltaFrequencyInferer)
from pytz import AmbiguousTimeError
RESO_NS = 0
RESO_US = 1
RESO_MS = 2
RESO_SEC = 3
RESO_MIN = 4
RESO_HR = 5
RESO_DAY = 6
# ---------------------------------------------------------------------
# Offset names ("time rules") and related functions
from pandas._libs.tslibs.offsets import _offset_to_period_map # noqa:E402
from pandas.tseries.offsets import (Nano, Micro, Milli, Second, # noqa
Minute, Hour,
Day, BDay, CDay, Week, MonthBegin,
MonthEnd, BMonthBegin, BMonthEnd,
QuarterBegin, QuarterEnd, BQuarterBegin,
BQuarterEnd, YearBegin, YearEnd,
BYearBegin, BYearEnd, prefix_mapping)
try:
cday = CDay()
except NotImplementedError:
cday = None
#: cache of previously seen offsets
_offset_map = {}
def get_period_alias(offset_str):
""" alias to closest period strings BQ->Q etc"""
return _offset_to_period_map.get(offset_str, None)
_name_to_offset_map = {'days': Day(1),
'hours': Hour(1),
'minutes': Minute(1),
'seconds': Second(1),
'milliseconds': Milli(1),
'microseconds': Micro(1),
'nanoseconds': Nano(1)}
def to_offset(freq):
"""
Return DateOffset object from string or tuple representation
or datetime.timedelta object
Parameters
----------
freq : str, tuple, datetime.timedelta, DateOffset or None
Returns
-------
delta : DateOffset
None if freq is None
Raises
------
ValueError
If freq is an invalid frequency
See Also
--------
pandas.DateOffset
Examples
--------
>>> to_offset('5min')
<5 * Minutes>
>>> to_offset('1D1H')
<25 * Hours>
>>> to_offset(('W', 2))
<2 * Weeks: weekday=6>
>>> to_offset((2, 'B'))
<2 * BusinessDays>
>>> to_offset(datetime.timedelta(days=1))
<Day>
>>> to_offset(Hour())
<Hour>
"""
if freq is None:
return None
if isinstance(freq, DateOffset):
return freq
if isinstance(freq, tuple):
name = freq[0]
stride = freq[1]
if isinstance(stride, compat.string_types):
name, stride = stride, name
name, _ = libfreqs._base_and_stride(name)
delta = get_offset(name) * stride
elif isinstance(freq, timedelta):
delta = None
freq = Timedelta(freq)
try:
for name in freq.components._fields:
offset = _name_to_offset_map[name]
stride = getattr(freq.components, name)
if stride != 0:
offset = stride * offset
if delta is None:
delta = offset
else:
delta = delta + offset
except Exception:
raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(freq))
else:
delta = None
stride_sign = None
try:
splitted = re.split(libfreqs.opattern, freq)
if splitted[-1] != '' and not splitted[-1].isspace():
# the last element must be blank
raise ValueError('last element must be blank')
for sep, stride, name in zip(splitted[0::4], splitted[1::4],
splitted[2::4]):
if sep != '' and not sep.isspace():
raise ValueError('separator must be spaces')
prefix = libfreqs._lite_rule_alias.get(name) or name
if stride_sign is None:
stride_sign = -1 if stride.startswith('-') else 1
if not stride:
stride = 1
if prefix in Resolution._reso_str_bump_map.keys():
stride, name = Resolution.get_stride_from_decimal(
float(stride), prefix
)
stride = int(stride)
offset = get_offset(name)
offset = offset * int(np.fabs(stride) * stride_sign)
if delta is None:
delta = offset
else:
delta = delta + offset
except Exception:
raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(freq))
if delta is None:
raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(freq))
return delta
def get_offset(name):
"""
Return DateOffset object associated with rule name
Examples
--------
get_offset('EOM') --> BMonthEnd(1)
"""
if name not in libfreqs._dont_uppercase:
name = name.upper()
name = libfreqs._lite_rule_alias.get(name, name)
name = libfreqs._lite_rule_alias.get(name.lower(), name)
else:
name = libfreqs._lite_rule_alias.get(name, name)
if name not in _offset_map:
try:
split = name.split('-')
klass = prefix_mapping[split[0]]
# handles case where there's no suffix (and will TypeError if too
# many '-')
offset = klass._from_name(*split[1:])
except (ValueError, TypeError, KeyError):
# bad prefix or suffix
raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(name))
# cache
_offset_map[name] = offset
# do not return cache because it's mutable
return _offset_map[name].copy()
getOffset = get_offset
# ---------------------------------------------------------------------
# Period codes
def infer_freq(index, warn=True):
"""
Infer the most likely frequency given the input index. If the frequency is
uncertain, a warning will be printed.
Parameters
----------
index : DatetimeIndex or TimedeltaIndex
if passed a Series will use the values of the series (NOT THE INDEX)
warn : boolean, default True
Returns
-------
freq : string or None
None if no discernible frequency
TypeError if the index is not datetime-like
ValueError if there are less than three values.
"""
import pandas as pd
if isinstance(index, ABCSeries):
values = index._values
if not (is_datetime64_dtype(values) or
is_timedelta64_dtype(values) or
values.dtype == object):
raise TypeError("cannot infer freq from a non-convertible dtype "
"on a Series of {dtype}".format(dtype=index.dtype))
index = values
if is_period_arraylike(index):
raise TypeError("PeriodIndex given. Check the `freq` attribute "
"instead of using infer_freq.")
elif isinstance(index, pd.TimedeltaIndex):
inferer = _TimedeltaFrequencyInferer(index, warn=warn)
return inferer.get_freq()
if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
if isinstance(index, (pd.Int64Index, pd.Float64Index)):
raise TypeError("cannot infer freq from a non-convertible index "
"type {type}".format(type=type(index)))
index = index.values
if not isinstance(index, pd.DatetimeIndex):
try:
index = pd.DatetimeIndex(index)
except AmbiguousTimeError:
index = pd.DatetimeIndex(index.asi8)
inferer = _FrequencyInferer(index, warn=warn)
return inferer.get_freq()
@@ -0,0 +1,509 @@
import warnings
from pandas import DateOffset, DatetimeIndex, Series, Timestamp
from pandas.compat import add_metaclass
from datetime import datetime, timedelta
from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU # noqa
from pandas.tseries.offsets import Easter, Day
import numpy as np
def next_monday(dt):
"""
If holiday falls on Saturday, use following Monday instead;
if holiday falls on Sunday, use Monday instead
"""
if dt.weekday() == 5:
return dt + timedelta(2)
elif dt.weekday() == 6:
return dt + timedelta(1)
return dt
def next_monday_or_tuesday(dt):
"""
For second holiday of two adjacent ones!
If holiday falls on Saturday, use following Monday instead;
if holiday falls on Sunday or Monday, use following Tuesday instead
(because Monday is already taken by adjacent holiday on the day before)
"""
dow = dt.weekday()
if dow == 5 or dow == 6:
return dt + timedelta(2)
elif dow == 0:
return dt + timedelta(1)
return dt
def previous_friday(dt):
"""
If holiday falls on Saturday or Sunday, use previous Friday instead.
"""
if dt.weekday() == 5:
return dt - timedelta(1)
elif dt.weekday() == 6:
return dt - timedelta(2)
return dt
def sunday_to_monday(dt):
"""
If holiday falls on Sunday, use day thereafter (Monday) instead.
"""
if dt.weekday() == 6:
return dt + timedelta(1)
return dt
def weekend_to_monday(dt):
"""
If holiday falls on Sunday or Saturday,
use day thereafter (Monday) instead.
Needed for holidays such as Christmas observation in Europe
"""
if dt.weekday() == 6:
return dt + timedelta(1)
elif dt.weekday() == 5:
return dt + timedelta(2)
return dt
def nearest_workday(dt):
"""
If holiday falls on Saturday, use day before (Friday) instead;
if holiday falls on Sunday, use day thereafter (Monday) instead.
"""
if dt.weekday() == 5:
return dt - timedelta(1)
elif dt.weekday() == 6:
return dt + timedelta(1)
return dt
def next_workday(dt):
"""
returns next weekday used for observances
"""
dt += timedelta(days=1)
while dt.weekday() > 4:
# Mon-Fri are 0-4
dt += timedelta(days=1)
return dt
def previous_workday(dt):
"""
returns previous weekday used for observances
"""
dt -= timedelta(days=1)
while dt.weekday() > 4:
# Mon-Fri are 0-4
dt -= timedelta(days=1)
return dt
def before_nearest_workday(dt):
"""
returns previous workday after nearest workday
"""
return previous_workday(nearest_workday(dt))
def after_nearest_workday(dt):
"""
returns next workday after nearest workday
needed for Boxing day or multiple holidays in a series
"""
return next_workday(nearest_workday(dt))
class Holiday(object):
"""
Class that defines a holiday with start/end dates and rules
for observance.
"""
def __init__(self, name, year=None, month=None, day=None, offset=None,
observance=None, start_date=None, end_date=None,
days_of_week=None):
"""
Parameters
----------
name : str
Name of the holiday , defaults to class name
offset : array of pandas.tseries.offsets or
class from pandas.tseries.offsets
computes offset from date
observance: function
computes when holiday is given a pandas Timestamp
days_of_week:
provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday
Monday=0,..,Sunday=6
Examples
--------
>>> from pandas.tseries.holiday import Holiday, nearest_workday
>>> from pandas import DateOffset
>>> from dateutil.relativedelta import MO
>>> USMemorialDay = Holiday('MemorialDay', month=5, day=24,
offset=DateOffset(weekday=MO(1)))
>>> USLaborDay = Holiday('Labor Day', month=9, day=1,
offset=DateOffset(weekday=MO(1)))
>>> July3rd = Holiday('July 3rd', month=7, day=3,)
>>> NewYears = Holiday('New Years Day', month=1, day=1,
observance=nearest_workday),
>>> July3rd = Holiday('July 3rd', month=7, day=3,
days_of_week=(0, 1, 2, 3))
"""
if offset is not None and observance is not None:
raise NotImplementedError("Cannot use both offset and observance.")
self.name = name
self.year = year
self.month = month
self.day = day
self.offset = offset
self.start_date = Timestamp(
start_date) if start_date is not None else start_date
self.end_date = Timestamp(
end_date) if end_date is not None else end_date
self.observance = observance
assert (days_of_week is None or type(days_of_week) == tuple)
self.days_of_week = days_of_week
def __repr__(self):
info = ''
if self.year is not None:
info += 'year={year}, '.format(year=self.year)
info += 'month={mon}, day={day}, '.format(mon=self.month, day=self.day)
if self.offset is not None:
info += 'offset={offset}'.format(offset=self.offset)
if self.observance is not None:
info += 'observance={obs}'.format(obs=self.observance)
repr = 'Holiday: {name} ({info})'.format(name=self.name, info=info)
return repr
def dates(self, start_date, end_date, return_name=False):
"""
Calculate holidays observed between start date and end date
Parameters
----------
start_date : starting date, datetime-like, optional
end_date : ending date, datetime-like, optional
return_name : bool, optional, default=False
If True, return a series that has dates and holiday names.
False will only return dates.
"""
start_date = Timestamp(start_date)
end_date = Timestamp(end_date)
filter_start_date = start_date
filter_end_date = end_date
if self.year is not None:
dt = Timestamp(datetime(self.year, self.month, self.day))
if return_name:
return Series(self.name, index=[dt])
else:
return [dt]
dates = self._reference_dates(start_date, end_date)
holiday_dates = self._apply_rule(dates)
if self.days_of_week is not None:
holiday_dates = holiday_dates[np.in1d(holiday_dates.dayofweek,
self.days_of_week)]
if self.start_date is not None:
filter_start_date = max(self.start_date.tz_localize(
filter_start_date.tz), filter_start_date)
if self.end_date is not None:
filter_end_date = min(self.end_date.tz_localize(
filter_end_date.tz), filter_end_date)
holiday_dates = holiday_dates[(holiday_dates >= filter_start_date) &
(holiday_dates <= filter_end_date)]
if return_name:
return Series(self.name, index=holiday_dates)
return holiday_dates
def _reference_dates(self, start_date, end_date):
"""
Get reference dates for the holiday.
Return reference dates for the holiday also returning the year
prior to the start_date and year following the end_date. This ensures
that any offsets to be applied will yield the holidays within
the passed in dates.
"""
if self.start_date is not None:
start_date = self.start_date.tz_localize(start_date.tz)
if self.end_date is not None:
end_date = self.end_date.tz_localize(start_date.tz)
year_offset = DateOffset(years=1)
reference_start_date = Timestamp(
datetime(start_date.year - 1, self.month, self.day))
reference_end_date = Timestamp(
datetime(end_date.year + 1, self.month, self.day))
# Don't process unnecessary holidays
dates = DatetimeIndex(start=reference_start_date,
end=reference_end_date,
freq=year_offset, tz=start_date.tz)
return dates
def _apply_rule(self, dates):
"""
Apply the given offset/observance to a DatetimeIndex of dates.
Parameters
----------
dates : DatetimeIndex
Dates to apply the given offset/observance rule
Returns
-------
Dates with rules applied
"""
if self.observance is not None:
return dates.map(lambda d: self.observance(d))
if self.offset is not None:
if not isinstance(self.offset, list):
offsets = [self.offset]
else:
offsets = self.offset
for offset in offsets:
# if we are adding a non-vectorized value
# ignore the PerformanceWarnings:
with warnings.catch_warnings(record=True):
dates += offset
return dates
holiday_calendars = {}
def register(cls):
try:
name = cls.name
except:
name = cls.__name__
holiday_calendars[name] = cls
def get_calendar(name):
"""
Return an instance of a calendar based on its name.
Parameters
----------
name : str
Calendar name to return an instance of
"""
return holiday_calendars[name]()
class HolidayCalendarMetaClass(type):
def __new__(cls, clsname, bases, attrs):
calendar_class = super(HolidayCalendarMetaClass, cls).__new__(
cls, clsname, bases, attrs)
register(calendar_class)
return calendar_class
@add_metaclass(HolidayCalendarMetaClass)
class AbstractHolidayCalendar(object):
"""
Abstract interface to create holidays following certain rules.
"""
__metaclass__ = HolidayCalendarMetaClass
rules = []
start_date = Timestamp(datetime(1970, 1, 1))
end_date = Timestamp(datetime(2030, 12, 31))
_cache = None
def __init__(self, name=None, rules=None):
"""
Initializes holiday object with a given set a rules. Normally
classes just have the rules defined within them.
Parameters
----------
name : str
Name of the holiday calendar, defaults to class name
rules : array of Holiday objects
A set of rules used to create the holidays.
"""
super(AbstractHolidayCalendar, self).__init__()
if name is None:
name = self.__class__.__name__
self.name = name
if rules is not None:
self.rules = rules
def rule_from_name(self, name):
for rule in self.rules:
if rule.name == name:
return rule
return None
def holidays(self, start=None, end=None, return_name=False):
"""
Returns a curve with holidays between start_date and end_date
Parameters
----------
start : starting date, datetime-like, optional
end : ending date, datetime-like, optional
return_name : bool, optional
If True, return a series that has dates and holiday names.
False will only return a DatetimeIndex of dates.
Returns
-------
DatetimeIndex of holidays
"""
if self.rules is None:
raise Exception('Holiday Calendar {name} does not have any '
'rules specified'.format(name=self.name))
if start is None:
start = AbstractHolidayCalendar.start_date
if end is None:
end = AbstractHolidayCalendar.end_date
start = Timestamp(start)
end = Timestamp(end)
holidays = None
# If we don't have a cache or the dates are outside the prior cache, we
# get them again
if (self._cache is None or start < self._cache[0] or
end > self._cache[1]):
for rule in self.rules:
rule_holidays = rule.dates(start, end, return_name=True)
if holidays is None:
holidays = rule_holidays
else:
holidays = holidays.append(rule_holidays)
self._cache = (start, end, holidays.sort_index())
holidays = self._cache[2]
holidays = holidays[start:end]
if return_name:
return holidays
else:
return holidays.index
@staticmethod
def merge_class(base, other):
"""
Merge holiday calendars together. The base calendar
will take precedence to other. The merge will be done
based on each holiday's name.
Parameters
----------
base : AbstractHolidayCalendar
instance/subclass or array of Holiday objects
other : AbstractHolidayCalendar
instance/subclass or array of Holiday objects
"""
try:
other = other.rules
except:
pass
if not isinstance(other, list):
other = [other]
other_holidays = {holiday.name: holiday for holiday in other}
try:
base = base.rules
except:
pass
if not isinstance(base, list):
base = [base]
base_holidays = {holiday.name: holiday for holiday in base}
other_holidays.update(base_holidays)
return list(other_holidays.values())
def merge(self, other, inplace=False):
"""
Merge holiday calendars together. The caller's class
rules take precedence. The merge will be done
based on each holiday's name.
Parameters
----------
other : holiday calendar
inplace : bool (default=False)
If True set rule_table to holidays, else return array of Holidays
"""
holidays = self.merge_class(self, other)
if inplace:
self.rules = holidays
else:
return holidays
USMemorialDay = Holiday('MemorialDay', month=5, day=31,
offset=DateOffset(weekday=MO(-1)))
USLaborDay = Holiday('Labor Day', month=9, day=1,
offset=DateOffset(weekday=MO(1)))
USColumbusDay = Holiday('Columbus Day', month=10, day=1,
offset=DateOffset(weekday=MO(2)))
USThanksgivingDay = Holiday('Thanksgiving', month=11, day=1,
offset=DateOffset(weekday=TH(4)))
USMartinLutherKingJr = Holiday('Dr. Martin Luther King Jr.',
start_date=datetime(1986, 1, 1), month=1, day=1,
offset=DateOffset(weekday=MO(3)))
USPresidentsDay = Holiday('President''s Day', month=2, day=1,
offset=DateOffset(weekday=MO(3)))
GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
EasterMonday = Holiday("Easter Monday", month=1, day=1,
offset=[Easter(), Day(1)])
class USFederalHolidayCalendar(AbstractHolidayCalendar):
"""
US Federal Government Holiday Calendar based on rules specified by:
https://www.opm.gov/policy-data-oversight/
snow-dismissal-procedures/federal-holidays/
"""
rules = [
Holiday('New Years Day', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
USMemorialDay,
Holiday('July 4th', month=7, day=4, observance=nearest_workday),
USLaborDay,
USColumbusDay,
Holiday('Veterans Day', month=11, day=11, observance=nearest_workday),
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
def HolidayCalendarFactory(name, base, other,
base_class=AbstractHolidayCalendar):
rules = AbstractHolidayCalendar.merge_class(base, other)
calendar_class = type(name, (base_class,), {"rules": rules, "name": name})
return calendar_class
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,3 @@
# flake8: noqa
from pandas.plotting._timeseries import tsplot