demo + utils venv
This commit is contained in:
@@ -0,0 +1,497 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
from pytz import AmbiguousTimeError
|
||||
|
||||
from pandas._libs.algos import unique_deltas
|
||||
from pandas._libs.tslibs import Timedelta, Timestamp
|
||||
from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday
|
||||
from pandas._libs.tslibs.conversion import tz_convert
|
||||
from pandas._libs.tslibs.fields import build_field_sarray
|
||||
import pandas._libs.tslibs.frequencies as libfreqs
|
||||
from pandas._libs.tslibs.offsets import _offset_to_period_map
|
||||
import pandas._libs.tslibs.resolution as libresolution
|
||||
from pandas._libs.tslibs.resolution import Resolution
|
||||
from pandas._libs.tslibs.timezones import UTC
|
||||
import pandas.compat as compat
|
||||
from pandas.compat import zip
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_datetime64_dtype, is_period_arraylike, is_timedelta64_dtype)
|
||||
from pandas.core.dtypes.generic import ABCSeries
|
||||
|
||||
from pandas.core.algorithms import unique
|
||||
|
||||
from pandas.tseries.offsets import (
|
||||
DateOffset, Day, Hour, Micro, Milli, Minute, Nano, Second, prefix_mapping)
|
||||
|
||||
_ONE_MICRO = 1000
|
||||
_ONE_MILLI = (_ONE_MICRO * 1000)
|
||||
_ONE_SECOND = (_ONE_MILLI * 1000)
|
||||
_ONE_MINUTE = (60 * _ONE_SECOND)
|
||||
_ONE_HOUR = (60 * _ONE_MINUTE)
|
||||
_ONE_DAY = (24 * _ONE_HOUR)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Offset names ("time rules") and related functions
|
||||
|
||||
#: cache of previously seen offsets
|
||||
_offset_map = {}
|
||||
|
||||
|
||||
def get_period_alias(offset_str):
|
||||
""" alias to closest period strings BQ->Q etc"""
|
||||
return _offset_to_period_map.get(offset_str, None)
|
||||
|
||||
|
||||
_name_to_offset_map = {'days': Day(1),
|
||||
'hours': Hour(1),
|
||||
'minutes': Minute(1),
|
||||
'seconds': Second(1),
|
||||
'milliseconds': Milli(1),
|
||||
'microseconds': Micro(1),
|
||||
'nanoseconds': Nano(1)}
|
||||
|
||||
|
||||
def to_offset(freq):
|
||||
"""
|
||||
Return DateOffset object from string or tuple representation
|
||||
or datetime.timedelta object
|
||||
|
||||
Parameters
|
||||
----------
|
||||
freq : str, tuple, datetime.timedelta, DateOffset or None
|
||||
|
||||
Returns
|
||||
-------
|
||||
delta : DateOffset
|
||||
None if freq is None
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If freq is an invalid frequency
|
||||
|
||||
See Also
|
||||
--------
|
||||
pandas.DateOffset
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> to_offset('5min')
|
||||
<5 * Minutes>
|
||||
|
||||
>>> to_offset('1D1H')
|
||||
<25 * Hours>
|
||||
|
||||
>>> to_offset(('W', 2))
|
||||
<2 * Weeks: weekday=6>
|
||||
|
||||
>>> to_offset((2, 'B'))
|
||||
<2 * BusinessDays>
|
||||
|
||||
>>> to_offset(datetime.timedelta(days=1))
|
||||
<Day>
|
||||
|
||||
>>> to_offset(Hour())
|
||||
<Hour>
|
||||
"""
|
||||
if freq is None:
|
||||
return None
|
||||
|
||||
if isinstance(freq, DateOffset):
|
||||
return freq
|
||||
|
||||
if isinstance(freq, tuple):
|
||||
name = freq[0]
|
||||
stride = freq[1]
|
||||
if isinstance(stride, compat.string_types):
|
||||
name, stride = stride, name
|
||||
name, _ = libfreqs._base_and_stride(name)
|
||||
delta = get_offset(name) * stride
|
||||
|
||||
elif isinstance(freq, timedelta):
|
||||
delta = None
|
||||
freq = Timedelta(freq)
|
||||
try:
|
||||
for name in freq.components._fields:
|
||||
offset = _name_to_offset_map[name]
|
||||
stride = getattr(freq.components, name)
|
||||
if stride != 0:
|
||||
offset = stride * offset
|
||||
if delta is None:
|
||||
delta = offset
|
||||
else:
|
||||
delta = delta + offset
|
||||
except Exception:
|
||||
raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq))
|
||||
|
||||
else:
|
||||
delta = None
|
||||
stride_sign = None
|
||||
try:
|
||||
splitted = re.split(libfreqs.opattern, freq)
|
||||
if splitted[-1] != '' and not splitted[-1].isspace():
|
||||
# the last element must be blank
|
||||
raise ValueError('last element must be blank')
|
||||
for sep, stride, name in zip(splitted[0::4], splitted[1::4],
|
||||
splitted[2::4]):
|
||||
if sep != '' and not sep.isspace():
|
||||
raise ValueError('separator must be spaces')
|
||||
prefix = libfreqs._lite_rule_alias.get(name) or name
|
||||
if stride_sign is None:
|
||||
stride_sign = -1 if stride.startswith('-') else 1
|
||||
if not stride:
|
||||
stride = 1
|
||||
if prefix in Resolution._reso_str_bump_map.keys():
|
||||
stride, name = Resolution.get_stride_from_decimal(
|
||||
float(stride), prefix
|
||||
)
|
||||
stride = int(stride)
|
||||
offset = get_offset(name)
|
||||
offset = offset * int(np.fabs(stride) * stride_sign)
|
||||
if delta is None:
|
||||
delta = offset
|
||||
else:
|
||||
delta = delta + offset
|
||||
except Exception:
|
||||
raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq))
|
||||
|
||||
if delta is None:
|
||||
raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq))
|
||||
|
||||
return delta
|
||||
|
||||
|
||||
def get_offset(name):
|
||||
"""
|
||||
Return DateOffset object associated with rule name
|
||||
|
||||
Examples
|
||||
--------
|
||||
get_offset('EOM') --> BMonthEnd(1)
|
||||
"""
|
||||
if name not in libfreqs._dont_uppercase:
|
||||
name = name.upper()
|
||||
name = libfreqs._lite_rule_alias.get(name, name)
|
||||
name = libfreqs._lite_rule_alias.get(name.lower(), name)
|
||||
else:
|
||||
name = libfreqs._lite_rule_alias.get(name, name)
|
||||
|
||||
if name not in _offset_map:
|
||||
try:
|
||||
split = name.split('-')
|
||||
klass = prefix_mapping[split[0]]
|
||||
# handles case where there's no suffix (and will TypeError if too
|
||||
# many '-')
|
||||
offset = klass._from_name(*split[1:])
|
||||
except (ValueError, TypeError, KeyError):
|
||||
# bad prefix or suffix
|
||||
raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name))
|
||||
# cache
|
||||
_offset_map[name] = offset
|
||||
|
||||
return _offset_map[name]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Period codes
|
||||
|
||||
|
||||
def infer_freq(index, warn=True):
|
||||
"""
|
||||
Infer the most likely frequency given the input index. If the frequency is
|
||||
uncertain, a warning will be printed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index : DatetimeIndex or TimedeltaIndex
|
||||
if passed a Series will use the values of the series (NOT THE INDEX)
|
||||
warn : boolean, default True
|
||||
|
||||
Returns
|
||||
-------
|
||||
freq : string or None
|
||||
None if no discernible frequency
|
||||
TypeError if the index is not datetime-like
|
||||
ValueError if there are less than three values.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
if isinstance(index, ABCSeries):
|
||||
values = index._values
|
||||
if not (is_datetime64_dtype(values) or
|
||||
is_timedelta64_dtype(values) or
|
||||
values.dtype == object):
|
||||
raise TypeError("cannot infer freq from a non-convertible dtype "
|
||||
"on a Series of {dtype}".format(dtype=index.dtype))
|
||||
index = values
|
||||
|
||||
if is_period_arraylike(index):
|
||||
raise TypeError("PeriodIndex given. Check the `freq` attribute "
|
||||
"instead of using infer_freq.")
|
||||
elif is_timedelta64_dtype(index):
|
||||
# Allow TimedeltaIndex and TimedeltaArray
|
||||
inferer = _TimedeltaFrequencyInferer(index, warn=warn)
|
||||
return inferer.get_freq()
|
||||
|
||||
if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
|
||||
if isinstance(index, (pd.Int64Index, pd.Float64Index)):
|
||||
raise TypeError("cannot infer freq from a non-convertible index "
|
||||
"type {type}".format(type=type(index)))
|
||||
index = index.values
|
||||
|
||||
if not isinstance(index, pd.DatetimeIndex):
|
||||
try:
|
||||
index = pd.DatetimeIndex(index)
|
||||
except AmbiguousTimeError:
|
||||
index = pd.DatetimeIndex(index.asi8)
|
||||
|
||||
inferer = _FrequencyInferer(index, warn=warn)
|
||||
return inferer.get_freq()
|
||||
|
||||
|
||||
class _FrequencyInferer(object):
|
||||
"""
|
||||
Not sure if I can avoid the state machine here
|
||||
"""
|
||||
|
||||
def __init__(self, index, warn=True):
|
||||
self.index = index
|
||||
self.values = index.asi8
|
||||
|
||||
# This moves the values, which are implicitly in UTC, to the
|
||||
# the timezone so they are in local time
|
||||
if hasattr(index, 'tz'):
|
||||
if index.tz is not None:
|
||||
self.values = tz_convert(self.values, UTC, index.tz)
|
||||
|
||||
self.warn = warn
|
||||
|
||||
if len(index) < 3:
|
||||
raise ValueError('Need at least 3 dates to infer frequency')
|
||||
|
||||
self.is_monotonic = (self.index._is_monotonic_increasing or
|
||||
self.index._is_monotonic_decreasing)
|
||||
|
||||
@cache_readonly
|
||||
def deltas(self):
|
||||
return unique_deltas(self.values)
|
||||
|
||||
@cache_readonly
|
||||
def deltas_asi8(self):
|
||||
return unique_deltas(self.index.asi8)
|
||||
|
||||
@cache_readonly
|
||||
def is_unique(self):
|
||||
return len(self.deltas) == 1
|
||||
|
||||
@cache_readonly
|
||||
def is_unique_asi8(self):
|
||||
return len(self.deltas_asi8) == 1
|
||||
|
||||
def get_freq(self):
|
||||
"""
|
||||
Find the appropriate frequency string to describe the inferred
|
||||
frequency of self.values
|
||||
|
||||
Returns
|
||||
-------
|
||||
freqstr : str or None
|
||||
"""
|
||||
if not self.is_monotonic or not self.index._is_unique:
|
||||
return None
|
||||
|
||||
delta = self.deltas[0]
|
||||
if _is_multiple(delta, _ONE_DAY):
|
||||
return self._infer_daily_rule()
|
||||
|
||||
# Business hourly, maybe. 17: one day / 65: one weekend
|
||||
if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
|
||||
return 'BH'
|
||||
# Possibly intraday frequency. Here we use the
|
||||
# original .asi8 values as the modified values
|
||||
# will not work around DST transitions. See #8772
|
||||
elif not self.is_unique_asi8:
|
||||
return None
|
||||
|
||||
delta = self.deltas_asi8[0]
|
||||
if _is_multiple(delta, _ONE_HOUR):
|
||||
# Hours
|
||||
return _maybe_add_count('H', delta / _ONE_HOUR)
|
||||
elif _is_multiple(delta, _ONE_MINUTE):
|
||||
# Minutes
|
||||
return _maybe_add_count('T', delta / _ONE_MINUTE)
|
||||
elif _is_multiple(delta, _ONE_SECOND):
|
||||
# Seconds
|
||||
return _maybe_add_count('S', delta / _ONE_SECOND)
|
||||
elif _is_multiple(delta, _ONE_MILLI):
|
||||
# Milliseconds
|
||||
return _maybe_add_count('L', delta / _ONE_MILLI)
|
||||
elif _is_multiple(delta, _ONE_MICRO):
|
||||
# Microseconds
|
||||
return _maybe_add_count('U', delta / _ONE_MICRO)
|
||||
else:
|
||||
# Nanoseconds
|
||||
return _maybe_add_count('N', delta)
|
||||
|
||||
@cache_readonly
|
||||
def day_deltas(self):
|
||||
return [x / _ONE_DAY for x in self.deltas]
|
||||
|
||||
@cache_readonly
|
||||
def hour_deltas(self):
|
||||
return [x / _ONE_HOUR for x in self.deltas]
|
||||
|
||||
@cache_readonly
|
||||
def fields(self):
|
||||
return build_field_sarray(self.values)
|
||||
|
||||
@cache_readonly
|
||||
def rep_stamp(self):
|
||||
return Timestamp(self.values[0])
|
||||
|
||||
def month_position_check(self):
|
||||
return libresolution.month_position_check(self.fields,
|
||||
self.index.dayofweek)
|
||||
|
||||
@cache_readonly
|
||||
def mdiffs(self):
|
||||
nmonths = self.fields['Y'] * 12 + self.fields['M']
|
||||
return unique_deltas(nmonths.astype('i8'))
|
||||
|
||||
@cache_readonly
|
||||
def ydiffs(self):
|
||||
return unique_deltas(self.fields['Y'].astype('i8'))
|
||||
|
||||
def _infer_daily_rule(self):
|
||||
annual_rule = self._get_annual_rule()
|
||||
if annual_rule:
|
||||
nyears = self.ydiffs[0]
|
||||
month = MONTH_ALIASES[self.rep_stamp.month]
|
||||
alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month)
|
||||
return _maybe_add_count(alias, nyears)
|
||||
|
||||
quarterly_rule = self._get_quarterly_rule()
|
||||
if quarterly_rule:
|
||||
nquarters = self.mdiffs[0] / 3
|
||||
mod_dict = {0: 12, 2: 11, 1: 10}
|
||||
month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]]
|
||||
alias = '{prefix}-{month}'.format(prefix=quarterly_rule,
|
||||
month=month)
|
||||
return _maybe_add_count(alias, nquarters)
|
||||
|
||||
monthly_rule = self._get_monthly_rule()
|
||||
if monthly_rule:
|
||||
return _maybe_add_count(monthly_rule, self.mdiffs[0])
|
||||
|
||||
if self.is_unique:
|
||||
days = self.deltas[0] / _ONE_DAY
|
||||
if days % 7 == 0:
|
||||
# Weekly
|
||||
day = int_to_weekday[self.rep_stamp.weekday()]
|
||||
return _maybe_add_count(
|
||||
'W-{day}'.format(day=day), days / 7)
|
||||
else:
|
||||
return _maybe_add_count('D', days)
|
||||
|
||||
if self._is_business_daily():
|
||||
return 'B'
|
||||
|
||||
wom_rule = self._get_wom_rule()
|
||||
if wom_rule:
|
||||
return wom_rule
|
||||
|
||||
def _get_annual_rule(self):
|
||||
if len(self.ydiffs) > 1:
|
||||
return None
|
||||
|
||||
if len(unique(self.fields['M'])) > 1:
|
||||
return None
|
||||
|
||||
pos_check = self.month_position_check()
|
||||
return {'cs': 'AS', 'bs': 'BAS',
|
||||
'ce': 'A', 'be': 'BA'}.get(pos_check)
|
||||
|
||||
def _get_quarterly_rule(self):
|
||||
if len(self.mdiffs) > 1:
|
||||
return None
|
||||
|
||||
if not self.mdiffs[0] % 3 == 0:
|
||||
return None
|
||||
|
||||
pos_check = self.month_position_check()
|
||||
return {'cs': 'QS', 'bs': 'BQS',
|
||||
'ce': 'Q', 'be': 'BQ'}.get(pos_check)
|
||||
|
||||
def _get_monthly_rule(self):
|
||||
if len(self.mdiffs) > 1:
|
||||
return None
|
||||
pos_check = self.month_position_check()
|
||||
return {'cs': 'MS', 'bs': 'BMS',
|
||||
'ce': 'M', 'be': 'BM'}.get(pos_check)
|
||||
|
||||
def _is_business_daily(self):
|
||||
# quick check: cannot be business daily
|
||||
if self.day_deltas != [1, 3]:
|
||||
return False
|
||||
|
||||
# probably business daily, but need to confirm
|
||||
first_weekday = self.index[0].weekday()
|
||||
shifts = np.diff(self.index.asi8)
|
||||
shifts = np.floor_divide(shifts, _ONE_DAY)
|
||||
weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
|
||||
return np.all(((weekdays == 0) & (shifts == 3)) |
|
||||
((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))
|
||||
|
||||
def _get_wom_rule(self):
|
||||
# wdiffs = unique(np.diff(self.index.week))
|
||||
# We also need -47, -49, -48 to catch index spanning year boundary
|
||||
# if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
|
||||
# return None
|
||||
|
||||
weekdays = unique(self.index.weekday)
|
||||
if len(weekdays) > 1:
|
||||
return None
|
||||
|
||||
week_of_months = unique((self.index.day - 1) // 7)
|
||||
# Only attempt to infer up to WOM-4. See #9425
|
||||
week_of_months = week_of_months[week_of_months < 4]
|
||||
if len(week_of_months) == 0 or len(week_of_months) > 1:
|
||||
return None
|
||||
|
||||
# get which week
|
||||
week = week_of_months[0] + 1
|
||||
wd = int_to_weekday[weekdays[0]]
|
||||
|
||||
return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)
|
||||
|
||||
|
||||
class _TimedeltaFrequencyInferer(_FrequencyInferer):
|
||||
|
||||
def _infer_daily_rule(self):
|
||||
if self.is_unique:
|
||||
days = self.deltas[0] / _ONE_DAY
|
||||
if days % 7 == 0:
|
||||
# Weekly
|
||||
wd = int_to_weekday[self.rep_stamp.weekday()]
|
||||
alias = 'W-{weekday}'.format(weekday=wd)
|
||||
return _maybe_add_count(alias, days / 7)
|
||||
else:
|
||||
return _maybe_add_count('D', days)
|
||||
|
||||
|
||||
def _is_multiple(us, mult):
|
||||
return us % mult == 0
|
||||
|
||||
|
||||
def _maybe_add_count(base, count):
|
||||
if count != 1:
|
||||
assert count == int(count)
|
||||
count = int(count)
|
||||
return '{count}{base}'.format(count=count, base=base)
|
||||
else:
|
||||
return base
|
||||
Reference in New Issue
Block a user