pruned venvs

This commit is contained in:
d3m1g0d
2019-03-12 21:57:16 +01:00
parent 33f0511081
commit e441f4f7f7
5988 changed files with 0 additions and 1353666 deletions
@@ -1,907 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas._libs.tslib import iNaT
import pandas.compat as compat
from pandas.compat import PY3
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas as pd
from pandas import (
CategoricalIndex, DatetimeIndex, Float64Index, Index, Int64Index,
IntervalIndex, MultiIndex, PeriodIndex, RangeIndex, Series, TimedeltaIndex,
UInt64Index, isna)
from pandas.core.indexes.base import InvalidIndexError
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
import pandas.util.testing as tm
class Base(object):
""" base class for index sub-class tests """
_holder = None
_compat_props = ['shape', 'ndim', 'size', 'nbytes']
def setup_indices(self):
for name, idx in self.indices.items():
setattr(self, name, idx)
def test_pickle_compat_construction(self):
# need an object to create with
pytest.raises(TypeError, self._holder)
def test_to_series(self):
# assert that we are creating a copy of the index
idx = self.create_index()
s = idx.to_series()
assert s.values is not idx.values
assert s.index is not idx
assert s.name == idx.name
def test_to_series_with_arguments(self):
# GH18699
# index kwarg
idx = self.create_index()
s = idx.to_series(index=idx)
assert s.values is not idx.values
assert s.index is idx
assert s.name == idx.name
# name kwarg
idx = self.create_index()
s = idx.to_series(name='__test')
assert s.values is not idx.values
assert s.index is not idx
assert s.name != idx.name
@pytest.mark.parametrize("name", [None, "new_name"])
def test_to_frame(self, name):
# see GH-15230, GH-22580
idx = self.create_index()
if name:
idx_name = name
else:
idx_name = idx.name or 0
df = idx.to_frame(name=idx_name)
assert df.index is idx
assert len(df.columns) == 1
assert df.columns[0] == idx_name
assert df[idx_name].values is not idx.values
df = idx.to_frame(index=False, name=idx_name)
assert df.index is not idx
def test_shift(self):
# GH8083 test the base class for shift
idx = self.create_index()
pytest.raises(NotImplementedError, idx.shift, 1)
pytest.raises(NotImplementedError, idx.shift, 1, 2)
def test_create_index_existing_name(self):
# GH11193, when an existing index is passed, and a new name is not
# specified, the new index should inherit the previous object name
expected = self.create_index()
if not isinstance(expected, MultiIndex):
expected.name = 'foo'
result = pd.Index(expected)
tm.assert_index_equal(result, expected)
result = pd.Index(expected, name='bar')
expected.name = 'bar'
tm.assert_index_equal(result, expected)
else:
expected.names = ['foo', 'bar']
result = pd.Index(expected)
tm.assert_index_equal(
result, Index(Index([('foo', 'one'), ('foo', 'two'),
('bar', 'one'), ('baz', 'two'),
('qux', 'one'), ('qux', 'two')],
dtype='object'),
names=['foo', 'bar']))
result = pd.Index(expected, names=['A', 'B'])
tm.assert_index_equal(
result,
Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
('baz', 'two'), ('qux', 'one'), ('qux', 'two')],
dtype='object'), names=['A', 'B']))
def test_numeric_compat(self):
idx = self.create_index()
with pytest.raises(TypeError, match="cannot perform __mul__"):
idx * 1
with pytest.raises(TypeError, match="cannot perform __rmul__"):
1 * idx
div_err = ("cannot perform __truediv__" if PY3
else "cannot perform __div__")
with pytest.raises(TypeError, match=div_err):
idx / 1
div_err = div_err.replace(' __', ' __r')
with pytest.raises(TypeError, match=div_err):
1 / idx
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
idx // 1
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
1 // idx
def test_logical_compat(self):
idx = self.create_index()
with pytest.raises(TypeError, match='cannot perform all'):
idx.all()
with pytest.raises(TypeError, match='cannot perform any'):
idx.any()
def test_boolean_context_compat(self):
# boolean context compat
idx = self.create_index()
with pytest.raises(ValueError, match='The truth value of a'):
if idx:
pass
def test_reindex_base(self):
idx = self.create_index()
expected = np.arange(idx.size, dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match='Invalid fill method'):
idx.get_indexer(idx, method='invalid')
def test_get_indexer_consistency(self):
# See GH 16819
for name, index in self.indices.items():
if isinstance(index, IntervalIndex):
continue
if index.is_unique or isinstance(index, CategoricalIndex):
indexer = index.get_indexer(index[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
else:
e = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=e):
index.get_indexer(index[0:2])
indexer, _ = index.get_indexer_non_unique(index[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
def test_ndarray_compat_properties(self):
idx = self.create_index()
assert idx.T.equals(idx)
assert idx.transpose().equals(idx)
values = idx.values
for prop in self._compat_props:
assert getattr(idx, prop) == getattr(values, prop)
# test for validity
idx.nbytes
idx.values.nbytes
def test_repr_roundtrip(self):
idx = self.create_index()
tm.assert_index_equal(eval(repr(idx)), idx)
def test_str(self):
# test the string repr
idx = self.create_index()
idx.name = 'foo'
assert "'foo'" in str(idx)
assert idx.__class__.__name__ in str(idx)
def test_repr_max_seq_item_setting(self):
# GH10182
idx = self.create_index()
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
assert '...' not in str(idx)
def test_copy_name(self):
# gh-12309: Check that the "name" argument
# passed at initialization is honored.
for name, index in compat.iteritems(self.indices):
if isinstance(index, MultiIndex):
continue
first = index.__class__(index, copy=True, name='mario')
second = first.__class__(first, copy=False)
# Even though "copy=False", we want a new object.
assert first is not second
# Not using tm.assert_index_equal() since names differ.
assert index.equals(first)
assert first.name == 'mario'
assert second.name == 'mario'
s1 = Series(2, index=first)
s2 = Series(3, index=second[:-1])
if not isinstance(index, CategoricalIndex):
# See gh-13365
s3 = s1 * s2
assert s3.index.name == 'mario'
def test_ensure_copied_data(self):
# Check the "copy" argument of each Index.__new__ is honoured
# GH12309
for name, index in compat.iteritems(self.indices):
init_kwargs = {}
if isinstance(index, PeriodIndex):
# Needs "freq" specification:
init_kwargs['freq'] = index.freq
elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
# RangeIndex cannot be initialized from data
# MultiIndex and CategoricalIndex are tested separately
continue
index_type = index.__class__
result = index_type(index.values, copy=True, **init_kwargs)
tm.assert_index_equal(index, result)
tm.assert_numpy_array_equal(index._ndarray_values,
result._ndarray_values,
check_same='copy')
if isinstance(index, PeriodIndex):
# .values an object array of Period, thus copied
result = index_type(ordinal=index.asi8, copy=False,
**init_kwargs)
tm.assert_numpy_array_equal(index._ndarray_values,
result._ndarray_values,
check_same='same')
elif isinstance(index, IntervalIndex):
# checked in test_interval.py
pass
else:
result = index_type(index.values, copy=False, **init_kwargs)
tm.assert_numpy_array_equal(index.values, result.values,
check_same='same')
tm.assert_numpy_array_equal(index._ndarray_values,
result._ndarray_values,
check_same='same')
def test_memory_usage(self):
for name, index in compat.iteritems(self.indices):
result = index.memory_usage()
if len(index):
index.get_loc(index[0])
result2 = index.memory_usage()
result3 = index.memory_usage(deep=True)
# RangeIndex, IntervalIndex
# don't have engines
if not isinstance(index, (RangeIndex, IntervalIndex)):
assert result2 > result
if index.inferred_type == 'object':
assert result3 > result2
else:
# we report 0 for no-length
assert result == 0
def test_argsort(self):
for k, ind in self.indices.items():
# separately tested
if k in ['catIndex']:
continue
result = ind.argsort()
expected = np.array(ind).argsort()
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
def test_numpy_argsort(self):
for k, ind in self.indices.items():
result = np.argsort(ind)
expected = ind.argsort()
tm.assert_numpy_array_equal(result, expected)
# these are the only two types that perform
# pandas compatibility input validation - the
# rest already perform separate (or no) such
# validation via their 'values' attribute as
# defined in pandas.core.indexes/base.py - they
# cannot be changed at the moment due to
# backwards compatibility concerns
if isinstance(type(ind), (CategoricalIndex, RangeIndex)):
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(ind, axis=1)
msg = "the 'kind' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(ind, kind='mergesort')
msg = "the 'order' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(ind, order=('a', 'b'))
def test_take(self):
indexer = [4, 3, 0, 2]
for k, ind in self.indices.items():
# separate
if k in ['boolIndex', 'tuples', 'empty']:
continue
result = ind.take(indexer)
expected = ind[indexer]
assert result.equals(expected)
if not isinstance(ind,
(DatetimeIndex, PeriodIndex, TimedeltaIndex)):
# GH 10791
with pytest.raises(AttributeError):
ind.freq
def test_take_invalid_kwargs(self):
idx = self.create_index()
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode='clip')
def test_repeat(self):
rep = 2
i = self.create_index()
expected = pd.Index(i.values.repeat(rep), name=i.name)
tm.assert_index_equal(i.repeat(rep), expected)
i = self.create_index()
rep = np.arange(len(i))
expected = pd.Index(i.values.repeat(rep), name=i.name)
tm.assert_index_equal(i.repeat(rep), expected)
def test_numpy_repeat(self):
rep = 2
i = self.create_index()
expected = i.repeat(rep)
tm.assert_index_equal(np.repeat(i, rep), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(i, rep, axis=0)
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = self.create_index()
cond = [True] * len(i)
result = i.where(klass(cond))
expected = i
tm.assert_index_equal(result, expected)
cond = [False] + [True] * len(i[1:])
expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype)
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("case", [0.5, "xxx"])
@pytest.mark.parametrize("method", ["intersection", "union",
"difference", "symmetric_difference"])
def test_set_ops_error_cases(self, case, method):
for name, idx in compat.iteritems(self.indices):
# non-iterable input
msg = "Input must be Index or array-like"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)(case)
def test_intersection_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[:5]
second = idx[:3]
intersect = first.intersection(second)
if isinstance(idx, CategoricalIndex):
pass
else:
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with pytest.raises(ValueError, match=msg):
first.intersection(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
result = first.intersection(case)
assert tm.equalContents(result, second)
if isinstance(idx, MultiIndex):
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.intersection([1, 2, 3])
def test_union_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[3:]
second = idx[:5]
everything = idx
union = first.union(second)
assert tm.equalContents(union, everything)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with pytest.raises(ValueError, match=msg):
first.union(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
result = first.union(case)
assert tm.equalContents(result, everything)
if isinstance(idx, MultiIndex):
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.union([1, 2, 3])
@pytest.mark.parametrize("sort", [True, False])
def test_difference_base(self, sort):
for name, idx in compat.iteritems(self.indices):
first = idx[2:]
second = idx[:4]
answer = idx[4:]
result = first.difference(second, sort)
if isinstance(idx, CategoricalIndex):
pass
else:
assert tm.equalContents(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with pytest.raises(ValueError, match=msg):
first.difference(case, sort)
elif isinstance(idx, CategoricalIndex):
pass
elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
assert result.__class__ == answer.__class__
tm.assert_numpy_array_equal(result.sort_values().asi8,
answer.sort_values().asi8)
else:
result = first.difference(case, sort)
assert tm.equalContents(result, answer)
if isinstance(idx, MultiIndex):
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.difference([1, 2, 3], sort)
def test_symmetric_difference(self):
for name, idx in compat.iteritems(self.indices):
first = idx[1:]
second = idx[:-1]
if isinstance(idx, CategoricalIndex):
pass
else:
answer = idx[[0, -1]]
result = first.symmetric_difference(second)
assert tm.equalContents(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with pytest.raises(ValueError, match=msg):
first.symmetric_difference(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
result = first.symmetric_difference(case)
assert tm.equalContents(result, answer)
if isinstance(idx, MultiIndex):
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.symmetric_difference([1, 2, 3])
def test_insert_base(self):
for name, idx in compat.iteritems(self.indices):
result = idx[1:4]
if not len(idx):
continue
# test 0th element
assert idx[0:4].equals(result.insert(0, idx[0]))
def test_delete_base(self):
for name, idx in compat.iteritems(self.indices):
if not len(idx):
continue
if isinstance(idx, RangeIndex):
# tested in class
continue
expected = idx[1:]
result = idx.delete(0)
assert result.equals(expected)
assert result.name == expected.name
expected = idx[:-1]
result = idx.delete(-1)
assert result.equals(expected)
assert result.name == expected.name
with pytest.raises((IndexError, ValueError)):
# either depending on numpy version
idx.delete(len(idx))
def test_equals(self):
for name, idx in compat.iteritems(self.indices):
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(np.array(idx))
# Cannot pass in non-int64 dtype to RangeIndex
if not isinstance(idx, RangeIndex):
same_values = Index(idx, dtype=object)
assert idx.equals(same_values)
assert same_values.equals(idx)
if idx.nlevels == 1:
# do not test MultiIndex
assert not idx.equals(pd.Series(idx))
def test_equals_op(self):
# GH9947, GH10637
index_a = self.create_index()
if isinstance(index_a, PeriodIndex):
pytest.skip('Skip check for PeriodIndex')
n = len(index_a)
index_b = index_a[0:-1]
index_c = index_a[0:-1].append(index_a[-2:-1])
index_d = index_a[0:1]
msg = "Lengths must match|could not be broadcast"
with pytest.raises(ValueError, match=msg):
index_a == index_b
expected1 = np.array([True] * n)
expected2 = np.array([True] * (n - 1) + [False])
tm.assert_numpy_array_equal(index_a == index_a, expected1)
tm.assert_numpy_array_equal(index_a == index_c, expected2)
# test comparisons with numpy arrays
array_a = np.array(index_a)
array_b = np.array(index_a[0:-1])
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
array_d = np.array(index_a[0:1])
with pytest.raises(ValueError, match=msg):
index_a == array_b
tm.assert_numpy_array_equal(index_a == array_a, expected1)
tm.assert_numpy_array_equal(index_a == array_c, expected2)
# test comparisons with Series
series_a = Series(array_a)
series_b = Series(array_b)
series_c = Series(array_c)
series_d = Series(array_d)
with pytest.raises(ValueError, match=msg):
index_a == series_b
tm.assert_numpy_array_equal(index_a == series_a, expected1)
tm.assert_numpy_array_equal(index_a == series_c, expected2)
# cases where length is 1 for one of them
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_d
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
series_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
series_a == array_d
# comparing with a scalar should broadcast; note that we are excluding
# MultiIndex because in this case each item in the index is a tuple of
# length 2, and therefore is considered an array of length 2 in the
# comparison instead of a scalar
if not isinstance(index_a, MultiIndex):
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
# assuming the 2nd to last item is unique in the data
item = index_a[-2]
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
def test_numpy_ufuncs(self):
# test ufuncs of numpy, see:
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
for name, idx in compat.iteritems(self.indices):
for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10,
np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin,
np.arccos, np.arctan, np.sinh, np.cosh, np.tanh,
np.arcsinh, np.arccosh, np.arctanh, np.deg2rad,
np.rad2deg]:
if isinstance(idx, DatetimeIndexOpsMixin):
# raise TypeError or ValueError (PeriodIndex)
# PeriodIndex behavior should be changed in future version
with pytest.raises(Exception):
with np.errstate(all='ignore'):
func(idx)
elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)):
# coerces to float (e.g. np.sin)
with np.errstate(all='ignore'):
result = func(idx)
exp = Index(func(idx.values), name=idx.name)
tm.assert_index_equal(result, exp)
assert isinstance(result, pd.Float64Index)
else:
# raise AttributeError or TypeError
if len(idx) == 0:
continue
else:
with pytest.raises(Exception):
with np.errstate(all='ignore'):
func(idx)
for func in [np.isfinite, np.isinf, np.isnan, np.signbit]:
if isinstance(idx, DatetimeIndexOpsMixin):
# raise TypeError or ValueError (PeriodIndex)
with pytest.raises(Exception):
func(idx)
elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)):
# Results in bool array
result = func(idx)
assert isinstance(result, np.ndarray)
assert not isinstance(result, Index)
else:
if len(idx) == 0:
continue
else:
with pytest.raises(Exception):
func(idx)
def test_hasnans_isnans(self):
# GH 11343, added tests for hasnans / isnans
for name, index in self.indices.items():
if isinstance(index, MultiIndex):
pass
else:
idx = index.copy()
# cases in indices doesn't include NaN
expected = np.array([False] * len(idx), dtype=bool)
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is False
idx = index.copy()
values = np.asarray(idx.values)
if len(index) == 0:
continue
elif isinstance(index, DatetimeIndexOpsMixin):
values[1] = iNaT
elif isinstance(index, (Int64Index, UInt64Index)):
continue
else:
values[1] = np.nan
if isinstance(index, PeriodIndex):
idx = index.__class__(values, freq=index.freq)
else:
idx = index.__class__(values)
expected = np.array([False] * len(idx), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is True
def test_fillna(self):
# GH 11343
for name, index in self.indices.items():
if len(index) == 0:
pass
elif isinstance(index, MultiIndex):
idx = index.copy()
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.fillna(idx[0])
else:
idx = index.copy()
result = idx.fillna(idx[0])
tm.assert_index_equal(result, idx)
assert result is not idx
msg = "'value' must be a scalar, passed: "
with pytest.raises(TypeError, match=msg):
idx.fillna([idx[0]])
idx = index.copy()
values = np.asarray(idx.values)
if isinstance(index, DatetimeIndexOpsMixin):
values[1] = iNaT
elif isinstance(index, (Int64Index, UInt64Index)):
continue
else:
values[1] = np.nan
if isinstance(index, PeriodIndex):
idx = index.__class__(values, freq=index.freq)
else:
idx = index.__class__(values)
expected = np.array([False] * len(idx), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is True
def test_nulls(self):
# this is really a smoke test for the methods
# as these are adequately tested for function elsewhere
for name, index in self.indices.items():
if len(index) == 0:
tm.assert_numpy_array_equal(
index.isna(), np.array([], dtype=bool))
elif isinstance(index, MultiIndex):
idx = index.copy()
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.isna()
else:
if not index.hasnans:
tm.assert_numpy_array_equal(
index.isna(), np.zeros(len(index), dtype=bool))
tm.assert_numpy_array_equal(
index.notna(), np.ones(len(index), dtype=bool))
else:
result = isna(index)
tm.assert_numpy_array_equal(index.isna(), result)
tm.assert_numpy_array_equal(index.notna(), ~result)
def test_empty(self):
# GH 15270
index = self.create_index()
assert not index.empty
assert index[:0].empty
def test_join_self_unique(self, join_type):
index = self.create_index()
if index.is_unique:
joined = index.join(index, how=join_type)
assert (index == joined).all()
def test_map(self):
# callable
index = self.create_index()
# we don't infer UInt64
if isinstance(index, pd.UInt64Index):
expected = index.astype('int64')
else:
expected = index
result = index.map(lambda x: x)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"mapper",
[
lambda values, index: {i: e for e, i in zip(values, index)},
lambda values, index: pd.Series(values, index)])
def test_map_dictlike(self, mapper):
index = self.create_index()
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
pytest.skip("skipping tests for {}".format(type(index)))
identity = mapper(index.values, index)
# we don't infer to UInt64 for a dict
if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
expected = index.astype('int64')
else:
expected = index
result = index.map(identity)
tm.assert_index_equal(result, expected)
# empty mappable
expected = pd.Index([np.nan] * len(index))
result = index.map(mapper(expected, index))
tm.assert_index_equal(result, expected)
def test_putmask_with_wrong_mask(self):
# GH18368
index = self.create_index()
with pytest.raises(ValueError):
index.putmask(np.ones(len(index) + 1, np.bool), 1)
with pytest.raises(ValueError):
index.putmask(np.ones(len(index) - 1, np.bool), 1)
with pytest.raises(ValueError):
index.putmask('foo', 1)
@pytest.mark.parametrize('copy', [True, False])
@pytest.mark.parametrize('name', [None, 'foo'])
@pytest.mark.parametrize('ordered', [True, False])
def test_astype_category(self, copy, name, ordered):
# GH 18630
index = self.create_index()
if name:
index = index.rename(name)
# standard categories
dtype = CategoricalDtype(ordered=ordered)
result = index.astype(dtype, copy=copy)
expected = CategoricalIndex(index.values, name=name, ordered=ordered)
tm.assert_index_equal(result, expected)
# non-standard categories
dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
result = index.astype(dtype, copy=copy)
expected = CategoricalIndex(index.values, name=name, dtype=dtype)
tm.assert_index_equal(result, expected)
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
result = index.astype('category', copy=copy)
expected = CategoricalIndex(index.values, name=name)
tm.assert_index_equal(result, expected)
@@ -1,49 +0,0 @@
import numpy as np
import pytest
from pandas.compat import long, lzip
import pandas as pd
from pandas.core.indexes.api import Index, MultiIndex
import pandas.util.testing as tm
@pytest.fixture(params=[tm.makeUnicodeIndex(100),
tm.makeStringIndex(100),
tm.makeDateIndex(100),
tm.makePeriodIndex(100),
tm.makeTimedeltaIndex(100),
tm.makeIntIndex(100),
tm.makeUIntIndex(100),
tm.makeRangeIndex(100),
tm.makeFloatIndex(100),
Index([True, False]),
tm.makeCategoricalIndex(100),
Index([]),
MultiIndex.from_tuples(lzip(
['foo', 'bar', 'baz'], [1, 2, 3])),
Index([0, 0, 1, 1, 2, 2])],
ids=lambda x: type(x).__name__)
def indices(request):
return request.param
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
# zero-dim integer array behaves like an integer
return request.param
zeros = [box([0] * 5, dtype=dtype)
for box in [pd.Index, np.array]
for dtype in [np.int64, np.uint64, np.float64]]
zeros.extend([np.array(0, dtype=dtype)
for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([0, 0.0, long(0)])
@pytest.fixture(params=zeros)
def zero(request):
# For testing division by (or of) zero for Index with length 5, this
# gives several scalar-zeros and length-5 vector-zeros
return request.param
@@ -1,101 +0,0 @@
""" generic datetimelike tests """
import numpy as np
import pytest
import pandas as pd
import pandas.util.testing as tm
from .common import Base
class DatetimeLike(Base):
def test_argmax_axis_invalid(self):
# GH#23081
rng = self.create_index()
with pytest.raises(ValueError):
rng.argmax(axis=1)
with pytest.raises(ValueError):
rng.argmin(axis=2)
with pytest.raises(ValueError):
rng.min(axis=-2)
with pytest.raises(ValueError):
rng.max(axis=-3)
def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is False
def test_shift_identity(self):
idx = self.create_index()
tm.assert_index_equal(idx, idx.shift(0))
def test_str(self):
# test the string repr
idx = self.create_index()
idx.name = 'foo'
assert not "length=%s" % len(idx) in str(idx)
assert "'foo'" in str(idx)
assert idx.__class__.__name__ in str(idx)
if hasattr(idx, 'tz'):
if idx.tz is not None:
assert idx.tz in str(idx)
if hasattr(idx, 'freq'):
assert "freq='%s'" % idx.freqstr in str(idx)
def test_view(self):
i = self.create_index()
i_view = i.view('i8')
result = self._holder(i)
tm.assert_index_equal(result, i)
i_view = i.view(self._holder)
result = self._holder(i)
tm.assert_index_equal(result, i_view)
def test_map_callable(self):
expected = self.index + self.index.freq
result = self.index.map(lambda x: x + x.freq)
tm.assert_index_equal(result, expected)
# map to NaT
result = self.index.map(lambda x: pd.NaT if x == self.index[0] else x)
expected = pd.Index([pd.NaT] + self.index[1:].tolist())
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"mapper",
[
lambda values, index: {i: e for e, i in zip(values, index)},
lambda values, index: pd.Series(values, index)])
def test_map_dictlike(self, mapper):
expected = self.index + self.index.freq
# don't compare the freqs
if isinstance(expected, pd.DatetimeIndex):
expected.freq = None
result = self.index.map(mapper(expected, self.index))
tm.assert_index_equal(result, expected)
expected = pd.Index([pd.NaT] + self.index[1:].tolist())
result = self.index.map(mapper(expected, self.index))
tm.assert_index_equal(result, expected)
# empty map; these map to np.nan because we cannot know
# to re-infer things
expected = pd.Index([np.nan] * len(self.index))
result = self.index.map(mapper([], []))
tm.assert_index_equal(result, expected)
def test_asobject_deprecated(self):
# GH18572
d = self.create_index()
with tm.assert_produces_warning(FutureWarning):
i = d.asobject
assert isinstance(i, pd.Index)
@@ -1,109 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import datetime
import pytest
import pytz
from pandas.errors import NullFrequencyError
import pandas as pd
from pandas import DatetimeIndex, Series, date_range
import pandas.util.testing as tm
class TestDatetimeIndexArithmetic(object):
# -------------------------------------------------------------
# DatetimeIndex.shift is used in integer addition
def test_dti_shift_tzaware(self, tz_naive_fixture):
# GH#9903
tz = tz_naive_fixture
idx = pd.DatetimeIndex([], name='xxx', tz=tz)
tm.assert_index_equal(idx.shift(0, freq='H'), idx)
tm.assert_index_equal(idx.shift(3, freq='H'), idx)
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00',
'2011-01-01 12:00'], name='xxx', tz=tz)
tm.assert_index_equal(idx.shift(0, freq='H'), idx)
exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00',
'2011-01-01 15:00'], name='xxx', tz=tz)
tm.assert_index_equal(idx.shift(3, freq='H'), exp)
exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00',
'2011-01-01 09:00'], name='xxx', tz=tz)
tm.assert_index_equal(idx.shift(-3, freq='H'), exp)
def test_dti_shift_freqs(self):
# test shift for DatetimeIndex and non DatetimeIndex
# GH#8083
drange = pd.date_range('20130101', periods=5)
result = drange.shift(1)
expected = pd.DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05',
'2013-01-06'], freq='D')
tm.assert_index_equal(result, expected)
result = drange.shift(-1)
expected = pd.DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02',
'2013-01-03', '2013-01-04'],
freq='D')
tm.assert_index_equal(result, expected)
result = drange.shift(3, freq='2D')
expected = pd.DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09',
'2013-01-10',
'2013-01-11'], freq='D')
tm.assert_index_equal(result, expected)
def test_dti_shift_int(self):
rng = date_range('1/1/2000', periods=20)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng + 5
expected = rng.shift(5)
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng - 5
expected = rng.shift(-5)
tm.assert_index_equal(result, expected)
def test_dti_shift_no_freq(self):
# GH#19147
dti = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01'], freq=None)
with pytest.raises(NullFrequencyError):
dti.shift(2)
@pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern'])
def test_dti_shift_localized(self, tzstr):
dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI')
dr_tz = dr.tz_localize(tzstr)
result = dr_tz.shift(1, '10T')
assert result.tz == dr_tz.tz
def test_dti_shift_across_dst(self):
# GH 8616
idx = date_range('2013-11-03', tz='America/Chicago',
periods=7, freq='H')
s = Series(index=idx[:-1])
result = s.shift(freq='H')
expected = Series(index=idx[1:])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('shift, result_time', [
[0, '2014-11-14 00:00:00'],
[-1, '2014-11-13 23:00:00'],
[1, '2014-11-14 01:00:00']])
def test_dti_shift_near_midnight(self, shift, result_time):
# GH 8616
dt = datetime(2014, 11, 14, 0)
dt_est = pytz.timezone('EST').localize(dt)
s = Series(data=[1], index=[dt_est])
result = s.shift(shift, freq='H')
expected = Series(1, index=DatetimeIndex([result_time], tz='EST'))
tm.assert_series_equal(result, expected)
@@ -1,343 +0,0 @@
from datetime import datetime
import dateutil
from dateutil.tz import tzlocal
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
DatetimeIndex, Index, Int64Index, NaT, Period, Series, Timestamp,
date_range)
import pandas.util.testing as tm
class TestDatetimeIndex(object):
def test_astype(self):
# GH 13149, GH 13209
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
result = idx.astype(object)
expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object)
tm.assert_index_equal(result, expected)
result = idx.astype(int)
expected = Int64Index([1463356800000000000] +
[-9223372036854775808] * 3, dtype=np.int64)
tm.assert_index_equal(result, expected)
rng = date_range('1/1/2000', periods=10)
result = rng.astype('i8')
tm.assert_index_equal(result, Index(rng.asi8))
tm.assert_numpy_array_equal(result.values, rng.asi8)
def test_astype_uint(self):
arr = date_range('2000', periods=2)
expected = pd.UInt64Index(
np.array([946684800000000000, 946771200000000000], dtype="uint64")
)
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
def test_astype_with_tz(self):
# with tz
rng = date_range('1/1/2000', periods=10, tz='US/Eastern')
result = rng.astype('datetime64[ns]')
expected = (date_range('1/1/2000', periods=10,
tz='US/Eastern')
.tz_convert('UTC').tz_localize(None))
tm.assert_index_equal(result, expected)
# BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex
result = pd.Series(pd.date_range('2012-01-01', periods=3)).astype(str)
expected = pd.Series(
['2012-01-01', '2012-01-02', '2012-01-03'], dtype=object)
tm.assert_series_equal(result, expected)
result = Series(pd.date_range('2012-01-01', periods=3,
tz='US/Eastern')).astype(str)
expected = Series(['2012-01-01 00:00:00-05:00',
'2012-01-02 00:00:00-05:00',
'2012-01-03 00:00:00-05:00'],
dtype=object)
tm.assert_series_equal(result, expected)
# GH 18951: tz-aware to tz-aware
idx = date_range('20170101', periods=4, tz='US/Pacific')
result = idx.astype('datetime64[ns, US/Eastern]')
expected = date_range('20170101 03:00:00', periods=4, tz='US/Eastern')
tm.assert_index_equal(result, expected)
# GH 18951: tz-naive to tz-aware
idx = date_range('20170101', periods=4)
result = idx.astype('datetime64[ns, US/Eastern]')
expected = date_range('20170101', periods=4, tz='US/Eastern')
tm.assert_index_equal(result, expected)
def test_astype_str_compat(self):
# GH 13149, GH 13209
# verify that we are returning NaT as a string (and not unicode)
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
result = idx.astype(str)
expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object)
tm.assert_index_equal(result, expected)
def test_astype_str(self):
# test astype string - #10442
result = date_range('2012-01-01', periods=4,
name='test_name').astype(str)
expected = Index(['2012-01-01', '2012-01-02', '2012-01-03',
'2012-01-04'], name='test_name', dtype=object)
tm.assert_index_equal(result, expected)
# test astype string with tz and name
result = date_range('2012-01-01', periods=3, name='test_name',
tz='US/Eastern').astype(str)
expected = Index(['2012-01-01 00:00:00-05:00',
'2012-01-02 00:00:00-05:00',
'2012-01-03 00:00:00-05:00'],
name='test_name', dtype=object)
tm.assert_index_equal(result, expected)
# test astype string with freqH and name
result = date_range('1/1/2011', periods=3, freq='H',
name='test_name').astype(str)
expected = Index(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
'2011-01-01 02:00:00'],
name='test_name', dtype=object)
tm.assert_index_equal(result, expected)
# test astype string with freqH and timezone
result = date_range('3/6/2012 00:00', periods=2, freq='H',
tz='Europe/London', name='test_name').astype(str)
expected = Index(['2012-03-06 00:00:00+00:00',
'2012-03-06 01:00:00+00:00'],
dtype=object, name='test_name')
tm.assert_index_equal(result, expected)
def test_astype_datetime64(self):
# GH 13149, GH 13209
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
result = idx.astype('datetime64[ns]')
tm.assert_index_equal(result, idx)
assert result is not idx
result = idx.astype('datetime64[ns]', copy=False)
tm.assert_index_equal(result, idx)
assert result is idx
idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST')
result = idx_tz.astype('datetime64[ns]')
expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'],
dtype='datetime64[ns]')
tm.assert_index_equal(result, expected)
def test_astype_object(self):
rng = date_range('1/1/2000', periods=20)
casted = rng.astype('O')
exp_values = list(rng)
tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_))
assert casted.tolist() == exp_values
@pytest.mark.parametrize('tz', [None, 'Asia/Tokyo'])
def test_astype_object_tz(self, tz):
idx = pd.date_range(start='2013-01-01', periods=4, freq='M',
name='idx', tz=tz)
expected_list = [Timestamp('2013-01-31', tz=tz),
Timestamp('2013-02-28', tz=tz),
Timestamp('2013-03-31', tz=tz),
Timestamp('2013-04-30', tz=tz)]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
def test_astype_object_with_nat(self):
idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2),
pd.NaT, datetime(2013, 1, 4)], name='idx')
expected_list = [Timestamp('2013-01-01'),
Timestamp('2013-01-02'), pd.NaT,
Timestamp('2013-01-04')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
@pytest.mark.parametrize('dtype', [
float, 'timedelta64', 'timedelta64[ns]', 'datetime64',
'datetime64[D]'])
def test_astype_raises(self, dtype):
# GH 13149, GH 13209
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
msg = 'Cannot cast DatetimeArray to dtype'
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)
def test_index_convert_to_datetime_array(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range('20090415', '20090519')
rng_eastern = date_range('20090415', '20090519', tz='US/Eastern')
rng_utc = date_range('20090415', '20090519', tz='utc')
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_explicit_pytz(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range('20090415', '20090519')
rng_eastern = date_range('20090415', '20090519',
tz=pytz.timezone('US/Eastern'))
rng_utc = date_range('20090415', '20090519', tz=pytz.utc)
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_dateutil(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range('20090415', '20090519')
rng_eastern = date_range('20090415', '20090519',
tz='dateutil/US/Eastern')
rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc())
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
@pytest.mark.parametrize('tz, dtype', [
['US/Pacific', 'datetime64[ns, US/Pacific]'],
[None, 'datetime64[ns]']])
def test_integer_index_astype_datetime(self, tz, dtype):
# GH 20997, 20964, 24559
val = [pd.Timestamp('2018-01-01', tz=tz).value]
result = pd.Index(val).astype(dtype)
expected = pd.DatetimeIndex(["2018-01-01"], tz=tz)
tm.assert_index_equal(result, expected)
class TestToPeriod(object):
def setup_method(self, method):
data = [Timestamp('2007-01-01 10:11:12.123456Z'),
Timestamp('2007-01-01 10:11:13.789123Z')]
self.index = DatetimeIndex(data)
def test_to_period_millisecond(self):
index = self.index
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq='L')
assert 2 == len(period)
assert period[0] == Period('2007-01-01 10:11:12.123Z', 'L')
assert period[1] == Period('2007-01-01 10:11:13.789Z', 'L')
def test_to_period_microsecond(self):
index = self.index
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq='U')
assert 2 == len(period)
assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U')
assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U')
@pytest.mark.parametrize('tz', [
'US/Eastern', pytz.utc, tzlocal(), 'dateutil/US/Eastern',
dateutil.tz.tzutc()])
def test_to_period_tz(self, tz):
ts = date_range('1/1/2000', '2/1/2000', tz=tz)
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
result = ts.to_period()[0]
expected = ts[0].to_period()
assert result == expected
expected = date_range('1/1/2000', '2/1/2000').to_period()
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
result = ts.to_period()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('tz', ['Etc/GMT-1', 'Etc/GMT+1'])
def test_to_period_tz_utc_offset_consistency(self, tz):
# GH 22905
ts = pd.date_range('1/1/2000', '2/1/2000', tz='Etc/GMT-1')
with tm.assert_produces_warning(UserWarning):
result = ts.to_period()[0]
expected = ts[0].to_period()
assert result == expected
def test_to_period_nofreq(self):
idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'])
with pytest.raises(ValueError):
idx.to_period()
idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'],
freq='infer')
assert idx.freqstr == 'D'
expected = pd.PeriodIndex(['2000-01-01', '2000-01-02',
'2000-01-03'], freq='D')
tm.assert_index_equal(idx.to_period(), expected)
# GH 7606
idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])
assert idx.freqstr is None
tm.assert_index_equal(idx.to_period(), expected)
@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_astype_category(self, tz):
obj = pd.date_range("2000", periods=2, tz=tz)
result = obj.astype('category')
expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz),
pd.Timestamp('2000-01-02', tz=tz)])
tm.assert_index_equal(result, expected)
result = obj._data.astype('category')
expected = expected.values
tm.assert_categorical_equal(result, expected)
@pytest.mark.parametrize('tz', [None, 'US/Central'])
def test_astype_array_fallback(self, tz):
obj = pd.date_range("2000", periods=2, tz=tz)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)
result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
@@ -1,794 +0,0 @@
from datetime import timedelta
from functools import partial
from operator import attrgetter
import dateutil
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs import OutOfBoundsDatetime, conversion
import pandas as pd
from pandas import (
DatetimeIndex, Index, Timestamp, date_range, datetime, offsets,
to_datetime)
from pandas.core.arrays import DatetimeArray, period_array
import pandas.util.testing as tm
class TestDatetimeIndex(object):
@pytest.mark.parametrize('dt_cls', [DatetimeIndex,
DatetimeArray._from_sequence])
def test_freq_validation_with_nat(self, dt_cls):
# GH#11587 make sure we get a useful error message when generate_range
# raises
msg = ("Inferred frequency None from passed values does not conform "
"to passed frequency D")
with pytest.raises(ValueError, match=msg):
dt_cls([pd.NaT, pd.Timestamp('2011-01-01')], freq='D')
with pytest.raises(ValueError, match=msg):
dt_cls([pd.NaT, pd.Timestamp('2011-01-01').value],
freq='D')
def test_categorical_preserves_tz(self):
# GH#18664 retain tz when going DTI-->Categorical-->DTI
# TODO: parametrize over DatetimeIndex/DatetimeArray
# once CategoricalIndex(DTA) works
dti = pd.DatetimeIndex(
[pd.NaT, '2015-01-01', '1999-04-06 15:14:13', '2015-01-01'],
tz='US/Eastern')
ci = pd.CategoricalIndex(dti)
carr = pd.Categorical(dti)
cser = pd.Series(ci)
for obj in [ci, carr, cser]:
result = pd.DatetimeIndex(obj)
tm.assert_index_equal(result, dti)
def test_dti_with_period_data_raises(self):
# GH#23675
data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q')
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(data)
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(data)
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(period_array(data))
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(period_array(data))
def test_dti_with_timedelta64_data_deprecation(self):
# GH#23675
data = np.array([0], dtype='m8[ns]')
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(data)
assert result[0] == Timestamp('1970-01-01')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(data)
assert result[0] == Timestamp('1970-01-01')
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(pd.TimedeltaIndex(data))
assert result[0] == Timestamp('1970-01-01')
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(pd.TimedeltaIndex(data))
assert result[0] == Timestamp('1970-01-01')
def test_construction_caching(self):
df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3),
'dttz': pd.date_range('20130101', periods=3,
tz='US/Eastern'),
'dt_with_null': [pd.Timestamp('20130101'), pd.NaT,
pd.Timestamp('20130103')],
'dtns': pd.date_range('20130101', periods=3,
freq='ns')})
assert df.dttz.dtype.tz.zone == 'US/Eastern'
@pytest.mark.parametrize('kwargs', [
{'tz': 'dtype.tz'},
{'dtype': 'dtype'},
{'dtype': 'dtype', 'tz': 'dtype.tz'}])
def test_construction_with_alt(self, kwargs, tz_aware_fixture):
tz = tz_aware_fixture
i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
result = DatetimeIndex(i, **kwargs)
tm.assert_index_equal(i, result)
@pytest.mark.parametrize('kwargs', [
{'tz': 'dtype.tz'},
{'dtype': 'dtype'},
{'dtype': 'dtype', 'tz': 'dtype.tz'}])
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
tz = tz_aware_fixture
i = pd.date_range('20130101', periods=5, freq='H', tz=tz)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
if str(tz) in ('UTC', 'tzutc()'):
warn = None
else:
warn = FutureWarning
with tm.assert_produces_warning(warn, check_stacklevel=False):
result = DatetimeIndex(i.tz_localize(None).asi8, **kwargs)
expected = DatetimeIndex(i, **kwargs)
tm.assert_index_equal(result, expected)
# localize into the provided tz
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC')
expected = i.tz_localize(None).tz_localize('UTC')
tm.assert_index_equal(i2, expected)
# incompat tz/dtype
pytest.raises(ValueError, lambda: DatetimeIndex(
i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific'))
def test_construction_index_with_mixed_timezones(self):
# gh-11488: no tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01'),
Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01'),
Timestamp('2011-01-02')], name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# same tz results in DatetimeIndex
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')],
name='idx')
exp = DatetimeIndex(
[Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')
], tz='Asia/Tokyo', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# same tz results in DatetimeIndex (DST)
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
Timestamp('2011-08-01 10:00', tz='US/Eastern')],
name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# Different tz results in Index(dtype=object)
result = Index([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
name='idx')
exp = Index([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
name='idx')
exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
# length = 1
result = Index([Timestamp('2011-01-01')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# length = 1 with tz
result = Index(
[Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo',
name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
def test_construction_index_with_mixed_timezones_with_NaT(self):
# see gh-11488
result = Index([pd.NaT, Timestamp('2011-01-01'),
pd.NaT, Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'),
pd.NaT, Timestamp('2011-01-02')], name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# Same tz results in DatetimeIndex
result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00',
tz='Asia/Tokyo')],
name='idx')
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00')],
tz='Asia/Tokyo', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# same tz results in DatetimeIndex (DST)
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
pd.NaT,
Timestamp('2011-08-01 10:00', tz='US/Eastern')],
name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT,
Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
# different tz results in Index(dtype=object)
result = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00',
tz='US/Eastern')],
name='idx')
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00',
tz='US/Eastern')], name='idx')
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
dtype='object', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert not isinstance(result, DatetimeIndex)
# all NaT
result = Index([pd.NaT, pd.NaT], name='idx')
exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is None
# all NaT with tz
result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
assert result.tz is not None
assert result.tz == exp.tz
def test_construction_dti_with_mixed_timezones(self):
# GH 11488 (not changed, added explicit tests)
# no tz results in DatetimeIndex
result = DatetimeIndex(
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
exp = DatetimeIndex(
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
# same tz results in DatetimeIndex
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00',
tz='Asia/Tokyo')],
name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00')],
tz='Asia/Tokyo', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
# same tz results in DatetimeIndex (DST)
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
Timestamp('2011-08-01 10:00',
tz='US/Eastern')],
name='idx')
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-08-01 10:00')],
tz='US/Eastern', name='idx')
tm.assert_index_equal(result, exp, exact=True)
assert isinstance(result, DatetimeIndex)
# tz mismatch affecting to tz-aware raises TypeError/ValueError
with pytest.raises(ValueError):
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
name='idx')
msg = 'cannot be converted to datetime64'
with pytest.raises(ValueError, match=msg):
DatetimeIndex([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')
with pytest.raises(ValueError):
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='US/Eastern', name='idx')
with pytest.raises(ValueError, match=msg):
# passing tz should results in DatetimeIndex, then mismatch raises
# TypeError
Index([pd.NaT, Timestamp('2011-01-01 10:00'),
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
tz='Asia/Tokyo', name='idx')
def test_construction_base_constructor(self):
arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.DatetimeIndex(np.array(arr)))
arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')]
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.DatetimeIndex(np.array(arr)))
def test_construction_outofbounds(self):
# GH 13663
dates = [datetime(3000, 1, 1), datetime(4000, 1, 1),
datetime(5000, 1, 1), datetime(6000, 1, 1)]
exp = Index(dates, dtype=object)
# coerces to object
tm.assert_index_equal(Index(dates), exp)
with pytest.raises(OutOfBoundsDatetime):
# can't create DatetimeIndex
DatetimeIndex(dates)
def test_construction_with_ndarray(self):
# GH 5152
dates = [datetime(2013, 10, 7),
datetime(2013, 10, 8),
datetime(2013, 10, 9)]
data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values
result = DatetimeIndex(data, freq=pd.offsets.BDay())
expected = DatetimeIndex(['2013-10-07',
'2013-10-08',
'2013-10-09'],
freq='B')
tm.assert_index_equal(result, expected)
def test_verify_integrity_deprecated(self):
# GH#23919
with tm.assert_produces_warning(FutureWarning):
DatetimeIndex(['1/1/2000'], verify_integrity=False)
def test_range_kwargs_deprecated(self):
# GH#23919
with tm.assert_produces_warning(FutureWarning):
DatetimeIndex(start='1/1/2000', end='1/10/2000', freq='D')
def test_integer_values_and_tz_deprecated(self):
# GH-24559
values = np.array([946684800000000000])
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(values, tz='US/Central')
expected = pd.DatetimeIndex(['2000-01-01T00:00:00'], tz="US/Central")
tm.assert_index_equal(result, expected)
# but UTC is *not* deprecated.
with tm.assert_produces_warning(None):
result = DatetimeIndex(values, tz='UTC')
expected = pd.DatetimeIndex(['2000-01-01T00:00:00'], tz="US/Central")
def test_constructor_coverage(self):
rng = date_range('1/1/2000', periods=10.5)
exp = date_range('1/1/2000', periods=10)
tm.assert_index_equal(rng, exp)
msg = 'periods must be a number, got foo'
with pytest.raises(TypeError, match=msg):
date_range(start='1/1/2000', periods='foo', freq='D')
with pytest.raises(ValueError):
with tm.assert_produces_warning(FutureWarning):
DatetimeIndex(start='1/1/2000', end='1/10/2000')
with pytest.raises(TypeError):
DatetimeIndex('1/1/2000')
# generator expression
gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
result = DatetimeIndex(gen)
expected = DatetimeIndex([datetime(2000, 1, 1) + timedelta(i)
for i in range(10)])
tm.assert_index_equal(result, expected)
# NumPy string array
strings = np.array(['2000-01-01', '2000-01-02', '2000-01-03'])
result = DatetimeIndex(strings)
expected = DatetimeIndex(strings.astype('O'))
tm.assert_index_equal(result, expected)
from_ints = DatetimeIndex(expected.asi8)
tm.assert_index_equal(from_ints, expected)
# string with NaT
strings = np.array(['2000-01-01', '2000-01-02', 'NaT'])
result = DatetimeIndex(strings)
expected = DatetimeIndex(strings.astype('O'))
tm.assert_index_equal(result, expected)
from_ints = DatetimeIndex(expected.asi8)
tm.assert_index_equal(from_ints, expected)
# non-conforming
pytest.raises(ValueError, DatetimeIndex,
['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')
pytest.raises(ValueError, date_range, start='2011-01-01',
freq='b')
pytest.raises(ValueError, date_range, end='2011-01-01',
freq='B')
pytest.raises(ValueError, date_range, periods=10, freq='D')
@pytest.mark.parametrize('freq', ['AS', 'W-SUN'])
def test_constructor_datetime64_tzformat(self, freq):
# see GH#6572: ISO 8601 format results in pytz.FixedOffset
idx = date_range('2013-01-01T00:00:00-05:00',
'2016-01-01T23:59:59-05:00', freq=freq)
expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59',
freq=freq, tz=pytz.FixedOffset(-300))
tm.assert_index_equal(idx, expected)
# Unable to use `US/Eastern` because of DST
expected_i8 = date_range('2013-01-01T00:00:00',
'2016-01-01T23:59:59', freq=freq,
tz='America/Lima')
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
idx = date_range('2013-01-01T00:00:00+09:00',
'2016-01-01T23:59:59+09:00', freq=freq)
expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59',
freq=freq, tz=pytz.FixedOffset(540))
tm.assert_index_equal(idx, expected)
expected_i8 = date_range('2013-01-01T00:00:00',
'2016-01-01T23:59:59', freq=freq,
tz='Asia/Tokyo')
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
# Non ISO 8601 format results in dateutil.tz.tzoffset
idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00',
freq=freq)
expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59',
freq=freq, tz=pytz.FixedOffset(-300))
tm.assert_index_equal(idx, expected)
# Unable to use `US/Eastern` because of DST
expected_i8 = date_range('2013-01-01T00:00:00',
'2016-01-01T23:59:59', freq=freq,
tz='America/Lima')
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
idx = date_range('2013/1/1 0:00:00+9:00',
'2016/1/1 23:59:59+09:00', freq=freq)
expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59',
freq=freq, tz=pytz.FixedOffset(540))
tm.assert_index_equal(idx, expected)
expected_i8 = date_range('2013-01-01T00:00:00',
'2016-01-01T23:59:59', freq=freq,
tz='Asia/Tokyo')
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
def test_constructor_dtype(self):
# passing a dtype with a tz should localize
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
dtype='datetime64[ns, US/Eastern]')
expected = DatetimeIndex(['2013-01-01', '2013-01-02']
).tz_localize('US/Eastern')
tm.assert_index_equal(idx, expected)
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
tz='US/Eastern')
tm.assert_index_equal(idx, expected)
# if we already have a tz and its not the same, then raise
idx = DatetimeIndex(['2013-01-01', '2013-01-02'],
dtype='datetime64[ns, US/Eastern]')
pytest.raises(ValueError,
lambda: DatetimeIndex(idx,
dtype='datetime64[ns]'))
# this is effectively trying to convert tz's
pytest.raises(TypeError,
lambda: DatetimeIndex(idx,
dtype='datetime64[ns, CET]'))
pytest.raises(ValueError,
lambda: DatetimeIndex(
idx, tz='CET',
dtype='datetime64[ns, US/Eastern]'))
result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]')
tm.assert_index_equal(idx, result)
def test_constructor_name(self):
idx = date_range(start='2000-01-01', periods=1, freq='A',
name='TEST')
assert idx.name == 'TEST'
def test_000constructor_resolution(self):
# 2252
t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
idx = DatetimeIndex([t1])
assert idx.nanosecond[0] == t1.nanosecond
def test_disallow_setting_tz(self):
# GH 3746
dti = DatetimeIndex(['2010'], tz='UTC')
with pytest.raises(AttributeError):
dti.tz = pytz.timezone('US/Pacific')
@pytest.mark.parametrize('tz', [
None, 'America/Los_Angeles', pytz.timezone('America/Los_Angeles'),
Timestamp('2000', tz='America/Los_Angeles').tz])
def test_constructor_start_end_with_tz(self, tz):
# GH 18595
start = Timestamp('2013-01-01 06:00:00', tz='America/Los_Angeles')
end = Timestamp('2013-01-02 06:00:00', tz='America/Los_Angeles')
result = date_range(freq='D', start=start, end=end, tz=tz)
expected = DatetimeIndex(['2013-01-01 06:00:00',
'2013-01-02 06:00:00'],
tz='America/Los_Angeles')
tm.assert_index_equal(result, expected)
# Especially assert that the timezone is consistent for pytz
assert pytz.timezone('America/Los_Angeles') is result.tz
@pytest.mark.parametrize('tz', ['US/Pacific', 'US/Eastern', 'Asia/Tokyo'])
def test_constructor_with_non_normalized_pytz(self, tz):
# GH 18595
non_norm_tz = Timestamp('2010', tz=tz).tz
result = DatetimeIndex(['2010'], tz=non_norm_tz)
assert pytz.timezone(tz) is result.tz
def test_constructor_timestamp_near_dst(self):
# GH 20854
ts = [Timestamp('2016-10-30 03:00:00+0300', tz='Europe/Helsinki'),
Timestamp('2016-10-30 03:00:00+0200', tz='Europe/Helsinki')]
result = DatetimeIndex(ts)
expected = DatetimeIndex([ts[0].to_pydatetime(),
ts[1].to_pydatetime()])
tm.assert_index_equal(result, expected)
# TODO(GH-24559): Remove the xfail for the tz-aware case.
@pytest.mark.parametrize('klass', [Index, DatetimeIndex])
@pytest.mark.parametrize('box', [
np.array, partial(np.array, dtype=object), list])
@pytest.mark.parametrize('tz, dtype', [
pytest.param('US/Pacific', 'datetime64[ns, US/Pacific]',
marks=[pytest.mark.xfail(),
pytest.mark.filterwarnings(
"ignore:\\n Passing:FutureWarning")]),
[None, 'datetime64[ns]'],
])
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
# GH 20997, 20964
ts = Timestamp('2018-01-01', tz=tz)
result = klass(box([ts.value]), dtype=dtype)
expected = klass([ts])
assert result == expected
# This is the desired future behavior
@pytest.mark.xfail(reason="Future behavior", strict=False)
@pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning")
def test_construction_int_rountrip(self, tz_naive_fixture):
# GH 12619
# TODO(GH-24559): Remove xfail
tz = tz_naive_fixture
result = 1293858000000000000
expected = DatetimeIndex([1293858000000000000], tz=tz).asi8[0]
assert result == expected
def test_construction_from_replaced_timestamps_with_dst(self):
# GH 18785
index = pd.date_range(pd.Timestamp(2000, 1, 1),
pd.Timestamp(2005, 1, 1),
freq='MS', tz='Australia/Melbourne')
test = pd.DataFrame({'data': range(len(index))}, index=index)
test = test.resample('Y').mean()
result = pd.DatetimeIndex([x.replace(month=6, day=1)
for x in test.index])
expected = pd.DatetimeIndex(['2000-06-01 00:00:00',
'2001-06-01 00:00:00',
'2002-06-01 00:00:00',
'2003-06-01 00:00:00',
'2004-06-01 00:00:00',
'2005-06-01 00:00:00'],
tz='Australia/Melbourne')
tm.assert_index_equal(result, expected)
def test_construction_with_tz_and_tz_aware_dti(self):
# GH 23579
dti = date_range('2016-01-01', periods=3, tz='US/Central')
with pytest.raises(TypeError):
DatetimeIndex(dti, tz='Asia/Tokyo')
def test_construction_with_nat_and_tzlocal(self):
tz = dateutil.tz.tzlocal()
result = DatetimeIndex(['2018', 'NaT'], tz=tz)
expected = DatetimeIndex([Timestamp('2018', tz=tz), pd.NaT])
tm.assert_index_equal(result, expected)
def test_constructor_no_precision_warns(self):
# GH-24753, GH-24739
expected = pd.DatetimeIndex(['2000'], dtype='datetime64[ns]')
# we set the stacklevel for DatetimeIndex
with tm.assert_produces_warning(FutureWarning):
result = pd.DatetimeIndex(['2000'], dtype='datetime64')
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = pd.Index(['2000'], dtype='datetime64')
tm.assert_index_equal(result, expected)
def test_constructor_wrong_precision_raises(self):
with pytest.raises(ValueError):
pd.DatetimeIndex(['2000'], dtype='datetime64[us]')
class TestTimeSeries(object):
def test_dti_constructor_preserve_dti_freq(self):
rng = date_range('1/1/2000', '1/2/2000', freq='5min')
rng2 = DatetimeIndex(rng)
assert rng.freq == rng2.freq
def test_dti_constructor_years_only(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH 6961
rng1 = date_range('2014', '2015', freq='M', tz=tz)
expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz)
rng2 = date_range('2014', '2015', freq='MS', tz=tz)
expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', tz=tz)
rng3 = date_range('2014', '2020', freq='A', tz=tz)
expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz)
rng4 = date_range('2014', '2020', freq='AS', tz=tz)
expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', tz=tz)
for rng, expected in [(rng1, expected1), (rng2, expected2),
(rng3, expected3), (rng4, expected4)]:
tm.assert_index_equal(rng, expected)
def test_dti_constructor_small_int(self, any_int_dtype):
# see gh-13721
exp = DatetimeIndex(['1970-01-01 00:00:00.00000000',
'1970-01-01 00:00:00.00000001',
'1970-01-01 00:00:00.00000002'])
arr = np.array([0, 10, 20], dtype=any_int_dtype)
tm.assert_index_equal(DatetimeIndex(arr), exp)
def test_ctor_str_intraday(self):
rng = DatetimeIndex(['1-1-2000 00:00:01'])
assert rng[0].second == 1
def test_is_(self):
dti = date_range(start='1/1/2005', end='12/1/2005', freq='M')
assert dti.is_(dti)
assert dti.is_(dti.view())
assert not dti.is_(dti.copy())
def test_index_cast_datetime64_other_units(self):
arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]')
idx = Index(arr)
assert (idx.values == conversion.ensure_datetime64ns(arr)).all()
def test_constructor_int64_nocopy(self):
# GH#1624
arr = np.arange(1000, dtype=np.int64)
index = DatetimeIndex(arr)
arr[50:100] = -1
assert (index.asi8[50:100] == -1).all()
arr = np.arange(1000, dtype=np.int64)
index = DatetimeIndex(arr, copy=True)
arr[50:100] = -1
assert (index.asi8[50:100] != -1).all()
@pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', 'BH',
'T', 'S', 'L', 'U', 'H', 'N', 'C'])
def test_from_freq_recreate_from_data(self, freq):
org = date_range(start='2001/02/01 09:00', freq=freq, periods=1)
idx = DatetimeIndex(org, freq=freq)
tm.assert_index_equal(idx, org)
org = date_range(start='2001/02/01 09:00', freq=freq,
tz='US/Pacific', periods=1)
idx = DatetimeIndex(org, freq=freq, tz='US/Pacific')
tm.assert_index_equal(idx, org)
def test_datetimeindex_constructor_misc(self):
arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04']
pytest.raises(Exception, DatetimeIndex, arr)
arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']
idx1 = DatetimeIndex(arr)
arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04']
idx2 = DatetimeIndex(arr)
arr = [Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005',
'2005-01-04']
idx3 = DatetimeIndex(arr)
arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005',
'2005-01-04'], dtype='O')
idx4 = DatetimeIndex(arr)
arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'])
idx5 = DatetimeIndex(arr)
arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04'
])
idx6 = DatetimeIndex(arr)
idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True)
idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False,
yearfirst=True)
tm.assert_index_equal(idx7, idx8)
for other in [idx2, idx3, idx4, idx5, idx6]:
assert (idx1.values == other.values).all()
sdate = datetime(1999, 12, 25)
edate = datetime(2000, 1, 1)
idx = date_range(start=sdate, freq='1B', periods=20)
assert len(idx) == 20
assert idx[0] == sdate + 0 * offsets.BDay()
assert idx.freq == 'B'
idx = date_range(end=edate, freq=('D', 5), periods=20)
assert len(idx) == 20
assert idx[-1] == edate
assert idx.freq == '5D'
idx1 = date_range(start=sdate, end=edate, freq='W-SUN')
idx2 = date_range(start=sdate, end=edate,
freq=offsets.Week(weekday=6))
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq
idx1 = date_range(start=sdate, end=edate, freq='QS')
idx2 = date_range(start=sdate, end=edate,
freq=offsets.QuarterBegin(startingMonth=1))
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq
idx1 = date_range(start=sdate, end=edate, freq='BQ')
idx2 = date_range(start=sdate, end=edate,
freq=offsets.BQuarterEnd(startingMonth=12))
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq
@@ -1,842 +0,0 @@
"""
test date_range, bdate_range construction from the convenience range functions
"""
from datetime import datetime, time, timedelta
import numpy as np
import pytest
import pytz
from pytz import timezone
import pandas.compat as compat
from pandas.errors import OutOfBoundsDatetime
import pandas.util._test_decorators as td
import pandas as pd
from pandas import DatetimeIndex, Timestamp, bdate_range, date_range, offsets
from pandas.tests.series.common import TestData
import pandas.util.testing as tm
from pandas.tseries.offsets import (
BDay, CDay, DateOffset, MonthEnd, generate_range, prefix_mapping)
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestTimestampEquivDateRange(object):
# Older tests in TestTimeSeries constructed their `stamp` objects
# using `date_range` instead of the `Timestamp` constructor.
# TestTimestampEquivDateRange checks that these are equivalent in the
# pertinent cases.
def test_date_range_timestamp_equiv(self):
rng = date_range('20090415', '20090519', tz='US/Eastern')
stamp = rng[0]
ts = Timestamp('20090415', tz='US/Eastern', freq='D')
assert ts == stamp
def test_date_range_timestamp_equiv_dateutil(self):
rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern')
stamp = rng[0]
ts = Timestamp('20090415', tz='dateutil/US/Eastern', freq='D')
assert ts == stamp
def test_date_range_timestamp_equiv_explicit_pytz(self):
rng = date_range('20090415', '20090519',
tz=pytz.timezone('US/Eastern'))
stamp = rng[0]
ts = Timestamp('20090415', tz=pytz.timezone('US/Eastern'), freq='D')
assert ts == stamp
@td.skip_if_windows_python_3
def test_date_range_timestamp_equiv_explicit_dateutil(self):
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
rng = date_range('20090415', '20090519', tz=gettz('US/Eastern'))
stamp = rng[0]
ts = Timestamp('20090415', tz=gettz('US/Eastern'), freq='D')
assert ts == stamp
def test_date_range_timestamp_equiv_from_datetime_instance(self):
datetime_instance = datetime(2014, 3, 4)
# build a timestamp with a frequency, since then it supports
# addition/subtraction of integers
timestamp_instance = date_range(datetime_instance, periods=1,
freq='D')[0]
ts = Timestamp(datetime_instance, freq='D')
assert ts == timestamp_instance
def test_date_range_timestamp_equiv_preserve_frequency(self):
timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0]
ts = Timestamp('2014-03-05', freq='D')
assert timestamp_instance == ts
class TestDateRanges(TestData):
def test_date_range_nat(self):
# GH#11587
msg = "Neither `start` nor `end` can be NaT"
with pytest.raises(ValueError, match=msg):
date_range(start='2016-01-01', end=pd.NaT, freq='D')
with pytest.raises(ValueError, match=msg):
date_range(start=pd.NaT, end='2016-01-01', freq='D')
def test_date_range_multiplication_overflow(self):
# GH#24255
# check that overflows in calculating `addend = periods * stride`
# are caught
with tm.assert_produces_warning(None):
# we should _not_ be seeing a overflow RuntimeWarning
dti = date_range(start='1677-09-22', periods=213503, freq='D')
assert dti[0] == Timestamp('1677-09-22')
assert len(dti) == 213503
msg = "Cannot generate range with"
with pytest.raises(OutOfBoundsDatetime, match=msg):
date_range('1969-05-04', periods=200000000, freq='30000D')
def test_date_range_unsigned_overflow_handling(self):
# GH#24255
# case where `addend = periods * stride` overflows int64 bounds
# but not uint64 bounds
dti = date_range(start='1677-09-22', end='2262-04-11', freq='D')
dti2 = date_range(start=dti[0], periods=len(dti), freq='D')
assert dti2.equals(dti)
dti3 = date_range(end=dti[-1], periods=len(dti), freq='D')
assert dti3.equals(dti)
def test_date_range_int64_overflow_non_recoverable(self):
# GH#24255
# case with start later than 1970-01-01, overflow int64 but not uint64
msg = "Cannot generate range with"
with pytest.raises(OutOfBoundsDatetime, match=msg):
date_range(start='1970-02-01', periods=106752 * 24, freq='H')
# case with end before 1970-01-01, overflow int64 but not uint64
with pytest.raises(OutOfBoundsDatetime, match=msg):
date_range(end='1969-11-14', periods=106752 * 24, freq='H')
def test_date_range_int64_overflow_stride_endpoint_different_signs(self):
# cases where stride * periods overflow int64 and stride/endpoint
# have different signs
start = Timestamp('2262-02-23')
end = Timestamp('1969-11-14')
expected = date_range(start=start, end=end, freq='-1H')
assert expected[0] == start
assert expected[-1] == end
dti = date_range(end=end, periods=len(expected), freq='-1H')
tm.assert_index_equal(dti, expected)
start2 = Timestamp('1970-02-01')
end2 = Timestamp('1677-10-22')
expected2 = date_range(start=start2, end=end2, freq='-1H')
assert expected2[0] == start2
assert expected2[-1] == end2
dti2 = date_range(start=start2, periods=len(expected2), freq='-1H')
tm.assert_index_equal(dti2, expected2)
def test_date_range_out_of_bounds(self):
# GH#14187
with pytest.raises(OutOfBoundsDatetime):
date_range('2016-01-01', periods=100000, freq='D')
with pytest.raises(OutOfBoundsDatetime):
date_range(end='1763-10-12', periods=100000, freq='D')
def test_date_range_gen_error(self):
rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min')
assert len(rng) == 4
@pytest.mark.parametrize("freq", ["AS", "YS"])
def test_begin_year_alias(self, freq):
# see gh-9313
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
exp = pd.DatetimeIndex(["2013-01-01", "2014-01-01",
"2015-01-01", "2016-01-01",
"2017-01-01"], freq=freq)
tm.assert_index_equal(rng, exp)
@pytest.mark.parametrize("freq", ["A", "Y"])
def test_end_year_alias(self, freq):
# see gh-9313
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
exp = pd.DatetimeIndex(["2013-12-31", "2014-12-31",
"2015-12-31", "2016-12-31"], freq=freq)
tm.assert_index_equal(rng, exp)
@pytest.mark.parametrize("freq", ["BA", "BY"])
def test_business_end_year_alias(self, freq):
# see gh-9313
rng = date_range("1/1/2013", "7/1/2017", freq=freq)
exp = pd.DatetimeIndex(["2013-12-31", "2014-12-31",
"2015-12-31", "2016-12-30"], freq=freq)
tm.assert_index_equal(rng, exp)
def test_date_range_negative_freq(self):
# GH 11018
rng = date_range('2011-12-31', freq='-2A', periods=3)
exp = pd.DatetimeIndex(['2011-12-31', '2009-12-31',
'2007-12-31'], freq='-2A')
tm.assert_index_equal(rng, exp)
assert rng.freq == '-2A'
rng = date_range('2011-01-31', freq='-2M', periods=3)
exp = pd.DatetimeIndex(['2011-01-31', '2010-11-30',
'2010-09-30'], freq='-2M')
tm.assert_index_equal(rng, exp)
assert rng.freq == '-2M'
def test_date_range_bms_bug(self):
# #1645
rng = date_range('1/1/2000', periods=10, freq='BMS')
ex_first = Timestamp('2000-01-03')
assert rng[0] == ex_first
def test_date_range_normalize(self):
snap = datetime.today()
n = 50
rng = date_range(snap, periods=n, normalize=False, freq='2D')
offset = timedelta(2)
values = DatetimeIndex([snap + i * offset for i in range(n)])
tm.assert_index_equal(rng, values)
rng = date_range('1/1/2000 08:15', periods=n, normalize=False,
freq='B')
the_time = time(8, 15)
for val in rng:
assert val.time() == the_time
def test_date_range_fy5252(self):
dr = date_range(start="2013-01-01", periods=2, freq=offsets.FY5253(
startingMonth=1, weekday=3, variation="nearest"))
assert dr[0] == Timestamp('2013-01-31')
assert dr[1] == Timestamp('2014-01-30')
def test_date_range_ambiguous_arguments(self):
# #2538
start = datetime(2011, 1, 1, 5, 3, 40)
end = datetime(2011, 1, 1, 8, 9, 40)
msg = ('Of the four parameters: start, end, periods, and '
'freq, exactly three must be specified')
with pytest.raises(ValueError, match=msg):
date_range(start, end, periods=10, freq='s')
def test_date_range_convenience_periods(self):
# GH 20808
result = date_range('2018-04-24', '2018-04-27', periods=3)
expected = DatetimeIndex(['2018-04-24 00:00:00',
'2018-04-25 12:00:00',
'2018-04-27 00:00:00'], freq=None)
tm.assert_index_equal(result, expected)
# Test if spacing remains linear if tz changes to dst in range
result = date_range('2018-04-01 01:00:00',
'2018-04-01 04:00:00',
tz='Australia/Sydney',
periods=3)
expected = DatetimeIndex([Timestamp('2018-04-01 01:00:00+1100',
tz='Australia/Sydney'),
Timestamp('2018-04-01 02:00:00+1000',
tz='Australia/Sydney'),
Timestamp('2018-04-01 04:00:00+1000',
tz='Australia/Sydney')])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('start,end,result_tz', [
['20180101', '20180103', 'US/Eastern'],
[datetime(2018, 1, 1), datetime(2018, 1, 3), 'US/Eastern'],
[Timestamp('20180101'), Timestamp('20180103'), 'US/Eastern'],
[Timestamp('20180101', tz='US/Eastern'),
Timestamp('20180103', tz='US/Eastern'), 'US/Eastern'],
[Timestamp('20180101', tz='US/Eastern'),
Timestamp('20180103', tz='US/Eastern'), None]])
def test_date_range_linspacing_tz(self, start, end, result_tz):
# GH 20983
result = date_range(start, end, periods=3, tz=result_tz)
expected = date_range('20180101', periods=3, freq='D', tz='US/Eastern')
tm.assert_index_equal(result, expected)
def test_date_range_businesshour(self):
idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
'2014-07-04 11:00',
'2014-07-04 12:00', '2014-07-04 13:00',
'2014-07-04 14:00',
'2014-07-04 15:00', '2014-07-04 16:00'],
freq='BH')
rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH')
tm.assert_index_equal(idx, rng)
idx = DatetimeIndex(
['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH')
rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH')
tm.assert_index_equal(idx, rng)
idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
'2014-07-04 11:00',
'2014-07-04 12:00', '2014-07-04 13:00',
'2014-07-04 14:00',
'2014-07-04 15:00', '2014-07-04 16:00',
'2014-07-07 09:00', '2014-07-07 10:00',
'2014-07-07 11:00',
'2014-07-07 12:00', '2014-07-07 13:00',
'2014-07-07 14:00',
'2014-07-07 15:00', '2014-07-07 16:00',
'2014-07-08 09:00', '2014-07-08 10:00',
'2014-07-08 11:00',
'2014-07-08 12:00', '2014-07-08 13:00',
'2014-07-08 14:00',
'2014-07-08 15:00', '2014-07-08 16:00'],
freq='BH')
rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH')
tm.assert_index_equal(idx, rng)
def test_range_misspecified(self):
# GH #1095
msg = ('Of the four parameters: start, end, periods, and '
'freq, exactly three must be specified')
with pytest.raises(ValueError, match=msg):
date_range(start='1/1/2000')
with pytest.raises(ValueError, match=msg):
date_range(end='1/1/2000')
with pytest.raises(ValueError, match=msg):
date_range(periods=10)
with pytest.raises(ValueError, match=msg):
date_range(start='1/1/2000', freq='H')
with pytest.raises(ValueError, match=msg):
date_range(end='1/1/2000', freq='H')
with pytest.raises(ValueError, match=msg):
date_range(periods=10, freq='H')
with pytest.raises(ValueError, match=msg):
date_range()
@pytest.mark.parametrize('f', [compat.long, int])
def test_compat_replace(self, f):
# https://github.com/statsmodels/statsmodels/issues/3349
# replace should take ints/longs for compat
result = date_range(Timestamp('1960-04-01 00:00:00', freq='QS-JAN'),
periods=f(76), freq='QS-JAN')
assert len(result) == 76
def test_catch_infinite_loop(self):
offset = offsets.DateOffset(minute=5)
# blow up, don't loop forever
pytest.raises(Exception, date_range, datetime(2011, 11, 11),
datetime(2011, 11, 12), freq=offset)
@pytest.mark.parametrize('periods', (1, 2))
def test_wom_len(self, periods):
# https://github.com/pandas-dev/pandas/issues/20517
res = date_range(start='20110101', periods=periods, freq='WOM-1MON')
assert len(res) == periods
def test_construct_over_dst(self):
# GH 20854
pre_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific',
ambiguous=True)
pst_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific',
ambiguous=False)
expect_data = [Timestamp('2010-11-07 00:00:00', tz='US/Pacific'),
pre_dst,
pst_dst]
expected = DatetimeIndex(expect_data)
result = date_range(start='2010-11-7', periods=3,
freq='H', tz='US/Pacific')
tm.assert_index_equal(result, expected)
def test_construct_with_different_start_end_string_format(self):
# GH 12064
result = date_range('2013-01-01 00:00:00+09:00',
'2013/01/01 02:00:00+09:00', freq='H')
expected = DatetimeIndex([Timestamp('2013-01-01 00:00:00+09:00'),
Timestamp('2013-01-01 01:00:00+09:00'),
Timestamp('2013-01-01 02:00:00+09:00')])
tm.assert_index_equal(result, expected)
def test_error_with_zero_monthends(self):
msg = r'Offset <0 \* MonthEnds> did not increment date'
with pytest.raises(ValueError, match=msg):
date_range('1/1/2000', '1/1/2001', freq=MonthEnd(0))
def test_range_bug(self):
# GH #770
offset = DateOffset(months=3)
result = date_range("2011-1-1", "2012-1-31", freq=offset)
start = datetime(2011, 1, 1)
expected = DatetimeIndex([start + i * offset for i in range(5)])
tm.assert_index_equal(result, expected)
def test_range_tz_pytz(self):
# see gh-2906
tz = timezone('US/Eastern')
start = tz.localize(datetime(2011, 1, 1))
end = tz.localize(datetime(2011, 1, 3))
dr = date_range(start=start, periods=3)
assert dr.tz.zone == tz.zone
assert dr[0] == start
assert dr[2] == end
dr = date_range(end=end, periods=3)
assert dr.tz.zone == tz.zone
assert dr[0] == start
assert dr[2] == end
dr = date_range(start=start, end=end)
assert dr.tz.zone == tz.zone
assert dr[0] == start
assert dr[2] == end
@pytest.mark.parametrize('start, end', [
[Timestamp(datetime(2014, 3, 6), tz='US/Eastern'),
Timestamp(datetime(2014, 3, 12), tz='US/Eastern')],
[Timestamp(datetime(2013, 11, 1), tz='US/Eastern'),
Timestamp(datetime(2013, 11, 6), tz='US/Eastern')]
])
def test_range_tz_dst_straddle_pytz(self, start, end):
dr = date_range(start, end, freq='D')
assert dr[0] == start
assert dr[-1] == end
assert np.all(dr.hour == 0)
dr = date_range(start, end, freq='D', tz='US/Eastern')
assert dr[0] == start
assert dr[-1] == end
assert np.all(dr.hour == 0)
dr = date_range(start.replace(tzinfo=None), end.replace(
tzinfo=None), freq='D', tz='US/Eastern')
assert dr[0] == start
assert dr[-1] == end
assert np.all(dr.hour == 0)
def test_range_tz_dateutil(self):
# see gh-2906
# Use maybe_get_tz to fix filename in tz under dateutil.
from pandas._libs.tslibs.timezones import maybe_get_tz
tz = lambda x: maybe_get_tz('dateutil/' + x)
start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern'))
end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern'))
dr = date_range(start=start, periods=3)
assert dr.tz == tz('US/Eastern')
assert dr[0] == start
assert dr[2] == end
dr = date_range(end=end, periods=3)
assert dr.tz == tz('US/Eastern')
assert dr[0] == start
assert dr[2] == end
dr = date_range(start=start, end=end)
assert dr.tz == tz('US/Eastern')
assert dr[0] == start
assert dr[2] == end
@pytest.mark.parametrize('freq', ["1D", "3D", "2M", "7W", "3H", "A"])
def test_range_closed(self, freq):
begin = datetime(2011, 1, 1)
end = datetime(2014, 1, 1)
closed = date_range(begin, end, closed=None, freq=freq)
left = date_range(begin, end, closed="left", freq=freq)
right = date_range(begin, end, closed="right", freq=freq)
expected_left = left
expected_right = right
if end == closed[-1]:
expected_left = closed[:-1]
if begin == closed[0]:
expected_right = closed[1:]
tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
def test_range_closed_with_tz_aware_start_end(self):
# GH12409, GH12684
begin = Timestamp('2011/1/1', tz='US/Eastern')
end = Timestamp('2014/1/1', tz='US/Eastern')
for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
closed = date_range(begin, end, closed=None, freq=freq)
left = date_range(begin, end, closed="left", freq=freq)
right = date_range(begin, end, closed="right", freq=freq)
expected_left = left
expected_right = right
if end == closed[-1]:
expected_left = closed[:-1]
if begin == closed[0]:
expected_right = closed[1:]
tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
begin = Timestamp('2011/1/1')
end = Timestamp('2014/1/1')
begintz = Timestamp('2011/1/1', tz='US/Eastern')
endtz = Timestamp('2014/1/1', tz='US/Eastern')
for freq in ["1D", "3D", "2M", "7W", "3H", "A"]:
closed = date_range(begin, end, closed=None, freq=freq,
tz='US/Eastern')
left = date_range(begin, end, closed="left", freq=freq,
tz='US/Eastern')
right = date_range(begin, end, closed="right", freq=freq,
tz='US/Eastern')
expected_left = left
expected_right = right
if endtz == closed[-1]:
expected_left = closed[:-1]
if begintz == closed[0]:
expected_right = closed[1:]
tm.assert_index_equal(expected_left, left)
tm.assert_index_equal(expected_right, right)
@pytest.mark.parametrize('closed', ['right', 'left', None])
def test_range_closed_boundary(self, closed):
# GH#11804
right_boundary = date_range('2015-09-12', '2015-12-01',
freq='QS-MAR', closed=closed)
left_boundary = date_range('2015-09-01', '2015-09-12',
freq='QS-MAR', closed=closed)
both_boundary = date_range('2015-09-01', '2015-12-01',
freq='QS-MAR', closed=closed)
expected_right = expected_left = expected_both = both_boundary
if closed == 'right':
expected_left = both_boundary[1:]
if closed == 'left':
expected_right = both_boundary[:-1]
if closed is None:
expected_right = both_boundary[1:]
expected_left = both_boundary[:-1]
tm.assert_index_equal(right_boundary, expected_right)
tm.assert_index_equal(left_boundary, expected_left)
tm.assert_index_equal(both_boundary, expected_both)
def test_years_only(self):
# GH 6961
dr = date_range('2014', '2015', freq='M')
assert dr[0] == datetime(2014, 1, 31)
assert dr[-1] == datetime(2014, 12, 31)
def test_freq_divides_end_in_nanos(self):
# GH 10885
result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00',
freq='345min')
result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00',
freq='345min')
expected_1 = DatetimeIndex(['2005-01-12 10:00:00',
'2005-01-12 15:45:00'],
dtype='datetime64[ns]', freq='345T',
tz=None)
expected_2 = DatetimeIndex(['2005-01-13 10:00:00',
'2005-01-13 15:45:00'],
dtype='datetime64[ns]', freq='345T',
tz=None)
tm.assert_index_equal(result_1, expected_1)
tm.assert_index_equal(result_2, expected_2)
def test_cached_range_bug(self):
rng = date_range('2010-09-01 05:00:00', periods=50,
freq=DateOffset(hours=6))
assert len(rng) == 50
assert rng[0] == datetime(2010, 9, 1, 5)
def test_timezone_comparaison_bug(self):
# smoke test
start = Timestamp('20130220 10:00', tz='US/Eastern')
result = date_range(start, periods=2, tz='US/Eastern')
assert len(result) == 2
def test_timezone_comparaison_assert(self):
start = Timestamp('20130220 10:00', tz='US/Eastern')
msg = 'Inferred time zone not equal to passed time zone'
with pytest.raises(AssertionError, match=msg):
date_range(start, periods=2, tz='Europe/Berlin')
def test_negative_non_tick_frequency_descending_dates(self,
tz_aware_fixture):
# GH 23270
tz = tz_aware_fixture
result = pd.date_range(start='2011-06-01', end='2011-01-01',
freq='-1MS', tz=tz)
expected = pd.date_range(end='2011-06-01', start='2011-01-01',
freq='1MS', tz=tz)[::-1]
tm.assert_index_equal(result, expected)
class TestGenRangeGeneration(object):
def test_generate(self):
rng1 = list(generate_range(START, END, offset=BDay()))
rng2 = list(generate_range(START, END, offset='B'))
assert rng1 == rng2
def test_generate_cday(self):
rng1 = list(generate_range(START, END, offset=CDay()))
rng2 = list(generate_range(START, END, offset='C'))
assert rng1 == rng2
def test_1(self):
rng = list(generate_range(start=datetime(2009, 3, 25), periods=2))
expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
assert rng == expected
def test_2(self):
rng = list(generate_range(start=datetime(2008, 1, 1),
end=datetime(2008, 1, 3)))
expected = [datetime(2008, 1, 1),
datetime(2008, 1, 2),
datetime(2008, 1, 3)]
assert rng == expected
def test_3(self):
rng = list(generate_range(start=datetime(2008, 1, 5),
end=datetime(2008, 1, 6)))
expected = []
assert rng == expected
def test_precision_finer_than_offset(self):
# GH#9907
result1 = pd.date_range(start='2015-04-15 00:00:03',
end='2016-04-22 00:00:00', freq='Q')
result2 = pd.date_range(start='2015-04-15 00:00:03',
end='2015-06-22 00:00:04', freq='W')
expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03',
'2015-12-31 00:00:03', '2016-03-31 00:00:03']
expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03',
'2015-05-03 00:00:03', '2015-05-10 00:00:03',
'2015-05-17 00:00:03', '2015-05-24 00:00:03',
'2015-05-31 00:00:03', '2015-06-07 00:00:03',
'2015-06-14 00:00:03', '2015-06-21 00:00:03']
expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]',
freq='Q-DEC', tz=None)
expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]',
freq='W-SUN', tz=None)
tm.assert_index_equal(result1, expected1)
tm.assert_index_equal(result2, expected2)
dt1, dt2 = '2017-01-01', '2017-01-01'
tz1, tz2 = 'US/Eastern', 'Europe/London'
@pytest.mark.parametrize("start,end", [
(pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)),
(pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)),
(pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)),
(pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1))
])
def test_mismatching_tz_raises_err(self, start, end):
# issue 18488
with pytest.raises(TypeError):
pd.date_range(start, end)
with pytest.raises(TypeError):
pd.date_range(start, end, freq=BDay())
class TestBusinessDateRange(object):
def test_constructor(self):
bdate_range(START, END, freq=BDay())
bdate_range(START, periods=20, freq=BDay())
bdate_range(end=START, periods=20, freq=BDay())
msg = 'periods must be a number, got B'
with pytest.raises(TypeError, match=msg):
date_range('2011-1-1', '2012-1-1', 'B')
with pytest.raises(TypeError, match=msg):
bdate_range('2011-1-1', '2012-1-1', 'B')
msg = 'freq must be specified for bdate_range; use date_range instead'
with pytest.raises(TypeError, match=msg):
bdate_range(START, END, periods=10, freq=None)
def test_naive_aware_conflicts(self):
naive = bdate_range(START, END, freq=BDay(), tz=None)
aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong")
msg = 'tz-naive.*tz-aware'
with pytest.raises(TypeError, match=msg):
naive.join(aware)
with pytest.raises(TypeError, match=msg):
aware.join(naive)
def test_misc(self):
end = datetime(2009, 5, 13)
dr = bdate_range(end=end, periods=20)
firstDate = end - 19 * BDay()
assert len(dr) == 20
assert dr[0] == firstDate
assert dr[-1] == end
def test_date_parse_failure(self):
badly_formed_date = '2007/100/1'
with pytest.raises(ValueError):
Timestamp(badly_formed_date)
with pytest.raises(ValueError):
bdate_range(start=badly_formed_date, periods=10)
with pytest.raises(ValueError):
bdate_range(end=badly_formed_date, periods=10)
with pytest.raises(ValueError):
bdate_range(badly_formed_date, badly_formed_date)
def test_daterange_bug_456(self):
# GH #456
rng1 = bdate_range('12/5/2011', '12/5/2011')
rng2 = bdate_range('12/2/2011', '12/5/2011')
rng2.freq = BDay()
result = rng1.union(rng2)
assert isinstance(result, DatetimeIndex)
@pytest.mark.parametrize('closed', ['left', 'right'])
def test_bdays_and_open_boundaries(self, closed):
# GH 6673
start = '2018-07-21' # Saturday
end = '2018-07-29' # Sunday
result = pd.date_range(start, end, freq='B', closed=closed)
bday_start = '2018-07-23' # Monday
bday_end = '2018-07-27' # Friday
expected = pd.date_range(bday_start, bday_end, freq='D')
tm.assert_index_equal(result, expected)
class TestCustomDateRange(object):
def test_constructor(self):
bdate_range(START, END, freq=CDay())
bdate_range(START, periods=20, freq=CDay())
bdate_range(end=START, periods=20, freq=CDay())
msg = 'periods must be a number, got C'
with pytest.raises(TypeError, match=msg):
date_range('2011-1-1', '2012-1-1', 'C')
with pytest.raises(TypeError, match=msg):
bdate_range('2011-1-1', '2012-1-1', 'C')
def test_misc(self):
end = datetime(2009, 5, 13)
dr = bdate_range(end=end, periods=20, freq='C')
firstDate = end - 19 * CDay()
assert len(dr) == 20
assert dr[0] == firstDate
assert dr[-1] == end
def test_daterange_bug_456(self):
# GH #456
rng1 = bdate_range('12/5/2011', '12/5/2011', freq='C')
rng2 = bdate_range('12/2/2011', '12/5/2011', freq='C')
rng2.freq = CDay()
result = rng1.union(rng2)
assert isinstance(result, DatetimeIndex)
def test_cdaterange(self):
result = bdate_range('2013-05-01', periods=3, freq='C')
expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03'])
tm.assert_index_equal(result, expected)
def test_cdaterange_weekmask(self):
result = bdate_range('2013-05-01', periods=3, freq='C',
weekmask='Sun Mon Tue Wed Thu')
expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05'])
tm.assert_index_equal(result, expected)
# raise with non-custom freq
msg = ('a custom frequency string is required when holidays or '
'weekmask are passed, got frequency B')
with pytest.raises(ValueError, match=msg):
bdate_range('2013-05-01', periods=3,
weekmask='Sun Mon Tue Wed Thu')
def test_cdaterange_holidays(self):
result = bdate_range('2013-05-01', periods=3, freq='C',
holidays=['2013-05-01'])
expected = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06'])
tm.assert_index_equal(result, expected)
# raise with non-custom freq
msg = ('a custom frequency string is required when holidays or '
'weekmask are passed, got frequency B')
with pytest.raises(ValueError, match=msg):
bdate_range('2013-05-01', periods=3, holidays=['2013-05-01'])
def test_cdaterange_weekmask_and_holidays(self):
result = bdate_range('2013-05-01', periods=3, freq='C',
weekmask='Sun Mon Tue Wed Thu',
holidays=['2013-05-01'])
expected = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06'])
tm.assert_index_equal(result, expected)
# raise with non-custom freq
msg = ('a custom frequency string is required when holidays or '
'weekmask are passed, got frequency B')
with pytest.raises(ValueError, match=msg):
bdate_range('2013-05-01', periods=3,
weekmask='Sun Mon Tue Wed Thu',
holidays=['2013-05-01'])
@pytest.mark.parametrize('freq', [freq for freq in prefix_mapping
if freq.startswith('C')])
def test_all_custom_freq(self, freq):
# should not raise
bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri',
holidays=['2009-03-14'])
bad_freq = freq + 'FOO'
msg = 'invalid custom frequency string: {freq}'
with pytest.raises(ValueError, match=msg.format(freq=bad_freq)):
bdate_range(START, END, freq=bad_freq)
@pytest.mark.parametrize('start_end', [
('2018-01-01T00:00:01.000Z', '2018-01-03T00:00:01.000Z'),
('2018-01-01T00:00:00.010Z', '2018-01-03T00:00:00.010Z'),
('2001-01-01T00:00:00.010Z', '2001-01-03T00:00:00.010Z')])
def test_range_with_millisecond_resolution(self, start_end):
# https://github.com/pandas-dev/pandas/issues/24110
start, end = start_end
result = pd.date_range(start=start, end=end, periods=2, closed='left')
expected = DatetimeIndex([start])
tm.assert_index_equal(result, expected)
@@ -1,436 +0,0 @@
from datetime import date
import dateutil
import numpy as np
import pytest
from pandas.compat import lrange
import pandas as pd
from pandas import (
DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets)
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
randn = np.random.randn
class TestDatetimeIndex(object):
def test_roundtrip_pickle_with_tz(self):
# GH 8367
# round-trip of timezone
index = date_range('20130101', periods=3, tz='US/Eastern', name='foo')
unpickled = tm.round_trip_pickle(index)
tm.assert_index_equal(index, unpickled)
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
# GH7774
index = date_range('20130101', periods=3, tz='US/Eastern')
assert str(index.reindex([])[0].tz) == 'US/Eastern'
assert str(index.reindex(np.array([]))[0].tz) == 'US/Eastern'
def test_time_loc(self): # GH8667
from datetime import time
from pandas._libs.index import _SIZE_CUTOFF
ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
key = time(15, 11, 30)
start = key.hour * 3600 + key.minute * 60 + key.second
step = 24 * 3600
for n in ns:
idx = pd.date_range('2014-11-26', periods=n, freq='S')
ts = pd.Series(np.random.randn(n), index=idx)
i = np.arange(start, n, step)
tm.assert_numpy_array_equal(ts.index.get_loc(key), i,
check_dtype=False)
tm.assert_series_equal(ts[key], ts.iloc[i])
left, right = ts.copy(), ts.copy()
left[key] *= -10
right.iloc[i] *= -10
tm.assert_series_equal(left, right)
def test_time_overflow_for_32bit_machines(self):
# GH8943. On some machines NumPy defaults to np.int32 (for example,
# 32-bit Linux machines). In the function _generate_regular_range
# found in tseries/index.py, `periods` gets multiplied by `strides`
# (which has value 1e9) and since the max value for np.int32 is ~2e9,
# and since those machines won't promote np.int32 to np.int64, we get
# overflow.
periods = np.int_(1000)
idx1 = pd.date_range(start='2000', periods=periods, freq='S')
assert len(idx1) == periods
idx2 = pd.date_range(end='2000', periods=periods, freq='S')
assert len(idx2) == periods
def test_nat(self):
assert DatetimeIndex([np.nan])[0] is pd.NaT
def test_week_of_month_frequency(self):
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
d1 = date(2002, 9, 1)
d2 = date(2013, 10, 27)
d3 = date(2012, 9, 30)
idx1 = DatetimeIndex([d1, d2])
idx2 = DatetimeIndex([d3])
result_append = idx1.append(idx2)
expected = DatetimeIndex([d1, d2, d3])
tm.assert_index_equal(result_append, expected)
result_union = idx1.union(idx2)
expected = DatetimeIndex([d1, d3, d2])
tm.assert_index_equal(result_union, expected)
# GH 5115
result = date_range("2013-1-1", periods=4, freq='WOM-1SAT')
dates = ['2013-01-05', '2013-02-02', '2013-03-02', '2013-04-06']
expected = DatetimeIndex(dates, freq='WOM-1SAT')
tm.assert_index_equal(result, expected)
def test_hash_error(self):
index = date_range('20010101', periods=10)
with pytest.raises(TypeError, match=("unhashable type: %r" %
type(index).__name__)):
hash(index)
def test_stringified_slice_with_tz(self):
# GH#2658
import datetime
start = datetime.datetime.now()
idx = date_range(start=start, freq="1d", periods=10)
df = DataFrame(lrange(10), index=idx)
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
def test_append_join_nondatetimeindex(self):
rng = date_range('1/1/2000', periods=10)
idx = Index(['a', 'b', 'c', 'd'])
result = rng.append(idx)
assert isinstance(result[0], Timestamp)
# it works
rng.join(idx, how='outer')
def test_map(self):
rng = date_range('1/1/2000', periods=10)
f = lambda x: x.strftime('%Y%m%d')
result = rng.map(f)
exp = Index([f(x) for x in rng], dtype='<U8')
tm.assert_index_equal(result, exp)
def test_map_fallthrough(self, capsys):
# GH#22067, check we don't get warnings about silently ignored errors
dti = date_range('2017-01-01', '2018-01-01', freq='B')
dti.map(lambda x: pd.Period(year=x.year, month=x.month, freq='M'))
captured = capsys.readouterr()
assert captured.err == ''
def test_iteration_preserves_tz(self):
# see gh-8890
index = date_range("2012-01-01", periods=3, freq='H', tz='US/Eastern')
for i, ts in enumerate(index):
result = ts
expected = index[i]
assert result == expected
index = date_range("2012-01-01", periods=3, freq='H',
tz=dateutil.tz.tzoffset(None, -28800))
for i, ts in enumerate(index):
result = ts
expected = index[i]
assert result._repr_base == expected._repr_base
assert result == expected
# 9100
index = pd.DatetimeIndex(['2014-12-01 03:32:39.987000-08:00',
'2014-12-01 04:12:34.987000-08:00'])
for i, ts in enumerate(index):
result = ts
expected = index[i]
assert result._repr_base == expected._repr_base
assert result == expected
@pytest.mark.parametrize('periods', [0, 9999, 10000, 10001])
def test_iteration_over_chunksize(self, periods):
# GH21012
index = date_range('2000-01-01 00:00:00', periods=periods, freq='min')
num = 0
for stamp in index:
assert index[num] == stamp
num += 1
assert num == len(index)
def test_misc_coverage(self):
rng = date_range('1/1/2000', periods=5)
result = rng.groupby(rng.day)
assert isinstance(list(result.values())[0][0], Timestamp)
idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02'])
assert not idx.equals(list(idx))
non_datetime = Index(list('abc'))
assert not idx.equals(list(non_datetime))
def test_string_index_series_name_converted(self):
# #1644
df = DataFrame(np.random.randn(10, 4),
index=date_range('1/1/2000', periods=10))
result = df.loc['1/3/2000']
assert result.name == df.index[2]
result = df.T['1/3/2000']
assert result.name == df.index[2]
def test_get_duplicates(self):
idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
'2000-01-03', '2000-01-03', '2000-01-04'])
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
result = idx.get_duplicates()
ex = DatetimeIndex(['2000-01-02', '2000-01-03'])
tm.assert_index_equal(result, ex)
def test_argmin_argmax(self):
idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'])
assert idx.argmin() == 1
assert idx.argmax() == 0
def test_sort_values(self):
idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'])
ordered = idx.sort_values()
assert ordered.is_monotonic
ordered = idx.sort_values(ascending=False)
assert ordered[::-1].is_monotonic
ordered, dexer = idx.sort_values(return_indexer=True)
assert ordered.is_monotonic
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
assert ordered[::-1].is_monotonic
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
def test_map_bug_1677(self):
index = DatetimeIndex(['2012-04-25 09:30:00.393000'])
f = index.asof
result = index.map(f)
expected = Index([f(index[0])])
tm.assert_index_equal(result, expected)
def test_groupby_function_tuple_1677(self):
df = DataFrame(np.random.rand(100),
index=date_range("1/1/2000", periods=100))
monthly_group = df.groupby(lambda x: (x.year, x.month))
result = monthly_group.mean()
assert isinstance(result.index[0], tuple)
def test_append_numpy_bug_1681(self):
# another datetime64 bug
dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI')
a = DataFrame()
c = DataFrame({'A': 'foo', 'B': dr}, index=dr)
result = a.append(c)
assert (result['B'] == dr).all()
def test_isin(self):
index = tm.makeDateIndex(4)
result = index.isin(index)
assert result.all()
result = index.isin(list(index))
assert result.all()
assert_almost_equal(index.isin([index[2], 5]),
np.array([False, False, True, False]))
def test_does_not_convert_mixed_integer(self):
df = tm.makeCustomDataframe(10, 10,
data_gen_f=lambda *args, **kwargs: randn(),
r_idx_type='i', c_idx_type='dt')
cols = df.columns.join(df.index, how='outer')
joined = cols.join(df.columns)
assert cols.dtype == np.dtype('O')
assert cols.dtype == joined.dtype
tm.assert_numpy_array_equal(cols.values, joined.values)
def test_join_self(self, join_type):
index = date_range('1/1/2000', periods=10)
joined = index.join(index, how=join_type)
assert index is joined
def assert_index_parameters(self, index):
assert index.freq == '40960N'
assert index.inferred_freq == '40960N'
def test_ns_index(self):
nsamples = 400
ns = int(1e9 / 24414)
dtstart = np.datetime64('2012-09-20T00:00:00')
dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, 'ns')
freq = ns * offsets.Nano()
index = pd.DatetimeIndex(dt, freq=freq, name='time')
self.assert_index_parameters(index)
new_index = pd.date_range(start=index[0], end=index[-1],
freq=index.freq)
self.assert_index_parameters(new_index)
def test_join_with_period_index(self, join_type):
df = tm.makeCustomDataframe(
10, 10, data_gen_f=lambda *args: np.random.randint(2),
c_idx_type='p', r_idx_type='dt')
s = df.iloc[:5, 0]
msg = 'can only call with other PeriodIndex-ed objects'
with pytest.raises(ValueError, match=msg):
df.columns.join(s.index, how=join_type)
def test_factorize(self):
idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02',
'2014-03', '2014-03'])
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03'])
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
# tz must be preserved
idx1 = idx1.tz_localize('Asia/Tokyo')
exp_idx = exp_idx.tz_localize('Asia/Tokyo')
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01',
'2014-03', '2014-01'])
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03'])
arr, idx = idx2.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01'])
arr, idx = idx2.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
# freq must be preserved
idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo')
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
arr, idx = idx3.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, idx3)
def test_factorize_tz(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH#13750
base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz)
idx = base.repeat(5)
exp_arr = np.arange(100, dtype=np.intp).repeat(5)
for obj in [idx, pd.Series(idx)]:
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(res, base)
def test_factorize_dst(self):
# GH 13750
idx = pd.date_range('2016-11-06', freq='H', periods=12,
tz='US/Eastern')
for obj in [idx, pd.Series(idx)]:
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
idx = pd.date_range('2016-06-13', freq='H', periods=12,
tz='US/Eastern')
for obj in [idx, pd.Series(idx)]:
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
@pytest.mark.parametrize('arr, expected', [
(pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
pd.DatetimeIndex(['2017'], tz='US/Eastern')),
])
def test_unique(self, arr, expected):
result = arr.unique()
tm.assert_index_equal(result, expected)
# GH 21737
# Ensure the underlying data is consistent
assert result[0] == expected[0]
def test_asarray_tz_naive(self):
# This shouldn't produce a warning.
idx = pd.date_range('2000', periods=2)
# M8[ns] by default
with tm.assert_produces_warning(None):
result = np.asarray(idx)
expected = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]')
tm.assert_numpy_array_equal(result, expected)
# optionally, object
with tm.assert_produces_warning(None):
result = np.asarray(idx, dtype=object)
expected = np.array([pd.Timestamp('2000-01-01'),
pd.Timestamp('2000-01-02')])
tm.assert_numpy_array_equal(result, expected)
def test_asarray_tz_aware(self):
tz = 'US/Central'
idx = pd.date_range('2000', periods=2, tz=tz)
expected = np.array(['2000-01-01T06', '2000-01-02T06'], dtype='M8[ns]')
# We warn by default and return an ndarray[M8[ns]]
with tm.assert_produces_warning(FutureWarning):
result = np.asarray(idx)
tm.assert_numpy_array_equal(result, expected)
# Old behavior with no warning
with tm.assert_produces_warning(None):
result = np.asarray(idx, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
# Future behavior with no warning
expected = np.array([pd.Timestamp("2000-01-01", tz=tz),
pd.Timestamp("2000-01-02", tz=tz)])
with tm.assert_produces_warning(None):
result = np.asarray(idx, dtype=object)
tm.assert_numpy_array_equal(result, expected)
@@ -1,31 +0,0 @@
""" generic tests from the Datetimelike class """
from pandas import DatetimeIndex, date_range
from pandas.util import testing as tm
from ..datetimelike import DatetimeLike
class TestDatetimeIndex(DatetimeLike):
_holder = DatetimeIndex
def setup_method(self, method):
self.indices = dict(index=tm.makeDateIndex(10),
index_dec=date_range('20130110', periods=10,
freq='-1D'))
self.setup_indices()
def create_index(self):
return date_range('20130101', periods=5)
def test_shift(self):
pass # handled in test_ops
def test_pickle_compat_construction(self):
pass
def test_intersection(self):
pass # handled in test_setops
def test_union(self):
pass # handled in test_setops
@@ -1,221 +0,0 @@
from datetime import datetime
import dateutil.tz
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import DatetimeIndex, Series
import pandas.util.testing as tm
def test_to_native_types():
index = pd.date_range(freq='1D', periods=3, start='2017-01-01')
# First, with no arguments.
expected = np.array(['2017-01-01', '2017-01-02',
'2017-01-03'], dtype=object)
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
# No NaN values, so na_rep has no effect
result = index.to_native_types(na_rep='pandas')
tm.assert_numpy_array_equal(result, expected)
# Make sure slicing works
expected = np.array(['2017-01-01', '2017-01-03'], dtype=object)
result = index.to_native_types([0, 2])
tm.assert_numpy_array_equal(result, expected)
# Make sure date formatting works
expected = np.array(['01-2017-01', '01-2017-02',
'01-2017-03'], dtype=object)
result = index.to_native_types(date_format='%m-%Y-%d')
tm.assert_numpy_array_equal(result, expected)
# NULL object handling should work
index = DatetimeIndex(['2017-01-01', pd.NaT, '2017-01-03'])
expected = np.array(['2017-01-01', 'NaT', '2017-01-03'], dtype=object)
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
expected = np.array(['2017-01-01', 'pandas',
'2017-01-03'], dtype=object)
result = index.to_native_types(na_rep='pandas')
tm.assert_numpy_array_equal(result, expected)
class TestDatetimeIndexRendering(object):
def test_dti_repr_short(self):
dr = pd.date_range(start='1/1/2012', periods=1)
repr(dr)
dr = pd.date_range(start='1/1/2012', periods=2)
repr(dr)
dr = pd.date_range(start='1/1/2012', periods=3)
repr(dr)
@pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__'])
def test_dti_representation(self, method):
idxs = []
idxs.append(DatetimeIndex([], freq='D'))
idxs.append(DatetimeIndex(['2011-01-01'], freq='D'))
idxs.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D'))
idxs.append(DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
freq='D'))
idxs.append(DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
], freq='H', tz='Asia/Tokyo'))
idxs.append(DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern'))
idxs.append(DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC'))
exp = []
exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""")
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', "
"freq='D')")
exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], "
"dtype='datetime64[ns]', freq='D')")
exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
"dtype='datetime64[ns]', freq='D')")
exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', "
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
", dtype='datetime64[ns, Asia/Tokyo]', freq='H')")
exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', "
"'2011-01-01 10:00:00-05:00', 'NaT'], "
"dtype='datetime64[ns, US/Eastern]', freq=None)")
exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', "
"'2011-01-01 10:00:00+00:00', 'NaT'], "
"dtype='datetime64[ns, UTC]', freq=None)""")
with pd.option_context('display.width', 300):
for indx, expected in zip(idxs, exp):
result = getattr(indx, method)()
assert result == expected
def test_dti_representation_to_series(self):
idx1 = DatetimeIndex([], freq='D')
idx2 = DatetimeIndex(['2011-01-01'], freq='D')
idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
idx4 = DatetimeIndex(
['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H', tz='Asia/Tokyo')
idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
tz='US/Eastern')
idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15'])
exp1 = """Series([], dtype: datetime64[ns])"""
exp2 = ("0 2011-01-01\n"
"dtype: datetime64[ns]")
exp3 = ("0 2011-01-01\n"
"1 2011-01-02\n"
"dtype: datetime64[ns]")
exp4 = ("0 2011-01-01\n"
"1 2011-01-02\n"
"2 2011-01-03\n"
"dtype: datetime64[ns]")
exp5 = ("0 2011-01-01 09:00:00+09:00\n"
"1 2011-01-01 10:00:00+09:00\n"
"2 2011-01-01 11:00:00+09:00\n"
"dtype: datetime64[ns, Asia/Tokyo]")
exp6 = ("0 2011-01-01 09:00:00-05:00\n"
"1 2011-01-01 10:00:00-05:00\n"
"2 NaT\n"
"dtype: datetime64[ns, US/Eastern]")
exp7 = ("0 2011-01-01 09:00:00\n"
"1 2011-01-02 10:15:00\n"
"dtype: datetime64[ns]")
with pd.option_context('display.width', 300):
for idx, expected in zip([idx1, idx2, idx3, idx4,
idx5, idx6, idx7],
[exp1, exp2, exp3, exp4,
exp5, exp6, exp7]):
result = repr(Series(idx))
assert result == expected
def test_dti_summary(self):
# GH#9116
idx1 = DatetimeIndex([], freq='D')
idx2 = DatetimeIndex(['2011-01-01'], freq='D')
idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
idx4 = DatetimeIndex(
['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'],
freq='H', tz='Asia/Tokyo')
idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
tz='US/Eastern')
exp1 = ("DatetimeIndex: 0 entries\n"
"Freq: D")
exp2 = ("DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\n"
"Freq: D")
exp3 = ("DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\n"
"Freq: D")
exp4 = ("DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\n"
"Freq: D")
exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
"to 2011-01-01 11:00:00+09:00\n"
"Freq: H")
exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
[exp1, exp2, exp3, exp4, exp5, exp6]):
result = idx._summary()
assert result == expected
def test_dti_business_repr(self):
# only really care that it works
repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1)))
def test_dti_business_summary(self):
rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))
rng._summary()
rng[2:2]._summary()
def test_dti_business_summary_pytz(self):
pd.bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc)._summary()
def test_dti_business_summary_dateutil(self):
pd.bdate_range('1/1/2005', '1/1/2009',
tz=dateutil.tz.tzutc())._summary()
def test_dti_custom_business_repr(self):
# only really care that it works
repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1),
freq='C'))
def test_dti_custom_business_summary(self):
rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1),
freq='C')
rng._summary()
rng[2:2]._summary()
def test_dti_custom_business_summary_pytz(self):
pd.bdate_range('1/1/2005', '1/1/2009', freq='C',
tz=pytz.utc)._summary()
def test_dti_custom_business_summary_dateutil(self):
pd.bdate_range('1/1/2005', '1/1/2009', freq='C',
tz=dateutil.tz.tzutc())._summary()
@@ -1,612 +0,0 @@
from datetime import datetime, time, timedelta
import numpy as np
import pytest
import pytz
import pandas.compat as compat
import pandas as pd
from pandas import DatetimeIndex, Index, Timestamp, date_range, notna
import pandas.util.testing as tm
from pandas.tseries.offsets import BDay, CDay
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestGetItem(object):
def test_ellipsis(self):
# GH#21282
idx = pd.date_range('2011-01-01', '2011-01-31', freq='D',
tz='Asia/Tokyo', name='idx')
result = idx[...]
assert result.equals(idx)
assert result is not idx
def test_getitem(self):
idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D',
tz='Asia/Tokyo', name='idx')
for idx in [idx1, idx2]:
result = idx[0]
assert result == Timestamp('2011-01-01', tz=idx.tz)
result = idx[0:5]
expected = pd.date_range('2011-01-01', '2011-01-05', freq='D',
tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[0:10:2]
expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D',
tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[-20:-5:3]
expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D',
tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[4::-1]
expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03',
'2011-01-02', '2011-01-01'],
freq='-1D', tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
def test_dti_business_getitem(self):
rng = pd.bdate_range(START, END)
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
tm.assert_index_equal(smaller, exp)
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == BDay() * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np.int_(4)]
def test_dti_business_getitem_matplotlib_hackaround(self):
rng = pd.bdate_range(START, END)
values = rng[:, None]
expected = rng.values[:, None]
tm.assert_numpy_array_equal(values, expected)
def test_dti_custom_getitem(self):
rng = pd.bdate_range(START, END, freq='C')
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5])
tm.assert_index_equal(smaller, exp)
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == CDay() * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np.int_(4)]
def test_dti_custom_getitem_matplotlib_hackaround(self):
rng = pd.bdate_range(START, END, freq='C')
values = rng[:, None]
expected = rng.values[:, None]
tm.assert_numpy_array_equal(values, expected)
class TestWhere(object):
def test_where_other(self):
# other is ndarray or Index
i = pd.date_range('20130101', periods=3, tz='US/Eastern')
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=np.nan)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2.values)
tm.assert_index_equal(result, i2)
def test_where_tz(self):
i = pd.date_range('20130101', periods=3, tz='US/Eastern')
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)
class TestTake(object):
def test_take(self):
# GH#10295
idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D',
tz='Asia/Tokyo', name='idx')
for idx in [idx1, idx2]:
result = idx.take([0])
assert result == Timestamp('2011-01-01', tz=idx.tz)
result = idx.take([0, 1, 2])
expected = pd.date_range('2011-01-01', '2011-01-03', freq='D',
tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D',
tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([7, 4, 1])
expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D',
tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([3, 2, 5])
expected = DatetimeIndex(['2011-01-04', '2011-01-03',
'2011-01-06'],
freq=None, tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq is None
result = idx.take([-3, 2, 5])
expected = DatetimeIndex(['2011-01-29', '2011-01-03',
'2011-01-06'],
freq=None, tz=idx.tz, name='idx')
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_take_invalid_kwargs(self):
idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
indices = [1, 6, 5, 9, 10, 13, 15, 3]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode='clip')
# TODO: This method came from test_datetime; de-dup with version above
@pytest.mark.parametrize('tz', [None, 'US/Eastern', 'Asia/Tokyo'])
def test_take2(self, tz):
dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15),
datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)]
idx = pd.date_range(start='2010-01-01 09:00',
end='2010-02-01 09:00', freq='H', tz=tz,
name='idx')
expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz)
taken1 = idx.take([5, 6, 8, 12])
taken2 = idx[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, DatetimeIndex)
assert taken.freq is None
assert taken.tz == expected.tz
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
name='xxx')
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx')
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'],
name='xxx')
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx')
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
def test_take_fill_value_with_timezone(self):
idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
name='xxx', tz='US/Eastern')
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx', tz='US/Eastern')
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'],
name='xxx', tz='US/Eastern')
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx', tz='US/Eastern')
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
class TestDatetimeIndex(object):
@pytest.mark.parametrize('null', [None, np.nan, pd.NaT])
@pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern'])
def test_insert_nat(self, tz, null):
# GH#16537, GH#18295 (test missing)
idx = pd.DatetimeIndex(['2017-01-01'], tz=tz)
expected = pd.DatetimeIndex(['NaT', '2017-01-01'], tz=tz)
res = idx.insert(0, null)
tm.assert_index_equal(res, expected)
def test_insert(self):
idx = DatetimeIndex(
['2000-01-04', '2000-01-01', '2000-01-02'], name='idx')
result = idx.insert(2, datetime(2000, 1, 5))
exp = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-05',
'2000-01-02'], name='idx')
tm.assert_index_equal(result, exp)
# insertion of non-datetime should coerce to object index
result = idx.insert(1, 'inserted')
expected = Index([datetime(2000, 1, 4), 'inserted',
datetime(2000, 1, 1),
datetime(2000, 1, 2)], name='idx')
assert not isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
idx = date_range('1/1/2000', periods=3, freq='M', name='idx')
# preserve freq
expected_0 = DatetimeIndex(['1999-12-31', '2000-01-31', '2000-02-29',
'2000-03-31'], name='idx', freq='M')
expected_3 = DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31',
'2000-04-30'], name='idx', freq='M')
# reset freq to None
expected_1_nofreq = DatetimeIndex(['2000-01-31', '2000-01-31',
'2000-02-29',
'2000-03-31'], name='idx',
freq=None)
expected_3_nofreq = DatetimeIndex(['2000-01-31', '2000-02-29',
'2000-03-31',
'2000-01-02'], name='idx',
freq=None)
cases = [(0, datetime(1999, 12, 31), expected_0),
(-3, datetime(1999, 12, 31), expected_0),
(3, datetime(2000, 4, 30), expected_3),
(1, datetime(2000, 1, 31), expected_1_nofreq),
(3, datetime(2000, 1, 2), expected_3_nofreq)]
for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# reset freq to None
result = idx.insert(3, datetime(2000, 1, 2))
expected = DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31',
'2000-01-02'], name='idx', freq=None)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
# see gh-7299
idx = date_range('1/1/2000', periods=3, freq='D', tz='Asia/Tokyo',
name='idx')
with pytest.raises(ValueError):
idx.insert(3, pd.Timestamp('2000-01-04'))
with pytest.raises(ValueError):
idx.insert(3, datetime(2000, 1, 4))
with pytest.raises(ValueError):
idx.insert(3, pd.Timestamp('2000-01-04', tz='US/Eastern'))
with pytest.raises(ValueError):
idx.insert(3, datetime(2000, 1, 4,
tzinfo=pytz.timezone('US/Eastern')))
for tz in ['US/Pacific', 'Asia/Singapore']:
idx = date_range('1/1/2000 09:00', periods=6, freq='H', tz=tz,
name='idx')
# preserve freq
expected = date_range('1/1/2000 09:00', periods=7, freq='H', tz=tz,
name='idx')
for d in [pd.Timestamp('2000-01-01 15:00', tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 15))]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
expected = DatetimeIndex(['2000-01-01 09:00', '2000-01-01 10:00',
'2000-01-01 11:00',
'2000-01-01 12:00', '2000-01-01 13:00',
'2000-01-01 14:00',
'2000-01-01 10:00'], name='idx',
tz=tz, freq=None)
# reset freq to None
for d in [pd.Timestamp('2000-01-01 10:00', tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 10))]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.tz == expected.tz
assert result.freq is None
def test_delete(self):
idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx')
# prserve freq
expected_0 = date_range(start='2000-02-01', periods=4, freq='M',
name='idx')
expected_4 = date_range(start='2000-01-01', periods=4, freq='M',
name='idx')
# reset freq to None
expected_1 = DatetimeIndex(['2000-01-31', '2000-03-31', '2000-04-30',
'2000-05-31'], freq=None, name='idx')
cases = {0: expected_0,
-5: expected_0,
-1: expected_4,
4: expected_4,
1: expected_1}
for n, expected in compat.iteritems(cases):
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
with pytest.raises((IndexError, ValueError)):
# either depending on numpy version
idx.delete(5)
for tz in [None, 'Asia/Tokyo', 'US/Pacific']:
idx = date_range(start='2000-01-01 09:00', periods=10, freq='H',
name='idx', tz=tz)
expected = date_range(start='2000-01-01 10:00', periods=9,
freq='H', name='idx', tz=tz)
result = idx.delete(0)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == 'H'
assert result.tz == expected.tz
expected = date_range(start='2000-01-01 09:00', periods=9,
freq='H', name='idx', tz=tz)
result = idx.delete(-1)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == 'H'
assert result.tz == expected.tz
def test_delete_slice(self):
idx = date_range(start='2000-01-01', periods=10, freq='D', name='idx')
# prserve freq
expected_0_2 = date_range(start='2000-01-04', periods=7, freq='D',
name='idx')
expected_7_9 = date_range(start='2000-01-01', periods=7, freq='D',
name='idx')
# reset freq to None
expected_3_5 = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03',
'2000-01-07', '2000-01-08', '2000-01-09',
'2000-01-10'], freq=None, name='idx')
cases = {(0, 1, 2): expected_0_2,
(7, 8, 9): expected_7_9,
(3, 4, 5): expected_3_5}
for n, expected in compat.iteritems(cases):
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
result = idx.delete(slice(n[0], n[-1] + 1))
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
for tz in [None, 'Asia/Tokyo', 'US/Pacific']:
ts = pd.Series(1, index=pd.date_range(
'2000-01-01 09:00', periods=10, freq='H', name='idx', tz=tz))
# preserve freq
result = ts.drop(ts.index[:5]).index
expected = pd.date_range('2000-01-01 14:00', periods=5, freq='H',
name='idx', tz=tz)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
# reset freq to None
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
expected = DatetimeIndex(['2000-01-01 09:00', '2000-01-01 11:00',
'2000-01-01 13:00',
'2000-01-01 15:00', '2000-01-01 17:00'],
freq=None, name='idx', tz=tz)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
def test_get_loc(self):
idx = pd.date_range('2000-01-01', periods=3)
for method in [None, 'pad', 'backfill', 'nearest']:
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1
if method is not None:
assert idx.get_loc(idx[1], method,
tolerance=pd.Timedelta('0 days')) == 1
assert idx.get_loc('2000-01-01', method='nearest') == 0
assert idx.get_loc('2000-01-01T12', method='nearest') == 1
assert idx.get_loc('2000-01-01T12', method='nearest',
tolerance='1 day') == 1
assert idx.get_loc('2000-01-01T12', method='nearest',
tolerance=pd.Timedelta('1D')) == 1
assert idx.get_loc('2000-01-01T12', method='nearest',
tolerance=np.timedelta64(1, 'D')) == 1
assert idx.get_loc('2000-01-01T12', method='nearest',
tolerance=timedelta(1)) == 1
with pytest.raises(ValueError, match='unit abbreviation w/o a number'):
idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo')
with pytest.raises(KeyError):
idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours')
with pytest.raises(
ValueError,
match='tolerance size must match target index size'):
idx.get_loc('2000-01-01', method='nearest',
tolerance=[pd.Timedelta('1day').to_timedelta64(),
pd.Timedelta('1day').to_timedelta64()])
assert idx.get_loc('2000', method='nearest') == slice(0, 3)
assert idx.get_loc('2000-01', method='nearest') == slice(0, 3)
assert idx.get_loc('1999', method='nearest') == 0
assert idx.get_loc('2001', method='nearest') == 2
with pytest.raises(KeyError):
idx.get_loc('1999', method='pad')
with pytest.raises(KeyError):
idx.get_loc('2001', method='backfill')
with pytest.raises(KeyError):
idx.get_loc('foobar')
with pytest.raises(TypeError):
idx.get_loc(slice(2))
idx = pd.to_datetime(['2000-01-01', '2000-01-04'])
assert idx.get_loc('2000-01-02', method='nearest') == 0
assert idx.get_loc('2000-01-03', method='nearest') == 1
assert idx.get_loc('2000-01', method='nearest') == slice(0, 2)
# time indexing
idx = pd.date_range('2000-01-01', periods=24, freq='H')
tm.assert_numpy_array_equal(idx.get_loc(time(12)),
np.array([12]), check_dtype=False)
tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)),
np.array([]), check_dtype=False)
with pytest.raises(NotImplementedError):
idx.get_loc(time(12, 30), method='pad')
def test_get_indexer(self):
idx = pd.date_range('2000-01-01', periods=3)
exp = np.array([0, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours',
'1 day 1 hour'])
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
np.array([-1, 0, 1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
np.array([0, 1, 2], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
np.array([0, 1, 1], dtype=np.intp))
tm.assert_numpy_array_equal(
idx.get_indexer(target, 'nearest',
tolerance=pd.Timedelta('1 hour')),
np.array([0, -1, 1], dtype=np.intp))
tol_raw = [pd.Timedelta('1 hour'),
pd.Timedelta('1 hour'),
pd.Timedelta('1 hour').to_timedelta64(), ]
tm.assert_numpy_array_equal(
idx.get_indexer(target, 'nearest',
tolerance=[np.timedelta64(x) for x in tol_raw]),
np.array([0, -1, 1], dtype=np.intp))
tol_bad = [pd.Timedelta('2 hour').to_timedelta64(),
pd.Timedelta('1 hour').to_timedelta64(),
'foo', ]
with pytest.raises(
ValueError, match='abbreviation w/o a number'):
idx.get_indexer(target, 'nearest', tolerance=tol_bad)
with pytest.raises(ValueError):
idx.get_indexer(idx[[0]], method='nearest', tolerance='foo')
def test_reasonable_key_error(self):
# GH#1062
index = DatetimeIndex(['1/3/2000'])
with pytest.raises(KeyError, match='2000'):
index.get_loc('1/1/2000')
@pytest.mark.parametrize('key', [pd.Timedelta(0),
pd.Timedelta(1),
timedelta(0)])
def test_timedelta_invalid_key(self, key):
# GH#20464
dti = pd.date_range('1970-01-01', periods=10)
with pytest.raises(TypeError):
dti.get_loc(key)
def test_get_loc_nat(self):
# GH#20464
index = DatetimeIndex(['1/3/2000', 'NaT'])
assert index.get_loc(pd.NaT) == 1
@@ -1,312 +0,0 @@
import calendar
import locale
import unicodedata
import numpy as np
import pytest
import pandas as pd
from pandas import (
DatetimeIndex, Index, Timestamp, compat, date_range, datetime, offsets)
import pandas.util.testing as tm
class TestTimeSeries(object):
def test_pass_datetimeindex_to_index(self):
# Bugs in #1396
rng = date_range('1/1/2000', '3/1/2000')
idx = Index(rng, dtype=object)
expected = Index(rng.to_pydatetime(), dtype=object)
tm.assert_numpy_array_equal(idx.values, expected.values)
def test_range_edges(self):
# GH#13672
idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'),
end=Timestamp('1970-01-01 00:00:00.000000004'),
freq='N')
exp = DatetimeIndex(['1970-01-01 00:00:00.000000001',
'1970-01-01 00:00:00.000000002',
'1970-01-01 00:00:00.000000003',
'1970-01-01 00:00:00.000000004'])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000004'),
end=Timestamp('1970-01-01 00:00:00.000000001'),
freq='N')
exp = DatetimeIndex([])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'),
end=Timestamp('1970-01-01 00:00:00.000000001'),
freq='N')
exp = DatetimeIndex(['1970-01-01 00:00:00.000000001'])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000001'),
end=Timestamp('1970-01-01 00:00:00.000004'),
freq='U')
exp = DatetimeIndex(['1970-01-01 00:00:00.000001',
'1970-01-01 00:00:00.000002',
'1970-01-01 00:00:00.000003',
'1970-01-01 00:00:00.000004'])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.001'),
end=Timestamp('1970-01-01 00:00:00.004'),
freq='L')
exp = DatetimeIndex(['1970-01-01 00:00:00.001',
'1970-01-01 00:00:00.002',
'1970-01-01 00:00:00.003',
'1970-01-01 00:00:00.004'])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01 00:00:01'),
end=Timestamp('1970-01-01 00:00:04'), freq='S')
exp = DatetimeIndex(['1970-01-01 00:00:01', '1970-01-01 00:00:02',
'1970-01-01 00:00:03', '1970-01-01 00:00:04'])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01 00:01'),
end=Timestamp('1970-01-01 00:04'), freq='T')
exp = DatetimeIndex(['1970-01-01 00:01', '1970-01-01 00:02',
'1970-01-01 00:03', '1970-01-01 00:04'])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01 01:00'),
end=Timestamp('1970-01-01 04:00'), freq='H')
exp = DatetimeIndex(['1970-01-01 01:00', '1970-01-01 02:00',
'1970-01-01 03:00', '1970-01-01 04:00'])
tm.assert_index_equal(idx, exp)
idx = pd.date_range(start=Timestamp('1970-01-01'),
end=Timestamp('1970-01-04'), freq='D')
exp = DatetimeIndex(['1970-01-01', '1970-01-02',
'1970-01-03', '1970-01-04'])
tm.assert_index_equal(idx, exp)
class TestDatetime64(object):
def test_datetimeindex_accessors(self):
dti_naive = pd.date_range(freq='D', start=datetime(1998, 1, 1),
periods=365)
# GH#13303
dti_tz = pd.date_range(freq='D', start=datetime(1998, 1, 1),
periods=365, tz='US/Eastern')
for dti in [dti_naive, dti_tz]:
assert dti.year[0] == 1998
assert dti.month[0] == 1
assert dti.day[0] == 1
assert dti.hour[0] == 0
assert dti.minute[0] == 0
assert dti.second[0] == 0
assert dti.microsecond[0] == 0
assert dti.dayofweek[0] == 3
assert dti.dayofyear[0] == 1
assert dti.dayofyear[120] == 121
assert dti.weekofyear[0] == 1
assert dti.weekofyear[120] == 18
assert dti.quarter[0] == 1
assert dti.quarter[120] == 2
assert dti.days_in_month[0] == 31
assert dti.days_in_month[90] == 30
assert dti.is_month_start[0]
assert not dti.is_month_start[1]
assert dti.is_month_start[31]
assert dti.is_quarter_start[0]
assert dti.is_quarter_start[90]
assert dti.is_year_start[0]
assert not dti.is_year_start[364]
assert not dti.is_month_end[0]
assert dti.is_month_end[30]
assert not dti.is_month_end[31]
assert dti.is_month_end[364]
assert not dti.is_quarter_end[0]
assert not dti.is_quarter_end[30]
assert dti.is_quarter_end[89]
assert dti.is_quarter_end[364]
assert not dti.is_year_end[0]
assert dti.is_year_end[364]
assert len(dti.year) == 365
assert len(dti.month) == 365
assert len(dti.day) == 365
assert len(dti.hour) == 365
assert len(dti.minute) == 365
assert len(dti.second) == 365
assert len(dti.microsecond) == 365
assert len(dti.dayofweek) == 365
assert len(dti.dayofyear) == 365
assert len(dti.weekofyear) == 365
assert len(dti.quarter) == 365
assert len(dti.is_month_start) == 365
assert len(dti.is_month_end) == 365
assert len(dti.is_quarter_start) == 365
assert len(dti.is_quarter_end) == 365
assert len(dti.is_year_start) == 365
assert len(dti.is_year_end) == 365
assert len(dti.weekday_name) == 365
dti.name = 'name'
# non boolean accessors -> return Index
for accessor in DatetimeIndex._field_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, Index)
assert res.name == 'name'
# boolean accessors -> return array
for accessor in DatetimeIndex._bool_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, np.ndarray)
# test boolean indexing
res = dti[dti.is_quarter_start]
exp = dti[[0, 90, 181, 273]]
tm.assert_index_equal(res, exp)
res = dti[dti.is_leap_year]
exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name')
tm.assert_index_equal(res, exp)
dti = pd.date_range(freq='BQ-FEB', start=datetime(1998, 1, 1),
periods=4)
assert sum(dti.is_quarter_start) == 0
assert sum(dti.is_quarter_end) == 4
assert sum(dti.is_year_start) == 0
assert sum(dti.is_year_end) == 1
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu')
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
pytest.raises(ValueError, lambda: dti.is_month_start)
dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])
assert dti.is_month_start[0] == 1
tests = [
(Timestamp('2013-06-01', freq='M').is_month_start, 1),
(Timestamp('2013-06-01', freq='BM').is_month_start, 0),
(Timestamp('2013-06-03', freq='M').is_month_start, 0),
(Timestamp('2013-06-03', freq='BM').is_month_start, 1),
(Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1),
(Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1),
(Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1),
(Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1),
(Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1),
(Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1),
(Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1),
(Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0),
(Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0),
(Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1),
(Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1),
(Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1),
(Timestamp('2013-06-30', freq='BQ').is_month_end, 0),
(Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0),
(Timestamp('2013-06-30', freq='BQ').is_year_end, 0),
(Timestamp('2013-06-28', freq='BQ').is_month_end, 1),
(Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1),
(Timestamp('2013-06-28', freq='BQ').is_year_end, 0),
(Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0),
(Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0),
(Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0),
(Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1),
(Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1),
(Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1),
(Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1),
(Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1),
(Timestamp('2012-02-01').days_in_month, 29),
(Timestamp('2013-02-01').days_in_month, 28)]
for ts, value in tests:
assert ts == value
# GH 6538: Check that DatetimeIndex and its TimeStamp elements
# return the same weekofyear accessor close to new year w/ tz
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
dates = DatetimeIndex(dates, tz="Europe/Brussels")
expected = [52, 1, 1]
assert dates.weekofyear.tolist() == expected
assert [d.weekofyear for d in dates] == expected
# GH 12806
@pytest.mark.parametrize('time_locale', [
None] if tm.get_locales() is None else [None] + tm.get_locales())
def test_datetime_name_accessors(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday']
expected_months = ['January', 'February', 'March', 'April', 'May',
'June', 'July', 'August', 'September',
'October', 'November', 'December']
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
# GH#11128
dti = pd.date_range(freq='D', start=datetime(1998, 1, 1),
periods=365)
english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday']
for day, name, eng_name in zip(range(4, 11),
expected_days,
english_days):
name = name.capitalize()
assert dti.weekday_name[day] == eng_name
assert dti.day_name(locale=time_locale)[day] == name
ts = Timestamp(datetime(2016, 4, day))
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
assert ts.weekday_name == eng_name
assert ts.day_name(locale=time_locale) == name
dti = dti.append(DatetimeIndex([pd.NaT]))
assert np.isnan(dti.day_name(locale=time_locale)[-1])
ts = Timestamp(pd.NaT)
assert np.isnan(ts.day_name(locale=time_locale))
# GH#12805
dti = pd.date_range(freq='M', start='2012', end='2013')
result = dti.month_name(locale=time_locale)
expected = Index([month.capitalize() for month in expected_months])
# work around different normalization schemes
# https://github.com/pandas-dev/pandas/issues/22342
if not compat.PY2:
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_index_equal(result, expected)
for date, expected in zip(dti, expected_months):
result = date.month_name(locale=time_locale)
expected = expected.capitalize()
if not compat.PY2:
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", result)
assert result == expected
dti = dti.append(DatetimeIndex([pd.NaT]))
assert np.isnan(dti.month_name(locale=time_locale)[-1])
def test_nanosecond_field(self):
dti = DatetimeIndex(np.arange(10))
tm.assert_index_equal(dti.nanosecond,
pd.Index(np.arange(10, dtype=np.int64)))
@@ -1,52 +0,0 @@
import pytest
import pandas as pd
import pandas.util.testing as tm
class TestDatetimeIndex(object):
@pytest.mark.parametrize('tz', ['US/Eastern', 'Asia/Tokyo'])
def test_fillna_datetime64(self, tz):
# GH 11343
idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'])
exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'])
tm.assert_index_equal(
idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp)
# tz mismatch
exp = pd.Index([pd.Timestamp('2011-01-01 09:00'),
pd.Timestamp('2011-01-01 10:00', tz=tz),
pd.Timestamp('2011-01-01 11:00')], dtype=object)
tm.assert_index_equal(
idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp)
# object
exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x',
pd.Timestamp('2011-01-01 11:00')], dtype=object)
tm.assert_index_equal(idx.fillna('x'), exp)
idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'], tz=tz)
exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], tz=tz)
tm.assert_index_equal(
idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp)
exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz),
pd.Timestamp('2011-01-01 10:00'),
pd.Timestamp('2011-01-01 11:00', tz=tz)],
dtype=object)
tm.assert_index_equal(
idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp)
# object
exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz),
'x',
pd.Timestamp('2011-01-01 11:00', tz=tz)],
dtype=object)
tm.assert_index_equal(idx.fillna('x'), exp)
@@ -1,498 +0,0 @@
from datetime import datetime
import warnings
import numpy as np
import pytest
from pandas.core.dtypes.generic import ABCDateOffset
import pandas as pd
from pandas import (
DatetimeIndex, Index, PeriodIndex, Series, Timestamp, bdate_range,
date_range)
from pandas.tests.test_base import Ops
import pandas.util.testing as tm
from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexOps(Ops):
def setup_method(self, method):
super(TestDatetimeIndexOps, self).setup_method(method)
mask = lambda x: (isinstance(x, DatetimeIndex) or
isinstance(x, PeriodIndex))
self.is_valid_objs = [o for o in self.objs if mask(o)]
self.not_valid_objs = [o for o in self.objs if not mask(o)]
def test_ops_properties(self):
f = lambda x: isinstance(x, DatetimeIndex)
self.check_ops_properties(DatetimeIndex._field_ops, f)
self.check_ops_properties(DatetimeIndex._object_ops, f)
self.check_ops_properties(DatetimeIndex._bool_ops, f)
def test_ops_properties_basic(self):
# sanity check that the behavior didn't change
# GH#7206
for op in ['year', 'day', 'second', 'weekday']:
pytest.raises(TypeError, lambda x: getattr(self.dt_series, op))
# attribute access should still work!
s = Series(dict(year=2000, month=1, day=10))
assert s.year == 2000
assert s.month == 1
assert s.day == 10
pytest.raises(AttributeError, lambda: s.weekday)
def test_repeat_range(self, tz_naive_fixture):
tz = tz_naive_fixture
rng = date_range('1/1/2000', '1/1/2001')
result = rng.repeat(5)
assert result.freq is None
assert len(result) == 5 * len(rng)
index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz)
exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
'2001-01-02', '2001-01-02'], tz=tz)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz)
exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
'2001-01-03', '2001-01-03'], tz=tz)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'],
tz=tz)
exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01',
'NaT', 'NaT', 'NaT',
'2003-01-01', '2003-01-01', '2003-01-01'],
tz=tz)
for res in [index.repeat(3), np.repeat(index, 3)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat(self, tz_naive_fixture):
tz = tz_naive_fixture
reps = 2
msg = "the 'axis' parameter is not supported"
rng = pd.date_range(start='2016-01-01', periods=2,
freq='30Min', tz=tz)
expected_rng = DatetimeIndex([
Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
])
res = rng.repeat(reps)
tm.assert_index_equal(res, expected_rng)
assert res.freq is None
tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
with pytest.raises(ValueError, match=msg):
np.repeat(rng, reps, axis=1)
def test_resolution(self, tz_naive_fixture):
tz = tz_naive_fixture
for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T',
'S', 'L', 'U'],
['day', 'day', 'day', 'day', 'hour',
'minute', 'second', 'millisecond',
'microsecond']):
idx = pd.date_range(start='2013-04-01', periods=30, freq=freq,
tz=tz)
assert idx.resolution == expected
def test_value_counts_unique(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH 7735
idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)),
tz=tz)
exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10,
tz=tz)
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10,
tz=tz)
tm.assert_index_equal(idx.unique(), expected)
idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00',
'2013-01-01 09:00', '2013-01-01 08:00',
'2013-01-01 08:00', pd.NaT], tz=tz)
exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
tz=tz)
expected = Series([3, 2], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00',
pd.NaT], tz=tz)
expected = Series([3, 2, 1], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False),
expected)
tm.assert_index_equal(idx.unique(), exp_idx)
def test_nonunique_contains(self):
# GH 9512
for idx in map(DatetimeIndex,
([0, 1, 0], [0, 0, -1], [0, -1, -1],
['2015', '2015', '2016'], ['2015', '2015', '2014'])):
assert idx[0] in idx
@pytest.mark.parametrize('idx',
[
DatetimeIndex(
['2011-01-01',
'2011-01-02',
'2011-01-03'],
freq='D', name='idx'),
DatetimeIndex(
['2011-01-01 09:00',
'2011-01-01 10:00',
'2011-01-01 11:00'],
freq='H', name='tzidx', tz='Asia/Tokyo')
])
def test_order_with_freq(self, idx):
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
assert ordered.freq == idx.freq
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
assert ordered.freq == expected.freq
assert ordered.freq.n == -1
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
check_dtype=False)
assert ordered.freq == idx.freq
ordered, indexer = idx.sort_values(return_indexer=True,
ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
tm.assert_numpy_array_equal(indexer,
np.array([2, 1, 0]),
check_dtype=False)
assert ordered.freq == expected.freq
assert ordered.freq.n == -1
@pytest.mark.parametrize('index_dates,expected_dates', [
(['2011-01-01', '2011-01-03', '2011-01-05',
'2011-01-02', '2011-01-01'],
['2011-01-01', '2011-01-01', '2011-01-02',
'2011-01-03', '2011-01-05']),
(['2011-01-01', '2011-01-03', '2011-01-05',
'2011-01-02', '2011-01-01'],
['2011-01-01', '2011-01-01', '2011-01-02',
'2011-01-03', '2011-01-05']),
([pd.NaT, '2011-01-03', '2011-01-05',
'2011-01-02', pd.NaT],
[pd.NaT, pd.NaT, '2011-01-02', '2011-01-03',
'2011-01-05'])
])
def test_order_without_freq(self, index_dates, expected_dates,
tz_naive_fixture):
tz = tz_naive_fixture
# without freq
index = DatetimeIndex(index_dates, tz=tz, name='idx')
expected = DatetimeIndex(expected_dates, tz=tz, name='idx')
ordered = index.sort_values()
tm.assert_index_equal(ordered, expected)
assert ordered.freq is None
ordered = index.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
assert ordered.freq is None
ordered, indexer = index.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None
ordered, indexer = index.sort_values(return_indexer=True,
ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None
def test_drop_duplicates_metadata(self):
# GH 10115
idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx)
assert idx_dup.freq is None # freq is reset
result = idx_dup.drop_duplicates()
tm.assert_index_equal(idx, result)
assert result.freq is None
def test_drop_duplicates(self):
# to check Index/Series compat
base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
idx = base.append(base[:5])
res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
res = idx.drop_duplicates(keep='last')
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep='last')
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
@pytest.mark.parametrize('freq', [
'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D',
'-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S',
'-3S'])
def test_infer_freq(self, freq):
# GH 11018
idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10)
result = pd.DatetimeIndex(idx.asi8, freq='infer')
tm.assert_index_equal(idx, result)
assert result.freq == freq
def test_nat(self, tz_naive_fixture):
tz = tz_naive_fixture
assert pd.DatetimeIndex._na_value is pd.NaT
assert pd.DatetimeIndex([])._na_value is pd.NaT
idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([], dtype=np.intp))
idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([1], dtype=np.intp))
def test_equals(self):
# GH 13107
idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'])
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'],
tz='US/Pacific')
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific')
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
@pytest.mark.parametrize('values', [
['20180101', '20180103', '20180105'], []])
@pytest.mark.parametrize('freq', [
'2D', Day(2), '2B', BDay(2), '48H', Hour(48)])
@pytest.mark.parametrize('tz', [None, 'US/Eastern'])
def test_freq_setter(self, values, freq, tz):
# GH 20678
idx = DatetimeIndex(values, tz=tz)
# can set to an offset, converting from string if necessary
idx.freq = freq
assert idx.freq == freq
assert isinstance(idx.freq, ABCDateOffset)
# can reset to None
idx.freq = None
assert idx.freq is None
def test_freq_setter_errors(self):
# GH 20678
idx = DatetimeIndex(['20180101', '20180103', '20180105'])
# setting with an incompatible freq
msg = ('Inferred frequency 2D from passed values does not conform to '
'passed frequency 5D')
with pytest.raises(ValueError, match=msg):
idx.freq = '5D'
# setting with non-freq string
with pytest.raises(ValueError, match='Invalid frequency'):
idx.freq = 'foo'
def test_offset_deprecated(self):
# GH 20716
idx = pd.DatetimeIndex(['20180101', '20180102'])
# getter deprecated
with tm.assert_produces_warning(FutureWarning):
idx.offset
# setter deprecated
with tm.assert_produces_warning(FutureWarning):
idx.offset = BDay()
class TestBusinessDatetimeIndex(object):
def setup_method(self, method):
self.rng = bdate_range(START, END)
def test_comparison(self):
d = self.rng[10]
comp = self.rng > d
assert comp[11]
assert not comp[9]
def test_pickle_unpickle(self):
unpickled = tm.round_trip_pickle(self.rng)
assert unpickled.freq is not None
def test_copy(self):
cp = self.rng.copy()
repr(cp)
tm.assert_index_equal(cp, self.rng)
def test_shift(self):
shifted = self.rng.shift(5)
assert shifted[0] == self.rng[5]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(-5)
assert shifted[5] == self.rng[0]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(0)
assert shifted[0] == self.rng[0]
assert shifted.freq == self.rng.freq
rng = date_range(START, END, freq=BMonthEnd())
shifted = rng.shift(1, freq=BDay())
assert shifted[0] == rng[0] + BDay()
def test_equals(self):
assert not self.rng.equals(list(self.rng))
def test_identical(self):
t1 = self.rng.copy()
t2 = self.rng.copy()
assert t1.identical(t2)
# name
t1 = t1.rename('foo')
assert t1.equals(t2)
assert not t1.identical(t2)
t2 = t2.rename('foo')
assert t1.identical(t2)
# freq
t2v = Index(t2.values)
assert t1.equals(t2v)
assert not t1.identical(t2v)
class TestCustomDatetimeIndex(object):
def setup_method(self, method):
self.rng = bdate_range(START, END, freq='C')
def test_comparison(self):
d = self.rng[10]
comp = self.rng > d
assert comp[11]
assert not comp[9]
def test_copy(self):
cp = self.rng.copy()
repr(cp)
tm.assert_index_equal(cp, self.rng)
def test_shift(self):
shifted = self.rng.shift(5)
assert shifted[0] == self.rng[5]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(-5)
assert shifted[5] == self.rng[0]
assert shifted.freq == self.rng.freq
shifted = self.rng.shift(0)
assert shifted[0] == self.rng[0]
assert shifted.freq == self.rng.freq
with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
rng = date_range(START, END, freq=BMonthEnd())
shifted = rng.shift(1, freq=CDay())
assert shifted[0] == rng[0] + CDay()
def test_shift_periods(self):
# GH#22458 : argument 'n' was deprecated in favor of 'periods'
idx = pd.date_range(start=START, end=END, periods=3)
tm.assert_index_equal(idx.shift(periods=0), idx)
tm.assert_index_equal(idx.shift(0), idx)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=True):
tm.assert_index_equal(idx.shift(n=0), idx)
def test_pickle_unpickle(self):
unpickled = tm.round_trip_pickle(self.rng)
assert unpickled.freq is not None
def test_equals(self):
assert not self.rng.equals(list(self.rng))
@@ -1,388 +0,0 @@
""" test partial slicing on Series/Frame """
from datetime import datetime
import operator as op
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame, DatetimeIndex, Index, Series, Timedelta, Timestamp, date_range)
from pandas.core.indexing import IndexingError
from pandas.util import testing as tm
class TestSlicing(object):
def test_dti_slicing(self):
dti = date_range(start='1/1/2005', end='12/1/2005', freq='M')
dti2 = dti[[1, 3, 5]]
v1 = dti2[0]
v2 = dti2[1]
v3 = dti2[2]
assert v1 == Timestamp('2/28/2005')
assert v2 == Timestamp('4/30/2005')
assert v3 == Timestamp('6/30/2005')
# don't carry freq through irregular slicing
assert dti2.freq is None
def test_slice_keeps_name(self):
# GH4226
st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles')
et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles')
dr = pd.date_range(st, et, freq='H', name='timebucket')
assert dr[1:].name == dr.name
def test_slice_with_negative_step(self):
ts = Series(np.arange(20),
date_range('2014-01-01', periods=20, freq='MS'))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1])
assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1])
assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1])
assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1])
assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp(
'2014-10-01'):-1], SLC[13:8:-1])
assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0])
def test_slice_with_zero_step_raises(self):
ts = Series(np.arange(20),
date_range('2014-01-01', periods=20, freq='MS'))
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts[::0]
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts.loc[::0]
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts.loc[::0]
def test_slice_bounds_empty(self):
# GH#14354
empty_idx = date_range(freq='1H', periods=0, end='2015')
right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc')
exp = Timestamp('2015-01-02 23:59:59.999999999')
assert right == exp
left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc')
exp = Timestamp('2015-01-02 00:00:00')
assert left == exp
def test_slice_duplicate_monotonic(self):
# https://github.com/pandas-dev/pandas/issues/16515
idx = pd.DatetimeIndex(['2017', '2017'])
result = idx._maybe_cast_slice_bound('2017-01-01', 'left', 'loc')
expected = Timestamp('2017-01-01')
assert result == expected
def test_monotone_DTI_indexing_bug(self):
# GH 19362
# Testing accessing the first element in a montononic descending
# partial string indexing.
df = pd.DataFrame(list(range(5)))
date_list = ['2018-01-02', '2017-02-10', '2016-03-10',
'2015-03-15', '2014-03-16']
date_index = pd.to_datetime(date_list)
df['date'] = date_index
expected = pd.DataFrame({0: list(range(5)), 'date': date_index})
tm.assert_frame_equal(df, expected)
df = pd.DataFrame({'A': [1, 2, 3]},
index=pd.date_range('20170101',
periods=3)[::-1])
expected = pd.DataFrame({'A': 1},
index=pd.date_range('20170103',
periods=1))
tm.assert_frame_equal(df.loc['2017-01-03'], expected)
def test_slice_year(self):
dti = date_range(freq='B', start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
result = s['2005']
expected = s[s.index.year == 2005]
tm.assert_series_equal(result, expected)
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
result = df.loc['2005']
expected = df[df.index.year == 2005]
tm.assert_frame_equal(result, expected)
rng = date_range('1/1/2000', '1/1/2010')
result = rng.get_loc('2009')
expected = slice(3288, 3653)
assert result == expected
def test_slice_quarter(self):
dti = date_range(freq='D', start=datetime(2000, 6, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s['2001Q1']) == 90
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
assert len(df.loc['1Q01']) == 90
def test_slice_month(self):
dti = date_range(freq='D', start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s['2005-11']) == 30
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
assert len(df.loc['2005-11']) == 30
tm.assert_series_equal(s['2005-11'], s['11-2005'])
def test_partial_slice(self):
rng = date_range(freq='D', start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s['2005-05':'2006-02']
expected = s['20050501':'20060228']
tm.assert_series_equal(result, expected)
result = s['2005-05':]
expected = s['20050501':]
tm.assert_series_equal(result, expected)
result = s[:'2006-02']
expected = s[:'20060228']
tm.assert_series_equal(result, expected)
result = s['2005-1-1']
assert result == s.iloc[0]
pytest.raises(Exception, s.__getitem__, '2004-12-31')
def test_partial_slice_daily(self):
rng = date_range(freq='H', start=datetime(2005, 1, 31), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s['2005-1-31']
tm.assert_series_equal(result, s.iloc[:24])
pytest.raises(Exception, s.__getitem__, '2004-12-31 00')
def test_partial_slice_hourly(self):
rng = date_range(freq='T', start=datetime(2005, 1, 1, 20, 0, 0),
periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s['2005-1-1']
tm.assert_series_equal(result, s.iloc[:60 * 4])
result = s['2005-1-1 20']
tm.assert_series_equal(result, s.iloc[:60])
assert s['2005-1-1 20:00'] == s.iloc[0]
pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15')
def test_partial_slice_minutely(self):
rng = date_range(freq='S', start=datetime(2005, 1, 1, 23, 59, 0),
periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s['2005-1-1 23:59']
tm.assert_series_equal(result, s.iloc[:60])
result = s['2005-1-1']
tm.assert_series_equal(result, s.iloc[:60])
assert s[Timestamp('2005-1-1 23:59:00')] == s.iloc[0]
pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00')
def test_partial_slice_second_precision(self):
rng = date_range(start=datetime(2005, 1, 1, 0, 0, 59,
microsecond=999990),
periods=20, freq='US')
s = Series(np.arange(20), rng)
tm.assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10])
tm.assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10])
tm.assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:])
tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:])
assert s[Timestamp('2005-1-1 00:00:59.999990')] == s.iloc[0]
with pytest.raises(KeyError, match='2005-1-1 00:00:00'):
s['2005-1-1 00:00:00']
def test_partial_slicing_dataframe(self):
# GH14856
# Test various combinations of string slicing resolution vs.
# index resolution
# - If string resolution is less precise than index resolution,
# string is considered a slice
# - If string resolution is equal to or more precise than index
# resolution, string is considered an exact match
formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H',
'%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S']
resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second']
for rnum, resolution in enumerate(resolutions[2:], 2):
# we check only 'day', 'hour', 'minute' and 'second'
unit = Timedelta("1 " + resolution)
middate = datetime(2012, 1, 1, 0, 0, 0)
index = DatetimeIndex([middate - unit,
middate, middate + unit])
values = [1, 2, 3]
df = DataFrame({'a': values}, index, dtype=np.int64)
assert df.index.resolution == resolution
# Timestamp with the same resolution as index
# Should be exact match for Series (return scalar)
# and raise KeyError for Frame
for timestamp, expected in zip(index, values):
ts_string = timestamp.strftime(formats[rnum])
# make ts_string as precise as index
result = df['a'][ts_string]
assert isinstance(result, np.int64)
assert result == expected
pytest.raises(KeyError, df.__getitem__, ts_string)
# Timestamp with resolution less precise than index
for fmt in formats[:rnum]:
for element, theslice in [[0, slice(None, 1)],
[1, slice(1, None)]]:
ts_string = index[element].strftime(fmt)
# Series should return slice
result = df['a'][ts_string]
expected = df['a'][theslice]
tm.assert_series_equal(result, expected)
# Frame should return slice as well
result = df[ts_string]
expected = df[theslice]
tm.assert_frame_equal(result, expected)
# Timestamp with resolution more precise than index
# Compatible with existing key
# Should return scalar for Series
# and raise KeyError for Frame
for fmt in formats[rnum + 1:]:
ts_string = index[1].strftime(fmt)
result = df['a'][ts_string]
assert isinstance(result, np.int64)
assert result == 2
pytest.raises(KeyError, df.__getitem__, ts_string)
# Not compatible with existing key
# Should raise KeyError
for fmt, res in list(zip(formats, resolutions))[rnum + 1:]:
ts = index[1] + Timedelta("1 " + res)
ts_string = ts.strftime(fmt)
pytest.raises(KeyError, df['a'].__getitem__, ts_string)
pytest.raises(KeyError, df.__getitem__, ts_string)
def test_partial_slicing_with_multiindex(self):
# GH 4758
# partial string indexing with a multi-index buggy
df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
'TICKER': ["ABC", "MNP", "XYZ", "XYZ"],
'val': [1, 2, 3, 4]},
index=date_range("2013-06-19 09:30:00",
periods=4, freq='5T'))
df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True)
expected = DataFrame([
[1]
], index=Index(['ABC'], name='TICKER'), columns=['val'])
result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')]
tm.assert_frame_equal(result, expected)
expected = df_multi.loc[
(pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')]
result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')]
tm.assert_series_equal(result, expected)
# this is an IndexingError as we don't do partial string selection on
# multi-levels.
def f():
df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')]
pytest.raises(IndexingError, f)
# GH 4294
# partial slice on a series mi
s = pd.DataFrame(np.random.rand(1000, 1000), index=pd.date_range(
'2000-1-1', periods=1000)).stack()
s2 = s[:-1].copy()
expected = s2['2000-1-4']
result = s2[pd.Timestamp('2000-1-4')]
tm.assert_series_equal(result, expected)
result = s[pd.Timestamp('2000-1-4')]
expected = s['2000-1-4']
tm.assert_series_equal(result, expected)
df2 = pd.DataFrame(s)
expected = df2.xs('2000-1-4')
result = df2.loc[pd.Timestamp('2000-1-4')]
tm.assert_frame_equal(result, expected)
def test_partial_slice_doesnt_require_monotonicity(self):
# For historical reasons.
s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10))
nonmonotonic = s[[3, 5, 4]]
expected = nonmonotonic.iloc[:0]
timestamp = pd.Timestamp('2014-01-10')
tm.assert_series_equal(nonmonotonic['2014-01-10':], expected)
with pytest.raises(KeyError,
match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic[timestamp:]
tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected)
with pytest.raises(KeyError,
match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic.loc[timestamp:]
def test_loc_datetime_length_one(self):
# GH16071
df = pd.DataFrame(columns=['1'],
index=pd.date_range('2016-10-01T00:00:00',
'2016-10-01T23:59:59'))
result = df.loc[datetime(2016, 10, 1):]
tm.assert_frame_equal(result, df)
result = df.loc['2016-10-01T00:00:00':]
tm.assert_frame_equal(result, df)
@pytest.mark.parametrize('datetimelike', [
Timestamp('20130101'), datetime(2013, 1, 1),
np.datetime64('2013-01-01T00:00', 'ns')])
@pytest.mark.parametrize('op,expected', [
(op.lt, [True, False, False, False]),
(op.le, [True, True, False, False]),
(op.eq, [False, True, False, False]),
(op.gt, [False, False, False, True])])
def test_selection_by_datetimelike(self, datetimelike, op, expected):
# GH issue #17965, test for ability to compare datetime64[ns] columns
# to datetimelike
df = DataFrame({'A': [pd.Timestamp('20120101'),
pd.Timestamp('20130101'),
np.nan, pd.Timestamp('20130103')]})
result = op(df.A, datetimelike)
expected = Series(expected, name='A')
tm.assert_series_equal(result, expected)
@@ -1,280 +0,0 @@
# -*- coding: utf-8 -*-
"""
Tests for DatetimeIndex methods behaving like their Timestamp counterparts
"""
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import DatetimeIndex, Timestamp, date_range
import pandas.util.testing as tm
from pandas.tseries.frequencies import to_offset
class TestDatetimeIndexOps(object):
def test_dti_time(self):
rng = date_range('1/1/2000', freq='12min', periods=10)
result = pd.Index(rng).time
expected = [t.time() for t in rng]
assert (result == expected).all()
def test_dti_date(self):
rng = date_range('1/1/2000', freq='12H', periods=10)
result = pd.Index(rng).date
expected = [t.date() for t in rng]
assert (result == expected).all()
def test_dti_date_out_of_range(self):
# GH#1475
pytest.raises(ValueError, DatetimeIndex, ['1400-01-01'])
pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)])
@pytest.mark.parametrize('field', [
'dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter',
'days_in_month', 'is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end', 'is_year_start',
'is_year_end', 'weekday_name'])
def test_dti_timestamp_fields(self, field):
# extra fields from DatetimeIndex like quarter and week
idx = tm.makeDateIndex(100)
expected = getattr(idx, field)[-1]
if field == 'weekday_name':
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = getattr(Timestamp(idx[-1]), field)
else:
result = getattr(Timestamp(idx[-1]), field)
assert result == expected
def test_dti_timestamp_freq_fields(self):
# extra fields from DatetimeIndex like quarter and week
idx = tm.makeDateIndex(100)
assert idx.freq == Timestamp(idx[-1], idx.freq).freq
assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr
# ----------------------------------------------------------------
# DatetimeIndex.round
def test_round_daily(self):
dti = date_range('20130101 09:10:11', periods=5)
result = dti.round('D')
expected = date_range('20130101', periods=5)
tm.assert_index_equal(result, expected)
dti = dti.tz_localize('UTC').tz_convert('US/Eastern')
result = dti.round('D')
expected = date_range('20130101',
periods=5).tz_localize('US/Eastern')
tm.assert_index_equal(result, expected)
result = dti.round('s')
tm.assert_index_equal(result, dti)
# invalid
for freq in ['Y', 'M', 'foobar']:
pytest.raises(ValueError, lambda: dti.round(freq))
def test_round(self, tz_naive_fixture):
tz = tz_naive_fixture
rng = date_range(start='2016-01-01', periods=5,
freq='30Min', tz=tz)
elt = rng[1]
expected_rng = DatetimeIndex([
Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'),
Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'),
Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'),
])
expected_elt = expected_rng[1]
tm.assert_index_equal(rng.round(freq='H'), expected_rng)
assert elt.round(freq='H') == expected_elt
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
with pytest.raises(ValueError, match=msg):
rng.round(freq='foo')
with pytest.raises(ValueError, match=msg):
elt.round(freq='foo')
msg = "<MonthEnd> is a non-fixed frequency"
with pytest.raises(ValueError, match=msg):
rng.round(freq='M')
with pytest.raises(ValueError, match=msg):
elt.round(freq='M')
# GH#14440 & GH#15578
index = DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz)
result = index.round('ms')
expected = DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz)
tm.assert_index_equal(result, expected)
for freq in ['us', 'ns']:
tm.assert_index_equal(index, index.round(freq))
index = DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz)
result = index.round('ms')
expected = DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz)
tm.assert_index_equal(result, expected)
index = DatetimeIndex(['2016-10-17 12:00:00.001501031'])
result = index.round('10ns')
expected = DatetimeIndex(['2016-10-17 12:00:00.001501030'])
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(False):
ts = '2016-10-17 12:00:00.001501031'
DatetimeIndex([ts]).round('1010ns')
def test_no_rounding_occurs(self, tz_naive_fixture):
# GH 21262
tz = tz_naive_fixture
rng = date_range(start='2016-01-01', periods=5,
freq='2Min', tz=tz)
expected_rng = DatetimeIndex([
Timestamp('2016-01-01 00:00:00', tz=tz, freq='2T'),
Timestamp('2016-01-01 00:02:00', tz=tz, freq='2T'),
Timestamp('2016-01-01 00:04:00', tz=tz, freq='2T'),
Timestamp('2016-01-01 00:06:00', tz=tz, freq='2T'),
Timestamp('2016-01-01 00:08:00', tz=tz, freq='2T'),
])
tm.assert_index_equal(rng.round(freq='2T'), expected_rng)
@pytest.mark.parametrize('test_input, rounder, freq, expected', [
(['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']),
(['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']),
(['2117-01-01 00:00:45.000000012'], 'floor', '10ns',
['2117-01-01 00:00:45.000000010']),
(['1823-01-01 00:00:01.000000012'], 'ceil', '10ns',
['1823-01-01 00:00:01.000000020']),
(['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']),
(['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']),
(['2018-01-01 00:15:00'], 'ceil', '15T', ['2018-01-01 00:15:00']),
(['2018-01-01 00:15:00'], 'floor', '15T', ['2018-01-01 00:15:00']),
(['1823-01-01 03:00:00'], 'ceil', '3H', ['1823-01-01 03:00:00']),
(['1823-01-01 03:00:00'], 'floor', '3H', ['1823-01-01 03:00:00']),
(('NaT', '1823-01-01 00:00:01'), 'floor', '1s',
('NaT', '1823-01-01 00:00:01')),
(('NaT', '1823-01-01 00:00:01'), 'ceil', '1s',
('NaT', '1823-01-01 00:00:01'))
])
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
dt = DatetimeIndex(list(test_input))
func = getattr(dt, rounder)
result = func(freq)
expected = DatetimeIndex(list(expected))
assert expected.equals(result)
@pytest.mark.parametrize('start, index_freq, periods', [
('2018-01-01', '12H', 25),
('2018-01-01 0:0:0.124999', '1ns', 1000),
])
@pytest.mark.parametrize('round_freq', [
'2ns', '3ns', '4ns', '5ns', '6ns', '7ns',
'250ns', '500ns', '750ns',
'1us', '19us', '250us', '500us', '750us',
'1s', '2s', '3s',
'12H', '1D',
])
def test_round_int64(self, start, index_freq, periods, round_freq):
dt = date_range(start=start, freq=index_freq, periods=periods)
unit = to_offset(round_freq).nanos
# test floor
result = dt.floor(round_freq)
diff = dt.asi8 - result.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), "floor not a {} multiple".format(round_freq)
assert (0 <= diff).all() and (diff < unit).all(), "floor error"
# test ceil
result = dt.ceil(round_freq)
diff = result.asi8 - dt.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq)
assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
# test round
result = dt.round(round_freq)
diff = abs(result.asi8 - dt.asi8)
mod = result.asi8 % unit
assert (mod == 0).all(), "round not a {} multiple".format(round_freq)
assert (diff <= unit // 2).all(), "round error"
if unit % 2 == 0:
assert (
result.asi8[diff == unit // 2] % 2 == 0
).all(), "round half to even error"
# ----------------------------------------------------------------
# DatetimeIndex.normalize
def test_normalize(self):
rng = date_range('1/1/2000 9:30', periods=10, freq='D')
result = rng.normalize()
expected = date_range('1/1/2000', periods=10, freq='D')
tm.assert_index_equal(result, expected)
arr_ns = np.array([1380585623454345752,
1380585612343234312]).astype("datetime64[ns]")
rng_ns = DatetimeIndex(arr_ns)
rng_ns_normalized = rng_ns.normalize()
arr_ns = np.array([1380585600000000000,
1380585600000000000]).astype("datetime64[ns]")
expected = DatetimeIndex(arr_ns)
tm.assert_index_equal(rng_ns_normalized, expected)
assert result.is_normalized
assert not rng.is_normalized
def test_normalize_nat(self):
dti = DatetimeIndex([pd.NaT, Timestamp('2018-01-01 01:00:00')])
result = dti.normalize()
expected = DatetimeIndex([pd.NaT, Timestamp('2018-01-01')])
tm.assert_index_equal(result, expected)
class TestDateTimeIndexToJulianDate(object):
def test_1700(self):
dr = date_range(start=Timestamp('1710-10-01'), periods=5, freq='D')
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_2000(self):
dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='D')
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_hour(self):
dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='H')
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_minute(self):
dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='T')
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
def test_second(self):
dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='S')
r1 = pd.Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, pd.Float64Index)
tm.assert_index_equal(r1, r2)
@@ -1,500 +0,0 @@
from datetime import datetime
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame, DatetimeIndex, Index, Int64Index, Series, bdate_range,
date_range, to_datetime)
import pandas.util.testing as tm
from pandas.tseries.offsets import BMonthEnd, Minute, MonthEnd
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexSetOps(object):
tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore',
'dateutil/US/Pacific']
# TODO: moved from test_datetimelike; dedup with version below
def test_union2(self):
everything = tm.makeDateIndex(10)
first = everything[:5]
second = everything[5:]
union = first.union(second)
assert tm.equalContents(union, everything)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.union(case)
assert tm.equalContents(result, everything)
@pytest.mark.parametrize("tz", tz)
def test_union(self, tz):
rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz)
rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz)
rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
other3 = pd.DatetimeIndex([], tz=tz)
expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
for rng, other, expected in [(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3)]:
result_union = rng.union(other)
tm.assert_index_equal(result_union, expected)
def test_union_coverage(self):
idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02'])
ordered = DatetimeIndex(idx.sort_values(), freq='infer')
result = ordered.union(idx)
tm.assert_index_equal(result, ordered)
result = ordered[:0].union(ordered)
tm.assert_index_equal(result, ordered)
assert result.freq == ordered.freq
def test_union_bug_1730(self):
rng_a = date_range('1/1/2012', periods=4, freq='3H')
rng_b = date_range('1/1/2012', periods=4, freq='4H')
result = rng_a.union(rng_b)
exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
tm.assert_index_equal(result, exp)
def test_union_bug_1745(self):
left = DatetimeIndex(['2012-05-11 15:19:49.695000'])
right = DatetimeIndex(['2012-05-29 13:04:21.322000',
'2012-05-11 15:27:24.873000',
'2012-05-11 15:31:05.350000'])
result = left.union(right)
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
tm.assert_index_equal(result, exp)
def test_union_bug_4564(self):
from pandas import DateOffset
left = date_range("2013-01-01", "2013-02-01")
right = left + DateOffset(minutes=15)
result = left.union(right)
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
tm.assert_index_equal(result, exp)
def test_union_freq_both_none(self):
# GH11086
expected = bdate_range('20150101', periods=10)
expected.freq = None
result = expected.union(expected)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_union_dataframe_index(self):
rng1 = date_range('1/1/1999', '1/1/2012', freq='MS')
s1 = Series(np.random.randn(len(rng1)), rng1)
rng2 = date_range('1/1/1980', '12/1/2001', freq='MS')
s2 = Series(np.random.randn(len(rng2)), rng2)
df = DataFrame({'s1': s1, 's2': s2})
exp = pd.date_range('1/1/1980', '1/1/2012', freq='MS')
tm.assert_index_equal(df.index, exp)
def test_union_with_DatetimeIndex(self):
i1 = Int64Index(np.arange(0, 20, 2))
i2 = date_range(start='2012-01-03 00:00:00', periods=10, freq='D')
i1.union(i2) # Works
i2.union(i1) # Fails with "AttributeError: can't set attribute"
# TODO: moved from test_datetimelike; de-duplicate with version below
def test_intersection2(self):
first = tm.makeDateIndex(10)
second = first[5:]
intersect = first.intersection(second)
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.intersection(case)
assert tm.equalContents(result, second)
third = Index(['a', 'b', 'c'])
result = first.intersection(third)
expected = pd.Index([], dtype=object)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", [None, 'Asia/Tokyo', 'US/Eastern',
'dateutil/US/Pacific'])
@pytest.mark.parametrize("sort", [True, False])
def test_intersection(self, tz, sort):
# GH 4690 (with tz)
base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx')
# if target has the same name, it is preserved
rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx')
expected2 = date_range('6/1/2000', '6/20/2000', freq='D', name='idx')
# if target name is different, it will be reset
rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other')
expected3 = date_range('6/1/2000', '6/20/2000', freq='D', name=None)
rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx')
expected4 = DatetimeIndex([], name='idx')
for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
# non-monotonic
base = DatetimeIndex(['2011-01-05', '2011-01-04',
'2011-01-02', '2011-01-03'],
tz=tz, name='idx')
rng2 = DatetimeIndex(['2011-01-04', '2011-01-02',
'2011-02-02', '2011-02-03'],
tz=tz, name='idx')
expected2 = DatetimeIndex(['2011-01-04', '2011-01-02'],
tz=tz, name='idx')
rng3 = DatetimeIndex(['2011-01-04', '2011-01-02',
'2011-02-02', '2011-02-03'],
tz=tz, name='other')
expected3 = DatetimeIndex(['2011-01-04', '2011-01-02'],
tz=tz, name=None)
# GH 7880
rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz,
name='idx')
expected4 = DatetimeIndex([], tz=tz, name='idx')
for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng, sort=sort)
if sort:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
assert result.tz == expected.tz
def test_intersection_empty(self):
# empty same freq GH2129
rng = date_range('6/1/2000', '6/15/2000', freq='T')
result = rng[0:0].intersection(rng)
assert len(result) == 0
result = rng.intersection(rng[0:0])
assert len(result) == 0
def test_intersection_bug_1708(self):
from pandas import DateOffset
index_1 = date_range('1/1/2012', periods=4, freq='12H')
index_2 = index_1 + DateOffset(hours=1)
result = index_1 & index_2
assert len(result) == 0
@pytest.mark.parametrize("tz", tz)
@pytest.mark.parametrize("sort", [True, False])
def test_difference(self, tz, sort):
rng_dates = ['1/2/2000', '1/3/2000', '1/1/2000', '1/4/2000',
'1/5/2000']
rng1 = pd.DatetimeIndex(rng_dates, tz=tz)
other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
expected1 = pd.DatetimeIndex(rng_dates, tz=tz)
rng2 = pd.DatetimeIndex(rng_dates, tz=tz)
other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz)
rng3 = pd.DatetimeIndex(rng_dates, tz=tz)
other3 = pd.DatetimeIndex([], tz=tz)
expected3 = pd.DatetimeIndex(rng_dates, tz=tz)
for rng, other, expected in [(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3)]:
result_diff = rng.difference(other, sort)
if sort:
expected = expected.sort_values()
tm.assert_index_equal(result_diff, expected)
@pytest.mark.parametrize("sort", [True, False])
def test_difference_freq(self, sort):
# GH14323: difference of DatetimeIndex should not preserve frequency
index = date_range("20160920", "20160925", freq="D")
other = date_range("20160921", "20160924", freq="D")
expected = DatetimeIndex(["20160920", "20160925"], freq=None)
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
other = date_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other, sort)
expected = DatetimeIndex(["20160920", "20160921"], freq=None)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
@pytest.mark.parametrize("sort", [True, False])
def test_datetimeindex_diff(self, sort):
dti1 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31),
periods=100)
dti2 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31),
periods=98)
assert len(dti1.difference(dti2, sort)) == 2
def test_datetimeindex_union_join_empty(self):
dti = date_range(start='1/1/2001', end='2/1/2001', freq='D')
empty = Index([])
result = dti.union(empty)
assert isinstance(result, DatetimeIndex)
assert result is result
result = dti.join(empty)
assert isinstance(result, DatetimeIndex)
def test_join_nonunique(self):
idx1 = to_datetime(['2012-11-06 16:00:11.477563',
'2012-11-06 16:00:11.477563'])
idx2 = to_datetime(['2012-11-06 15:11:09.006507',
'2012-11-06 15:11:09.006507'])
rs = idx1.join(idx2, how='outer')
assert rs.is_monotonic
class TestBusinessDatetimeIndex(object):
def setup_method(self, method):
self.rng = bdate_range(START, END)
def test_union(self):
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_union = left.union(right)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_union = left.union(right)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_union = left.union(right)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
tm.assert_index_equal(right.union(left), the_union)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = self.rng.union(rng)
assert isinstance(the_union, DatetimeIndex)
def test_outer_join(self):
# should just behave as union
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_join = left.join(right, how='outer')
assert isinstance(the_join, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_join = left.join(right, how='outer')
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_join = left.join(right, how='outer')
assert isinstance(the_join, DatetimeIndex)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_join = self.rng.join(rng, how='outer')
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
def test_union_not_cacheable(self):
rng = date_range('1/1/2000', periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_union = rng1.union(rng2)
tm.assert_index_equal(the_union, rng)
rng1 = rng[10:]
rng2 = rng[15:35]
the_union = rng1.union(rng2)
expected = rng[10:]
tm.assert_index_equal(the_union, expected)
def test_intersection(self):
rng = date_range('1/1/2000', periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_int = rng1.intersection(rng2)
expected = rng[10:25]
tm.assert_index_equal(the_int, expected)
assert isinstance(the_int, DatetimeIndex)
assert the_int.freq == rng.freq
the_int = rng1.intersection(rng2.view(DatetimeIndex))
tm.assert_index_equal(the_int, expected)
# non-overlapping
the_int = rng[:10].intersection(rng[10:])
expected = DatetimeIndex([])
tm.assert_index_equal(the_int, expected)
def test_intersection_bug(self):
# GH #771
a = bdate_range('11/30/2011', '12/31/2011')
b = bdate_range('12/10/2011', '12/20/2011')
result = a.intersection(b)
tm.assert_index_equal(result, b)
def test_month_range_union_tz_pytz(self):
from pytz import timezone
tz = timezone('US/Eastern')
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz,
freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz,
freq=MonthEnd())
early_dr.union(late_dr)
@td.skip_if_windows_python_3
def test_month_range_union_tz_dateutil(self):
from pandas._libs.tslibs.timezones import dateutil_gettz
tz = dateutil_gettz('US/Eastern')
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz,
freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz,
freq=MonthEnd())
early_dr.union(late_dr)
class TestCustomDatetimeIndex(object):
def setup_method(self, method):
self.rng = bdate_range(START, END, freq='C')
def test_union(self):
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_union = left.union(right)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_union = left.union(right)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_union = left.union(right)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
tm.assert_index_equal(right.union(left), the_union)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = self.rng.union(rng)
assert isinstance(the_union, DatetimeIndex)
def test_outer_join(self):
# should just behave as union
# overlapping
left = self.rng[:10]
right = self.rng[5:10]
the_join = left.join(right, how='outer')
assert isinstance(the_join, DatetimeIndex)
# non-overlapping, gap in middle
left = self.rng[:5]
right = self.rng[10:]
the_join = left.join(right, how='outer')
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
# non-overlapping, no gap
left = self.rng[:5]
right = self.rng[5:10]
the_join = left.join(right, how='outer')
assert isinstance(the_join, DatetimeIndex)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_join = self.rng.join(rng, how='outer')
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
def test_intersection_bug(self):
# GH #771
a = bdate_range('11/30/2011', '12/31/2011', freq='C')
b = bdate_range('12/10/2011', '12/20/2011', freq='C')
result = a.intersection(b)
tm.assert_index_equal(result, b)
@@ -1,206 +0,0 @@
from __future__ import division
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype
from pandas import (
CategoricalIndex, Index, IntervalIndex, NaT, Timedelta, Timestamp,
interval_range)
import pandas.util.testing as tm
class Base(object):
"""Tests common to IntervalIndex with any subtype"""
def test_astype_idempotent(self, index):
result = index.astype('interval')
tm.assert_index_equal(result, index)
result = index.astype(index.dtype)
tm.assert_index_equal(result, index)
def test_astype_object(self, index):
result = index.astype(object)
expected = Index(index.values, dtype='object')
tm.assert_index_equal(result, expected)
assert not result.equals(index)
def test_astype_category(self, index):
result = index.astype('category')
expected = CategoricalIndex(index.values)
tm.assert_index_equal(result, expected)
result = index.astype(CategoricalDtype())
tm.assert_index_equal(result, expected)
# non-default params
categories = index.dropna().unique().values[:-1]
dtype = CategoricalDtype(categories=categories, ordered=True)
result = index.astype(dtype)
expected = CategoricalIndex(
index.values, categories=categories, ordered=True)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('dtype', [
'int64', 'uint64', 'float64', 'complex128', 'period[M]',
'timedelta64', 'timedelta64[ns]', 'datetime64', 'datetime64[ns]',
'datetime64[ns, US/Eastern]'])
def test_astype_cannot_cast(self, index, dtype):
msg = 'Cannot cast IntervalIndex to dtype'
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
def test_astype_invalid_dtype(self, index):
msg = "data type 'fake_dtype' not understood"
with pytest.raises(TypeError, match=msg):
index.astype('fake_dtype')
class TestIntSubtype(Base):
"""Tests specific to IntervalIndex with integer-like subtype"""
indexes = [
IntervalIndex.from_breaks(np.arange(-10, 11, dtype='int64')),
IntervalIndex.from_breaks(
np.arange(100, dtype='uint64'), closed='left'),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize('subtype', [
'float64', 'datetime64[ns]', 'timedelta64[ns]'])
def test_subtype_conversion(self, index, subtype):
dtype = IntervalDtype(subtype)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype),
index.right.astype(subtype),
closed=index.closed)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('subtype_start, subtype_end', [
('int64', 'uint64'), ('uint64', 'int64')])
def test_subtype_integer(self, subtype_start, subtype_end):
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
dtype = IntervalDtype(subtype_end)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype_end),
index.right.astype(subtype_end),
closed=index.closed)
tm.assert_index_equal(result, expected)
@pytest.mark.xfail(reason='GH#15832')
def test_subtype_integer_errors(self):
# int64 -> uint64 fails with negative values
index = interval_range(-10, 10)
dtype = IntervalDtype('uint64')
with pytest.raises(ValueError):
index.astype(dtype)
class TestFloatSubtype(Base):
"""Tests specific to IntervalIndex with float subtype"""
indexes = [
interval_range(-10.0, 10.0, closed='neither'),
IntervalIndex.from_arrays([-1.5, np.nan, 0., 0., 1.5],
[-0.5, np.nan, 1., 1., 3.],
closed='both'),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize('subtype', ['int64', 'uint64'])
def test_subtype_integer(self, subtype):
index = interval_range(0.0, 10.0)
dtype = IntervalDtype(subtype)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype),
index.right.astype(subtype),
closed=index.closed)
tm.assert_index_equal(result, expected)
# raises with NA
msg = 'Cannot convert NA to integer'
with pytest.raises(ValueError, match=msg):
index.insert(0, np.nan).astype(dtype)
@pytest.mark.xfail(reason='GH#15832')
def test_subtype_integer_errors(self):
# float64 -> uint64 fails with negative values
index = interval_range(-10.0, 10.0)
dtype = IntervalDtype('uint64')
with pytest.raises(ValueError):
index.astype(dtype)
# float64 -> integer-like fails with non-integer valued floats
index = interval_range(0.0, 10.0, freq=0.25)
dtype = IntervalDtype('int64')
with pytest.raises(ValueError):
index.astype(dtype)
dtype = IntervalDtype('uint64')
with pytest.raises(ValueError):
index.astype(dtype)
@pytest.mark.parametrize('subtype', ['datetime64[ns]', 'timedelta64[ns]'])
def test_subtype_datetimelike(self, index, subtype):
dtype = IntervalDtype(subtype)
msg = 'Cannot convert .* to .*; subtypes are incompatible'
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
class TestDatetimelikeSubtype(Base):
"""Tests specific to IntervalIndex with datetime-like subtype"""
indexes = [
interval_range(Timestamp('2018-01-01'), periods=10, closed='neither'),
interval_range(Timestamp('2018-01-01'), periods=10).insert(2, NaT),
interval_range(Timestamp('2018-01-01', tz='US/Eastern'), periods=10),
interval_range(Timedelta('0 days'), periods=10, closed='both'),
interval_range(Timedelta('0 days'), periods=10).insert(2, NaT),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize('subtype', ['int64', 'uint64'])
def test_subtype_integer(self, index, subtype):
dtype = IntervalDtype(subtype)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(index.left.astype(subtype),
index.right.astype(subtype),
closed=index.closed)
tm.assert_index_equal(result, expected)
def test_subtype_float(self, index):
dtype = IntervalDtype('float64')
msg = 'Cannot convert .* to .*; subtypes are incompatible'
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
def test_subtype_datetimelike(self):
# datetime -> timedelta raises
dtype = IntervalDtype('timedelta64[ns]')
msg = 'Cannot convert .* to .*; subtypes are incompatible'
index = interval_range(Timestamp('2018-01-01'), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
# timedelta -> datetime raises
dtype = IntervalDtype('datetime64[ns]')
index = interval_range(Timedelta('0 days'), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
@@ -1,389 +0,0 @@
from __future__ import division
from functools import partial
import numpy as np
import pytest
from pandas.compat import lzip
from pandas.core.dtypes.common import is_categorical_dtype
from pandas.core.dtypes.dtypes import IntervalDtype
from pandas import (
Categorical, CategoricalIndex, Float64Index, Index, Int64Index, Interval,
IntervalIndex, date_range, notna, period_range, timedelta_range)
from pandas.core.arrays import IntervalArray
import pandas.core.common as com
import pandas.util.testing as tm
@pytest.fixture(params=[None, 'foo'])
def name(request):
return request.param
class Base(object):
"""
Common tests for all variations of IntervalIndex construction. Input data
to be supplied in breaks format, then converted by the subclass method
get_kwargs_from_breaks to the expected format.
"""
@pytest.mark.parametrize('breaks', [
[3, 14, 15, 92, 653],
np.arange(10, dtype='int64'),
Int64Index(range(-10, 11)),
Float64Index(np.arange(20, 30, 0.5)),
date_range('20180101', periods=10),
date_range('20180101', periods=10, tz='US/Eastern'),
timedelta_range('1 day', periods=10)])
def test_constructor(self, constructor, breaks, closed, name):
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
result = constructor(closed=closed, name=name, **result_kwargs)
assert result.closed == closed
assert result.name == name
assert result.dtype.subtype == getattr(breaks, 'dtype', 'int64')
tm.assert_index_equal(result.left, Index(breaks[:-1]))
tm.assert_index_equal(result.right, Index(breaks[1:]))
@pytest.mark.parametrize('breaks, subtype', [
(Int64Index([0, 1, 2, 3, 4]), 'float64'),
(Int64Index([0, 1, 2, 3, 4]), 'datetime64[ns]'),
(Int64Index([0, 1, 2, 3, 4]), 'timedelta64[ns]'),
(Float64Index([0, 1, 2, 3, 4]), 'int64'),
(date_range('2017-01-01', periods=5), 'int64'),
(timedelta_range('1 day', periods=5), 'int64')])
def test_constructor_dtype(self, constructor, breaks, subtype):
# GH 19262: conversion via dtype parameter
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
expected = constructor(**expected_kwargs)
result_kwargs = self.get_kwargs_from_breaks(breaks)
iv_dtype = IntervalDtype(subtype)
for dtype in (iv_dtype, str(iv_dtype)):
result = constructor(dtype=dtype, **result_kwargs)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('breaks', [
[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
def test_constructor_nan(self, constructor, breaks, closed):
# GH 18421
result_kwargs = self.get_kwargs_from_breaks(breaks)
result = constructor(closed=closed, **result_kwargs)
expected_subtype = np.float64
expected_values = np.array(breaks[:-1], dtype=object)
assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
@pytest.mark.parametrize('breaks', [
[],
np.array([], dtype='int64'),
np.array([], dtype='float64'),
np.array([], dtype='datetime64[ns]'),
np.array([], dtype='timedelta64[ns]')])
def test_constructor_empty(self, constructor, breaks, closed):
# GH 18421
result_kwargs = self.get_kwargs_from_breaks(breaks)
result = constructor(closed=closed, **result_kwargs)
expected_values = np.array([], dtype=object)
expected_subtype = getattr(breaks, 'dtype', np.int64)
assert result.empty
assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
@pytest.mark.parametrize('breaks', [
tuple('0123456789'),
list('abcdefghij'),
np.array(list('abcdefghij'), dtype=object),
np.array(list('abcdefghij'), dtype='<U1')])
def test_constructor_string(self, constructor, breaks):
# GH 19016
msg = ('category, object, and string subtypes are not supported '
'for IntervalIndex')
with pytest.raises(TypeError, match=msg):
constructor(**self.get_kwargs_from_breaks(breaks))
@pytest.mark.parametrize('cat_constructor', [
Categorical, CategoricalIndex])
def test_constructor_categorical_valid(self, constructor, cat_constructor):
# GH 21243/21253
if isinstance(constructor, partial) and constructor.func is Index:
# Index is defined to create CategoricalIndex from categorical data
pytest.skip()
breaks = np.arange(10, dtype='int64')
expected = IntervalIndex.from_breaks(breaks)
cat_breaks = cat_constructor(breaks)
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
result = constructor(**result_kwargs)
tm.assert_index_equal(result, expected)
def test_generic_errors(self, constructor):
# filler input data to be used when supplying invalid kwargs
filler = self.get_kwargs_from_breaks(range(10))
# invalid closed
msg = "invalid option for 'closed': invalid"
with pytest.raises(ValueError, match=msg):
constructor(closed='invalid', **filler)
# unsupported dtype
msg = 'dtype must be an IntervalDtype, got int64'
with pytest.raises(TypeError, match=msg):
constructor(dtype='int64', **filler)
# invalid dtype
msg = "data type 'invalid' not understood"
with pytest.raises(TypeError, match=msg):
constructor(dtype='invalid', **filler)
# no point in nesting periods in an IntervalIndex
periods = period_range('2000-01-01', periods=10)
periods_kwargs = self.get_kwargs_from_breaks(periods)
msg = 'Period dtypes are not supported, use a PeriodIndex instead'
with pytest.raises(ValueError, match=msg):
constructor(**periods_kwargs)
# decreasing values
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
msg = 'left side of interval must be <= right side'
with pytest.raises(ValueError, match=msg):
constructor(**decreasing_kwargs)
class TestFromArrays(Base):
"""Tests specific to IntervalIndex.from_arrays"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_arrays
def get_kwargs_from_breaks(self, breaks, closed='right'):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_arrays
"""
return {'left': breaks[:-1], 'right': breaks[1:]}
def test_constructor_errors(self):
# GH 19016: categorical data
data = Categorical(list('01234abcde'), ordered=True)
msg = ('category, object, and string subtypes are not supported '
'for IntervalIndex')
with pytest.raises(TypeError, match=msg):
IntervalIndex.from_arrays(data[:-1], data[1:])
# unequal length
left = [0, 1, 2]
right = [2, 3]
msg = 'left and right must have the same length'
with pytest.raises(ValueError, match=msg):
IntervalIndex.from_arrays(left, right)
@pytest.mark.parametrize('left_subtype, right_subtype', [
(np.int64, np.float64), (np.float64, np.int64)])
def test_mixed_float_int(self, left_subtype, right_subtype):
"""mixed int/float left/right results in float for both sides"""
left = np.arange(9, dtype=left_subtype)
right = np.arange(1, 10, dtype=right_subtype)
result = IntervalIndex.from_arrays(left, right)
expected_left = Float64Index(left)
expected_right = Float64Index(right)
expected_subtype = np.float64
tm.assert_index_equal(result.left, expected_left)
tm.assert_index_equal(result.right, expected_right)
assert result.dtype.subtype == expected_subtype
class TestFromBreaks(Base):
"""Tests specific to IntervalIndex.from_breaks"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_breaks
def get_kwargs_from_breaks(self, breaks, closed='right'):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_breaks
"""
return {'breaks': breaks}
def test_constructor_errors(self):
# GH 19016: categorical data
data = Categorical(list('01234abcde'), ordered=True)
msg = ('category, object, and string subtypes are not supported '
'for IntervalIndex')
with pytest.raises(TypeError, match=msg):
IntervalIndex.from_breaks(data)
def test_length_one(self):
"""breaks of length one produce an empty IntervalIndex"""
breaks = [0]
result = IntervalIndex.from_breaks(breaks)
expected = IntervalIndex.from_breaks([])
tm.assert_index_equal(result, expected)
class TestFromTuples(Base):
"""Tests specific to IntervalIndex.from_tuples"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_tuples
def get_kwargs_from_breaks(self, breaks, closed='right'):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_tuples
"""
if len(breaks) == 0:
return {'data': breaks}
tuples = lzip(breaks[:-1], breaks[1:])
if isinstance(breaks, (list, tuple)):
return {'data': tuples}
elif is_categorical_dtype(breaks):
return {'data': breaks._constructor(tuples)}
return {'data': com.asarray_tuplesafe(tuples)}
def test_constructor_errors(self):
# non-tuple
tuples = [(0, 1), 2, (3, 4)]
msg = 'IntervalIndex.from_tuples received an invalid item, 2'
with pytest.raises(TypeError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
# too few/many items
tuples = [(0, 1), (2,), (3, 4)]
msg = 'IntervalIndex.from_tuples requires tuples of length 2, got {t}'
with pytest.raises(ValueError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
tuples = [(0, 1), (2, 3, 4), (5, 6)]
with pytest.raises(ValueError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
def test_na_tuples(self):
# tuple (NA, NA) evaluates the same as NA as an elemenent
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
tm.assert_index_equal(idx_na_tuple, idx_na_element)
class TestClassConstructors(Base):
"""Tests specific to the IntervalIndex/Index constructors"""
@pytest.fixture(params=[IntervalIndex, partial(Index, dtype='interval')],
ids=['IntervalIndex', 'Index'])
def constructor(self, request):
return request.param
def get_kwargs_from_breaks(self, breaks, closed='right'):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by the IntervalIndex/Index constructors
"""
if len(breaks) == 0:
return {'data': breaks}
ivs = [Interval(l, r, closed) if notna(l) else l
for l, r in zip(breaks[:-1], breaks[1:])]
if isinstance(breaks, list):
return {'data': ivs}
elif is_categorical_dtype(breaks):
return {'data': breaks._constructor(ivs)}
return {'data': np.array(ivs, dtype=object)}
def test_generic_errors(self, constructor):
"""
override the base class implementation since errors are handled
differently; checks unnecessary since caught at the Interval level
"""
pass
def test_constructor_errors(self, constructor):
# mismatched closed within intervals with no constructor override
ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')]
msg = 'intervals must all be closed on the same side'
with pytest.raises(ValueError, match=msg):
constructor(ivs)
# scalar
msg = (r'IntervalIndex\(...\) must be called with a collection of '
'some kind, 5 was passed')
with pytest.raises(TypeError, match=msg):
constructor(5)
# not an interval
msg = ("type <(class|type) 'numpy.int64'> with value 0 "
"is not an interval")
with pytest.raises(TypeError, match=msg):
constructor([0, 1])
@pytest.mark.parametrize('data, closed', [
([], 'both'),
([np.nan, np.nan], 'neither'),
([Interval(0, 3, closed='neither'),
Interval(2, 5, closed='neither')], 'left'),
([Interval(0, 3, closed='left'),
Interval(2, 5, closed='right')], 'neither'),
(IntervalIndex.from_breaks(range(5), closed='both'), 'right')])
def test_override_inferred_closed(self, constructor, data, closed):
# GH 19370
if isinstance(data, IntervalIndex):
tuples = data.to_tuples()
else:
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
expected = IntervalIndex.from_tuples(tuples, closed=closed)
result = constructor(data, closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('values_constructor', [
list, np.array, IntervalIndex, IntervalArray])
def test_index_object_dtype(self, values_constructor):
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
values = values_constructor(intervals)
result = Index(values, dtype=object)
assert type(result) is Index
tm.assert_numpy_array_equal(result.values, np.array(values))
class TestFromIntervals(TestClassConstructors):
"""
Tests for IntervalIndex.from_intervals, which is deprecated in favor of the
IntervalIndex constructor. Same tests as the IntervalIndex constructor,
plus deprecation test. Should only need to delete this class when removed.
"""
@pytest.fixture
def constructor(self):
def from_intervals_ignore_warnings(*args, **kwargs):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
return IntervalIndex.from_intervals(*args, **kwargs)
return from_intervals_ignore_warnings
def test_deprecated(self):
ivs = [Interval(0, 1), Interval(1, 2)]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
IntervalIndex.from_intervals(ivs)
@pytest.mark.skip(reason='parent class test that is not applicable')
def test_index_object_dtype(self):
pass
@@ -1,271 +0,0 @@
from __future__ import division
import numpy as np
import pytest
from pandas import Int64Index, Interval, IntervalIndex
import pandas.util.testing as tm
pytestmark = pytest.mark.skip(reason="new indexing tests for issue 16316")
class TestIntervalIndex(object):
@pytest.mark.parametrize("side", ['right', 'left', 'both', 'neither'])
def test_get_loc_interval(self, closed, side):
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
for bound in [[0, 1], [1, 2], [2, 3], [3, 4],
[0, 2], [2.5, 3], [-1, 4]]:
# if get_loc is supplied an interval, it should only search
# for exact matches, not overlaps or covers, else KeyError.
if closed == side:
if bound == [0, 1]:
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
elif bound == [2, 3]:
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
else:
with pytest.raises(KeyError):
idx.get_loc(Interval(*bound, closed=side))
else:
with pytest.raises(KeyError):
idx.get_loc(Interval(*bound, closed=side))
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
def test_get_loc_scalar(self, closed, scalar):
# correct = {side: {query: answer}}.
# If query is not in the dict, that query should raise a KeyError
correct = {'right': {0.5: 0, 1: 0, 2.5: 1, 3: 1},
'left': {0: 0, 0.5: 0, 2: 1, 2.5: 1},
'both': {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
'neither': {0.5: 0, 2.5: 1}}
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
# if get_loc is supplied a scalar, it should return the index of
# the interval which contains the scalar, or KeyError.
if scalar in correct[closed].keys():
assert idx.get_loc(scalar) == correct[closed][scalar]
else:
pytest.raises(KeyError, idx.get_loc, scalar)
def test_slice_locs_with_interval(self):
# increasing monotonically
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
assert index.slice_locs(
start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
assert index.slice_locs(
start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
# decreasing monotonically
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
assert index.slice_locs(
start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
assert index.slice_locs(
start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
# sorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
assert index.slice_locs(
start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
assert index.slice_locs(
start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
# unsorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
pytest.raises(KeyError, index.slice_locs(
start=Interval(0, 2), end=Interval(2, 4)))
pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2)))
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2)))
pytest.raises(KeyError, index.slice_locs(
start=Interval(2, 4), end=Interval(0, 2)))
# another unsorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
assert index.slice_locs(
start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
assert index.slice_locs(
start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
def test_slice_locs_with_ints_and_floats_succeeds(self):
# increasing non-overlapping
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
assert index.slice_locs(0, 1) == (0, 1)
assert index.slice_locs(0, 2) == (0, 2)
assert index.slice_locs(0, 3) == (0, 2)
assert index.slice_locs(3, 1) == (2, 1)
assert index.slice_locs(3, 4) == (2, 3)
assert index.slice_locs(0, 4) == (0, 3)
# decreasing non-overlapping
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
assert index.slice_locs(0, 1) == (3, 2)
assert index.slice_locs(0, 2) == (3, 1)
assert index.slice_locs(0, 3) == (3, 1)
assert index.slice_locs(3, 1) == (1, 2)
assert index.slice_locs(3, 4) == (1, 0)
assert index.slice_locs(0, 4) == (3, 0)
@pytest.mark.parametrize("query", [
[0, 1], [0, 2], [0, 3], [3, 1], [3, 4], [0, 4]])
@pytest.mark.parametrize("tuples", [
[(0, 2), (1, 3), (2, 4)], [(2, 4), (1, 3), (0, 2)],
[(0, 2), (0, 2), (2, 4)], [(0, 2), (2, 4), (0, 2)],
[(0, 2), (0, 2), (2, 4), (1, 3)]])
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
index = IntervalIndex.from_tuples(tuples)
with pytest.raises(KeyError):
index.slice_locs(query)
@pytest.mark.parametrize('query, expected', [
([Interval(1, 3, closed='right')], [1]),
([Interval(1, 3, closed='left')], [-1]),
([Interval(1, 3, closed='both')], [-1]),
([Interval(1, 3, closed='neither')], [-1]),
([Interval(1, 4, closed='right')], [-1]),
([Interval(0, 4, closed='right')], [-1]),
([Interval(1, 2, closed='right')], [-1]),
([Interval(2, 4, closed='right'), Interval(1, 3, closed='right')],
[2, 1]),
([Interval(1, 3, closed='right'), Interval(0, 2, closed='right')],
[1, -1]),
([Interval(1, 3, closed='right'), Interval(1, 3, closed='left')],
[1, -1])])
def test_get_indexer_with_interval(self, query, expected):
tuples = [(0, 2.5), (1, 3), (2, 4)]
index = IntervalIndex.from_tuples(tuples, closed='right')
result = index.get_indexer(query)
expected = np.array(expected, dtype='intp')
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize('query, expected', [
([-0.5], [-1]),
([0], [-1]),
([0.5], [0]),
([1], [0]),
([1.5], [1]),
([2], [1]),
([2.5], [-1]),
([3], [-1]),
([3.5], [2]),
([4], [2]),
([4.5], [-1]),
([1, 2], [0, 1]),
([1, 2, 3], [0, 1, -1]),
([1, 2, 3, 4], [0, 1, -1, 2]),
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1])])
def test_get_indexer_with_int_and_float(self, query, expected):
tuples = [(0, 1), (1, 2), (3, 4)]
index = IntervalIndex.from_tuples(tuples, closed='right')
result = index.get_indexer(query)
expected = np.array(expected, dtype='intp')
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize('tuples, closed', [
([(0, 2), (1, 3), (3, 4)], 'neither'),
([(0, 5), (1, 4), (6, 7)], 'left'),
([(0, 1), (0, 1), (1, 2)], 'right'),
([(0, 1), (2, 3), (3, 4)], 'both')])
def test_get_indexer_errors(self, tuples, closed):
# IntervalIndex needs non-overlapping for uniqueness when querying
index = IntervalIndex.from_tuples(tuples, closed=closed)
msg = ('cannot handle overlapping indices; use '
'IntervalIndex.get_indexer_non_unique')
with pytest.raises(ValueError, match=msg):
index.get_indexer([0, 2])
@pytest.mark.parametrize('query, expected', [
([-0.5], ([-1], [0])),
([0], ([0], [])),
([0.5], ([0], [])),
([1], ([0, 1], [])),
([1.5], ([0, 1], [])),
([2], ([0, 1, 2], [])),
([2.5], ([1, 2], [])),
([3], ([2], [])),
([3.5], ([2], [])),
([4], ([-1], [0])),
([4.5], ([-1], [0])),
([1, 2], ([0, 1, 0, 1, 2], [])),
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3]))])
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
tuples = [(0, 2.5), (1, 3), (2, 4)]
index = IntervalIndex.from_tuples(tuples, closed='left')
result_indexer, result_missing = index.get_indexer_non_unique(query)
expected_indexer = Int64Index(expected[0])
expected_missing = np.array(expected[1], dtype='intp')
tm.assert_index_equal(result_indexer, expected_indexer)
tm.assert_numpy_array_equal(result_missing, expected_missing)
# TODO we may also want to test get_indexer for the case when
# the intervals are duplicated, decreasing, non-monotonic, etc..
def test_contains(self):
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right')
# __contains__ requires perfect matches to intervals.
assert 0 not in index
assert 1 not in index
assert 2 not in index
assert Interval(0, 1, closed='right') in index
assert Interval(0, 2, closed='right') not in index
assert Interval(0, 0.5, closed='right') not in index
assert Interval(3, 5, closed='right') not in index
assert Interval(-1, 0, closed='left') not in index
assert Interval(0, 1, closed='left') not in index
assert Interval(0, 1, closed='both') not in index
def test_contains_method(self):
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right')
assert not index.contains(0)
assert index.contains(0.1)
assert index.contains(0.5)
assert index.contains(1)
assert index.contains(Interval(0, 1, closed='right'))
assert not index.contains(Interval(0, 1, closed='left'))
assert not index.contains(Interval(0, 1, closed='both'))
assert not index.contains(Interval(0, 2, closed='right'))
assert not index.contains(Interval(0, 3, closed='right'))
assert not index.contains(Interval(1, 3, closed='right'))
assert not index.contains(20)
assert not index.contains(-20)
@@ -1,316 +0,0 @@
from __future__ import division
from datetime import timedelta
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
from pandas import (
DateOffset, Interval, IntervalIndex, Timedelta, Timestamp, date_range,
interval_range, timedelta_range)
import pandas.util.testing as tm
from pandas.tseries.offsets import Day
@pytest.fixture(scope='class', params=[None, 'foo'])
def name(request):
return request.param
class TestIntervalRange(object):
@pytest.mark.parametrize('freq, periods', [
(1, 100), (2.5, 40), (5, 20), (25, 4)])
def test_constructor_numeric(self, closed, name, freq, periods):
start, end = 0, 100
breaks = np.arange(101, step=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('tz', [None, 'US/Eastern'])
@pytest.mark.parametrize('freq, periods', [
('D', 364), ('2D', 182), ('22D18H', 16), ('M', 11)])
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz)
breaks = date_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
if not breaks.freq.isAnchored() and tz is None:
# matches expected only for non-anchored offsets and tz naive
# (anchored/DST transitions cause unequal spacing in expected)
result = interval_range(start=start, end=end, periods=periods,
name=name, closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('freq, periods', [
('D', 100), ('2D12H', 40), ('5D', 20), ('25D', 4)])
def test_constructor_timedelta(self, closed, name, freq, periods):
start, end = Timedelta('0 days'), Timedelta('100 days')
breaks = timedelta_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('start, end, freq, expected_endpoint', [
(0, 10, 3, 9),
(0, 10, 1.5, 9),
(0.5, 10, 3, 9.5),
(Timedelta('0D'), Timedelta('10D'), '2D4H', Timedelta('8D16H')),
(Timestamp('2018-01-01'),
Timestamp('2018-02-09'),
'MS',
Timestamp('2018-02-01')),
(Timestamp('2018-01-01', tz='US/Eastern'),
Timestamp('2018-01-20', tz='US/Eastern'),
'5D12H',
Timestamp('2018-01-17 12:00:00', tz='US/Eastern'))])
def test_early_truncation(self, start, end, freq, expected_endpoint):
# index truncates early if freq causes end to be skipped
result = interval_range(start=start, end=end, freq=freq)
result_endpoint = result.right[-1]
assert result_endpoint == expected_endpoint
@pytest.mark.parametrize('start, end, freq', [
(0.5, None, None),
(None, 4.5, None),
(0.5, None, 1.5),
(None, 6.5, 1.5)])
def test_no_invalid_float_truncation(self, start, end, freq):
# GH 21161
if freq is None:
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
else:
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
expected = IntervalIndex.from_breaks(breaks)
result = interval_range(start=start, end=end, periods=4, freq=freq)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('start, mid, end', [
(Timestamp('2018-03-10', tz='US/Eastern'),
Timestamp('2018-03-10 23:30:00', tz='US/Eastern'),
Timestamp('2018-03-12', tz='US/Eastern')),
(Timestamp('2018-11-03', tz='US/Eastern'),
Timestamp('2018-11-04 00:30:00', tz='US/Eastern'),
Timestamp('2018-11-05', tz='US/Eastern'))])
def test_linspace_dst_transition(self, start, mid, end):
# GH 20976: linspace behavior defined from start/end/periods
# accounts for the hour gained/lost during DST transition
result = interval_range(start=start, end=end, periods=2)
expected = IntervalIndex.from_breaks([start, mid, end])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('freq', [2, 2.0])
@pytest.mark.parametrize('end', [10, 10.0])
@pytest.mark.parametrize('start', [0, 0.0])
def test_float_subtype(self, start, end, freq):
# Has float subtype if any of start/end/freq are float, even if all
# resulting endpoints can safely be upcast to integers
# defined from start/end/freq
index = interval_range(start=start, end=end, freq=freq)
result = index.dtype.subtype
expected = 'int64' if is_integer(start + end + freq) else 'float64'
assert result == expected
# defined from start/periods/freq
index = interval_range(start=start, periods=5, freq=freq)
result = index.dtype.subtype
expected = 'int64' if is_integer(start + freq) else 'float64'
assert result == expected
# defined from end/periods/freq
index = interval_range(end=end, periods=5, freq=freq)
result = index.dtype.subtype
expected = 'int64' if is_integer(end + freq) else 'float64'
assert result == expected
# GH 20976: linspace behavior defined from start/end/periods
index = interval_range(start=start, end=end, periods=5)
result = index.dtype.subtype
expected = 'int64' if is_integer(start + end) else 'float64'
assert result == expected
def test_constructor_coverage(self):
# float value for periods
expected = interval_range(start=0, periods=10)
result = interval_range(start=0, periods=10.5)
tm.assert_index_equal(result, expected)
# equivalent timestamp-like start/end
start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15')
expected = interval_range(start=start, end=end)
result = interval_range(start=start.to_pydatetime(),
end=end.to_pydatetime())
tm.assert_index_equal(result, expected)
result = interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)
# equivalent freq with timestamp
equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1),
DateOffset(days=1)]
for freq in equiv_freq:
result = interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)
# equivalent timedelta-like start/end
start, end = Timedelta(days=1), Timedelta(days=10)
expected = interval_range(start=start, end=end)
result = interval_range(start=start.to_pytimedelta(),
end=end.to_pytimedelta())
tm.assert_index_equal(result, expected)
result = interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)
# equivalent freq with timedelta
equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)]
for freq in equiv_freq:
result = interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)
def test_errors(self):
# not enough params
msg = ('Of the four parameters: start, end, periods, and freq, '
'exactly three must be specified')
with pytest.raises(ValueError, match=msg):
interval_range(start=0)
with pytest.raises(ValueError, match=msg):
interval_range(end=5)
with pytest.raises(ValueError, match=msg):
interval_range(periods=2)
with pytest.raises(ValueError, match=msg):
interval_range()
# too many params
with pytest.raises(ValueError, match=msg):
interval_range(start=0, end=5, periods=6, freq=1.5)
# mixed units
msg = 'start, end, freq need to be type compatible'
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=Timestamp('20130101'), freq=2)
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=Timedelta('1 day'), freq=2)
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=10, freq='D')
with pytest.raises(TypeError, match=msg):
interval_range(start=Timestamp('20130101'), end=10, freq='D')
with pytest.raises(TypeError, match=msg):
interval_range(start=Timestamp('20130101'),
end=Timedelta('1 day'), freq='D')
with pytest.raises(TypeError, match=msg):
interval_range(start=Timestamp('20130101'),
end=Timestamp('20130110'), freq=2)
with pytest.raises(TypeError, match=msg):
interval_range(start=Timedelta('1 day'), end=10, freq='D')
with pytest.raises(TypeError, match=msg):
interval_range(start=Timedelta('1 day'),
end=Timestamp('20130110'), freq='D')
with pytest.raises(TypeError, match=msg):
interval_range(start=Timedelta('1 day'),
end=Timedelta('10 days'), freq=2)
# invalid periods
msg = 'periods must be a number, got foo'
with pytest.raises(TypeError, match=msg):
interval_range(start=0, periods='foo')
# invalid start
msg = 'start must be numeric or datetime-like, got foo'
with pytest.raises(ValueError, match=msg):
interval_range(start='foo', periods=10)
# invalid end
msg = r'end must be numeric or datetime-like, got \(0, 1\]'
with pytest.raises(ValueError, match=msg):
interval_range(end=Interval(0, 1), periods=10)
# invalid freq for datetime-like
msg = 'freq must be numeric or convertible to DateOffset, got foo'
with pytest.raises(ValueError, match=msg):
interval_range(start=0, end=10, freq='foo')
with pytest.raises(ValueError, match=msg):
interval_range(start=Timestamp('20130101'), periods=10, freq='foo')
with pytest.raises(ValueError, match=msg):
interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
# mixed tz
start = Timestamp('2017-01-01', tz='US/Eastern')
end = Timestamp('2017-01-07', tz='US/Pacific')
msg = 'Start and end cannot both be tz-aware with different timezones'
with pytest.raises(TypeError, match=msg):
interval_range(start=start, end=end)
@@ -1,173 +0,0 @@
from __future__ import division
from itertools import permutations
import numpy as np
import pytest
from pandas._libs.interval import IntervalTree
from pandas import compat
import pandas.util.testing as tm
def skipif_32bit(param):
"""
Skip parameters in a parametrize on 32bit systems. Specifically used
here to skip leaf_size parameters related to GH 23440.
"""
marks = pytest.mark.skipif(compat.is_platform_32bit(),
reason='GH 23440: int type mismatch on 32bit')
return pytest.param(param, marks=marks)
@pytest.fixture(
scope='class', params=['int32', 'int64', 'float32', 'float64', 'uint64'])
def dtype(request):
return request.param
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
def leaf_size(request):
"""
Fixture to specify IntervalTree leaf_size parameter; to be used with the
tree fixture.
"""
return request.param
@pytest.fixture(params=[
np.arange(5, dtype='int64'),
np.arange(5, dtype='int32'),
np.arange(5, dtype='uint64'),
np.arange(5, dtype='float64'),
np.arange(5, dtype='float32'),
np.array([0, 1, 2, 3, 4, np.nan], dtype='float64'),
np.array([0, 1, 2, 3, 4, np.nan], dtype='float32')])
def tree(request, leaf_size):
left = request.param
return IntervalTree(left, left + 2, leaf_size=leaf_size)
class TestIntervalTree(object):
def test_get_loc(self, tree):
result = tree.get_loc(1)
expected = np.array([0], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
result = np.sort(tree.get_loc(2))
expected = np.array([0, 1], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
with pytest.raises(KeyError):
tree.get_loc(-1)
def test_get_indexer(self, tree):
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
expected = np.array([0, 4, -1], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
with pytest.raises(KeyError):
tree.get_indexer(np.array([3.0]))
def test_get_indexer_non_unique(self, tree):
indexer, missing = tree.get_indexer_non_unique(
np.array([1.0, 2.0, 6.5]))
result = indexer[:1]
expected = np.array([0], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
result = np.sort(indexer[1:3])
expected = np.array([0, 1], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
result = np.sort(indexer[3:])
expected = np.array([-1], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
result = missing
expected = np.array([2], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
def test_duplicates(self, dtype):
left = np.array([0, 0, 0], dtype=dtype)
tree = IntervalTree(left, left + 1)
result = np.sort(tree.get_loc(0.5))
expected = np.array([0, 1, 2], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
with pytest.raises(KeyError):
tree.get_indexer(np.array([0.5]))
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
result = np.sort(indexer)
expected = np.array([0, 1, 2], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
result = missing
expected = np.array([], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
def test_get_loc_closed(self, closed):
tree = IntervalTree([0], [1], closed=closed)
for p, errors in [(0, tree.open_left),
(1, tree.open_right)]:
if errors:
with pytest.raises(KeyError):
tree.get_loc(p)
else:
result = tree.get_loc(p)
expected = np.array([0], dtype='intp')
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize('leaf_size', [
skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000])
def test_get_indexer_closed(self, closed, leaf_size):
x = np.arange(1000, dtype='float64')
found = x.astype('intp')
not_found = (-1 * np.ones(1000)).astype('intp')
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
expected = found if tree.closed_left else not_found
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
expected = found if tree.closed_right else not_found
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
@pytest.mark.parametrize('left, right, expected', [
(np.array([0, 1, 4]), np.array([2, 3, 5]), True),
(np.array([0, 1, 2]), np.array([5, 4, 3]), True),
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
(np.array([0, 2, 4]), np.array([1, 3, 5]), False),
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)])
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
def test_is_overlapping(self, closed, order, left, right, expected):
# GH 23309
tree = IntervalTree(left[order], right[order], closed=closed)
result = tree.is_overlapping
assert result is expected
@pytest.mark.parametrize('order', map(list, permutations(range(3))))
def test_is_overlapping_endpoints(self, closed, order):
"""shared endpoints are marked as overlapping"""
# GH 23309
left, right = np.arange(3), np.arange(1, 4)
tree = IntervalTree(left[order], right[order], closed=closed)
result = tree.is_overlapping
expected = closed is 'both'
assert result is expected
@pytest.mark.parametrize('left, right', [
(np.array([], dtype='int64'), np.array([], dtype='int64')),
(np.array([0], dtype='int64'), np.array([1], dtype='int64')),
(np.array([np.nan]), np.array([np.nan])),
(np.array([np.nan] * 3), np.array([np.nan] * 3))])
def test_is_overlapping_trivial(self, closed, left, right):
# GH 23309
tree = IntervalTree(left, right, closed=closed)
assert tree.is_overlapping is False
@@ -1,56 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas import Index, MultiIndex
@pytest.fixture
def idx():
# a MultiIndex used to test the general functionality of the
# general functionality of this object
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
mi = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names, verify_integrity=False)
return mi
@pytest.fixture
def idx_dup():
# compare tests/indexes/multi/conftest.py
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 0, 1, 1])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
mi = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names, verify_integrity=False)
return mi
@pytest.fixture
def index_names():
# names that match those in the idx fixture for testing equality of
# names assigned to the idx
return ['first', 'second']
@pytest.fixture
def holder():
# the MultiIndex constructor used to base compatibility with pickle
return MultiIndex
@pytest.fixture
def compat_props():
# a MultiIndex must have these properties associated with it
return ['shape', 'ndim', 'size']
@@ -1,321 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange
import pandas as pd
from pandas import Index, MultiIndex, date_range, period_range
import pandas.util.testing as tm
def test_shift(idx):
# GH8083 test the base class for shift
pytest.raises(NotImplementedError, idx.shift, 1)
pytest.raises(NotImplementedError, idx.shift, 1, 2)
def test_groupby(idx):
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = idx.get_values().tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)
# GH5620
groups = idx.groupby(idx)
exp = {key: [key] for key in idx}
tm.assert_dict_equal(groups, exp)
def test_truncate():
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
result = index.truncate(before=1)
assert 'foo' not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(after=1)
assert 2 not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(before=1, after=2)
assert len(result.levels[0]) == 2
# after < before
pytest.raises(ValueError, index.truncate, 3, 1)
def test_where():
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
with pytest.raises(NotImplementedError):
i.where(True)
def test_where_array_like():
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
klasses = [list, tuple, np.array, pd.Series]
cond = [False, True]
for klass in klasses:
with pytest.raises(NotImplementedError):
i.where(klass(cond))
# TODO: reshape
def test_reorder_levels(idx):
# this blows up
with pytest.raises(IndexError, match='^Too many levels'):
idx.reorder_levels([2, 1, 0])
def test_numpy_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(['foo', 'bar'])
m = MultiIndex.from_product([
numbers, names], names=names)
expected = MultiIndex.from_product([
numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(np.repeat(m, reps), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(m, reps, axis=1)
def test_append_mixed_dtypes():
# GH 13660
dti = date_range('2011-01-01', freq='M', periods=3, )
dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern')
pi = period_range('2011-01', freq='M', periods=3)
mi = MultiIndex.from_arrays([[1, 2, 3],
[1.1, np.nan, 3.3],
['a', 'b', 'c'],
dti, dti_tz, pi])
assert mi.nlevels == 6
res = mi.append(mi)
exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3],
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
['a', 'b', 'c', 'a', 'b', 'c'],
dti.append(dti),
dti_tz.append(dti_tz),
pi.append(pi)])
tm.assert_index_equal(res, exp)
other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'],
['x', 'y', 'z'], ['x', 'y', 'z'],
['x', 'y', 'z'], ['x', 'y', 'z']])
res = mi.append(other)
exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'],
[1.1, np.nan, 3.3, 'x', 'y', 'z'],
['a', 'b', 'c', 'x', 'y', 'z'],
dti.append(pd.Index(['x', 'y', 'z'])),
dti_tz.append(pd.Index(['x', 'y', 'z'])),
pi.append(pd.Index(['x', 'y', 'z']))])
tm.assert_index_equal(res, exp)
def test_take(idx):
indexer = [4, 3, 0, 2]
result = idx.take(indexer)
expected = idx[indexer]
assert result.equals(expected)
# TODO: Remove Commented Code
# if not isinstance(idx,
# (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
# GH 10791
with pytest.raises(AttributeError):
idx.freq
def test_take_invalid_kwargs(idx):
idx = idx
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode='clip')
def test_take_fill_value():
# GH 12631
vals = [['A', 'B'],
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
result = idx.take(np.array([1, 0, -1]))
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
('B', pd.Timestamp('2011-01-02'))]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
(np.nan, pd.NaT)]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
('B', pd.Timestamp('2011-01-02'))]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
def test_iter(idx):
result = list(idx)
expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
assert result == expected
def test_sub(idx):
first = idx
# - now raises (previously was set op difference)
with pytest.raises(TypeError):
first - idx[-3:]
with pytest.raises(TypeError):
idx[-3:] - first
with pytest.raises(TypeError):
idx[-3:] - first.tolist()
with pytest.raises(TypeError):
first.tolist() - idx[-3:]
def test_map(idx):
# callable
index = idx
# we don't infer UInt64
if isinstance(index, pd.UInt64Index):
expected = index.astype('int64')
else:
expected = index
result = index.map(lambda x: x)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"mapper",
[
lambda values, idx: {i: e for e, i in zip(values, idx)},
lambda values, idx: pd.Series(values, idx)])
def test_map_dictlike(idx, mapper):
if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)):
pytest.skip("skipping tests for {}".format(type(idx)))
identity = mapper(idx.values, idx)
# we don't infer to UInt64 for a dict
if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict):
expected = idx.astype('int64')
else:
expected = idx
result = idx.map(identity)
tm.assert_index_equal(result, expected)
# empty mappable
expected = pd.Index([np.nan] * len(idx))
result = idx.map(mapper(expected, idx))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('func', [
np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10,
np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin,
np.arccos, np.arctan, np.sinh, np.cosh, np.tanh,
np.arcsinh, np.arccosh, np.arctanh, np.deg2rad,
np.rad2deg
])
def test_numpy_ufuncs(func):
# test ufuncs of numpy. see:
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html
# copy and paste from idx fixture as pytest doesn't support
# parameters and fixtures at the same time.
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
idx = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False
)
with pytest.raises(Exception):
with np.errstate(all='ignore'):
func(idx)
@pytest.mark.parametrize('func', [
np.isfinite, np.isinf, np.isnan, np.signbit
])
def test_numpy_type_funcs(func):
# for func in [np.isfinite, np.isinf, np.isnan, np.signbit]:
# copy and paste from idx fixture as pytest doesn't support
# parameters and fixtures at the same time.
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ['first', 'second']
idx = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False
)
with pytest.raises(Exception):
func(idx)
@@ -1,32 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.util.testing import assert_copy
def test_astype(idx):
expected = idx.copy()
actual = idx.astype('O')
assert_copy(actual.levels, expected.levels)
assert_copy(actual.codes, expected.codes)
assert [level.name for level in actual.levels] == list(expected.names)
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
@pytest.mark.parametrize('ordered', [True, False])
def test_astype_category(idx, ordered):
# GH 18630
msg = '> 1 ndim Categorical are not supported at this time'
with pytest.raises(NotImplementedError, match=msg):
idx.astype(CategoricalDtype(ordered=ordered))
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
with pytest.raises(NotImplementedError, match=msg):
idx.astype('category')
@@ -1,131 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import PY3, long
from pandas import MultiIndex
import pandas.util.testing as tm
def test_numeric_compat(idx):
with pytest.raises(TypeError, match="cannot perform __mul__"):
idx * 1
with pytest.raises(TypeError, match="cannot perform __rmul__"):
1 * idx
div_err = ("cannot perform __truediv__" if PY3
else "cannot perform __div__")
with pytest.raises(TypeError, match=div_err):
idx / 1
div_err = div_err.replace(" __", " __r")
with pytest.raises(TypeError, match=div_err):
1 / idx
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
idx // 1
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
1 // idx
@pytest.mark.parametrize("method", ["all", "any"])
def test_logical_compat(idx, method):
msg = "cannot perform {method}".format(method=method)
with pytest.raises(TypeError, match=msg):
getattr(idx, method)()
def test_boolean_context_compat(idx):
with pytest.raises(ValueError):
bool(idx)
def test_boolean_context_compat2():
# boolean context compat
# GH7897
i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)])
i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)])
common = i1.intersection(i2)
with pytest.raises(ValueError):
bool(common)
def test_inplace_mutation_resets_values():
levels = [['a', 'b', 'c'], [4]]
levels2 = [[1, 2, 3], ['a']]
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
mi1 = MultiIndex(levels=levels, codes=codes)
mi2 = MultiIndex(levels=levels2, codes=codes)
vals = mi1.values.copy()
vals2 = mi2.values.copy()
assert mi1._tuples is not None
# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)
# Non-inplace doesn't kill _tuples [implementation detail]
tm.assert_almost_equal(mi1._tuples, vals)
# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)
# Inplace should kill _tuples
mi1.set_levels(levels2, inplace=True)
tm.assert_almost_equal(mi1.values, vals2)
# Make sure label setting works too
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
exp_values = np.empty((6,), dtype=object)
exp_values[:] = [(long(1), 'a')] * 6
# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_values = mi2.set_codes(codes2).values
# Not inplace shouldn't change
tm.assert_almost_equal(mi2._tuples, vals2)
# Should have correct values
tm.assert_almost_equal(exp_values, new_values)
# ...and again setting inplace should kill _tuples, etc
mi2.set_codes(codes2, inplace=True)
tm.assert_almost_equal(mi2.values, new_values)
def test_ndarray_compat_properties(idx, compat_props):
assert idx.T.equals(idx)
assert idx.transpose().equals(idx)
values = idx.values
for prop in compat_props:
assert getattr(idx, prop) == getattr(values, prop)
# test for validity
idx.nbytes
idx.values.nbytes
def test_compat(indices):
assert indices.tolist() == list(indices)
def test_pickle_compat_construction(holder):
# this is testing for pickle compat
if holder is None:
return
# need an object to create with
pytest.raises(TypeError, holder)
@@ -1,577 +0,0 @@
# -*- coding: utf-8 -*-
from collections import OrderedDict
import re
import numpy as np
import pytest
from pandas._libs.tslib import Timestamp
from pandas.compat import lrange, range
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import Index, MultiIndex, date_range
import pandas.util.testing as tm
def test_constructor_single_level():
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
codes=[[0, 1, 2, 3]], names=['first'])
assert isinstance(result, MultiIndex)
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ['first']
def test_constructor_no_levels():
msg = "non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=[], codes=[])
both_re = re.compile('Must pass both levels and codes')
with pytest.raises(TypeError, match=both_re):
MultiIndex(levels=[])
with pytest.raises(TypeError, match=both_re):
MultiIndex(codes=[])
def test_constructor_nonhashable_names():
# GH 20527
levels = [[1, 2], [u'one', u'two']]
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
names = (['foo'], ['bar'])
message = "MultiIndex.name must be a hashable type"
with pytest.raises(TypeError, match=message):
MultiIndex(levels=levels, codes=codes, names=names)
# With .rename()
mi = MultiIndex(levels=[[1, 2], [u'one', u'two']],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=('foo', 'bar'))
renamed = [['foor'], ['barr']]
with pytest.raises(TypeError, match=message):
mi.rename(names=renamed)
# With .set_names()
with pytest.raises(TypeError, match=message):
mi.set_names(names=renamed)
def test_constructor_mismatched_codes_levels(idx):
codes = [np.array([1]), np.array([2]), np.array([3])]
levels = ["a"]
msg = "Length of levels and codes must be the same"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=levels, codes=codes)
length_error = re.compile('>= length of level')
label_error = re.compile(r'Unequal code lengths: \[4, 2\]')
# important to check that it's looking at the right thing.
with pytest.raises(ValueError, match=length_error):
MultiIndex(levels=[['a'], ['b']],
codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
with pytest.raises(ValueError, match=label_error):
MultiIndex(levels=[['a'], ['b']], codes=[[0, 0, 0, 0], [0, 0]])
# external API
with pytest.raises(ValueError, match=length_error):
idx.copy().set_levels([['a'], ['b']])
with pytest.raises(ValueError, match=label_error):
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
def test_labels_deprecated(idx):
# GH23752
with tm.assert_produces_warning(FutureWarning):
MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
with tm.assert_produces_warning(FutureWarning):
idx.labels
def test_copy_in_constructor():
levels = np.array(["a", "b", "c"])
codes = np.array([1, 1, 2, 0, 0, 1, 1])
val = codes[0]
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes],
copy=True)
assert mi.codes[0][0] == val
codes[0] = 15
assert mi.codes[0][0] == val
val = levels[0]
levels[0] = "PANDA"
assert mi.levels[0][0] == val
# ----------------------------------------------------------------------------
# from_arrays
# ----------------------------------------------------------------------------
def test_from_arrays(idx):
arrays = [np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)]
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
['a', 'b']])
assert result.levels[0].equals(Index([Timestamp('20130101')]))
assert result.levels[1].equals(Index(['a', 'b']))
def test_from_arrays_iterator(idx):
# GH 18434
arrays = [np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)]
# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
tm.assert_index_equal(result, idx)
# invalid iterator input
msg = "Input must be a list / sequence of array-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(0)
def test_from_arrays_index_series_datetimetz():
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3,
tz='Asia/Tokyo')
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_timedelta():
idx1 = pd.timedelta_range('1 days', freq='D', periods=3)
idx2 = pd.timedelta_range('2 hours', freq='H', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_period():
idx1 = pd.period_range('2011-01-01', freq='D', periods=3)
idx2 = pd.period_range('2015-01-01', freq='H', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_datetimelike_mixed():
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3)
idx3 = pd.timedelta_range('1 days', freq='D', periods=3)
idx4 = pd.period_range('2011-01-01', freq='D', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
tm.assert_index_equal(result.get_level_values(2), idx3)
tm.assert_index_equal(result.get_level_values(3), idx4)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1),
pd.Series(idx2),
pd.Series(idx3),
pd.Series(idx4)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result2.get_level_values(2), idx3)
tm.assert_index_equal(result2.get_level_values(3), idx4)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_categorical():
# GH13743
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=False)
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=True)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
tm.assert_index_equal(result3.get_level_values(0), idx1)
tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_empty():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays(arrays=[])
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
assert isinstance(result, MultiIndex)
expected = Index([], name='A')
tm.assert_index_equal(result.levels[0], expected)
# N levels
for N in [2, 3]:
arrays = [[]] * N
names = list('ABC')[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N,
names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('invalid_array', [
(1),
([1]),
([1, 2]),
([[1], 2]),
('a'),
(['a']),
(['a', 'b']),
([['a'], 'b']),
])
def test_from_arrays_invalid_input(invalid_array):
invalid_inputs = [1, [1], [1, 2], [[1], 2],
'a', ['a'], ['a', 'b'], [['a'], 'b']]
for i in invalid_inputs:
pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i)
@pytest.mark.parametrize('idx1, idx2', [
([1, 2, 3], ['a', 'b']),
([], ['a', 'b']),
([1, 2, 3], [])
])
def test_from_arrays_different_lengths(idx1, idx2):
# see gh-13599
msg = '^all arrays must be same length$'
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays([idx1, idx2])
# ----------------------------------------------------------------------------
# from_tuples
# ----------------------------------------------------------------------------
def test_from_tuples():
msg = 'Cannot infer number of levels from empty list'
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples([])
expected = MultiIndex(levels=[[1, 3], [2, 4]],
codes=[[0, 1], [0, 1]],
names=['a', 'b'])
# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
tm.assert_index_equal(result, expected)
def test_from_tuples_iterator():
# GH 18434
# input iterator for tuples
expected = MultiIndex(levels=[[1, 3], [2, 4]],
codes=[[0, 1], [0, 1]],
names=['a', 'b'])
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b'])
tm.assert_index_equal(result, expected)
# input non-iterables
msg = 'Input must be a list / sequence of tuple-likes.'
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples(0)
def test_from_tuples_empty():
# GH 16777
result = MultiIndex.from_tuples([], names=['a', 'b'])
expected = MultiIndex.from_arrays(arrays=[[], []],
names=['a', 'b'])
tm.assert_index_equal(result, expected)
def test_from_tuples_index_values(idx):
result = MultiIndex.from_tuples(idx)
assert (result.values == idx.values).all()
def test_tuples_with_name_string():
# GH 15110 and GH 14848
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
with pytest.raises(ValueError):
pd.Index(li, name='abc')
with pytest.raises(ValueError):
pd.Index(li, name='a')
def test_from_tuples_with_tuple_label():
# GH 15457
expected = pd.DataFrame([[2, 1, 2], [4, (1, 2), 3]],
columns=['a', 'b', 'c']).set_index(['a', 'b'])
idx = pd.MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=('a', 'b'))
result = pd.DataFrame([2, 3], columns=['c'], index=idx)
tm.assert_frame_equal(expected, result)
# ----------------------------------------------------------------------------
# from_product
# ----------------------------------------------------------------------------
def test_from_product_empty_zero_levels():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_product([])
def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Index([], name='A')
tm.assert_index_equal(result.levels[0], expected)
@pytest.mark.parametrize('first, second', [
([], []),
(['foo', 'bar', 'baz'], []),
([], ['a', 'b', 'c']),
])
def test_from_product_empty_two_levels(first, second):
names = ['A', 'B']
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[first, second],
codes=[[], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('N', list(range(4)))
def test_from_product_empty_three_levels(N):
# GH12258
names = ['A', 'B', 'C']
lvl2 = lrange(N)
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[[], lvl2, []],
codes=[[], [], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('invalid_input', [
1,
[1],
[1, 2],
[[1], 2],
'a',
['a'],
['a', 'b'],
[['a'], 'b'],
])
def test_from_product_invalid_input(invalid_input):
pytest.raises(TypeError, MultiIndex.from_product, iterables=invalid_input)
def test_from_product_datetimeindex():
dt_index = date_range('2000-01-01', periods=2)
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
etalon = construct_1d_object_array_from_listlike([
(1, pd.Timestamp('2000-01-01')),
(1, pd.Timestamp('2000-01-02')),
(2, pd.Timestamp('2000-01-01')),
(2, pd.Timestamp('2000-01-02')),
])
tm.assert_numpy_array_equal(mi.values, etalon)
@pytest.mark.parametrize('ordered', [False, True])
@pytest.mark.parametrize('f', [
lambda x: x,
lambda x: pd.Series(x),
lambda x: x.values
])
def test_from_product_index_series_categorical(ordered, f):
# GH13743
first = ['foo', 'bar']
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=ordered)
expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"),
categories=list("bac"),
ordered=ordered)
result = pd.MultiIndex.from_product([first, f(idx)])
tm.assert_index_equal(result.get_level_values(1), expected)
def test_from_product():
first = ['foo', 'bar', 'buz']
second = ['a', 'b', 'c']
names = ['first', 'second']
result = MultiIndex.from_product([first, second], names=names)
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
('buz', 'c')]
expected = MultiIndex.from_tuples(tuples, names=names)
tm.assert_index_equal(result, expected)
def test_from_product_iterator():
# GH 18434
first = ['foo', 'bar', 'buz']
second = ['a', 'b', 'c']
names = ['first', 'second']
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
('buz', 'c')]
expected = MultiIndex.from_tuples(tuples, names=names)
# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)
# Invalid non-iterable input
msg = "Input must be a list / sequence of iterables."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(0)
def test_create_index_existing_name(idx):
# GH11193, when an existing index is passed, and a new name is not
# specified, the new index should inherit the previous object name
index = idx
index.names = ['foo', 'bar']
result = pd.Index(index)
expected = Index(
Index([
('foo', 'one'), ('foo', 'two'),
('bar', 'one'), ('baz', 'two'),
('qux', 'one'), ('qux', 'two')],
dtype='object'
),
names=['foo', 'bar']
)
tm.assert_index_equal(result, expected)
result = pd.Index(index, names=['A', 'B'])
expected = Index(
Index([
('foo', 'one'), ('foo', 'two'),
('bar', 'one'), ('baz', 'two'),
('qux', 'one'), ('qux', 'two')],
dtype='object'
),
names=['A', 'B']
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_frame
# ----------------------------------------------------------------------------
def test_from_frame():
# GH 22420
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
columns=['L1', 'L2'])
expected = pd.MultiIndex.from_tuples([('a', 'a'), ('a', 'b'),
('b', 'a'), ('b', 'b')],
names=['L1', 'L2'])
result = pd.MultiIndex.from_frame(df)
tm.assert_index_equal(expected, result)
@pytest.mark.parametrize('non_frame', [
pd.Series([1, 2, 3, 4]),
[1, 2, 3, 4],
[[1, 2], [3, 4], [5, 6]],
pd.Index([1, 2, 3, 4]),
np.array([[1, 2], [3, 4], [5, 6]]),
27
])
def test_from_frame_error(non_frame):
# GH 22420
with pytest.raises(TypeError, match='Input must be a DataFrame'):
pd.MultiIndex.from_frame(non_frame)
def test_from_frame_dtype_fidelity():
# GH 22420
df = pd.DataFrame(OrderedDict([
('dates', pd.date_range('19910905', periods=6, tz='US/Eastern')),
('a', [1, 1, 1, 2, 2, 2]),
('b', pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True)),
('c', ['x', 'x', 'y', 'z', 'x', 'y'])
]))
original_dtypes = df.dtypes.to_dict()
expected_mi = pd.MultiIndex.from_arrays([
pd.date_range('19910905', periods=6, tz='US/Eastern'),
[1, 1, 1, 2, 2, 2],
pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True),
['x', 'x', 'y', 'z', 'x', 'y']
], names=['dates', 'a', 'b', 'c'])
mi = pd.MultiIndex.from_frame(df)
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
tm.assert_index_equal(expected_mi, mi)
assert original_dtypes == mi_dtypes
@pytest.mark.parametrize('names_in,names_out', [
(None, [('L1', 'x'), ('L2', 'y')]),
(['x', 'y'], ['x', 'y']),
])
def test_from_frame_valid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
columns=pd.MultiIndex.from_tuples([('L1', 'x'),
('L2', 'y')]))
mi = pd.MultiIndex.from_frame(df, names=names_in)
assert mi.names == names_out
@pytest.mark.parametrize('names_in,names_out', [
('bad_input', ValueError("Names should be list-like for a MultiIndex")),
(['a', 'b', 'c'], ValueError("Length of names must match number of "
"levels in MultiIndex."))
])
def test_from_frame_invalid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame([['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']],
columns=pd.MultiIndex.from_tuples([('L1', 'x'),
('L2', 'y')]))
with pytest.raises(type(names_out), match=names_out.args[0]):
pd.MultiIndex.from_frame(df, names=names_in)
@@ -1,97 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import PYPY
import pandas as pd
from pandas import MultiIndex
import pandas.util.testing as tm
def test_contains_top_level():
midx = MultiIndex.from_product([['A', 'B'], [1, 2]])
assert 'A' in midx
assert 'A' not in midx._engine
def test_contains_with_nat():
# MI with a NaT
mi = MultiIndex(levels=[['C'],
pd.date_range('2012-01-01', periods=5)],
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
names=[None, 'B'])
assert ('C', pd.Timestamp('2012-01-01')) in mi
for val in mi.values:
assert val in mi
def test_contains(idx):
assert ('foo', 'two') in idx
assert ('bar', 'two') not in idx
assert None not in idx
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_pypy():
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
np.array([False, True]))
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
np.array([False, True]))
def test_isin():
values = [('foo', 2), ('bar', 3), ('quux', 4)]
idx = MultiIndex.from_arrays([
['qux', 'baz', 'foo', 'bar'],
np.arange(4)
])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_
@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_not_pypy():
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
np.array([False, False]))
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
np.array([False, False]))
def test_isin_level_kwarg():
idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
4)])
vals_0 = ['foo', 'bar', 'quux']
vals_1 = [2, 3, 10]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
pytest.raises(IndexError, idx.isin, vals_0, level=5)
pytest.raises(IndexError, idx.isin, vals_0, level=-5)
pytest.raises(KeyError, idx.isin, vals_0, level=1.0)
pytest.raises(KeyError, idx.isin, vals_1, level=-1.0)
pytest.raises(KeyError, idx.isin, vals_1, level='A')
idx.names = ['A', 'B']
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A'))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B'))
pytest.raises(KeyError, idx.isin, vals_1, level='C')
@@ -1,224 +0,0 @@
# -*- coding: utf-8 -*-
from collections import OrderedDict
import numpy as np
import pytest
from pandas.compat import range
import pandas as pd
from pandas import DataFrame, MultiIndex, date_range
import pandas.util.testing as tm
def test_tolist(idx):
result = idx.tolist()
exp = list(idx.values)
assert result == exp
def test_to_numpy(idx):
result = idx.to_numpy()
exp = idx.values
tm.assert_numpy_array_equal(result, exp)
def test_to_frame():
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False)
expected = DataFrame(tuples)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
result = index.to_frame(index=False)
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=['first', 'second'])
expected.index = index
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)
msg = "'name' must be a list / sequence of column names."
with pytest.raises(TypeError, match=msg):
index.to_frame(name='first')
msg = "'name' should have same length as number of levels on index."
with pytest.raises(ValueError, match=msg):
index.to_frame(name=['first'])
# Tests for datetime index
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame(index=False)
expected = DataFrame(
{0: np.repeat(np.arange(5, dtype='int64'), 3),
1: np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
result = index.to_frame(index=False, name=['first', 'second'])
expected = DataFrame(
{'first': np.repeat(np.arange(5, dtype='int64'), 3),
'second': np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=['first', 'second'])
expected.index = index
tm.assert_frame_equal(result, expected)
def test_to_frame_dtype_fidelity():
# GH 22420
mi = pd.MultiIndex.from_arrays([
pd.date_range('19910905', periods=6, tz='US/Eastern'),
[1, 1, 1, 2, 2, 2],
pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True),
['x', 'x', 'y', 'z', 'x', 'y']
], names=['dates', 'a', 'b', 'c'])
original_dtypes = {name: mi.levels[i].dtype
for i, name in enumerate(mi.names)}
expected_df = pd.DataFrame(OrderedDict([
('dates', pd.date_range('19910905', periods=6, tz='US/Eastern')),
('a', [1, 1, 1, 2, 2, 2]),
('b', pd.Categorical(['a', 'a', 'b', 'b', 'c', 'c'], ordered=True)),
('c', ['x', 'x', 'y', 'z', 'x', 'y'])
]))
df = mi.to_frame(index=False)
df_dtypes = df.dtypes.to_dict()
tm.assert_frame_equal(df, expected_df)
assert original_dtypes == df_dtypes
def test_to_frame_resulting_column_order():
# GH 22420
expected = ['z', 0, 'a']
mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z'],
['q', 'w', 'e']], names=expected)
result = mi.to_frame().columns.tolist()
assert result == expected
def test_to_hierarchical():
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
2, 'two')])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = index.to_hierarchical(3)
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
tm.assert_index_equal(result, expected)
assert result.names == index.names
# K > 1
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = index.to_hierarchical(3, 2)
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
codes=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
tm.assert_index_equal(result, expected)
assert result.names == index.names
# non-sorted
index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
(2, 'a'), (2, 'b')],
names=['N1', 'N2'])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = index.to_hierarchical(2)
expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'),
(1, 'b'),
(2, 'a'), (2, 'a'),
(2, 'b'), (2, 'b')],
names=['N1', 'N2'])
tm.assert_index_equal(result, expected)
assert result.names == index.names
def test_roundtrip_pickle_with_tz():
return
# GH 8367
# round-trip of timezone
index = MultiIndex.from_product(
[[1, 2], ['a', 'b'], date_range('20130101', periods=3,
tz='US/Eastern')
], names=['one', 'two', 'three'])
unpickled = tm.round_trip_pickle(index)
assert index.equal_levels(unpickled)
def test_pickle(indices):
return
unpickled = tm.round_trip_pickle(indices)
assert indices.equals(unpickled)
original_name, indices.name = indices.name, 'foo'
unpickled = tm.round_trip_pickle(indices)
assert indices.equals(unpickled)
indices.name = original_name
def test_to_series(idx):
# assert that we are creating a copy of the index
s = idx.to_series()
assert s.values is not idx.values
assert s.index is not idx
assert s.name == idx.name
def test_to_series_with_arguments(idx):
# GH18699
# index kwarg
s = idx.to_series(index=idx)
assert s.values is not idx.values
assert s.index is idx
assert s.name == idx.name
# name kwarg
idx = idx
s = idx.to_series(name='__test')
assert s.values is not idx.values
assert s.index is not idx
assert s.name != idx.name
def test_to_flat_index(idx):
expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
('baz', 'two'), ('qux', 'one'), ('qux', 'two')),
tupleize_cols=False)
result = idx.to_flat_index()
tm.assert_index_equal(result, expected)
@@ -1,93 +0,0 @@
# -*- coding: utf-8 -*-
from copy import copy, deepcopy
import pytest
from pandas import MultiIndex
import pandas.util.testing as tm
def assert_multiindex_copied(copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
tm.assert_almost_equal(copy.codes, original.codes)
# Labels doesn't matter which way copied
tm.assert_almost_equal(copy.codes, original.codes)
assert copy.codes is not original.codes
# Names doesn't matter which way copied
assert copy.names == original.names
assert copy.names is not original.names
# Sort order should be copied
assert copy.sortorder == original.sortorder
def test_copy(idx):
i_copy = idx.copy()
assert_multiindex_copied(i_copy, idx)
def test_shallow_copy(idx):
i_copy = idx._shallow_copy()
assert_multiindex_copied(i_copy, idx)
def test_labels_deprecated(idx):
# GH23752
with tm.assert_produces_warning(FutureWarning):
idx.copy(labels=idx.codes)
def test_view(idx):
i_view = idx.view()
assert_multiindex_copied(i_view, idx)
@pytest.mark.parametrize('func', [copy, deepcopy])
def test_copy_and_deepcopy(func):
idx = MultiIndex(
levels=[['foo', 'bar'], ['fizz', 'buzz']],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=['first', 'second']
)
idx_copy = func(idx)
assert idx_copy is not idx
assert idx_copy.equals(idx)
@pytest.mark.parametrize('deep', [True, False])
def test_copy_method(deep):
idx = MultiIndex(
levels=[['foo', 'bar'], ['fizz', 'buzz']],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=['first', 'second']
)
idx_copy = idx.copy(deep=deep)
assert idx_copy.equals(idx)
@pytest.mark.parametrize('deep', [True, False])
@pytest.mark.parametrize('kwarg, value', [
('names', ['thrid', 'fourth']),
('levels', [['foo2', 'bar2'], ['fizz2', 'buzz2']]),
('codes', [[1, 0, 0, 0], [1, 1, 0, 0]])
])
def test_copy_method_kwargs(deep, kwarg, value):
# gh-12309: Check that the "name" argument as well other kwargs are honored
idx = MultiIndex(
levels=[['foo', 'bar'], ['fizz', 'buzz']],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=['first', 'second']
)
return
idx_copy = idx.copy(**{kwarg: value, 'deep': deep})
if kwarg == 'names':
assert getattr(idx_copy, kwarg) == value
else:
assert [list(i) for i in getattr(idx_copy, kwarg)] == value
@@ -1,128 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def test_drop(idx):
dropped = idx.drop([('foo', 'two'), ('qux', 'one')])
index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')])
dropped2 = idx.drop(index)
expected = idx[[0, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
tm.assert_index_equal(dropped2, expected)
dropped = idx.drop(['bar'])
expected = idx[[0, 1, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop('foo')
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
index = MultiIndex.from_tuples([('bar', 'two')])
pytest.raises(KeyError, idx.drop, [('bar', 'two')])
pytest.raises(KeyError, idx.drop, index)
pytest.raises(KeyError, idx.drop, ['foo', 'two'])
# partially correct argument
mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
pytest.raises(KeyError, idx.drop, mixed_index)
# error='ignore'
dropped = idx.drop(index, errors='ignore')
expected = idx[[0, 1, 2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(mixed_index, errors='ignore')
expected = idx[[0, 1, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(['foo', 'two'], errors='ignore')
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop
dropped = idx.drop(['foo', ('qux', 'one')])
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop / error='ignore'
mixed_index = ['foo', ('qux', 'one'), 'two']
pytest.raises(KeyError, idx.drop, mixed_index)
dropped = idx.drop(mixed_index, errors='ignore')
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
def test_droplevel_with_names(idx):
index = idx[idx.get_loc('foo')]
dropped = index.droplevel(0)
assert dropped.name == 'second'
index = MultiIndex(
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
names=['one', 'two', 'three'])
dropped = index.droplevel(0)
assert dropped.names == ('two', 'three')
dropped = index.droplevel('two')
expected = index.droplevel(1)
assert dropped.equals(expected)
def test_droplevel_list():
index = MultiIndex(
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
names=['one', 'two', 'three'])
dropped = index[:2].droplevel(['three', 'one'])
expected = index[:2].droplevel(2).droplevel(0)
assert dropped.equals(expected)
dropped = index[:2].droplevel([])
expected = index[:2]
assert dropped.equals(expected)
with pytest.raises(ValueError):
index[:2].droplevel(['one', 'two', 'three'])
with pytest.raises(KeyError):
index[:2].droplevel(['one', 'four'])
def test_drop_not_lexsorted():
# GH 12078
# define the lexsorted version of the multi-index
tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')]
lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c'])
assert lexsorted_mi.is_lexsorted()
# and the not-lexsorted version
df = pd.DataFrame(columns=['a', 'b', 'c', 'd'],
data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]])
df = df.pivot_table(index='a', columns=['b', 'c'], values='d')
df = df.reset_index()
not_lexsorted_mi = df.columns
assert not not_lexsorted_mi.is_lexsorted()
# compare the results
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_index_equal(lexsorted_mi.drop('a'),
not_lexsorted_mi.drop('a'))
@@ -1,266 +0,0 @@
# -*- coding: utf-8 -*-
from itertools import product
import numpy as np
import pytest
from pandas._libs import hashtable
from pandas.compat import range, u
from pandas import DatetimeIndex, MultiIndex
import pandas.util.testing as tm
@pytest.mark.parametrize('names', [None, ['first', 'second']])
def test_unique(names):
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list('aaaa'), list('abab')],
names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([list('aa'), list('ab')], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list('aaaa'), list('aaaa')], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([['a'], ['a']], names=mi.names)
tm.assert_index_equal(res, exp)
# GH #20568 - empty MI
mi = MultiIndex.from_arrays([[], []], names=names)
res = mi.unique()
tm.assert_index_equal(mi, res)
def test_unique_datetimelike():
idx1 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
'2015-01-01', 'NaT', 'NaT'])
idx2 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02',
'2015-01-02', 'NaT', '2015-01-01'],
tz='Asia/Tokyo')
result = MultiIndex.from_arrays([idx1, idx2]).unique()
eidx1 = DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT'])
eidx2 = DatetimeIndex(['2015-01-01', '2015-01-02',
'NaT', '2015-01-01'],
tz='Asia/Tokyo')
exp = MultiIndex.from_arrays([eidx1, eidx2])
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
def test_unique_level(idx, level):
# GH #17896 - with level= argument
result = idx.unique(level=level)
expected = idx.get_level_values(level).unique()
tm.assert_index_equal(result, expected)
# With already unique level
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
names=['first', 'second'])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
# With empty MI
mi = MultiIndex.from_arrays([[], []], names=['first', 'second'])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
@pytest.mark.parametrize('dropna', [True, False])
def test_get_unique_index(idx, dropna):
mi = idx[[0, 1, 0, 1, 1, 0, 0]]
expected = mi._shallow_copy(mi[[0, 1]])
result = mi._get_unique_index(dropna=dropna)
assert result.unique
tm.assert_index_equal(result, expected)
def test_duplicate_multiindex_codes():
# GH 17464
# Make sure that a MultiIndex with duplicate levels throws a ValueError
with pytest.raises(ValueError):
mi = MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)])
# And that using set_levels with duplicate levels fails
mi = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'],
[1, 2, 1, 2, 3]])
with pytest.raises(ValueError):
mi.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
inplace=True)
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
[1, 'a', 1]])
def test_duplicate_level_names(names):
# GH18872, GH19029
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
assert mi.names == names
# With .rename()
mi = MultiIndex.from_product([[0, 1]] * 3)
mi = mi.rename(names)
assert mi.names == names
# With .rename(., level=)
mi.rename(names[1], level=1, inplace=True)
mi = mi.rename([names[0], names[2]], level=[0, 2])
assert mi.names == names
def test_duplicate_meta_data():
# GH 10115
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]],
codes=[[0, 0, 0, 0, 1, 1, 1],
[0, 1, 2, 0, 0, 1, 2]])
for idx in [mi,
mi.set_names([None, None]),
mi.set_names([None, 'Num']),
mi.set_names(['Upper', 'Num']), ]:
assert idx.has_duplicates
assert idx.drop_duplicates().names == idx.names
def test_has_duplicates(idx, idx_dup):
# see fixtures
assert idx.is_unique is True
assert idx.has_duplicates is False
assert idx_dup.is_unique is False
assert idx_dup.has_duplicates is True
mi = MultiIndex(levels=[[0, 1], [0, 1, 2]],
codes=[[0, 0, 0, 0, 1, 1, 1],
[0, 1, 2, 0, 0, 1, 2]])
assert mi.is_unique is False
assert mi.has_duplicates is True
def test_has_duplicates_from_tuples():
# GH 9075
t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169),
(u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119),
(u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135),
(u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145),
(u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158),
(u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122),
(u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160),
(u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180),
(u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143),
(u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128),
(u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129),
(u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111),
(u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114),
(u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121),
(u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126),
(u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155),
(u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123),
(u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)]
mi = MultiIndex.from_tuples(t)
assert not mi.has_duplicates
def test_has_duplicates_overflow():
# handle int64 overflow if possible
def check(nlevels, with_nulls):
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
if with_nulls: # inject some null values
codes[500] = -1 # common nan value
codes = [codes.copy() for i in range(nlevels)]
for i in range(nlevels):
codes[i][500 + i - nlevels // 2] = -1
codes += [np.array([-1, 1]).repeat(500)]
else:
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
levels = [level] * nlevels + [[0, 1]]
# no dups
mi = MultiIndex(levels=levels, codes=codes)
assert not mi.has_duplicates
# with a dup
if with_nulls:
def f(a):
return np.insert(a, 1000, a[0])
codes = list(map(f, codes))
mi = MultiIndex(levels=levels, codes=codes)
else:
values = mi.values.tolist()
mi = MultiIndex.from_tuples(values + [values[0]])
assert mi.has_duplicates
# no overflow
check(4, False)
check(4, True)
# overflow possible
check(8, False)
check(8, True)
@pytest.mark.parametrize('keep, expected', [
('first', np.array([False, False, False, True, True, False])),
('last', np.array([False, True, True, False, False, False])),
(False, np.array([False, True, True, True, True, False]))
])
def test_duplicated(idx_dup, keep, expected):
result = idx_dup.duplicated(keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize('keep', ['first', 'last', False])
def test_duplicated_large(keep):
# GH 9125
n, k = 200, 5000
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
codes = [np.random.choice(n, k * n) for lev in levels]
mi = MultiIndex(levels=levels, codes=codes)
result = mi.duplicated(keep=keep)
expected = hashtable.duplicated_object(mi.values, keep=keep)
tm.assert_numpy_array_equal(result, expected)
def test_get_duplicates():
# GH5873
for a in [101, 102]:
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
tm.assert_numpy_array_equal(mi.duplicated(),
np.zeros(2, dtype='bool'))
for n in range(1, 6): # 1st level shape
for m in range(1, 5): # 2nd level shape
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]],
codes=np.random.permutation(list(codes)).T)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
[[], []]))
tm.assert_numpy_array_equal(mi.duplicated(),
np.zeros(len(mi), dtype='bool'))
@@ -1,221 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange, lzip, range
import pandas as pd
from pandas import Index, MultiIndex, Series
import pandas.util.testing as tm
def test_equals(idx):
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(np.array(idx))
same_values = Index(idx, dtype=object)
assert idx.equals(same_values)
assert same_values.equals(idx)
if idx.nlevels == 1:
# do not test MultiIndex
assert not idx.equals(pd.Series(idx))
def test_equals_op(idx):
# GH9947, GH10637
index_a = idx
n = len(index_a)
index_b = index_a[0:-1]
index_c = index_a[0:-1].append(index_a[-2:-1])
index_d = index_a[0:1]
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_b
expected1 = np.array([True] * n)
expected2 = np.array([True] * (n - 1) + [False])
tm.assert_numpy_array_equal(index_a == index_a, expected1)
tm.assert_numpy_array_equal(index_a == index_c, expected2)
# test comparisons with numpy arrays
array_a = np.array(index_a)
array_b = np.array(index_a[0:-1])
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
array_d = np.array(index_a[0:1])
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_b
tm.assert_numpy_array_equal(index_a == array_a, expected1)
tm.assert_numpy_array_equal(index_a == array_c, expected2)
# test comparisons with Series
series_a = Series(array_a)
series_b = Series(array_b)
series_c = Series(array_c)
series_d = Series(array_d)
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_b
tm.assert_numpy_array_equal(index_a == series_a, expected1)
tm.assert_numpy_array_equal(index_a == series_c, expected2)
# cases where length is 1 for one of them
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_d
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
series_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
series_a == array_d
# comparing with a scalar should broadcast; note that we are excluding
# MultiIndex because in this case each item in the index is a tuple of
# length 2, and therefore is considered an array of length 2 in the
# comparison instead of a scalar
if not isinstance(index_a, MultiIndex):
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
# assuming the 2nd to last item is unique in the data
item = index_a[-2]
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
def test_equals_multi(idx):
assert idx.equals(idx)
assert not idx.equals(idx.values)
assert idx.equals(Index(idx.values))
assert idx.equal_levels(idx)
assert not idx.equals(idx[:-1])
assert not idx.equals(idx[-1])
# different number of levels
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
assert not index.equals(index2)
assert not index.equal_levels(index2)
# levels are different
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_codes = np.array([0, 0, 1, 2, 2, 3])
minor_codes = np.array([0, 1, 0, 0, 1, 0])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
assert not idx.equals(index)
assert not idx.equal_levels(index)
# some of the labels are different
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_codes = np.array([0, 0, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
assert not idx.equals(index)
def test_identical(idx):
mi = idx.copy()
mi2 = idx.copy()
assert mi.identical(mi2)
mi = mi.set_names(['new1', 'new2'])
assert mi.equals(mi2)
assert not mi.identical(mi2)
mi2 = mi2.set_names(['new1', 'new2'])
assert mi.identical(mi2)
mi3 = Index(mi.tolist(), names=mi.names)
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
assert mi.identical(mi3)
assert not mi.identical(mi4)
assert mi.equals(mi4)
def test_equals_operator(idx):
# GH9785
assert (idx == idx).all()
def test_equals_missing_values():
# make sure take is not using -1
i = pd.MultiIndex.from_tuples([(0, pd.NaT),
(0, pd.Timestamp('20130101'))])
result = i[0:1].equals(i[0])
assert not result
result = i[1:2].equals(i[1])
assert not result
def test_is_():
mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
assert mi.is_(mi)
assert mi.is_(mi.view())
assert mi.is_(mi.view().view().view().view())
mi2 = mi.view()
# names are metadata, they don't change id
mi2.names = ["A", "B"]
assert mi2.is_(mi)
assert mi.is_(mi2)
assert mi.is_(mi.set_names(["C", "D"]))
mi2 = mi.view()
mi2.set_names(["E", "F"], inplace=True)
assert mi.is_(mi2)
# levels are inherent properties, they change identity
mi3 = mi2.set_levels([lrange(10), lrange(10)])
assert not mi3.is_(mi2)
# shouldn't change
assert mi2.is_(mi)
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
mi4.set_levels([lrange(10), lrange(10)], inplace=True)
assert not mi4.is_(mi3)
mi5 = mi.view()
mi5.set_levels(mi5.levels, inplace=True)
assert not mi5.is_(mi)
def test_is_all_dates(idx):
assert not idx.is_all_dates
def test_is_numeric(idx):
# MultiIndex is never numeric
assert not idx.is_numeric()
def test_multiindex_compare():
# GH 21149
# Ensure comparison operations for MultiIndex with nlevels == 1
# behave consistently with those for MultiIndex with nlevels > 1
midx = pd.MultiIndex.from_product([[0, 1]])
# Equality self-test: MultiIndex object vs self
expected = pd.Series([True, True])
result = pd.Series(midx == midx)
tm.assert_series_equal(result, expected)
# Greater than comparison: MultiIndex object vs self
expected = pd.Series([False, False])
result = pd.Series(midx > midx)
tm.assert_series_equal(result, expected)
@@ -1,132 +0,0 @@
# -*- coding: utf-8 -*-
import warnings
import pytest
from pandas.compat import PY3, range, u
import pandas as pd
from pandas import MultiIndex, compat
import pandas.util.testing as tm
def test_dtype_str(indices):
dtype = indices.dtype_str
assert isinstance(dtype, compat.string_types)
assert dtype == str(indices.dtype)
def test_format(idx):
idx.format()
idx[:0].format()
def test_format_integer_names():
index = MultiIndex(levels=[[0, 1], [0, 1]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1])
index.format(names=True)
def test_format_sparse_config(idx):
warn_filters = warnings.filters
warnings.filterwarnings('ignore', category=FutureWarning,
module=".*format")
# GH1538
pd.set_option('display.multi_sparse', False)
result = idx.format()
assert result[1] == 'foo two'
tm.reset_display_options()
warnings.filters = warn_filters
def test_format_sparse_display():
index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]],
codes=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1],
[0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]])
result = index.format()
assert result[3] == '1 0 0 0'
def test_repr_with_unicode_data():
with pd.core.config.option_context("display.encoding", 'UTF-8'):
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
index = pd.DataFrame(d).set_index(["a", "b"]).index
assert "\\u" not in repr(index) # we don't want unicode-escaped
@pytest.mark.skip(reason="#22511 will remove this test")
def test_repr_roundtrip():
mi = MultiIndex.from_product([list('ab'), range(3)],
names=['first', 'second'])
str(mi)
if PY3:
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
else:
result = eval(repr(mi))
# string coerces to unicode
tm.assert_index_equal(result, mi, exact=False)
assert mi.get_level_values('first').inferred_type == 'string'
assert result.get_level_values('first').inferred_type == 'unicode'
mi_u = MultiIndex.from_product(
[list(u'ab'), range(3)], names=['first', 'second'])
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
# formatting
if PY3:
str(mi)
else:
compat.text_type(mi)
# long format
mi = MultiIndex.from_product([list('abcdefg'), range(10)],
names=['first', 'second'])
if PY3:
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
else:
result = eval(repr(mi))
# string coerces to unicode
tm.assert_index_equal(result, mi, exact=False)
assert mi.get_level_values('first').inferred_type == 'string'
assert result.get_level_values('first').inferred_type == 'unicode'
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
def test_unicode_string_with_unicode():
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
if PY3:
str(idx)
else:
compat.text_type(idx)
def test_bytestring_with_unicode():
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
if PY3:
bytes(idx)
else:
str(idx)
def test_repr_max_seq_item_setting(idx):
# GH10182
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
assert '...' not in str(idx)
@@ -1,454 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import range
import pandas as pd
from pandas import CategoricalIndex, Index, MultiIndex
import pandas.util.testing as tm
def assert_matching(actual, expected, check_dtype=False):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp)
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
def test_get_level_number_integer(idx):
idx.names = [1, 0]
assert idx._get_level_number(1) == 0
assert idx._get_level_number(0) == 1
pytest.raises(IndexError, idx._get_level_number, 2)
with pytest.raises(KeyError, match='Level fourth not found'):
idx._get_level_number('fourth')
def test_get_level_values(idx):
result = idx.get_level_values(0)
expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'],
name='first')
tm.assert_index_equal(result, expected)
assert result.name == 'first'
result = idx.get_level_values('first')
expected = idx.get_level_values(0)
tm.assert_index_equal(result, expected)
# GH 10460
index = MultiIndex(
levels=[CategoricalIndex(['A', 'B']),
CategoricalIndex([1, 2, 3])],
codes=[np.array([0, 0, 0, 1, 1, 1]),
np.array([0, 1, 2, 0, 1, 2])])
exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B'])
tm.assert_index_equal(index.get_level_values(0), exp)
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)
def test_get_value_duplicates():
index = MultiIndex(levels=[['D', 'B', 'C'],
[0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=['tag', 'day'])
assert index.get_loc('D') == slice(0, 3)
with pytest.raises(KeyError):
index._engine.get_value(np.array([]), 'D')
def test_get_level_values_all_na():
# GH 17924 when level entirely consists of nan
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(['a', np.nan, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_int_with_na():
# GH 17924
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)
def test_get_level_values_na():
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan])
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(['a', np.nan, 1])
tm.assert_index_equal(result, expected)
arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_index_equal(result, expected)
arrays = [[], []]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_set_name_methods(idx, index_names):
# so long as these are synonyms, we don't need to test set_names
assert idx.rename == idx.set_names
new_names = [name + "SUFFIX" for name in index_names]
ind = idx.set_names(new_names)
assert idx.names == index_names
assert ind.names == new_names
with pytest.raises(ValueError, match="^Length"):
ind.set_names(new_names + new_names)
new_names2 = [name + "SUFFIX2" for name in new_names]
res = ind.set_names(new_names2, inplace=True)
assert res is None
assert ind.names == new_names2
# set names for specific level (# GH7792)
ind = idx.set_names(new_names[0], level=0)
assert idx.names == index_names
assert ind.names == [new_names[0], index_names[1]]
res = ind.set_names(new_names2[0], level=0, inplace=True)
assert res is None
assert ind.names == [new_names2[0], index_names[1]]
# set names for multiple levels
ind = idx.set_names(new_names, level=[0, 1])
assert idx.names == index_names
assert ind.names == new_names
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
assert res is None
assert ind.names == new_names2
def test_set_levels_codes_directly(idx):
# setting levels/codes directly raises AttributeError
levels = idx.levels
new_levels = [[lev + 'a' for lev in level] for level in levels]
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
with pytest.raises(AttributeError):
idx.levels = new_levels
with pytest.raises(AttributeError):
idx.codes = new_codes
def test_set_levels(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
levels = idx.levels
new_levels = [[lev + 'a' for lev in level] for level in levels]
# level changing [w/o mutation]
ind2 = idx.set_levels(new_levels)
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
# level changing specific level [w/o mutation]
ind2 = idx.set_levels(new_levels[0], level=0)
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.set_levels(new_levels[1], level=1)
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/o mutation]
ind2 = idx.set_levels(new_levels, level=[0, 1])
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing specific level [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_levels(new_levels, level=[0, 1],
inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# illegal level changing should not change levels
# GH 13754
original_index = idx.copy()
for inplace in [True, False]:
with pytest.raises(ValueError, match="^On"):
idx.set_levels(['c'], level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels,
check_dtype=True)
with pytest.raises(ValueError, match="^On"):
idx.set_codes([0, 1, 2, 3, 4, 5], level=0,
inplace=inplace)
assert_matching(idx.codes, original_index.codes,
check_dtype=True)
with pytest.raises(TypeError, match="^Levels"):
idx.set_levels('c', level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels,
check_dtype=True)
with pytest.raises(TypeError, match="^Codes"):
idx.set_codes(1, level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes,
check_dtype=True)
def test_set_codes(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
# changing codes w/o mutation
ind2 = idx.set_codes(new_codes)
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# changing label w/ mutation
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
# codes changing specific level w/o mutation
ind2 = idx.set_codes(new_codes[0], level=0)
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.set_codes(new_codes[1], level=1)
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels w/o mutation
ind2 = idx.set_codes(new_codes, level=[0, 1])
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing specific level w/ mutation
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels [w/ mutation]
ind2 = idx.copy()
inplace_return = ind2.set_codes(new_codes, level=[0, 1],
inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing for levels of different magnitude of categories
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
new_codes = range(129, -1, -1)
expected = pd.MultiIndex.from_tuples(
[(0, i) for i in new_codes])
# [w/o mutation]
result = ind.set_codes(codes=new_codes, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
result.set_codes(codes=new_codes, level=1, inplace=True)
assert result.equals(expected)
with tm.assert_produces_warning(FutureWarning):
ind.set_codes(labels=new_codes, level=1)
def test_set_labels_deprecated():
# GH23752
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
new_labels = range(129, -1, -1)
expected = pd.MultiIndex.from_tuples(
[(0, i) for i in new_labels])
# [w/o mutation]
with tm.assert_produces_warning(FutureWarning):
result = ind.set_labels(labels=new_labels, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
with tm.assert_produces_warning(FutureWarning):
result.set_labels(labels=new_labels, level=1, inplace=True)
assert result.equals(expected)
def test_set_levels_codes_names_bad_input(idx):
levels, codes = idx.levels, idx.codes
names = idx.names
with pytest.raises(ValueError, match='Length of levels'):
idx.set_levels([levels[0]])
with pytest.raises(ValueError, match='Length of codes'):
idx.set_codes([codes[0]])
with pytest.raises(ValueError, match='Length of names'):
idx.set_names([names[0]])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_levels(levels[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_codes(codes[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match='list-like'):
idx.set_names(names[0])
# should have equal lengths
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_levels(levels[0], level=[0, 1])
with pytest.raises(TypeError, match='list-like'):
idx.set_levels(levels, level=0)
# should have equal lengths
with pytest.raises(TypeError, match='list of lists-like'):
idx.set_codes(codes[0], level=[0, 1])
with pytest.raises(TypeError, match='list-like'):
idx.set_codes(codes, level=0)
# should have equal lengths
with pytest.raises(ValueError, match='Length of names'):
idx.set_names(names[0], level=[0, 1])
with pytest.raises(TypeError, match='Names must be a'):
idx.set_names(names, level=0)
@pytest.mark.parametrize('inplace', [True, False])
def test_set_names_with_nlevel_1(inplace):
# GH 21149
# Ensure that .set_names for MultiIndex with
# nlevels == 1 does not raise any errors
expected = pd.MultiIndex(levels=[[0, 1]],
codes=[[0, 1]],
names=['first'])
m = pd.MultiIndex.from_product([[0, 1]])
result = m.set_names('first', level=0, inplace=inplace)
if inplace:
result = m
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('ordered', [True, False])
def test_set_levels_categorical(ordered):
# GH13854
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
cidx = CategoricalIndex(list("bac"), ordered=ordered)
result = index.set_levels(cidx, 0)
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]],
codes=index.codes)
tm.assert_index_equal(result, expected)
result_lvl = result.get_level_values(0)
expected_lvl = CategoricalIndex(list("bacb"),
categories=cidx.categories,
ordered=cidx.ordered)
tm.assert_index_equal(result_lvl, expected_lvl)
def test_set_value_keeps_names():
# motivating example from #3742
lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe']
lev2 = ['1', '2', '3'] * 2
idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number'])
df = pd.DataFrame(
np.random.randn(6, 4),
columns=['one', 'two', 'three', 'four'],
index=idx)
df = df.sort_index()
assert df._is_copy is None
assert df.index.names == ('Name', 'Number')
df.at[('grethe', '4'), 'one'] = 99.34
assert df._is_copy is None
assert df.index.names == ('Name', 'Number')
def test_set_levels_with_iterable():
# GH23273
sizes = [1, 2, 3]
colors = ['black'] * 3
index = pd.MultiIndex.from_arrays([sizes, colors], names=['size', 'color'])
result = index.set_levels(map(int, ['3', '2', '1']), level='size')
expected_sizes = [3, 2, 1]
expected = pd.MultiIndex.from_arrays([expected_sizes, colors],
names=['size', 'color'])
tm.assert_index_equal(result, expected)
@@ -1,375 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
import numpy as np
import pytest
from pandas.compat import lrange
import pandas as pd
from pandas import (
Categorical, CategoricalIndex, Index, IntervalIndex, MultiIndex,
date_range)
from pandas.core.indexes.base import InvalidIndexError
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal
def test_slice_locs_partial(idx):
sorted_idx, _ = idx.sortlevel(0)
result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one'))
assert result == (1, 5)
result = sorted_idx.slice_locs(None, ('qux', 'one'))
assert result == (0, 5)
result = sorted_idx.slice_locs(('foo', 'two'), None)
assert result == (1, len(sorted_idx))
result = sorted_idx.slice_locs('bar', 'baz')
assert result == (2, 4)
def test_slice_locs():
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
sliced = stacked[slob]
expected = df[5:16].stack()
tm.assert_almost_equal(sliced.values, expected.values)
slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30),
df.index[15] - timedelta(seconds=30)))
sliced = stacked[slob]
expected = df[6:15].stack()
tm.assert_almost_equal(sliced.values, expected.values)
def test_slice_locs_with_type_mismatch():
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs((1, 3))
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
df = tm.makeCustomDataframe(5, 5)
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs(timedelta(seconds=30))
# TODO: Try creating a UnicodeDecodeError in exception message
with pytest.raises(TypeError, match='^Level type mismatch'):
idx.slice_locs(df.index[1], (16, "a"))
def test_slice_locs_not_sorted():
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
with pytest.raises(KeyError, match=msg):
index.slice_locs((1, 0, 1), (2, 1, 0))
# works
sorted_index, _ = index.sortlevel(0)
# should there be a test case here???
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
def test_slice_locs_not_contained():
# some searchsorted action
index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]],
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3],
[0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0)
result = index.slice_locs((1, 0), (5, 2))
assert result == (3, 6)
result = index.slice_locs(1, 5)
assert result == (3, 6)
result = index.slice_locs((2, 2), (5, 2))
assert result == (3, 6)
result = index.slice_locs(2, 5)
assert result == (3, 6)
result = index.slice_locs((1, 0), (6, 3))
assert result == (3, 8)
result = index.slice_locs(-1, 10)
assert result == (0, len(index))
def test_putmask_with_wrong_mask(idx):
# GH18368
with pytest.raises(ValueError):
idx.putmask(np.ones(len(idx) + 1, np.bool), 1)
with pytest.raises(ValueError):
idx.putmask(np.ones(len(idx) - 1, np.bool), 1)
with pytest.raises(ValueError):
idx.putmask('foo', 1)
def test_get_indexer():
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
idx1 = index[:5]
idx2 = index[[1, 3, 5]]
r1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
r1 = idx2.get_indexer(idx1, method='pad')
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method='pad')
assert_almost_equal(r2, e1[::-1])
rffill1 = idx2.get_indexer(idx1, method='ffill')
assert_almost_equal(r1, rffill1)
r1 = idx2.get_indexer(idx1, method='backfill')
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method='backfill')
assert_almost_equal(r2, e1[::-1])
rbfill1 = idx2.get_indexer(idx1, method='bfill')
assert_almost_equal(r1, rbfill1)
# pass non-MultiIndex
r1 = idx1.get_indexer(idx2.values)
rexp1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, rexp1)
r1 = idx1.get_indexer([1, 2, 3])
assert (r1 == [-1, -1, -1]).all()
# create index with duplicates
idx1 = Index(lrange(10) + lrange(10))
idx2 = Index(lrange(20))
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
idx1.get_indexer(idx2)
def test_get_indexer_nearest():
midx = MultiIndex.from_tuples([('a', 1), ('b', 2)])
with pytest.raises(NotImplementedError):
midx.get_indexer(['a'], method='nearest')
with pytest.raises(NotImplementedError):
midx.get_indexer(['a'], method='pad', tolerance=2)
def test_getitem(idx):
# scalar
assert idx[2] == ('bar', 'one')
# slice
result = idx[2:5]
expected = idx[[2, 3, 4]]
assert result.equals(expected)
# boolean
result = idx[[True, False, True, False, True, True]]
result2 = idx[np.array([True, False, True, False, True, True])]
expected = idx[[0, 2, 4, 5]]
assert result.equals(expected)
assert result2.equals(expected)
def test_getitem_group_select(idx):
sorted_idx, _ = idx.sortlevel(0)
assert sorted_idx.get_loc('baz') == slice(3, 4)
assert sorted_idx.get_loc('foo') == slice(0, 2)
def test_get_indexer_consistency(idx):
# See GH 16819
if isinstance(idx, IntervalIndex):
pass
if idx.is_unique or isinstance(idx, CategoricalIndex):
indexer = idx.get_indexer(idx[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
else:
e = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=e):
idx.get_indexer(idx[0:2])
indexer, _ = idx.get_indexer_non_unique(idx[0:2])
assert isinstance(indexer, np.ndarray)
assert indexer.dtype == np.intp
@pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)])
@pytest.mark.parametrize('ind2', [[True, False, True, False, False],
pd.Index([True, False, True, False,
False])])
def test_getitem_bool_index_all(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3),
(40, 4), (50, 5)])
tm.assert_index_equal(idx[ind1], idx)
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
tm.assert_index_equal(idx[ind2], expected)
@pytest.mark.parametrize('ind1', [[True], pd.Index([True])])
@pytest.mark.parametrize('ind2', [[False], pd.Index([False])])
def test_getitem_bool_index_single(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1)])
tm.assert_index_equal(idx[ind1], idx)
expected = pd.MultiIndex(levels=[np.array([], dtype=np.int64),
np.array([], dtype=np.int64)],
codes=[[], []])
tm.assert_index_equal(idx[ind2], expected)
def test_get_loc(idx):
assert idx.get_loc(('foo', 'two')) == 1
assert idx.get_loc(('baz', 'two')) == 3
pytest.raises(KeyError, idx.get_loc, ('bar', 'two'))
pytest.raises(KeyError, idx.get_loc, 'quux')
pytest.raises(NotImplementedError, idx.get_loc, 'foo',
method='nearest')
# 3 levels
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
pytest.raises(KeyError, index.get_loc, (1, 1))
assert index.get_loc((2, 0)) == slice(3, 5)
def test_get_loc_duplicates():
index = Index([2, 2, 2, 2])
result = index.get_loc(2)
expected = slice(0, 4)
assert result == expected
# pytest.raises(Exception, index.get_loc, 2)
index = Index(['c', 'a', 'a', 'b', 'b'])
rs = index.get_loc('c')
xp = 0
assert rs == xp
def test_get_loc_level():
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
loc, new_index = index.get_loc_level((0, 1))
expected = slice(1, 2)
exp_index = index[expected].droplevel(0).droplevel(0)
assert loc == expected
assert new_index.equals(exp_index)
loc, new_index = index.get_loc_level((0, 1, 0))
expected = 1
assert loc == expected
assert new_index is None
pytest.raises(KeyError, index.get_loc_level, (2, 2))
# GH 22221: unused label
pytest.raises(KeyError, index.drop(2).get_loc_level, 2)
# Unused label on unsorted level:
pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2)
index = MultiIndex(levels=[[2000], lrange(4)], codes=[np.array(
[0, 0, 0, 0]), np.array([0, 1, 2, 3])])
result, new_index = index.get_loc_level((2000, slice(None, None)))
expected = slice(None, None)
assert result == expected
assert new_index.equals(index.droplevel(0))
@pytest.mark.parametrize('dtype1', [int, float, bool, str])
@pytest.mark.parametrize('dtype2', [int, float, bool, str])
def test_get_loc_multiple_dtypes(dtype1, dtype2):
# GH 18520
levels = [np.array([0, 1]).astype(dtype1),
np.array([0, 1]).astype(dtype2)]
idx = pd.MultiIndex.from_product(levels)
assert idx.get_loc(idx[2]) == 2
@pytest.mark.parametrize('level', [0, 1])
@pytest.mark.parametrize('dtypes', [[int, float], [float, int]])
def test_get_loc_implicit_cast(level, dtypes):
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
levels = [['a', 'b'], ['c', 'd']]
key = ['b', 'd']
lev_dtype, key_dtype = dtypes
levels[level] = np.array([0, 1], dtype=lev_dtype)
key[level] = key_dtype(1)
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_cast_bool():
# GH 19086 : int is casted to bool, but not vice-versa
levels = [[False, True], np.arange(2, dtype='int64')]
idx = MultiIndex.from_product(levels)
assert idx.get_loc((0, 1)) == 1
assert idx.get_loc((1, 0)) == 2
pytest.raises(KeyError, idx.get_loc, (False, True))
pytest.raises(KeyError, idx.get_loc, (True, False))
@pytest.mark.parametrize('level', [0, 1])
def test_get_loc_nan(level, nulls_fixture):
# GH 18485 : NaN in MultiIndex
levels = [['a', 'b'], ['c', 'd']]
key = ['b', 'd']
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
key[level] = nulls_fixture
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_missing_nan():
# GH 8569
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
assert isinstance(idx.get_loc(1), slice)
pytest.raises(KeyError, idx.get_loc, 3)
pytest.raises(KeyError, idx.get_loc, np.nan)
pytest.raises(KeyError, idx.get_loc, [np.nan])
def test_get_indexer_categorical_time():
# https://github.com/pandas-dev/pandas/issues/21390
midx = MultiIndex.from_product(
[Categorical(['a', 'b', 'c']),
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
result = midx.get_indexer(midx)
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
@@ -1,293 +0,0 @@
# -*- coding: utf-8 -*-
import re
import numpy as np
import pytest
from pandas.compat import lrange, range
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import IntervalIndex, MultiIndex, RangeIndex
import pandas.util.testing as tm
def test_labels_dtypes():
# GH 8456
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
assert i.codes[0].dtype == 'int8'
assert i.codes[1].dtype == 'int8'
i = MultiIndex.from_product([['a'], range(40)])
assert i.codes[1].dtype == 'int8'
i = MultiIndex.from_product([['a'], range(400)])
assert i.codes[1].dtype == 'int16'
i = MultiIndex.from_product([['a'], range(40000)])
assert i.codes[1].dtype == 'int32'
i = pd.MultiIndex.from_product([['a'], range(1000)])
assert (i.codes[0] >= 0).all()
assert (i.codes[1] >= 0).all()
def test_values_boxed():
tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT),
(3, pd.Timestamp('2000-01-03')),
(1, pd.Timestamp('2000-01-04')),
(2, pd.Timestamp('2000-01-02')),
(3, pd.Timestamp('2000-01-03'))]
result = pd.MultiIndex.from_tuples(tuples)
expected = construct_1d_object_array_from_listlike(tuples)
tm.assert_numpy_array_equal(result.values, expected)
# Check that code branches for boxed values produce identical results
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def test_values_multiindex_datetimeindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10 ** 18, 10 ** 18 + 5)
naive = pd.DatetimeIndex(ints)
# TODO(GH-24559): Remove the FutureWarning
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
aware = pd.DatetimeIndex(ints, tz='US/Central')
idx = pd.MultiIndex.from_arrays([naive, aware])
result = idx.values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive)
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware)
# n_lev > n_lab
result = idx[:2].values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive[:2])
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware[:2])
def test_values_multiindex_periodindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(2007, 2012)
pidx = pd.PeriodIndex(ints, freq='D')
idx = pd.MultiIndex.from_arrays([ints, pidx])
result = idx.values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx)
# n_lev > n_lab
result = idx[:2].values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx[:2])
def test_consistency():
# need to construct an overflow
major_axis = lrange(70000)
minor_axis = lrange(10)
major_codes = np.arange(70000)
minor_codes = np.repeat(lrange(10), 7000)
# the fact that is works means it's consistent
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
# inconsistent
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes])
assert index.is_unique is False
def test_hash_collisions():
# non-smoke test that we don't get hash collisions
index = MultiIndex.from_product([np.arange(1000), np.arange(1000)],
names=['one', 'two'])
result = index.get_indexer(index.values)
tm.assert_numpy_array_equal(result, np.arange(
len(index), dtype='intp'))
for i in [0, 1, len(index) - 2, len(index) - 1]:
result = index.get_loc(index[i])
assert result == i
def test_dims():
pass
def take_invalid_kwargs():
vals = [['A', 'B'],
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode='clip')
def test_isna_behavior(idx):
# should not segfault GH5123
# NOTE: if MI representation changes, may make sense to allow
# isna(MI)
with pytest.raises(NotImplementedError):
pd.isna(idx)
def test_large_multiindex_error():
# GH12527
df_below_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
columns=['dest'])
with pytest.raises(KeyError):
df_below_1000000.loc[(-1, 0), 'dest']
with pytest.raises(KeyError):
df_below_1000000.loc[(3, 0), 'dest']
df_above_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
columns=['dest'])
with pytest.raises(KeyError):
df_above_1000000.loc[(-1, 0), 'dest']
with pytest.raises(KeyError):
df_above_1000000.loc[(3, 0), 'dest']
def test_million_record_attribute_error():
# GH 18165
r = list(range(1000000))
df = pd.DataFrame({'a': r, 'b': r},
index=pd.MultiIndex.from_tuples([(x, x) for x in r]))
msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
df['a'].foo()
def test_can_hold_identifiers(idx):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_metadata_immutable(idx):
levels, codes = idx.levels, idx.codes
# shouldn't be able to set at either the top level or base level
mutable_regex = re.compile('does not support mutable operations')
with pytest.raises(TypeError, match=mutable_regex):
levels[0] = levels[0]
with pytest.raises(TypeError, match=mutable_regex):
levels[0][0] = levels[0][0]
# ditto for labels
with pytest.raises(TypeError, match=mutable_regex):
codes[0] = codes[0]
with pytest.raises(TypeError, match=mutable_regex):
codes[0][0] = codes[0][0]
# and for names
names = idx.names
with pytest.raises(TypeError, match=mutable_regex):
names[0] = names[0]
def test_level_setting_resets_attributes():
ind = pd.MultiIndex.from_arrays([
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
])
assert ind.is_monotonic
ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True)
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic
def test_rangeindex_fallback_coercion_bug():
# GH 12893
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1)
df.index.names = ['fizz', 'buzz']
str(df)
expected = pd.DataFrame({'bar': np.arange(100),
'foo': np.arange(100)},
index=pd.MultiIndex.from_product(
[range(10), range(10)],
names=['fizz', 'buzz']))
tm.assert_frame_equal(df, expected, check_like=True)
result = df.index.get_level_values('fizz')
expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10)
tm.assert_index_equal(result, expected)
result = df.index.get_level_values('buzz')
expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
tm.assert_index_equal(result, expected)
def test_hash_error(indices):
index = indices
with pytest.raises(TypeError, match=("unhashable type: %r" %
type(index).__name__)):
hash(indices)
def test_mutability(indices):
if not len(indices):
return
pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
def test_wrong_number_names(indices):
with pytest.raises(ValueError, match="^Length"):
indices.names = ["apple", "banana", "carrot"]
def test_memory_usage(idx):
result = idx.memory_usage()
if len(idx):
idx.get_loc(idx[0])
result2 = idx.memory_usage()
result3 = idx.memory_usage(deep=True)
# RangeIndex, IntervalIndex
# don't have engines
if not isinstance(idx, (RangeIndex, IntervalIndex)):
assert result2 > result
if idx.inferred_type == 'object':
assert result3 > result2
else:
# we report 0 for no-length
assert result == 0
def test_nlevels(idx):
assert idx.nlevels == 2
@@ -1,96 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
@pytest.mark.parametrize('other', [
Index(['three', 'one', 'two']),
Index(['one']),
Index(['one', 'three']),
])
def test_join_level(idx, other, join_type):
join_index, lidx, ridx = other.join(idx, how=join_type,
level='second',
return_indexers=True)
exp_level = other.join(idx.levels[1], how=join_type)
assert join_index.levels[0].equals(idx.levels[0])
assert join_index.levels[1].equals(exp_level)
# pare down levels
mask = np.array(
[x[1] in exp_level for x in idx], dtype=bool)
exp_values = idx.values[mask]
tm.assert_numpy_array_equal(join_index.values, exp_values)
if join_type in ('outer', 'inner'):
join_index2, ridx2, lidx2 = \
idx.join(other, how=join_type, level='second',
return_indexers=True)
assert join_index.equals(join_index2)
tm.assert_numpy_array_equal(lidx, lidx2)
tm.assert_numpy_array_equal(ridx, ridx2)
tm.assert_numpy_array_equal(join_index2.values, exp_values)
def test_join_level_corner_case(idx):
# some corner cases
index = Index(['three', 'one', 'two'])
result = index.join(idx, level='second')
assert isinstance(result, MultiIndex)
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
idx.join(idx, level=1)
def test_join_self(idx, join_type):
joined = idx.join(idx, how=join_type)
assert idx is joined
def test_join_multi():
# GH 10665
midx = pd.MultiIndex.from_product(
[np.arange(4), np.arange(4)], names=['a', 'b'])
idx = pd.Index([1, 2, 5], name='b')
# inner
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
exp_idx = pd.MultiIndex.from_product(
[np.arange(4), [1, 2]], names=['a', 'b'])
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# keep MultiIndex
jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True)
exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0,
1, -1], dtype=np.intp)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_join_self_unique(idx, join_type):
if idx.is_unique:
joined = idx.join(idx, how=join_type)
assert (idx == joined).all()
@@ -1,129 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas._libs.tslib import iNaT
import pandas as pd
from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
import pandas.util.testing as tm
def test_fillna(idx):
# GH 11343
# TODO: Remove or Refactor. Not Implemented for MultiIndex
for name, index in [('idx', idx), ]:
if len(index) == 0:
pass
elif isinstance(index, MultiIndex):
idx = index.copy()
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.fillna(idx[0])
else:
idx = index.copy()
result = idx.fillna(idx[0])
tm.assert_index_equal(result, idx)
assert result is not idx
msg = "'value' must be a scalar, passed: "
with pytest.raises(TypeError, match=msg):
idx.fillna([idx[0]])
idx = index.copy()
values = idx.values
if isinstance(index, DatetimeIndexOpsMixin):
values[1] = iNaT
elif isinstance(index, (Int64Index, UInt64Index)):
continue
else:
values[1] = np.nan
if isinstance(index, PeriodIndex):
idx = index.__class__(values, freq=index.freq)
else:
idx = index.__class__(values)
expected = np.array([False] * len(idx), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is True
def test_dropna():
# GH 6194
idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5],
[1, 2, np.nan, np.nan, 5],
['a', 'b', 'c', np.nan, 'e']])
exp = pd.MultiIndex.from_arrays([[1, 5],
[1, 5],
['a', 'e']])
tm.assert_index_equal(idx.dropna(), exp)
tm.assert_index_equal(idx.dropna(how='any'), exp)
exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5],
[1, 2, np.nan, 5],
['a', 'b', 'c', 'e']])
tm.assert_index_equal(idx.dropna(how='all'), exp)
msg = "invalid how option: xxx"
with pytest.raises(ValueError, match=msg):
idx.dropna(how='xxx')
def test_nulls(idx):
# this is really a smoke test for the methods
# as these are adequately tested for function elsewhere
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.isna()
@pytest.mark.xfail
def test_hasnans_isnans(idx):
# GH 11343, added tests for hasnans / isnans
index = idx.copy()
# cases in indices doesn't include NaN
expected = np.array([False] * len(index), dtype=bool)
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is False
index = idx.copy()
values = index.values
values[1] = np.nan
index = idx.__class__(values)
expected = np.array([False] * len(index), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is True
def test_nan_stays_float():
# GH 7031
idx0 = pd.MultiIndex(levels=[["A", "B"], []],
codes=[[1, 0], [-1, -1]],
names=[0, 1])
idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
codes=[[0], [0]],
names=[0, 1])
idxm = idx0.join(idx1, how='outer')
assert pd.isna(idx0.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
df0 = pd.DataFrame([[1, 2]], index=idx0)
df1 = pd.DataFrame([[3, 4]], index=idx1)
dfm = df0 - df1
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
@@ -1,213 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, IntervalIndex, MultiIndex
from pandas.api.types import is_scalar
def test_is_monotonic_increasing():
i = MultiIndex.from_product([np.arange(10),
np.arange(10)], names=['one', 'two'])
assert i.is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
i = MultiIndex.from_product([np.arange(10, 0, -1),
np.arange(10)], names=['one', 'two'])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product([np.arange(10),
np.arange(10, 0, -1)],
names=['one', 'two'])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
# string ordering
i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic is False
assert Index(i.values).is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
['mom', 'next', 'zenith']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237',
'nl0000289783',
'nl0000289965', 'nl0000301109']],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=['household_id', 'asset_id'])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
def test_is_monotonic_decreasing():
i = MultiIndex.from_product([np.arange(9, -1, -1),
np.arange(9, -1, -1)],
names=['one', 'two'])
assert i.is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
i = MultiIndex.from_product([np.arange(10),
np.arange(10, 0, -1)],
names=['one', 'two'])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product([np.arange(10, 0, -1),
np.arange(10)], names=['one', 'two'])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
# string ordering
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
['three', 'two', 'one']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
['zenith', 'next', 'mom']],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965',
'nl0000289783', 'lu0197800237',
'gb00b03mlx29']],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=['household_id', 'asset_id'])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
def test_is_strictly_monotonic_increasing():
idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
codes=[[0, 0, 1, 1], [0, 0, 0, 1]])
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is False
def test_is_strictly_monotonic_decreasing():
idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
codes=[[0, 0, 1, 1], [0, 0, 0, 1]])
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is False
def test_searchsorted_monotonic(indices):
# GH17271
# not implemented for tuple searches in MultiIndex
# or Intervals searches in IntervalIndex
if isinstance(indices, (MultiIndex, IntervalIndex)):
return
# nothing to test if the index is empty
if indices.empty:
return
value = indices[0]
# determine the expected results (handle dupes for 'right')
expected_left, expected_right = 0, (indices == value).argmin()
if expected_right == 0:
# all values are the same, expected_right should be length
expected_right = len(indices)
# test _searchsorted_monotonic in all cases
# test searchsorted only for increasing
if indices.is_monotonic_increasing:
ssm_left = indices._searchsorted_monotonic(value, side='left')
assert is_scalar(ssm_left)
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side='right')
assert is_scalar(ssm_right)
assert expected_right == ssm_right
ss_left = indices.searchsorted(value, side='left')
assert is_scalar(ss_left)
assert expected_left == ss_left
ss_right = indices.searchsorted(value, side='right')
assert is_scalar(ss_right)
assert expected_right == ss_right
elif indices.is_monotonic_decreasing:
ssm_left = indices._searchsorted_monotonic(value, side='left')
assert is_scalar(ssm_left)
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side='right')
assert is_scalar(ssm_right)
assert expected_right == ssm_right
else:
# non-monotonic should raise.
with pytest.raises(ValueError):
indices._searchsorted_monotonic(value, side='left')
@@ -1,124 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas.util.testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_slice_keep_name():
x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')],
names=['x', 'y'])
assert x[1:].names == x.names
def test_index_name_retained():
# GH9857
result = pd.DataFrame({'x': [1, 2, 6],
'y': [2, 2, 8],
'z': [-5, 0, 5]})
result = result.set_index('z')
result.loc[10] = [9, 10]
df_expected = pd.DataFrame({'x': [1, 2, 6, 9],
'y': [2, 2, 8, 10],
'z': [-5, 0, 5, 10]})
df_expected = df_expected.set_index('z')
tm.assert_frame_equal(result, df_expected)
def test_changing_names(idx):
# names should be applied to levels
level_names = [level.name for level in idx.levels]
check_level_names(idx, idx.names)
view = idx.view()
copy = idx.copy()
shallow_copy = idx._shallow_copy()
# changing names should change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, new_names)
# but not on copies
check_level_names(view, level_names)
check_level_names(copy, level_names)
check_level_names(shallow_copy, level_names)
# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, new_names)
def test_take_preserve_name(idx):
taken = idx.take([3, 0, 1])
assert taken.names == idx.names
def test_copy_names():
# Check that adding a "names" parameter to the copy is honored
# GH14302
multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2'])
multi_idx1 = multi_idx.copy()
assert multi_idx.equals(multi_idx1)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx1.names == ['MyName1', 'MyName2']
multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2'])
assert multi_idx.equals(multi_idx2)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx2.names == ['NewName1', 'NewName2']
multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2'])
assert multi_idx.equals(multi_idx3)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx3.names == ['NewName1', 'NewName2']
def test_names(idx, index_names):
# names are assigned in setup
names = index_names
level_names = [level.name for level in idx.levels]
assert names == level_names
# setting bad names on existing
index = idx
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", list(index.names) + ["third"])
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", [])
# initializing with bad names (should always be equivalent)
major_axis, minor_axis = idx.levels
major_codes, minor_codes = idx.codes
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=['first'])
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=['first', 'second', 'third'])
# names are assigned
index.names = ["a", "b"]
ind_names = list(index.names)
level_names = [level.name for level in index.levels]
assert ind_names == level_names
def test_duplicate_level_names_access_raises(idx):
# GH19029
idx.names = ['foo', 'foo']
with pytest.raises(ValueError, match='name foo occurs multiple times'):
idx._get_level_number('foo')
@@ -1,98 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, MultiIndex, date_range
import pandas.util.testing as tm
def test_partial_string_timestamp_multiindex():
# GH10331
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
abc = ['a', 'b', 'c']
ix = pd.MultiIndex.from_product([dr, abc])
df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
idx = pd.IndexSlice
# c1
# 2016-01-01 00:00:00 a 0
# b 1
# c 2
# 2016-01-01 12:00:00 a 3
# b 4
# c 5
# 2016-01-02 00:00:00 a 6
# b 7
# c 8
# 2016-01-02 12:00:00 a 9
# b 10
# c 11
# 2016-01-03 00:00:00 a 12
# b 13
# c 14
# partial string matching on a single index
for df_swap in (df.swaplevel(),
df.swaplevel(0),
df.swaplevel(0, 1)):
df_swap = df_swap.sort_index()
just_a = df_swap.loc['a']
result = just_a.loc['2016-01-01']
expected = df.loc[idx[:, 'a'], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)
# indexing with IndexSlice
result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
expected = df
tm.assert_frame_equal(result, expected)
# match on secondary index
result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError):
df['2016-01-01']
# partial string match on year only
result = df.loc['2016']
expected = df
tm.assert_frame_equal(result, expected)
# partial string match on date
result = df.loc['2016-01-01']
expected = df.iloc[0:6]
tm.assert_frame_equal(result, expected)
# partial string match on date and hour, from middle
result = df.loc['2016-01-02 12']
expected = df.iloc[9:12]
tm.assert_frame_equal(result, expected)
# partial string match on secondary index
result = df_swap.loc[idx[:, '2016-01-02'], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)
# tuple selector with partial string match on date
result = df.loc[('2016-01-01', 'a'), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# Slicing date on first level should break (of course)
with pytest.raises(KeyError):
df_swap.loc['2016-01-01']
# GH12685 (partial string with daily resolution or below)
dr = date_range('2013-01-01', periods=100, freq='D')
ix = MultiIndex.from_product([dr, ['a', 'b']])
df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix)
result = df.loc[idx['2013-03':'2013-03', :], :]
expected = df.iloc[118:180]
tm.assert_frame_equal(result, expected)
@@ -1,108 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
check_level_names(result, idx[:4].names)
result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
check_level_names(result, idx.names)
def test_reindex_level(idx):
index = Index(['one'])
target, indexer = idx.reindex(index, level='second')
target2, indexer2 = index.reindex(idx, level='second')
exp_index = idx.join(index, level='second', how='right')
exp_index2 = idx.join(index, level='second', how='left')
assert target.equals(exp_index)
exp_indexer = np.array([0, 2, 4])
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
assert target2.equals(exp_index2)
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
with pytest.raises(TypeError, match="Fill method not supported"):
idx.reindex(idx, method='pad', level='second')
with pytest.raises(TypeError, match="Fill method not supported"):
index.reindex(index, method='bfill', level='first')
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
# GH6552
idx = idx.copy()
target = idx.copy()
idx.names = target.names = [None, None]
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
# list & ndarray cases
assert idx.reindex([])[0].names == [None, None]
assert idx.reindex(np.array([]))[0].names == [None, None]
assert idx.reindex(target.tolist())[0].names == [None, None]
assert idx.reindex(target.values)[0].names == [None, None]
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
assert idx.reindex(other_dtype.values)[0].names == [None, None]
idx.names = ['foo', 'bar']
assert idx.reindex([])[0].names == ['foo', 'bar']
assert idx.reindex(np.array([]))[0].names == ['foo', 'bar']
assert idx.reindex(target.tolist())[0].names == ['foo', 'bar']
assert idx.reindex(target.values)[0].names == ['foo', 'bar']
assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar']
assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar']
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']],
names=['foo', 'bar'])
assert idx.reindex([], level=0)[0].names == ['foo', 'bar']
assert idx.reindex([], level=1)[0].names == ['foo', 'bar']
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
def test_reindex_base(idx):
idx = idx
expected = np.arange(idx.size, dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match='Invalid fill method'):
idx.get_indexer(idx, method='invalid')
def test_reindex_non_unique():
idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
a = pd.Series(np.arange(4), index=idx)
new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
msg = 'cannot handle a non-unique multi-index!'
with pytest.raises(ValueError, match=msg):
a.reindex(new_idx)
@@ -1,126 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, MultiIndex
import pandas.util.testing as tm
def test_insert(idx):
# key contained in all levels
new_index = idx.insert(0, ('bar', 'two'))
assert new_index.equal_levels(idx)
assert new_index[0] == ('bar', 'two')
# key not contained in all levels
new_index = idx.insert(0, ('abc', 'three'))
exp0 = Index(list(idx.levels[0]) + ['abc'], name='first')
tm.assert_index_equal(new_index.levels[0], exp0)
exp1 = Index(list(idx.levels[1]) + ['three'], name='second')
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ('abc', 'three')
# key wrong length
msg = "Item must have length equal to number of levels"
with pytest.raises(ValueError, match=msg):
idx.insert(0, ('foo2',))
left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]],
columns=['1st', '2nd', '3rd'])
left.set_index(['1st', '2nd'], inplace=True)
ts = left['3rd'].copy(deep=True)
left.loc[('b', 'x'), '3rd'] = 2
left.loc[('b', 'a'), '3rd'] = -1
left.loc[('b', 'b'), '3rd'] = 3
left.loc[('a', 'x'), '3rd'] = 4
left.loc[('a', 'w'), '3rd'] = 5
left.loc[('a', 'a'), '3rd'] = 6
ts.loc[('b', 'x')] = 2
ts.loc['b', 'a'] = -1
ts.loc[('b', 'b')] = 3
ts.loc['a', 'x'] = 4
ts.loc[('a', 'w')] = 5
ts.loc['a', 'a'] = 6
right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2],
['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4],
['a', 'w', 5], ['a', 'a', 6]],
columns=['1st', '2nd', '3rd'])
right.set_index(['1st', '2nd'], inplace=True)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
tm.assert_series_equal(ts, right['3rd'])
# GH9250
idx = [('test1', i) for i in range(5)] + \
[('test2', i) for i in range(6)] + \
[('test', 17), ('test', 18)]
left = pd.Series(np.linspace(0, 10, 11),
pd.MultiIndex.from_tuples(idx[:-2]))
left.loc[('test', 17)] = 11
left.loc[('test', 18)] = 12
right = pd.Series(np.linspace(0, 12, 13),
pd.MultiIndex.from_tuples(idx))
tm.assert_series_equal(left, right)
def test_append(idx):
result = idx[:3].append(idx[3:])
assert result.equals(idx)
foos = [idx[:1], idx[1:3], idx[3:]]
result = foos[0].append(foos[1:])
assert result.equals(idx)
# empty
result = idx.append([])
assert result.equals(idx)
def test_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(['foo', 'bar'])
m = MultiIndex.from_product([
numbers, names], names=names)
expected = MultiIndex.from_product([
numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(m.repeat(reps), expected)
def test_insert_base(idx):
result = idx[1:4]
# test 0th element
assert idx[0:4].equals(result.insert(0, idx[0]))
def test_delete_base(idx):
expected = idx[1:]
result = idx.delete(0)
assert result.equals(expected)
assert result.name == expected.name
expected = idx[:-1]
result = idx.delete(-1)
assert result.equals(expected)
assert result.name == expected.name
with pytest.raises((IndexError, ValueError)):
# Exception raised depends on NumPy version.
idx.delete(len(idx))
@@ -1,251 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import MultiIndex, Series
import pandas.util.testing as tm
@pytest.mark.parametrize("case", [0.5, "xxx"])
@pytest.mark.parametrize("sort", [True, False])
@pytest.mark.parametrize("method", ["intersection", "union",
"difference", "symmetric_difference"])
def test_set_ops_error_cases(idx, case, sort, method):
# non-iterable input
msg = "Input must be Index or array-like"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)(case, sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_intersection_base(idx, sort):
first = idx[:5]
second = idx[:3]
intersect = first.intersection(second, sort=sort)
if sort:
tm.assert_index_equal(intersect, second.sort_values())
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = first.intersection(case, sort=sort)
if sort:
tm.assert_index_equal(result, second.sort_values())
assert tm.equalContents(result, second)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.intersection([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_union_base(idx, sort):
first = idx[3:]
second = idx[:5]
everything = idx
union = first.union(second, sort=sort)
if sort:
tm.assert_index_equal(union, everything.sort_values())
assert tm.equalContents(union, everything)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = first.union(case, sort=sort)
if sort:
tm.assert_index_equal(result, everything.sort_values())
assert tm.equalContents(result, everything)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.union([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_difference_base(idx, sort):
second = idx[4:]
answer = idx[:4]
result = idx.difference(second, sort=sort)
if sort:
answer = answer.sort_values()
assert result.equals(answer)
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = idx.difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
idx.difference([1, 2, 3], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_symmetric_difference(idx, sort):
first = idx[1:]
second = idx[:-1]
answer = idx[[-1, 0]]
result = first.symmetric_difference(second, sort=sort)
if sort:
answer = answer.sort_values()
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
result = first.symmetric_difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.symmetric_difference([1, 2, 3], sort=sort)
def test_empty(idx):
# GH 15270
assert not idx.empty
assert idx[:0].empty
@pytest.mark.parametrize("sort", [True, False])
def test_difference(idx, sort):
first = idx
result = first.difference(idx[-3:], sort=sort)
vals = idx[:-3].values
if sort:
vals = sorted(vals)
expected = MultiIndex.from_tuples(vals,
sortorder=0,
names=idx.names)
assert isinstance(result, MultiIndex)
assert result.equals(expected)
assert result.names == idx.names
tm.assert_index_equal(result, expected)
# empty difference: reflexive
result = idx.difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: superset
result = idx[-3:].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: degenerate
result = idx[:0].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# names not the same
chunklet = idx[-3:]
chunklet.names = ['foo', 'baz']
result = first.difference(chunklet, sort=sort)
assert result.names == (None, None)
# empty, but non-equal
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
assert len(result) == 0
# raise Exception called with non-MultiIndex
result = first.difference(first.values, sort=sort)
assert result.equals(first[:0])
# name from empty array
result = first.difference([], sort=sort)
assert first.equals(result)
assert first.names == result.names
# name from non-empty array
result = first.difference([('foo', 'one')], sort=sort)
expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), (
'foo', 'two'), ('qux', 'one'), ('qux', 'two')])
expected.names = first.names
assert first.names == result.names
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.difference([1, 2, 3, 4, 5], sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_union(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_union = piece1.union(piece2, sort=sort)
if sort:
tm.assert_index_equal(the_union, idx.sort_values())
assert tm.equalContents(the_union, idx)
# corner case, pass self or empty thing:
the_union = idx.union(idx, sort=sort)
assert the_union is idx
the_union = idx.union(idx[:0], sort=sort)
assert the_union is idx
# won't work in python 3
# tuples = _index.values
# result = _index[:4] | tuples[4:]
# assert result.equals(tuples)
# not valid for python 3
# def test_union_with_regular_index(self):
# other = Index(['A', 'B', 'C'])
# result = other.union(idx)
# assert ('foo', 'one') in result
# assert 'B' in result
# result2 = _index.union(other)
# assert result.equals(result2)
@pytest.mark.parametrize("sort", [True, False])
def test_intersection(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_int = piece1.intersection(piece2, sort=sort)
if sort:
tm.assert_index_equal(the_int, idx[3:5])
assert tm.equalContents(the_int, idx[3:5])
# corner case, pass self
the_int = idx.intersection(idx, sort=sort)
assert the_int is idx
# empty intersection: disjoint
empty = idx[:2].intersection(idx[2:], sort=sort)
expected = idx[:0]
assert empty.equals(expected)
# can't do in python 3
# tuples = _index.values
# result = _index & tuples
# assert result.equals(tuples)
@@ -1,266 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pandas.compat import lrange
from pandas.errors import PerformanceWarning, UnsortedIndexError
import pandas as pd
from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex
import pandas.util.testing as tm
def test_sortlevel(idx):
import random
tuples = list(idx)
random.shuffle(tuples)
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sortlevel_not_sort_remaining():
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
assert sorted_idx.equals(mi)
def test_sortlevel_deterministic():
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sort(indices):
with pytest.raises(TypeError):
indices.sort()
def test_numpy_argsort(idx):
result = np.argsort(idx)
expected = idx.argsort()
tm.assert_numpy_array_equal(result, expected)
# these are the only two types that perform
# pandas compatibility input validation - the
# rest already perform separate (or no) such
# validation via their 'values' attribute as
# defined in pandas.core.indexes/base.py - they
# cannot be changed at the moment due to
# backwards compatibility concerns
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, axis=1)
msg = "the 'kind' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, kind='mergesort')
msg = "the 'order' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, order=('a', 'b'))
def test_unsortedindex():
# GH 11897
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
('x', 'b'), ('y', 'a'), ('z', 'b')],
names=['one', 'two'])
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
columns=['one', 'two'])
# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)['z', 'a']
expected = df.iloc[0]
tm.assert_series_equal(result, expected)
with pytest.raises(UnsortedIndexError):
df.loc(axis=0)['z', slice('a')]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)['z', :]) == 2
with pytest.raises(KeyError):
df.loc(axis=0)['q', :]
def test_unsortedindex_doc_examples():
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
dfm = DataFrame({'jim': [0, 0, 1, 1],
'joe': ['x', 'x', 'z', 'y'],
'jolie': np.random.rand(4)})
dfm = dfm.set_index(['jim', 'joe'])
with tm.assert_produces_warning(PerformanceWarning):
dfm.loc[(1, 'z')]
with pytest.raises(UnsortedIndexError):
dfm.loc[(0, 'y'):(1, 'z')]
assert not dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 1
# sort it
dfm = dfm.sort_index()
dfm.loc[(1, 'z')]
dfm.loc[(0, 'y'):(1, 'z')]
assert dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 2
def test_reconstruct_sort():
# starts off lexsorted & monotonic
mi = MultiIndex.from_arrays([
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
])
assert mi.is_lexsorted()
assert mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert recons.is_lexsorted()
assert recons.is_monotonic
assert mi is recons
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
('x', 'b'), ('y', 'a'), ('z', 'b')],
names=['one', 'two'])
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
names=['col1', 'col2'])
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
def test_reconstruct_remove_unused():
# xref to GH 2770
df = DataFrame([['deleteMe', 1, 9],
['keepMe', 2, 9],
['keepMeToo', 3, 9]],
columns=['first', 'second', 'third'])
df2 = df.set_index(['first', 'second'], drop=False)
df2 = df2[df2['first'] != 'deleteMe']
# removed levels are there
expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'],
[1, 2, 3]],
codes=[[1, 2], [1, 2]],
names=['first', 'second'])
result = df2.index
tm.assert_index_equal(result, expected)
expected = MultiIndex(levels=[['keepMe', 'keepMeToo'],
[2, 3]],
codes=[[0, 1], [0, 1]],
names=['first', 'second'])
result = df2.index.remove_unused_levels()
tm.assert_index_equal(result, expected)
# idempotent
result2 = result.remove_unused_levels()
tm.assert_index_equal(result2, expected)
assert result2.is_(result)
@pytest.mark.parametrize('first_type,second_type', [
('int64', 'int64'),
('datetime64[D]', 'str')
])
def test_remove_unused_levels_large(first_type, second_type):
# GH16556
# because tests should be deterministic (and this test in particular
# checks that levels are removed, which is not the case for every
# random input):
rng = np.random.RandomState(4) # seed is arbitrary value that works
size = 1 << 16
df = DataFrame(dict(
first=rng.randint(0, 1 << 13, size).astype(first_type),
second=rng.randint(0, 1 << 10, size).astype(second_type),
third=rng.rand(size)))
df = df.groupby(['first', 'second']).sum()
df = df[df.third < 0.1]
result = df.index.remove_unused_levels()
assert len(result.levels[0]) < len(df.index.levels[0])
assert len(result.levels[1]) < len(df.index.levels[1])
assert result.equals(df.index)
expected = df.reset_index().set_index(['first', 'second']).index
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('level0', [['a', 'd', 'b'],
['a', 'd', 'b', 'unused']])
@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'],
['w', 'x', 'y', 'z', 'unused']])
def test_remove_unused_nan(level0, level1):
# GH 18417
mi = pd.MultiIndex(levels=[level0, level1],
codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
result = mi.remove_unused_levels()
tm.assert_index_equal(result, mi)
for level in 0, 1:
assert('unused' not in result.levels[level])
def test_argsort(idx):
result = idx.argsort()
expected = idx.values.argsort()
tm.assert_numpy_array_equal(result, expected)
@@ -1,108 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import PeriodIndex, period_range
import pandas.util.testing as tm
class TestPeriodIndexArithmetic(object):
# ---------------------------------------------------------------
# PeriodIndex.shift is used by __add__ and __sub__
def test_pi_shift_ndarray(self):
idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
freq='M', name='idx')
result = idx.shift(np.array([1, 2, 3, 4]))
expected = PeriodIndex(['2011-02', '2011-04', 'NaT', '2011-08'],
freq='M', name='idx')
tm.assert_index_equal(result, expected)
result = idx.shift(np.array([1, -2, 3, -4]))
expected = PeriodIndex(['2011-02', '2010-12', 'NaT', '2010-12'],
freq='M', name='idx')
tm.assert_index_equal(result, expected)
def test_shift(self):
pi1 = period_range(freq='A', start='1/1/2001', end='12/1/2009')
pi2 = period_range(freq='A', start='1/1/2002', end='12/1/2010')
tm.assert_index_equal(pi1.shift(0), pi1)
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(1), pi2)
pi1 = period_range(freq='A', start='1/1/2001', end='12/1/2009')
pi2 = period_range(freq='A', start='1/1/2000', end='12/1/2008')
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(-1), pi2)
pi1 = period_range(freq='M', start='1/1/2001', end='12/1/2009')
pi2 = period_range(freq='M', start='2/1/2001', end='1/1/2010')
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(1), pi2)
pi1 = period_range(freq='M', start='1/1/2001', end='12/1/2009')
pi2 = period_range(freq='M', start='12/1/2000', end='11/1/2009')
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(-1), pi2)
pi1 = period_range(freq='D', start='1/1/2001', end='12/1/2009')
pi2 = period_range(freq='D', start='1/2/2001', end='12/2/2009')
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(1), pi2)
pi1 = period_range(freq='D', start='1/1/2001', end='12/1/2009')
pi2 = period_range(freq='D', start='12/31/2000', end='11/30/2009')
assert len(pi1) == len(pi2)
tm.assert_index_equal(pi1.shift(-1), pi2)
def test_shift_corner_cases(self):
# GH#9903
idx = pd.PeriodIndex([], name='xxx', freq='H')
with pytest.raises(TypeError):
# period shift doesn't accept freq
idx.shift(1, freq='H')
tm.assert_index_equal(idx.shift(0), idx)
tm.assert_index_equal(idx.shift(3), idx)
idx = pd.PeriodIndex(['2011-01-01 10:00', '2011-01-01 11:00'
'2011-01-01 12:00'], name='xxx', freq='H')
tm.assert_index_equal(idx.shift(0), idx)
exp = pd.PeriodIndex(['2011-01-01 13:00', '2011-01-01 14:00'
'2011-01-01 15:00'], name='xxx', freq='H')
tm.assert_index_equal(idx.shift(3), exp)
exp = pd.PeriodIndex(['2011-01-01 07:00', '2011-01-01 08:00'
'2011-01-01 09:00'], name='xxx', freq='H')
tm.assert_index_equal(idx.shift(-3), exp)
def test_shift_nat(self):
idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
freq='M', name='idx')
result = idx.shift(1)
expected = PeriodIndex(['2011-02', '2011-03', 'NaT', '2011-05'],
freq='M', name='idx')
tm.assert_index_equal(result, expected)
assert result.name == expected.name
def test_shift_gh8083(self):
# test shift for PeriodIndex
# GH#8083
drange = pd.period_range('20130101', periods=5, freq='D')
result = drange.shift(1)
expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06'], freq='D')
tm.assert_index_equal(result, expected)
def test_shift_periods(self):
# GH #22458 : argument 'n' was deprecated in favor of 'periods'
idx = period_range(freq='A', start='1/1/2001', end='12/1/2009')
tm.assert_index_equal(idx.shift(periods=0), idx)
tm.assert_index_equal(idx.shift(0), idx)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=True):
tm.assert_index_equal(idx.shift(n=0), idx)
@@ -1,152 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, PeriodIndex, Series, period_range
from pandas.util import testing as tm
class TestPeriodIndex(object):
def test_asfreq(self):
pi1 = period_range(freq='A', start='1/1/2001', end='1/1/2001')
pi2 = period_range(freq='Q', start='1/1/2001', end='1/1/2001')
pi3 = period_range(freq='M', start='1/1/2001', end='1/1/2001')
pi4 = period_range(freq='D', start='1/1/2001', end='1/1/2001')
pi5 = period_range(freq='H', start='1/1/2001', end='1/1/2001 00:00')
pi6 = period_range(freq='Min', start='1/1/2001', end='1/1/2001 00:00')
pi7 = period_range(freq='S', start='1/1/2001', end='1/1/2001 00:00:00')
assert pi1.asfreq('Q', 'S') == pi2
assert pi1.asfreq('Q', 's') == pi2
assert pi1.asfreq('M', 'start') == pi3
assert pi1.asfreq('D', 'StarT') == pi4
assert pi1.asfreq('H', 'beGIN') == pi5
assert pi1.asfreq('Min', 'S') == pi6
assert pi1.asfreq('S', 'S') == pi7
assert pi2.asfreq('A', 'S') == pi1
assert pi2.asfreq('M', 'S') == pi3
assert pi2.asfreq('D', 'S') == pi4
assert pi2.asfreq('H', 'S') == pi5
assert pi2.asfreq('Min', 'S') == pi6
assert pi2.asfreq('S', 'S') == pi7
assert pi3.asfreq('A', 'S') == pi1
assert pi3.asfreq('Q', 'S') == pi2
assert pi3.asfreq('D', 'S') == pi4
assert pi3.asfreq('H', 'S') == pi5
assert pi3.asfreq('Min', 'S') == pi6
assert pi3.asfreq('S', 'S') == pi7
assert pi4.asfreq('A', 'S') == pi1
assert pi4.asfreq('Q', 'S') == pi2
assert pi4.asfreq('M', 'S') == pi3
assert pi4.asfreq('H', 'S') == pi5
assert pi4.asfreq('Min', 'S') == pi6
assert pi4.asfreq('S', 'S') == pi7
assert pi5.asfreq('A', 'S') == pi1
assert pi5.asfreq('Q', 'S') == pi2
assert pi5.asfreq('M', 'S') == pi3
assert pi5.asfreq('D', 'S') == pi4
assert pi5.asfreq('Min', 'S') == pi6
assert pi5.asfreq('S', 'S') == pi7
assert pi6.asfreq('A', 'S') == pi1
assert pi6.asfreq('Q', 'S') == pi2
assert pi6.asfreq('M', 'S') == pi3
assert pi6.asfreq('D', 'S') == pi4
assert pi6.asfreq('H', 'S') == pi5
assert pi6.asfreq('S', 'S') == pi7
assert pi7.asfreq('A', 'S') == pi1
assert pi7.asfreq('Q', 'S') == pi2
assert pi7.asfreq('M', 'S') == pi3
assert pi7.asfreq('D', 'S') == pi4
assert pi7.asfreq('H', 'S') == pi5
assert pi7.asfreq('Min', 'S') == pi6
pytest.raises(ValueError, pi7.asfreq, 'T', 'foo')
result1 = pi1.asfreq('3M')
result2 = pi1.asfreq('M')
expected = period_range(freq='M', start='2001-12', end='2001-12')
tm.assert_numpy_array_equal(result1.asi8, expected.asi8)
assert result1.freqstr == '3M'
tm.assert_numpy_array_equal(result2.asi8, expected.asi8)
assert result2.freqstr == 'M'
def test_asfreq_nat(self):
idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M')
result = idx.asfreq(freq='Q')
expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q')
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('freq', ['D', '3D'])
def test_asfreq_mult_pi(self, freq):
pi = PeriodIndex(['2001-01', '2001-02', 'NaT', '2001-03'], freq='2M')
result = pi.asfreq(freq)
exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT',
'2001-04-30'], freq=freq)
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
result = pi.asfreq(freq, how='S')
exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT',
'2001-03-01'], freq=freq)
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
def test_asfreq_combined_pi(self):
pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'],
freq='H')
exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'],
freq='25H')
for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']):
result = pi.asfreq(freq, how=how)
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
for freq in ['1D1H', '1H1D']:
pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00',
'NaT'], freq=freq)
result = pi.asfreq('H')
exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'],
freq='H')
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00',
'NaT'], freq=freq)
result = pi.asfreq('H', how='S')
exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'],
freq='H')
tm.assert_index_equal(result, exp)
assert result.freq == exp.freq
def test_asfreq_ts(self):
index = period_range(freq='A', start='1/1/2001', end='12/31/2010')
ts = Series(np.random.randn(len(index)), index=index)
df = DataFrame(np.random.randn(len(index), 3), index=index)
result = ts.asfreq('D', how='end')
df_result = df.asfreq('D', how='end')
exp_index = index.asfreq('D', how='end')
assert len(result) == len(ts)
tm.assert_index_equal(result.index, exp_index)
tm.assert_index_equal(df_result.index, exp_index)
result = ts.asfreq('D', how='start')
assert len(result) == len(ts)
tm.assert_index_equal(result.index, index.asfreq('D', how='start'))
def test_astype_asfreq(self):
pi1 = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], freq='D')
exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M')
tm.assert_index_equal(pi1.asfreq('M'), exp)
tm.assert_index_equal(pi1.astype('period[M]'), exp)
exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='3M')
tm.assert_index_equal(pi1.asfreq('3M'), exp)
tm.assert_index_equal(pi1.astype('period[3M]'), exp)
@@ -1,126 +0,0 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
import pandas as pd
from pandas import Index, Int64Index, NaT, Period, PeriodIndex, period_range
import pandas.util.testing as tm
class TestPeriodIndexAsType(object):
@pytest.mark.parametrize('dtype', [
float, 'timedelta64', 'timedelta64[ns]'])
def test_astype_raises(self, dtype):
# GH#13149, GH#13209
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
msg = 'Cannot cast PeriodArray to dtype'
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)
def test_astype_conversion(self):
# GH#13149, GH#13209
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
result = idx.astype(object)
expected = Index([Period('2016-05-16', freq='D')] +
[Period(NaT, freq='D')] * 3, dtype='object')
tm.assert_index_equal(result, expected)
result = idx.astype(np.int64)
expected = Int64Index([16937] + [-9223372036854775808] * 3,
dtype=np.int64)
tm.assert_index_equal(result, expected)
result = idx.astype(str)
expected = Index(str(x) for x in idx)
tm.assert_index_equal(result, expected)
idx = period_range('1990', '2009', freq='A')
result = idx.astype('i8')
tm.assert_index_equal(result, Index(idx.asi8))
tm.assert_numpy_array_equal(result.values, idx.asi8)
def test_astype_uint(self):
arr = period_range('2000', periods=2)
expected = pd.UInt64Index(np.array([10957, 10958], dtype='uint64'))
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
def test_astype_object(self):
idx = pd.PeriodIndex([], freq='M')
exp = np.array([], dtype=object)
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object)
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT],
dtype=object)
idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
tm.assert_numpy_array_equal(idx.astype(object).values, exp)
tm.assert_numpy_array_equal(idx._mpl_repr(), exp)
# TODO: de-duplicate this version (from test_ops) with the one above
# (from test_period)
def test_astype_object2(self):
idx = pd.period_range(start='2013-01-01', periods=4, freq='M',
name='idx')
expected_list = [pd.Period('2013-01-31', freq='M'),
pd.Period('2013-02-28', freq='M'),
pd.Period('2013-03-31', freq='M'),
pd.Period('2013-04-30', freq='M')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.astype(object)
assert isinstance(result, Index)
assert result.dtype == object
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert idx.tolist() == expected_list
idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT',
'2013-01-04'], freq='D', name='idx')
expected_list = [pd.Period('2013-01-01', freq='D'),
pd.Period('2013-01-02', freq='D'),
pd.Period('NaT', freq='D'),
pd.Period('2013-01-04', freq='D')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.astype(object)
assert isinstance(result, Index)
assert result.dtype == object
tm.assert_index_equal(result, expected)
for i in [0, 1, 3]:
assert result[i] == expected[i]
assert result[2] is pd.NaT
assert result.name == expected.name
result_list = idx.tolist()
for i in [0, 1, 3]:
assert result_list[i] == expected_list[i]
assert result_list[2] is pd.NaT
def test_astype_category(self):
obj = pd.period_range("2000", periods=2)
result = obj.astype('category')
expected = pd.CategoricalIndex([pd.Period('2000-01-01', freq="D"),
pd.Period('2000-01-02', freq="D")])
tm.assert_index_equal(result, expected)
result = obj._data.astype('category')
expected = expected.values
tm.assert_categorical_equal(result, expected)
def test_astype_array_fallback(self):
obj = pd.period_range("2000", periods=2)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)
result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
@@ -1,519 +0,0 @@
import numpy as np
import pytest
from pandas.compat import PY3, lmap, lrange, text_type
from pandas.core.dtypes.dtypes import PeriodDtype
import pandas as pd
from pandas import (
Index, Period, PeriodIndex, Series, date_range, offsets, period_range)
import pandas.core.indexes.period as period
import pandas.util.testing as tm
class TestPeriodIndex(object):
def setup_method(self, method):
pass
def test_construction_base_constructor(self):
# GH 13664
arr = [pd.Period('2011-01', freq='M'), pd.NaT,
pd.Period('2011-03', freq='M')]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.PeriodIndex(np.array(arr)))
arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')]
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.PeriodIndex(np.array(arr)))
arr = [pd.Period('2011-01', freq='M'), pd.NaT,
pd.Period('2011-03', freq='D')]
tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.Index(np.array(arr), dtype=object))
def test_constructor_use_start_freq(self):
# GH #1118
p = Period('4/2/2012', freq='B')
with tm.assert_produces_warning(FutureWarning):
index = PeriodIndex(start=p, periods=10)
expected = period_range(start='4/2/2012', periods=10, freq='B')
tm.assert_index_equal(index, expected)
index = period_range(start=p, periods=10)
tm.assert_index_equal(index, expected)
def test_constructor_field_arrays(self):
# GH #1264
years = np.arange(1990, 2010).repeat(4)[2:-2]
quarters = np.tile(np.arange(1, 5), 20)[2:-2]
index = PeriodIndex(year=years, quarter=quarters, freq='Q-DEC')
expected = period_range('1990Q3', '2009Q2', freq='Q-DEC')
tm.assert_index_equal(index, expected)
index2 = PeriodIndex(year=years, quarter=quarters, freq='2Q-DEC')
tm.assert_numpy_array_equal(index.asi8, index2.asi8)
index = PeriodIndex(year=years, quarter=quarters)
tm.assert_index_equal(index, expected)
years = [2007, 2007, 2007]
months = [1, 2]
pytest.raises(ValueError, PeriodIndex, year=years, month=months,
freq='M')
pytest.raises(ValueError, PeriodIndex, year=years, month=months,
freq='2M')
pytest.raises(ValueError, PeriodIndex, year=years, month=months,
freq='M', start=Period('2007-01', freq='M'))
years = [2007, 2007, 2007]
months = [1, 2, 3]
idx = PeriodIndex(year=years, month=months, freq='M')
exp = period_range('2007-01', periods=3, freq='M')
tm.assert_index_equal(idx, exp)
def test_constructor_U(self):
# U was used as undefined period
pytest.raises(ValueError, period_range, '2007-1-1', periods=500,
freq='X')
def test_constructor_nano(self):
idx = period_range(start=Period(ordinal=1, freq='N'),
end=Period(ordinal=4, freq='N'), freq='N')
exp = PeriodIndex([Period(ordinal=1, freq='N'),
Period(ordinal=2, freq='N'),
Period(ordinal=3, freq='N'),
Period(ordinal=4, freq='N')], freq='N')
tm.assert_index_equal(idx, exp)
def test_constructor_arrays_negative_year(self):
years = np.arange(1960, 2000, dtype=np.int64).repeat(4)
quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40)
pindex = PeriodIndex(year=years, quarter=quarters)
tm.assert_index_equal(pindex.year, pd.Index(years))
tm.assert_index_equal(pindex.quarter, pd.Index(quarters))
def test_constructor_invalid_quarters(self):
pytest.raises(ValueError, PeriodIndex, year=lrange(2000, 2004),
quarter=lrange(4), freq='Q-DEC')
def test_constructor_corner(self):
pytest.raises(ValueError, PeriodIndex, periods=10, freq='A')
start = Period('2007', freq='A-JUN')
end = Period('2010', freq='A-DEC')
pytest.raises(ValueError, PeriodIndex, start=start, end=end)
pytest.raises(ValueError, PeriodIndex, start=start)
pytest.raises(ValueError, PeriodIndex, end=end)
result = period_range('2007-01', periods=10.5, freq='M')
exp = period_range('2007-01', periods=10, freq='M')
tm.assert_index_equal(result, exp)
def test_constructor_fromarraylike(self):
idx = period_range('2007-01', periods=20, freq='M')
# values is an array of Period, thus can retrieve freq
tm.assert_index_equal(PeriodIndex(idx.values), idx)
tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)
pytest.raises(ValueError, PeriodIndex, idx._ndarray_values)
pytest.raises(ValueError, PeriodIndex, list(idx._ndarray_values))
pytest.raises(TypeError, PeriodIndex,
data=Period('2007', freq='A'))
result = PeriodIndex(iter(idx))
tm.assert_index_equal(result, idx)
result = PeriodIndex(idx)
tm.assert_index_equal(result, idx)
result = PeriodIndex(idx, freq='M')
tm.assert_index_equal(result, idx)
result = PeriodIndex(idx, freq=offsets.MonthEnd())
tm.assert_index_equal(result, idx)
assert result.freq == 'M'
result = PeriodIndex(idx, freq='2M')
tm.assert_index_equal(result, idx.asfreq('2M'))
assert result.freq == '2M'
result = PeriodIndex(idx, freq=offsets.MonthEnd(2))
tm.assert_index_equal(result, idx.asfreq('2M'))
assert result.freq == '2M'
result = PeriodIndex(idx, freq='D')
exp = idx.asfreq('D', 'e')
tm.assert_index_equal(result, exp)
def test_constructor_datetime64arr(self):
vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64)
vals = vals.view(np.dtype('M8[us]'))
pytest.raises(ValueError, PeriodIndex, vals, freq='D')
@pytest.mark.parametrize('box', [None, 'series', 'index'])
def test_constructor_datetime64arr_ok(self, box):
# https://github.com/pandas-dev/pandas/issues/23438
data = pd.date_range('2017', periods=4, freq="M")
if box is None:
data = data._values
elif box == 'series':
data = pd.Series(data)
result = PeriodIndex(data, freq='D')
expected = PeriodIndex([
'2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30'
], freq="D")
tm.assert_index_equal(result, expected)
def test_constructor_dtype(self):
# passing a dtype with a tz should localize
idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]')
exp = PeriodIndex(['2013-01', '2013-03'], freq='M')
tm.assert_index_equal(idx, exp)
assert idx.dtype == 'period[M]'
idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]')
exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D')
tm.assert_index_equal(idx, exp)
assert idx.dtype == 'period[3D]'
# if we already have a freq and its not the same, then asfreq
# (not changed)
idx = PeriodIndex(['2013-01-01', '2013-01-02'], freq='D')
res = PeriodIndex(idx, dtype='period[M]')
exp = PeriodIndex(['2013-01', '2013-01'], freq='M')
tm.assert_index_equal(res, exp)
assert res.dtype == 'period[M]'
res = PeriodIndex(idx, freq='M')
tm.assert_index_equal(res, exp)
assert res.dtype == 'period[M]'
msg = 'specified freq and dtype are different'
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(['2011-01'], freq='M', dtype='period[D]')
def test_constructor_empty(self):
idx = pd.PeriodIndex([], freq='M')
assert isinstance(idx, PeriodIndex)
assert len(idx) == 0
assert idx.freq == 'M'
with pytest.raises(ValueError, match='freq not specified'):
pd.PeriodIndex([])
def test_constructor_pi_nat(self):
idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='M')])
exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)
idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='M')]))
tm.assert_index_equal(idx, exp)
idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'),
Period('2011-01', freq='M')])
exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)
idx = PeriodIndex(np.array([pd.NaT, pd.NaT,
Period('2011-01', freq='M'),
Period('2011-01', freq='M')]))
tm.assert_index_equal(idx, exp)
idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)
with pytest.raises(ValueError, match='freq not specified'):
PeriodIndex([pd.NaT, pd.NaT])
with pytest.raises(ValueError, match='freq not specified'):
PeriodIndex(np.array([pd.NaT, pd.NaT]))
with pytest.raises(ValueError, match='freq not specified'):
PeriodIndex(['NaT', 'NaT'])
with pytest.raises(ValueError, match='freq not specified'):
PeriodIndex(np.array(['NaT', 'NaT']))
def test_constructor_incompat_freq(self):
msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)"
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='D')])
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='D')]))
# first element is pd.NaT
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex([pd.NaT, Period('2011-01', freq='M'),
Period('2011-01', freq='D')])
with pytest.raises(period.IncompatibleFrequency, match=msg):
PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'),
Period('2011-01', freq='D')]))
def test_constructor_mixed(self):
idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')])
exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)
idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')])
exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)
idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT,
'2012-01-01'])
exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D')
tm.assert_index_equal(idx, exp)
def test_constructor_simple_new(self):
idx = period_range('2007-01', name='p', periods=2, freq='M')
result = idx._simple_new(idx, name='p', freq=idx.freq)
tm.assert_index_equal(result, idx)
result = idx._simple_new(idx.astype('i8'), name='p', freq=idx.freq)
tm.assert_index_equal(result, idx)
def test_constructor_simple_new_empty(self):
# GH13079
idx = PeriodIndex([], freq='M', name='p')
result = idx._simple_new(idx, name='p', freq='M')
tm.assert_index_equal(result, idx)
@pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])])
def test_constructor_floats(self, floats):
with pytest.raises(TypeError):
pd.PeriodIndex._simple_new(floats, freq='M')
with pytest.raises(TypeError):
pd.PeriodIndex(floats, freq='M')
def test_constructor_nat(self):
pytest.raises(ValueError, period_range, start='NaT',
end='2011-01-01', freq='M')
pytest.raises(ValueError, period_range, start='2011-01-01',
end='NaT', freq='M')
def test_constructor_year_and_quarter(self):
year = pd.Series([2001, 2002, 2003])
quarter = year - 2000
idx = PeriodIndex(year=year, quarter=quarter)
strs = ['%dQ%d' % t for t in zip(quarter, year)]
lops = list(map(Period, strs))
p = PeriodIndex(lops)
tm.assert_index_equal(p, idx)
@pytest.mark.parametrize('func, warning', [
(PeriodIndex, FutureWarning),
(period_range, None)
])
def test_constructor_freq_mult(self, func, warning):
# GH #7811
with tm.assert_produces_warning(warning):
# must be the same, but for sure...
pidx = func(start='2014-01', freq='2M', periods=4)
expected = PeriodIndex(['2014-01', '2014-03',
'2014-05', '2014-07'], freq='2M')
tm.assert_index_equal(pidx, expected)
with tm.assert_produces_warning(warning):
pidx = func(start='2014-01-02', end='2014-01-15', freq='3D')
expected = PeriodIndex(['2014-01-02', '2014-01-05',
'2014-01-08', '2014-01-11',
'2014-01-14'], freq='3D')
tm.assert_index_equal(pidx, expected)
with tm.assert_produces_warning(warning):
pidx = func(end='2014-01-01 17:00', freq='4H', periods=3)
expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00',
'2014-01-01 17:00'], freq='4H')
tm.assert_index_equal(pidx, expected)
msg = ('Frequency must be positive, because it'
' represents span: -1M')
with pytest.raises(ValueError, match=msg):
PeriodIndex(['2011-01'], freq='-1M')
msg = ('Frequency must be positive, because it' ' represents span: 0M')
with pytest.raises(ValueError, match=msg):
PeriodIndex(['2011-01'], freq='0M')
msg = ('Frequency must be positive, because it' ' represents span: 0M')
with pytest.raises(ValueError, match=msg):
period_range('2011-01', periods=3, freq='0M')
@pytest.mark.parametrize('freq', ['A', 'M', 'D', 'T', 'S'])
@pytest.mark.parametrize('mult', [1, 2, 3, 4, 5])
def test_constructor_freq_mult_dti_compat(self, mult, freq):
freqstr = str(mult) + freq
pidx = period_range(start='2014-04-01', freq=freqstr, periods=10)
expected = date_range(start='2014-04-01', freq=freqstr,
periods=10).to_period(freqstr)
tm.assert_index_equal(pidx, expected)
def test_constructor_freq_combined(self):
for freq in ['1D1H', '1H1D']:
pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq)
expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'],
freq='25H')
for freq in ['1D1H', '1H1D']:
pidx = period_range(start='2016-01-01', periods=2, freq=freq)
expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'],
freq='25H')
tm.assert_index_equal(pidx, expected)
def test_constructor_range_based_deprecated(self):
with tm.assert_produces_warning(FutureWarning):
pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
assert len(pi) == 9
def test_constructor_range_based_deprecated_different_freq(self):
with tm.assert_produces_warning(FutureWarning) as m:
PeriodIndex(start='2000', periods=2)
warning, = m
assert 'freq="A-DEC"' in str(warning.message)
def test_constructor(self):
pi = period_range(freq='A', start='1/1/2001', end='12/1/2009')
assert len(pi) == 9
pi = period_range(freq='Q', start='1/1/2001', end='12/1/2009')
assert len(pi) == 4 * 9
pi = period_range(freq='M', start='1/1/2001', end='12/1/2009')
assert len(pi) == 12 * 9
pi = period_range(freq='D', start='1/1/2001', end='12/31/2009')
assert len(pi) == 365 * 9 + 2
pi = period_range(freq='B', start='1/1/2001', end='12/31/2009')
assert len(pi) == 261 * 9
pi = period_range(freq='H', start='1/1/2001', end='12/31/2001 23:00')
assert len(pi) == 365 * 24
pi = period_range(freq='Min', start='1/1/2001', end='1/1/2001 23:59')
assert len(pi) == 24 * 60
pi = period_range(freq='S', start='1/1/2001', end='1/1/2001 23:59:59')
assert len(pi) == 24 * 60 * 60
start = Period('02-Apr-2005', 'B')
i1 = period_range(start=start, periods=20)
assert len(i1) == 20
assert i1.freq == start.freq
assert i1[0] == start
end_intv = Period('2006-12-31', 'W')
i1 = period_range(end=end_intv, periods=10)
assert len(i1) == 10
assert i1.freq == end_intv.freq
assert i1[-1] == end_intv
end_intv = Period('2006-12-31', '1w')
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
end_intv = Period('2006-12-31', ('w', 1))
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
end_intv = Period('2005-05-01', 'B')
i1 = period_range(start=start, end=end_intv)
# infer freq from first element
i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')])
assert len(i2) == 2
assert i2[0] == end_intv
i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')]))
assert len(i2) == 2
assert i2[0] == end_intv
# Mixed freq should fail
vals = [end_intv, Period('2006-12-31', 'w')]
pytest.raises(ValueError, PeriodIndex, vals)
vals = np.array(vals)
pytest.raises(ValueError, PeriodIndex, vals)
def test_constructor_error(self):
start = Period('02-Apr-2005', 'B')
end_intv = Period('2006-12-31', ('w', 1))
msg = 'start and end must have same freq'
with pytest.raises(ValueError, match=msg):
PeriodIndex(start=start, end=end_intv)
msg = ('Of the three parameters: start, end, and periods, '
'exactly two must be specified')
with pytest.raises(ValueError, match=msg):
PeriodIndex(start=start)
@pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B',
'T', 'S', 'L', 'U', 'N', 'H'])
def test_recreate_from_data(self, freq):
org = period_range(start='2001/04/01', freq=freq, periods=1)
idx = PeriodIndex(org.values, freq=freq)
tm.assert_index_equal(idx, org)
def test_map_with_string_constructor(self):
raw = [2005, 2007, 2009]
index = PeriodIndex(raw, freq='A')
types = str,
if PY3:
# unicode
types += text_type,
for t in types:
expected = Index(lmap(t, raw))
res = index.map(t)
# should return an Index
assert isinstance(res, Index)
# preserve element types
assert all(isinstance(resi, t) for resi in res)
# lastly, values should compare equal
tm.assert_index_equal(res, expected)
class TestSeriesPeriod(object):
def setup_method(self, method):
self.series = Series(period_range('2000-01-01', periods=10, freq='D'))
def test_constructor_cant_cast_period(self):
with pytest.raises(TypeError):
Series(period_range('2000-01-01', periods=10, freq='D'),
dtype=float)
def test_constructor_cast_object(self):
s = Series(period_range('1/1/2000', periods=10),
dtype=PeriodDtype("D"))
exp = Series(period_range('1/1/2000', periods=10))
tm.assert_series_equal(s, exp)
@@ -1,220 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import PeriodIndex
import pandas.util.testing as tm
def test_to_native_types():
index = PeriodIndex(['2017-01-01', '2017-01-02',
'2017-01-03'], freq='D')
# First, with no arguments.
expected = np.array(['2017-01-01', '2017-01-02',
'2017-01-03'], dtype='=U10')
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
# No NaN values, so na_rep has no effect
result = index.to_native_types(na_rep='pandas')
tm.assert_numpy_array_equal(result, expected)
# Make sure slicing works
expected = np.array(['2017-01-01', '2017-01-03'], dtype='=U10')
result = index.to_native_types([0, 2])
tm.assert_numpy_array_equal(result, expected)
# Make sure date formatting works
expected = np.array(['01-2017-01', '01-2017-02',
'01-2017-03'], dtype='=U10')
result = index.to_native_types(date_format='%m-%Y-%d')
tm.assert_numpy_array_equal(result, expected)
# NULL object handling should work
index = PeriodIndex(['2017-01-01', pd.NaT, '2017-01-03'], freq='D')
expected = np.array(['2017-01-01', 'NaT', '2017-01-03'], dtype=object)
result = index.to_native_types()
tm.assert_numpy_array_equal(result, expected)
expected = np.array(['2017-01-01', 'pandas',
'2017-01-03'], dtype=object)
result = index.to_native_types(na_rep='pandas')
tm.assert_numpy_array_equal(result, expected)
class TestPeriodIndexRendering(object):
def test_frame_repr(self):
df = pd.DataFrame({"A": [1, 2, 3]},
index=pd.date_range('2000', periods=3))
result = repr(df)
expected = (
' A\n'
'2000-01-01 1\n'
'2000-01-02 2\n'
'2000-01-03 3')
assert result == expected
@pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__'])
def test_representation(self, method):
# GH#7601
idx1 = PeriodIndex([], freq='D')
idx2 = PeriodIndex(['2011-01-01'], freq='D')
idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
freq='D')
idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A')
idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'],
freq='H')
idx7 = pd.period_range('2013Q1', periods=1, freq="Q")
idx8 = pd.period_range('2013Q1', periods=2, freq="Q")
idx9 = pd.period_range('2013Q1', periods=3, freq="Q")
idx10 = PeriodIndex(['2011-01-01', '2011-02-01'], freq='3D')
exp1 = """PeriodIndex([], dtype='period[D]', freq='D')"""
exp2 = """PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')"""
exp3 = ("PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', "
"freq='D')")
exp4 = ("PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
"dtype='period[D]', freq='D')")
exp5 = ("PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', "
"freq='A-DEC')")
exp6 = ("PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], "
"dtype='period[H]', freq='H')")
exp7 = ("PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', "
"freq='Q-DEC')")
exp8 = ("PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', "
"freq='Q-DEC')")
exp9 = ("PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], "
"dtype='period[Q-DEC]', freq='Q-DEC')")
exp10 = ("PeriodIndex(['2011-01-01', '2011-02-01'], "
"dtype='period[3D]', freq='3D')")
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5,
idx6, idx7, idx8, idx9, idx10],
[exp1, exp2, exp3, exp4, exp5,
exp6, exp7, exp8, exp9, exp10]):
result = getattr(idx, method)()
assert result == expected
def test_representation_to_series(self):
# GH#10971
idx1 = PeriodIndex([], freq='D')
idx2 = PeriodIndex(['2011-01-01'], freq='D')
idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
freq='D')
idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A')
idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'],
freq='H')
idx7 = pd.period_range('2013Q1', periods=1, freq="Q")
idx8 = pd.period_range('2013Q1', periods=2, freq="Q")
idx9 = pd.period_range('2013Q1', periods=3, freq="Q")
exp1 = """Series([], dtype: period[D])"""
exp2 = """0 2011-01-01
dtype: period[D]"""
exp3 = """0 2011-01-01
1 2011-01-02
dtype: period[D]"""
exp4 = """0 2011-01-01
1 2011-01-02
2 2011-01-03
dtype: period[D]"""
exp5 = """0 2011
1 2012
2 2013
dtype: period[A-DEC]"""
exp6 = """0 2011-01-01 09:00
1 2012-02-01 10:00
2 NaT
dtype: period[H]"""
exp7 = """0 2013Q1
dtype: period[Q-DEC]"""
exp8 = """0 2013Q1
1 2013Q2
dtype: period[Q-DEC]"""
exp9 = """0 2013Q1
1 2013Q2
2 2013Q3
dtype: period[Q-DEC]"""
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5,
idx6, idx7, idx8, idx9],
[exp1, exp2, exp3, exp4, exp5,
exp6, exp7, exp8, exp9]):
result = repr(pd.Series(idx))
assert result == expected
def test_summary(self):
# GH#9116
idx1 = PeriodIndex([], freq='D')
idx2 = PeriodIndex(['2011-01-01'], freq='D')
idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
freq='D')
idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A')
idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'],
freq='H')
idx7 = pd.period_range('2013Q1', periods=1, freq="Q")
idx8 = pd.period_range('2013Q1', periods=2, freq="Q")
idx9 = pd.period_range('2013Q1', periods=3, freq="Q")
exp1 = """PeriodIndex: 0 entries
Freq: D"""
exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01
Freq: D"""
exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02
Freq: D"""
exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03
Freq: D"""
exp5 = """PeriodIndex: 3 entries, 2011 to 2013
Freq: A-DEC"""
exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT
Freq: H"""
exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1
Freq: Q-DEC"""
exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2
Freq: Q-DEC"""
exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3
Freq: Q-DEC"""
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5,
idx6, idx7, idx8, idx9],
[exp1, exp2, exp3, exp4, exp5,
exp6, exp7, exp8, exp9]):
result = idx._summary()
assert result == expected
@@ -1,649 +0,0 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
from pandas._libs.tslibs import period as libperiod
from pandas.compat import lrange
import pandas as pd
from pandas import (
DatetimeIndex, Period, PeriodIndex, Series, notna, period_range)
from pandas.util import testing as tm
class TestGetItem(object):
def test_ellipsis(self):
# GH#21282
idx = period_range('2011-01-01', '2011-01-31', freq='D',
name='idx')
result = idx[...]
assert result.equals(idx)
assert result is not idx
def test_getitem(self):
idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D',
name='idx')
for idx in [idx1]:
result = idx[0]
assert result == pd.Period('2011-01-01', freq='D')
result = idx[-1]
assert result == pd.Period('2011-01-31', freq='D')
result = idx[0:5]
expected = pd.period_range('2011-01-01', '2011-01-05', freq='D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx[0:10:2]
expected = pd.PeriodIndex(['2011-01-01', '2011-01-03',
'2011-01-05',
'2011-01-07', '2011-01-09'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx[-20:-5:3]
expected = pd.PeriodIndex(['2011-01-12', '2011-01-15',
'2011-01-18',
'2011-01-21', '2011-01-24'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx[4::-1]
expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03',
'2011-01-02', '2011-01-01'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
def test_getitem_index(self):
idx = period_range('2007-01', periods=10, freq='M', name='x')
result = idx[[1, 3, 5]]
exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'],
freq='M', name='x')
tm.assert_index_equal(result, exp)
result = idx[[True, True, False, False, False,
True, True, False, False, False]]
exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'],
freq='M', name='x')
tm.assert_index_equal(result, exp)
def test_getitem_partial(self):
rng = period_range('2007-01', periods=50, freq='M')
ts = Series(np.random.randn(len(rng)), rng)
pytest.raises(KeyError, ts.__getitem__, '2006')
result = ts['2008']
assert (result.index.year == 2008).all()
result = ts['2008':'2009']
assert len(result) == 24
result = ts['2008-1':'2009-12']
assert len(result) == 24
result = ts['2008Q1':'2009Q4']
assert len(result) == 24
result = ts[:'2009']
assert len(result) == 36
result = ts['2009':]
assert len(result) == 50 - 24
exp = result
result = ts[24:]
tm.assert_series_equal(exp, result)
ts = ts[10:].append(ts[10:])
msg = "left slice bound for non-unique label: '2008'"
with pytest.raises(KeyError, match=msg):
ts[slice('2008', '2009')]
def test_getitem_datetime(self):
rng = period_range(start='2012-01-01', periods=10, freq='W-MON')
ts = Series(lrange(len(rng)), index=rng)
dt1 = datetime(2011, 10, 2)
dt4 = datetime(2012, 4, 20)
rs = ts[dt1:dt4]
tm.assert_series_equal(rs, ts)
def test_getitem_nat(self):
idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M')
assert idx[0] == pd.Period('2011-01', freq='M')
assert idx[1] is pd.NaT
s = pd.Series([0, 1, 2], index=idx)
assert s[pd.NaT] == 1
s = pd.Series(idx, index=idx)
assert (s[pd.Period('2011-01', freq='M')] ==
pd.Period('2011-01', freq='M'))
assert s[pd.NaT] is pd.NaT
def test_getitem_list_periods(self):
# GH 7710
rng = period_range(start='2012-01-01', periods=10, freq='D')
ts = Series(lrange(len(rng)), index=rng)
exp = ts.iloc[[1]]
tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp)
def test_getitem_seconds(self):
# GH#6716
didx = pd.date_range(start='2013/01/01 09:00:00', freq='S',
periods=4000)
pidx = period_range(start='2013/01/01 09:00:00', freq='S',
periods=4000)
for idx in [didx, pidx]:
# getitem against index should raise ValueError
values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
'2013/02/01 09:00']
for v in values:
# GH7116
# these show deprecations as we are trying
# to slice with non-integer indexers
# with pytest.raises(IndexError):
# idx[v]
continue
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660])
tm.assert_series_equal(s['2013/01/01 9H'], s[:3600])
for d in ['2013/01/01', '2013/01', '2013']:
tm.assert_series_equal(s[d], s)
def test_getitem_day(self):
# GH#6716
# Confirm DatetimeIndex and PeriodIndex works identically
didx = pd.date_range(start='2013/01/01', freq='D', periods=400)
pidx = period_range(start='2013/01/01', freq='D', periods=400)
for idx in [didx, pidx]:
# getitem against index should raise ValueError
values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
'2013/02/01 09:00']
for v in values:
# GH7116
# these show deprecations as we are trying
# to slice with non-integer indexers
# with pytest.raises(IndexError):
# idx[v]
continue
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s['2013/01'], s[0:31])
tm.assert_series_equal(s['2013/02'], s[31:59])
tm.assert_series_equal(s['2014'], s[365:])
invalid = ['2013/02/01 9H', '2013/02/01 09:00']
for v in invalid:
with pytest.raises(KeyError):
s[v]
class TestWhere(object):
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = period_range('20130101', periods=5, freq='D')
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
cond = [False] + [True] * (len(i) - 1)
expected = PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D')
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
def test_where_other(self):
i = period_range('20130101', periods=5, freq='D')
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=np.nan)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
freq='D')
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
freq='D')
result = i.where(notna(i2), i2.values)
tm.assert_index_equal(result, i2)
class TestTake(object):
def test_take(self):
# GH#10295
idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D',
name='idx')
for idx in [idx1]:
result = idx.take([0])
assert result == pd.Period('2011-01-01', freq='D')
result = idx.take([5])
assert result == pd.Period('2011-01-06', freq='D')
result = idx.take([0, 1, 2])
expected = pd.period_range('2011-01-01', '2011-01-03', freq='D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == 'D'
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = pd.PeriodIndex(['2011-01-01', '2011-01-03',
'2011-01-05'], freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx.take([7, 4, 1])
expected = pd.PeriodIndex(['2011-01-08', '2011-01-05',
'2011-01-02'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx.take([3, 2, 5])
expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx.take([-3, 2, 5])
expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
def test_take_misc(self):
index = period_range(start='1/1/10', end='12/31/12', freq='D',
name='idx')
expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7),
datetime(2010, 1, 9), datetime(2010, 1, 13)],
freq='D', name='idx')
taken1 = index.take([5, 6, 8, 12])
taken2 = index[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, PeriodIndex)
assert taken.freq == index.freq
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
name='xxx', freq='D')
result = idx.take(np.array([1, 0, -1]))
expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx', freq='D')
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'],
name='xxx', freq='D')
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx', freq='D')
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
class TestIndexing(object):
def test_get_loc_msg(self):
idx = period_range('2000-1-1', freq='A', periods=10)
bad_period = Period('2012', 'A')
pytest.raises(KeyError, idx.get_loc, bad_period)
try:
idx.get_loc(bad_period)
except KeyError as inst:
assert inst.args[0] == bad_period
def test_get_loc_nat(self):
didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03'])
pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M')
# check DatetimeIndex compat
for idx in [didx, pidx]:
assert idx.get_loc(pd.NaT) == 1
assert idx.get_loc(None) == 1
assert idx.get_loc(float('nan')) == 1
assert idx.get_loc(np.nan) == 1
def test_get_loc(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
# get the location of p1/p2 from
# monotonic increasing PeriodIndex with non-duplicate
idx0 = pd.PeriodIndex([p0, p1, p2])
expected_idx1_p1 = 1
expected_idx1_p2 = 2
assert idx0.get_loc(p1) == expected_idx1_p1
assert idx0.get_loc(str(p1)) == expected_idx1_p1
assert idx0.get_loc(p2) == expected_idx1_p2
assert idx0.get_loc(str(p2)) == expected_idx1_p2
msg = "Cannot interpret 'foo' as period"
with pytest.raises(KeyError, match=msg):
idx0.get_loc('foo')
pytest.raises(KeyError, idx0.get_loc, 1.1)
pytest.raises(TypeError, idx0.get_loc, idx0)
# get the location of p1/p2 from
# monotonic increasing PeriodIndex with duplicate
idx1 = pd.PeriodIndex([p1, p1, p2])
expected_idx1_p1 = slice(0, 2)
expected_idx1_p2 = 2
assert idx1.get_loc(p1) == expected_idx1_p1
assert idx1.get_loc(str(p1)) == expected_idx1_p1
assert idx1.get_loc(p2) == expected_idx1_p2
assert idx1.get_loc(str(p2)) == expected_idx1_p2
msg = "Cannot interpret 'foo' as period"
with pytest.raises(KeyError, match=msg):
idx1.get_loc('foo')
pytest.raises(KeyError, idx1.get_loc, 1.1)
pytest.raises(TypeError, idx1.get_loc, idx1)
# get the location of p1/p2 from
# non-monotonic increasing/decreasing PeriodIndex with duplicate
idx2 = pd.PeriodIndex([p2, p1, p2])
expected_idx2_p1 = 1
expected_idx2_p2 = np.array([True, False, True])
assert idx2.get_loc(p1) == expected_idx2_p1
assert idx2.get_loc(str(p1)) == expected_idx2_p1
tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2)
tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2)
def test_is_monotonic_increasing(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
idx = pd.PeriodIndex([p1, p2, p0])
assert idx_inc0.is_monotonic_increasing is True
assert idx_inc1.is_monotonic_increasing is True
assert idx_dec0.is_monotonic_increasing is False
assert idx_dec1.is_monotonic_increasing is False
assert idx.is_monotonic_increasing is False
def test_is_monotonic_decreasing(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
idx = pd.PeriodIndex([p1, p2, p0])
assert idx_inc0.is_monotonic_decreasing is False
assert idx_inc1.is_monotonic_decreasing is False
assert idx_dec0.is_monotonic_decreasing is True
assert idx_dec1.is_monotonic_decreasing is True
assert idx.is_monotonic_decreasing is False
def test_is_unique(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx0 = pd.PeriodIndex([p0, p1, p2])
assert idx0.is_unique is True
idx1 = pd.PeriodIndex([p1, p1, p2])
assert idx1.is_unique is False
def test_contains(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
p3 = pd.Period('2017-09-04')
ps0 = [p0, p1, p2]
idx0 = pd.PeriodIndex(ps0)
for p in ps0:
assert idx0.contains(p)
assert p in idx0
assert idx0.contains(str(p))
assert str(p) in idx0
assert idx0.contains('2017-09-01 00:00:01')
assert '2017-09-01 00:00:01' in idx0
assert idx0.contains('2017-09')
assert '2017-09' in idx0
assert not idx0.contains(p3)
assert p3 not in idx0
def test_get_value(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx0 = pd.PeriodIndex([p0, p1, p2])
input0 = np.array([1, 2, 3])
expected0 = 2
result0 = idx0.get_value(input0, p1)
assert result0 == expected0
idx1 = pd.PeriodIndex([p1, p1, p2])
input1 = np.array([1, 2, 3])
expected1 = np.array([1, 2])
result1 = idx1.get_value(input1, p1)
tm.assert_numpy_array_equal(result1, expected1)
idx2 = pd.PeriodIndex([p1, p2, p1])
input2 = np.array([1, 2, 3])
expected2 = np.array([1, 3])
result2 = idx2.get_value(input2, p1)
tm.assert_numpy_array_equal(result2, expected2)
def test_get_indexer(self):
# GH 17717
p1 = pd.Period('2017-09-01')
p2 = pd.Period('2017-09-04')
p3 = pd.Period('2017-09-07')
tp0 = pd.Period('2017-08-31')
tp1 = pd.Period('2017-09-02')
tp2 = pd.Period('2017-09-05')
tp3 = pd.Period('2017-09-09')
idx = pd.PeriodIndex([p1, p2, p3])
tm.assert_numpy_array_equal(idx.get_indexer(idx),
np.array([0, 1, 2], dtype=np.intp))
target = pd.PeriodIndex([tp0, tp1, tp2, tp3])
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
np.array([-1, 0, 1, 2], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
np.array([0, 1, 2, -1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
np.array([0, 0, 1, 2], dtype=np.intp))
res = idx.get_indexer(target, 'nearest',
tolerance=pd.Timedelta('1 day'))
tm.assert_numpy_array_equal(res,
np.array([0, 0, 1, -1], dtype=np.intp))
def test_get_indexer_non_unique(self):
# GH 17717
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
p3 = pd.Period('2017-09-04')
p4 = pd.Period('2017-09-05')
idx1 = pd.PeriodIndex([p1, p2, p1])
idx2 = pd.PeriodIndex([p2, p1, p3, p4])
result = idx1.get_indexer_non_unique(idx2)
expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.intp)
expected_missing = np.array([2, 3], dtype=np.int64)
tm.assert_numpy_array_equal(result[0], expected_indexer)
tm.assert_numpy_array_equal(result[1], expected_missing)
# TODO: This method came from test_period; de-dup with version above
def test_get_loc2(self):
idx = pd.period_range('2000-01-01', periods=3)
for method in [None, 'pad', 'backfill', 'nearest']:
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].asfreq('H', how='start'), method) == 1
assert idx.get_loc(idx[1].to_timestamp(), method) == 1
assert idx.get_loc(idx[1].to_timestamp()
.to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1
idx = pd.period_range('2000-01-01', periods=5)[::2]
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance='1 day') == 1
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance=pd.Timedelta('1D')) == 1
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance=np.timedelta64(1, 'D')) == 1
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance=timedelta(1)) == 1
msg = 'unit abbreviation w/o a number'
with pytest.raises(ValueError, match=msg):
idx.get_loc('2000-01-10', method='nearest', tolerance='foo')
msg = 'Input has different freq=None from PeriodArray\\(freq=D\\)'
with pytest.raises(ValueError, match=msg):
idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour')
with pytest.raises(KeyError):
idx.get_loc('2000-01-10', method='nearest', tolerance='1 day')
with pytest.raises(
ValueError,
match='list-like tolerance size must match target index size'):
idx.get_loc('2000-01-10', method='nearest',
tolerance=[pd.Timedelta('1 day').to_timedelta64(),
pd.Timedelta('1 day').to_timedelta64()])
# TODO: This method came from test_period; de-dup with version above
def test_get_indexer2(self):
idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start')
tm.assert_numpy_array_equal(idx.get_indexer(idx),
np.array([0, 1, 2], dtype=np.intp))
target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12',
'2000-01-02T01'], freq='H')
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
np.array([-1, 0, 1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
np.array([0, 1, 2], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
np.array([0, 1, 1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest',
tolerance='1 hour'),
np.array([0, -1, 1], dtype=np.intp))
msg = 'Input has different freq=None from PeriodArray\\(freq=H\\)'
with pytest.raises(ValueError, match=msg):
idx.get_indexer(target, 'nearest', tolerance='1 minute')
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest',
tolerance='1 day'),
np.array([0, 1, 1], dtype=np.intp))
tol_raw = [pd.Timedelta('1 hour'),
pd.Timedelta('1 hour'),
np.timedelta64(1, 'D'), ]
tm.assert_numpy_array_equal(
idx.get_indexer(target, 'nearest',
tolerance=[np.timedelta64(x) for x in tol_raw]),
np.array([0, -1, 1], dtype=np.intp))
tol_bad = [pd.Timedelta('2 hour').to_timedelta64(),
pd.Timedelta('1 hour').to_timedelta64(),
np.timedelta64(1, 'M'), ]
with pytest.raises(
libperiod.IncompatibleFrequency,
match='Input has different freq=None from'):
idx.get_indexer(target, 'nearest', tolerance=tol_bad)
def test_indexing(self):
# GH 4390, iat incorrectly indexing
index = period_range('1/1/2001', periods=10)
s = Series(np.random.randn(10), index=index)
expected = s[index[0]]
result = s.iat[0]
assert expected == result
def test_period_index_indexer(self):
# GH4125
idx = pd.period_range('2002-01', '2003-12', freq='M')
df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx)
tm.assert_frame_equal(df, df.loc[idx])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
tm.assert_frame_equal(df, df.loc[list(idx)])
@@ -1,329 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series
from pandas.core.arrays import PeriodArray
from pandas.tests.test_base import Ops
import pandas.util.testing as tm
class TestPeriodIndexOps(Ops):
def setup_method(self, method):
super(TestPeriodIndexOps, self).setup_method(method)
mask = lambda x: (isinstance(x, DatetimeIndex) or
isinstance(x, PeriodIndex))
self.is_valid_objs = [o for o in self.objs if mask(o)]
self.not_valid_objs = [o for o in self.objs if not mask(o)]
def test_ops_properties(self):
f = lambda x: isinstance(x, PeriodIndex)
self.check_ops_properties(PeriodArray._field_ops, f)
self.check_ops_properties(PeriodArray._object_ops, f)
self.check_ops_properties(PeriodArray._bool_ops, f)
def test_resolution(self):
for freq, expected in zip(['A', 'Q', 'M', 'D', 'H',
'T', 'S', 'L', 'U'],
['day', 'day', 'day', 'day',
'hour', 'minute', 'second',
'millisecond', 'microsecond']):
idx = pd.period_range(start='2013-04-01', periods=30, freq=freq)
assert idx.resolution == expected
def test_value_counts_unique(self):
# GH 7735
idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)),
freq='H')
exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00',
'2011-01-01 16:00', '2011-01-01 15:00',
'2011-01-01 14:00', '2011-01-01 13:00',
'2011-01-01 12:00', '2011-01-01 11:00',
'2011-01-01 10:00',
'2011-01-01 09:00'], freq='H')
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
expected = pd.period_range('2011-01-01 09:00', freq='H',
periods=10)
tm.assert_index_equal(idx.unique(), expected)
idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00',
'2013-01-01 09:00', '2013-01-01 08:00',
'2013-01-01 08:00', NaT], freq='H')
exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
freq='H')
expected = Series([3, 2], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00',
NaT], freq='H')
expected = Series([3, 2, 1], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
tm.assert_index_equal(idx.unique(), exp_idx)
def test_drop_duplicates_metadata(self):
# GH 10115
idx = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx')
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx) # freq will not be reset
result = idx_dup.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
def test_drop_duplicates(self):
# to check Index/Series compat
base = pd.period_range('2011-01-01', '2011-01-31', freq='D',
name='idx')
idx = base.append(base[:5])
res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
res = idx.drop_duplicates(keep='last')
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep='last')
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
def test_order_compat(self):
def _check_freq(index, expected_index):
if isinstance(index, PeriodIndex):
assert index.freq == expected_index.freq
pidx = PeriodIndex(['2011', '2012', '2013'], name='pidx', freq='A')
# for compatibility check
iidx = Index([2011, 2012, 2013], name='idx')
for idx in [pidx, iidx]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
_check_freq(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, idx[::-1])
_check_freq(ordered, idx[::-1])
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
check_dtype=False)
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True,
ascending=False)
tm.assert_index_equal(ordered, idx[::-1])
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]),
check_dtype=False)
_check_freq(ordered, idx[::-1])
pidx = PeriodIndex(['2011', '2013', '2015', '2012',
'2011'], name='pidx', freq='A')
pexpected = PeriodIndex(
['2011', '2011', '2012', '2013', '2015'], name='pidx', freq='A')
# for compatibility check
iidx = Index([2011, 2013, 2015, 2012, 2011], name='idx')
iexpected = Index([2011, 2011, 2012, 2013, 2015], name='idx')
for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
_check_freq(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
_check_freq(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True,
ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
_check_freq(ordered, idx)
pidx = PeriodIndex(['2011', '2013', 'NaT', '2011'], name='pidx',
freq='D')
result = pidx.sort_values()
expected = PeriodIndex(['NaT', '2011', '2011', '2013'],
name='pidx', freq='D')
tm.assert_index_equal(result, expected)
assert result.freq == 'D'
result = pidx.sort_values(ascending=False)
expected = PeriodIndex(
['2013', '2011', '2011', 'NaT'], name='pidx', freq='D')
tm.assert_index_equal(result, expected)
assert result.freq == 'D'
def test_order(self):
for freq in ['D', '2D', '4D']:
idx = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
freq=freq, name='idx')
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
assert ordered.freq == idx.freq
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
assert ordered.freq == expected.freq
assert ordered.freq == freq
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
check_dtype=False)
assert ordered.freq == idx.freq
assert ordered.freq == freq
ordered, indexer = idx.sort_values(return_indexer=True,
ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]),
check_dtype=False)
assert ordered.freq == expected.freq
assert ordered.freq == freq
idx1 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05',
'2011-01-02', '2011-01-01'], freq='D', name='idx1')
exp1 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02',
'2011-01-03', '2011-01-05'], freq='D', name='idx1')
idx2 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05',
'2011-01-02', '2011-01-01'],
freq='D', name='idx2')
exp2 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02',
'2011-01-03', '2011-01-05'],
freq='D', name='idx2')
idx3 = PeriodIndex([NaT, '2011-01-03', '2011-01-05',
'2011-01-02', NaT], freq='D', name='idx3')
exp3 = PeriodIndex([NaT, NaT, '2011-01-02', '2011-01-03',
'2011-01-05'], freq='D', name='idx3')
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
assert ordered.freq == 'D'
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
assert ordered.freq == 'D'
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq == 'D'
ordered, indexer = idx.sort_values(return_indexer=True,
ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq == 'D'
def test_shift(self):
# This is tested in test_arithmetic
pass
def test_nat(self):
assert pd.PeriodIndex._na_value is NaT
assert pd.PeriodIndex([], freq='M')._na_value is NaT
idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([], dtype=np.intp))
idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D')
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([1], dtype=np.intp))
@pytest.mark.parametrize('freq', ['D', 'M'])
def test_equals(self, freq):
# GH#13107
idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
freq=freq)
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
freq='H')
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = pd.PeriodIndex._simple_new(
idx._values._simple_new(idx._values.asi8, freq="H")
)
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
def test_freq_setter_deprecated(self):
# GH 20678
idx = pd.period_range('2018Q1', periods=4, freq='Q')
# no warning for getter
with tm.assert_produces_warning(None):
idx.freq
# warning for setter
with tm.assert_produces_warning(FutureWarning):
idx.freq = pd.offsets.Day()
@@ -1,132 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Period, Series, period_range
from pandas.util import testing as tm
class TestPeriodIndex(object):
def setup_method(self, method):
pass
def test_slice_with_negative_step(self):
ts = Series(np.arange(20),
period_range('2014-01', periods=20, freq='M'))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
assert_slices_equivalent(SLC[Period('2014-10')::-1], SLC[9::-1])
assert_slices_equivalent(SLC['2014-10'::-1], SLC[9::-1])
assert_slices_equivalent(SLC[:Period('2014-10'):-1], SLC[:8:-1])
assert_slices_equivalent(SLC[:'2014-10':-1], SLC[:8:-1])
assert_slices_equivalent(SLC['2015-02':'2014-10':-1], SLC[13:8:-1])
assert_slices_equivalent(SLC[Period('2015-02'):Period('2014-10'):-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC['2015-02':Period('2014-10'):-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC[Period('2015-02'):'2014-10':-1],
SLC[13:8:-1])
assert_slices_equivalent(SLC['2014-10':'2015-02':-1], SLC[:0])
def test_slice_with_zero_step_raises(self):
ts = Series(np.arange(20),
period_range('2014-01', periods=20, freq='M'))
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts[::0]
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts.loc[::0]
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts.loc[::0]
def test_slice_keep_name(self):
idx = period_range('20010101', periods=10, freq='D', name='bob')
assert idx.name == idx[1:].name
def test_pindex_slice_index(self):
pi = period_range(start='1/1/10', end='12/31/12', freq='M')
s = Series(np.random.rand(len(pi)), index=pi)
res = s['2010']
exp = s[0:12]
tm.assert_series_equal(res, exp)
res = s['2011']
exp = s[12:24]
tm.assert_series_equal(res, exp)
def test_range_slice_day(self):
# GH#6716
didx = pd.date_range(start='2013/01/01', freq='D', periods=400)
pidx = period_range(start='2013/01/01', freq='D', periods=400)
for idx in [didx, pidx]:
# slices against index should raise IndexError
values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
'2013/02/01 09:00']
for v in values:
with pytest.raises(TypeError):
idx[v:]
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s['2013/01/02':], s[1:])
tm.assert_series_equal(s['2013/01/02':'2013/01/05'], s[1:5])
tm.assert_series_equal(s['2013/02':], s[31:])
tm.assert_series_equal(s['2014':], s[365:])
invalid = ['2013/02/01 9H', '2013/02/01 09:00']
for v in invalid:
with pytest.raises(TypeError):
idx[v:]
def test_range_slice_seconds(self):
# GH#6716
didx = pd.date_range(start='2013/01/01 09:00:00', freq='S',
periods=4000)
pidx = period_range(start='2013/01/01 09:00:00', freq='S',
periods=4000)
for idx in [didx, pidx]:
# slices against index should raise IndexError
values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
'2013/02/01 09:00']
for v in values:
with pytest.raises(TypeError):
idx[v:]
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s['2013/01/01 09:05':'2013/01/01 09:10'],
s[300:660])
tm.assert_series_equal(s['2013/01/01 10:00':'2013/01/01 10:05'],
s[3600:3960])
tm.assert_series_equal(s['2013/01/01 10H':], s[3600:])
tm.assert_series_equal(s[:'2013/01/01 09:30'], s[:1860])
for d in ['2013/01/01', '2013/01', '2013']:
tm.assert_series_equal(s[d:], s)
def test_range_slice_outofbounds(self):
# GH#5407
didx = pd.date_range(start='2013/10/01', freq='D', periods=10)
pidx = period_range(start='2013/10/01', freq='D', periods=10)
for idx in [didx, pidx]:
df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx)
empty = DataFrame(index=idx.__class__([], freq='D'),
columns=['units'])
empty['units'] = empty['units'].astype('int64')
tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty)
tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2])
tm.assert_frame_equal(df['2013/10/01':'2013/10/02'], df.iloc[:2])
tm.assert_frame_equal(df['2013/10/02':'2013/09/30'], empty)
tm.assert_frame_equal(df['2013/10/15':'2013/10/17'], empty)
tm.assert_frame_equal(df['2013-06':'2013-09'], empty)
tm.assert_frame_equal(df['2013-11':'2013-12'], empty)
@@ -1,578 +0,0 @@
import numpy as np
import pytest
from pandas._libs.tslibs.period import IncompatibleFrequency
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame, DatetimeIndex, Index, NaT, Period, PeriodIndex, Series,
date_range, offsets, period_range)
from pandas.util import testing as tm
from ..datetimelike import DatetimeLike
class TestPeriodIndex(DatetimeLike):
_holder = PeriodIndex
def setup_method(self, method):
self.indices = dict(index=tm.makePeriodIndex(10),
index_dec=period_range('20130101', periods=10,
freq='D')[::-1])
self.setup_indices()
def create_index(self):
return period_range('20130101', periods=5, freq='D')
def test_pickle_compat_construction(self):
pass
@pytest.mark.parametrize('freq', ['D', 'M', 'A'])
def test_pickle_round_trip(self, freq):
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq)
result = tm.round_trip_pickle(idx)
tm.assert_index_equal(result, idx)
def test_where(self):
# This is handled in test_indexing
pass
@pytest.mark.parametrize('use_numpy', [True, False])
@pytest.mark.parametrize('index', [
pd.period_range('2000-01-01', periods=3, freq='D'),
pd.period_range('2001-01-01', periods=3, freq='2D'),
pd.PeriodIndex(['2001-01', 'NaT', '2003-01'], freq='M')])
def test_repeat_freqstr(self, index, use_numpy):
# GH10183
expected = PeriodIndex([p for p in index for _ in range(3)])
result = np.repeat(index, 3) if use_numpy else index.repeat(3)
tm.assert_index_equal(result, expected)
assert result.freqstr == index.freqstr
def test_fillna_period(self):
# GH 11343
idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'], freq='H')
exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H')
tm.assert_index_equal(
idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp)
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x',
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
tm.assert_index_equal(idx.fillna('x'), exp)
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'),
pd.Period('2011-01-01', freq='D'),
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
tm.assert_index_equal(idx.fillna(
pd.Period('2011-01-01', freq='D')), exp)
def test_no_millisecond_field(self):
with pytest.raises(AttributeError):
DatetimeIndex.millisecond
with pytest.raises(AttributeError):
DatetimeIndex([]).millisecond
@pytest.mark.parametrize("sort", [True, False])
def test_difference_freq(self, sort):
# GH14323: difference of Period MUST preserve frequency
# but the ability to union results must be preserved
index = period_range("20160920", "20160925", freq="D")
other = period_range("20160921", "20160924", freq="D")
expected = PeriodIndex(["20160920", "20160925"], freq='D')
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
other = period_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other, sort)
expected = PeriodIndex(["20160920", "20160921"], freq='D')
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
def test_hash_error(self):
index = period_range('20010101', periods=10)
with pytest.raises(TypeError, match=("unhashable type: %r" %
type(index).__name__)):
hash(index)
def test_make_time_series(self):
index = period_range(freq='A', start='1/1/2001', end='12/1/2009')
series = Series(1, index=index)
assert isinstance(series, Series)
def test_shallow_copy_empty(self):
# GH13067
idx = PeriodIndex([], freq='M')
result = idx._shallow_copy()
expected = idx
tm.assert_index_equal(result, expected)
def test_shallow_copy_i8(self):
# GH-24391
pi = period_range("2018-01-01", periods=3, freq="2D")
result = pi._shallow_copy(pi.asi8, freq=pi.freq)
tm.assert_index_equal(result, pi)
def test_shallow_copy_changing_freq_raises(self):
pi = period_range("2018-01-01", periods=3, freq="2D")
with pytest.raises(IncompatibleFrequency, match="are different"):
pi._shallow_copy(pi, freq="H")
def test_dtype_str(self):
pi = pd.PeriodIndex([], freq='M')
assert pi.dtype_str == 'period[M]'
assert pi.dtype_str == str(pi.dtype)
pi = pd.PeriodIndex([], freq='3M')
assert pi.dtype_str == 'period[3M]'
assert pi.dtype_str == str(pi.dtype)
def test_view_asi8(self):
idx = pd.PeriodIndex([], freq='M')
exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
def test_values(self):
idx = pd.PeriodIndex([], freq='M')
exp = np.array([], dtype=np.object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT],
dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
def test_period_index_length(self):
pi = period_range(freq='A', start='1/1/2001', end='12/1/2009')
assert len(pi) == 9
pi = period_range(freq='Q', start='1/1/2001', end='12/1/2009')
assert len(pi) == 4 * 9
pi = period_range(freq='M', start='1/1/2001', end='12/1/2009')
assert len(pi) == 12 * 9
start = Period('02-Apr-2005', 'B')
i1 = period_range(start=start, periods=20)
assert len(i1) == 20
assert i1.freq == start.freq
assert i1[0] == start
end_intv = Period('2006-12-31', 'W')
i1 = period_range(end=end_intv, periods=10)
assert len(i1) == 10
assert i1.freq == end_intv.freq
assert i1[-1] == end_intv
end_intv = Period('2006-12-31', '1w')
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
end_intv = Period('2006-12-31', ('w', 1))
i2 = period_range(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
try:
period_range(start=start, end=end_intv)
raise AssertionError('Cannot allow mixed freq for start and end')
except ValueError:
pass
end_intv = Period('2005-05-01', 'B')
i1 = period_range(start=start, end=end_intv)
try:
period_range(start=start)
raise AssertionError(
'Must specify periods if missing start or end')
except ValueError:
pass
# infer freq from first element
i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')])
assert len(i2) == 2
assert i2[0] == end_intv
i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')]))
assert len(i2) == 2
assert i2[0] == end_intv
# Mixed freq should fail
vals = [end_intv, Period('2006-12-31', 'w')]
pytest.raises(ValueError, PeriodIndex, vals)
vals = np.array(vals)
pytest.raises(ValueError, PeriodIndex, vals)
def test_fields(self):
# year, month, day, hour, minute
# second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
# qyear
pi = period_range(freq='A', start='1/1/2001', end='12/1/2005')
self._check_all_fields(pi)
pi = period_range(freq='Q', start='1/1/2001', end='12/1/2002')
self._check_all_fields(pi)
pi = period_range(freq='M', start='1/1/2001', end='1/1/2002')
self._check_all_fields(pi)
pi = period_range(freq='D', start='12/1/2001', end='6/1/2001')
self._check_all_fields(pi)
pi = period_range(freq='B', start='12/1/2001', end='6/1/2001')
self._check_all_fields(pi)
pi = period_range(freq='H', start='12/31/2001', end='1/1/2002 23:00')
self._check_all_fields(pi)
pi = period_range(freq='Min', start='12/31/2001', end='1/1/2002 00:20')
self._check_all_fields(pi)
pi = period_range(freq='S', start='12/31/2001 00:00:00',
end='12/31/2001 00:05:00')
self._check_all_fields(pi)
end_intv = Period('2006-12-31', 'W')
i1 = period_range(end=end_intv, periods=10)
self._check_all_fields(i1)
def _check_all_fields(self, periodindex):
fields = ['year', 'month', 'day', 'hour', 'minute', 'second',
'weekofyear', 'week', 'dayofweek', 'dayofyear',
'quarter', 'qyear', 'days_in_month']
periods = list(periodindex)
s = pd.Series(periodindex)
for field in fields:
field_idx = getattr(periodindex, field)
assert len(periodindex) == len(field_idx)
for x, val in zip(periods, field_idx):
assert getattr(x, field) == val
if len(s) == 0:
continue
field_s = getattr(s.dt, field)
assert len(periodindex) == len(field_s)
for x, val in zip(periods, field_s):
assert getattr(x, field) == val
def test_period_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = period_range('2011/01/01', periods=6, freq='M')
idx2 = period_range('2013', periods=6, freq='A')
df = df.set_index(idx1)
tm.assert_index_equal(df.index, idx1)
df = df.set_index(idx2)
tm.assert_index_equal(df.index, idx2)
def test_factorize(self):
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
'2014-03', '2014-03'], freq='M')
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
'2014-03', '2014-01'], freq='M')
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
arr, idx = idx2.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M')
arr, idx = idx2.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
def test_is_(self):
create_index = lambda: period_range(freq='A', start='1/1/2001',
end='12/1/2009')
index = create_index()
assert index.is_(index)
assert not index.is_(create_index())
assert index.is_(index.view())
assert index.is_(index.view().view().view().view().view())
assert index.view().is_(index)
ind2 = index.view()
index.name = "Apple"
assert ind2.is_(index)
assert not index.is_(index[:])
assert not index.is_(index.asfreq('M'))
assert not index.is_(index.asfreq('A'))
assert not index.is_(index - 2)
assert not index.is_(index - 0)
def test_contains(self):
rng = period_range('2007-01', freq='M', periods=10)
assert Period('2007-01', freq='M') in rng
assert not Period('2007-01', freq='D') in rng
assert not Period('2007-01', freq='2M') in rng
def test_contains_nat(self):
# see gh-13582
idx = period_range('2007-01', freq='M', periods=10)
assert pd.NaT not in idx
assert None not in idx
assert float('nan') not in idx
assert np.nan not in idx
idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M')
assert pd.NaT in idx
assert None in idx
assert float('nan') in idx
assert np.nan in idx
def test_periods_number_check(self):
with pytest.raises(ValueError):
period_range('2011-1-1', '2012-1-1', 'B')
def test_start_time(self):
# GH 17157
index = period_range(freq='M', start='2016-01-01', end='2016-05-31')
expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS')
tm.assert_index_equal(index.start_time, expected_index)
def test_end_time(self):
# GH 17157
index = period_range(freq='M', start='2016-01-01', end='2016-05-31')
expected_index = date_range('2016-01-01', end='2016-05-31', freq='M')
expected_index = expected_index.shift(1, freq='D').shift(-1, freq='ns')
tm.assert_index_equal(index.end_time, expected_index)
def test_index_duplicate_periods(self):
# monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN')
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts[2007]
expected = ts[1:3]
tm.assert_series_equal(result, expected)
result[:] = 1
assert (ts[1:3] == 1).all()
# not monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN')
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts[2007]
expected = ts[idx == 2007]
tm.assert_series_equal(result, expected)
def test_index_unique(self):
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN')
expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN')
tm.assert_index_equal(idx.unique(), expected)
assert idx.nunique() == 3
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN',
tz='US/Eastern')
expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN',
tz='US/Eastern')
tm.assert_index_equal(idx.unique(), expected)
assert idx.nunique() == 3
def test_shift(self):
# This is tested in test_arithmetic
pass
@td.skip_if_32bit
def test_ndarray_compat_properties(self):
super(TestPeriodIndex, self).test_ndarray_compat_properties()
def test_negative_ordinals(self):
Period(ordinal=-1000, freq='A')
Period(ordinal=0, freq='A')
idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq='A')
idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq='A')
tm.assert_index_equal(idx1, idx2)
def test_pindex_fieldaccessor_nat(self):
idx = PeriodIndex(['2011-01', '2011-02', 'NaT',
'2012-03', '2012-04'], freq='D', name='name')
exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name')
tm.assert_index_equal(idx.year, exp)
exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name')
tm.assert_index_equal(idx.month, exp)
def test_pindex_qaccess(self):
pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')
s = Series(np.random.rand(len(pi)), index=pi).cumsum()
# Todo: fix these accessors!
assert s['05Q4'] == s[2]
def test_pindex_multiples(self):
with tm.assert_produces_warning(FutureWarning):
pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M')
expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07',
'2011-09', '2011-11'], freq='2M')
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == '2M'
pi = period_range(start='1/1/11', end='12/31/11', freq='2M')
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == '2M'
pi = period_range(start='1/1/11', periods=6, freq='2M')
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == '2M'
def test_iteration(self):
index = period_range(start='1/1/10', periods=4, freq='B')
result = list(index)
assert isinstance(result[0], Period)
assert result[0].freq == index.freq
def test_is_full(self):
index = PeriodIndex([2005, 2007, 2009], freq='A')
assert not index.is_full
index = PeriodIndex([2005, 2006, 2007], freq='A')
assert index.is_full
index = PeriodIndex([2005, 2005, 2007], freq='A')
assert not index.is_full
index = PeriodIndex([2005, 2005, 2006], freq='A')
assert index.is_full
index = PeriodIndex([2006, 2005, 2005], freq='A')
pytest.raises(ValueError, getattr, index, 'is_full')
assert index[:0].is_full
def test_with_multi_index(self):
# #1705
index = date_range('1/1/2012', periods=4, freq='12H')
index_as_arrays = [index.to_period(freq='D'), index.hour]
s = Series([0, 1, 2, 3], index_as_arrays)
assert isinstance(s.index.levels[0], PeriodIndex)
assert isinstance(s.index.values[0][0], Period)
def test_convert_array_of_periods(self):
rng = period_range('1/1/2000', periods=20, freq='D')
periods = list(rng)
result = pd.Index(periods)
assert isinstance(result, PeriodIndex)
def test_append_concat(self):
# #1815
d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC')
d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC')
s1 = Series(np.random.randn(10), d1)
s2 = Series(np.random.randn(10), d2)
s1 = s1.to_period()
s2 = s2.to_period()
# drops index
result = pd.concat([s1, s2])
assert isinstance(result.index, PeriodIndex)
assert result.index[0] == s1.index[0]
def test_pickle_freq(self):
# GH2891
prng = period_range('1/1/2011', '1/1/2012', freq='M')
new_prng = tm.round_trip_pickle(prng)
assert new_prng.freq == offsets.MonthEnd()
assert new_prng.freqstr == 'M'
def test_map(self):
# test_map_dictlike generally tests
index = PeriodIndex([2005, 2007, 2009], freq='A')
result = index.map(lambda x: x.ordinal)
exp = Index([x.ordinal for x in index])
tm.assert_index_equal(result, exp)
def test_join_self(self, join_type):
index = period_range('1/1/2000', periods=10)
joined = index.join(index, how=join_type)
assert index is joined
def test_insert(self):
# GH 18295 (test missing)
expected = PeriodIndex(
['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q')
for na in (np.nan, pd.NaT, None):
result = period_range('2017Q1', periods=4, freq='Q').insert(1, na)
tm.assert_index_equal(result, expected)
def test_maybe_convert_timedelta():
pi = PeriodIndex(['2000', '2001'], freq='D')
offset = offsets.Day(2)
assert pi._maybe_convert_timedelta(offset) == 2
assert pi._maybe_convert_timedelta(2) == 2
offset = offsets.BusinessDay()
with pytest.raises(ValueError, match='freq'):
pi._maybe_convert_timedelta(offset)
@@ -1,95 +0,0 @@
import pytest
from pandas import NaT, Period, PeriodIndex, date_range, period_range
import pandas.util.testing as tm
class TestPeriodRange(object):
@pytest.mark.parametrize('freq', ['D', 'W', 'M', 'Q', 'A'])
def test_construction_from_string(self, freq):
# non-empty
expected = date_range(start='2017-01-01', periods=5,
freq=freq, name='foo').to_period()
start, end = str(expected[0]), str(expected[-1])
result = period_range(start=start, end=end, freq=freq, name='foo')
tm.assert_index_equal(result, expected)
result = period_range(start=start, periods=5, freq=freq, name='foo')
tm.assert_index_equal(result, expected)
result = period_range(end=end, periods=5, freq=freq, name='foo')
tm.assert_index_equal(result, expected)
# empty
expected = PeriodIndex([], freq=freq, name='foo')
result = period_range(start=start, periods=0, freq=freq, name='foo')
tm.assert_index_equal(result, expected)
result = period_range(end=end, periods=0, freq=freq, name='foo')
tm.assert_index_equal(result, expected)
result = period_range(start=end, end=start, freq=freq, name='foo')
tm.assert_index_equal(result, expected)
def test_construction_from_period(self):
# upsampling
start, end = Period('2017Q1', freq='Q'), Period('2018Q1', freq='Q')
expected = date_range(start='2017-03-31', end='2018-03-31', freq='M',
name='foo').to_period()
result = period_range(start=start, end=end, freq='M', name='foo')
tm.assert_index_equal(result, expected)
# downsampling
start, end = Period('2017-1', freq='M'), Period('2019-12', freq='M')
expected = date_range(start='2017-01-31', end='2019-12-31', freq='Q',
name='foo').to_period()
result = period_range(start=start, end=end, freq='Q', name='foo')
tm.assert_index_equal(result, expected)
# empty
expected = PeriodIndex([], freq='W', name='foo')
result = period_range(start=start, periods=0, freq='W', name='foo')
tm.assert_index_equal(result, expected)
result = period_range(end=end, periods=0, freq='W', name='foo')
tm.assert_index_equal(result, expected)
result = period_range(start=end, end=start, freq='W', name='foo')
tm.assert_index_equal(result, expected)
def test_errors(self):
# not enough params
msg = ('Of the three parameters: start, end, and periods, '
'exactly two must be specified')
with pytest.raises(ValueError, match=msg):
period_range(start='2017Q1')
with pytest.raises(ValueError, match=msg):
period_range(end='2017Q1')
with pytest.raises(ValueError, match=msg):
period_range(periods=5)
with pytest.raises(ValueError, match=msg):
period_range()
# too many params
with pytest.raises(ValueError, match=msg):
period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q')
# start/end NaT
msg = 'start and end must not be NaT'
with pytest.raises(ValueError, match=msg):
period_range(start=NaT, end='2018Q1')
with pytest.raises(ValueError, match=msg):
period_range(start='2017Q1', end=NaT)
# invalid periods param
msg = 'periods must be a number, got foo'
with pytest.raises(TypeError, match=msg):
period_range(start='2017Q1', periods='foo')
@@ -1,18 +0,0 @@
# -*- coding: utf-8 -*-
"""Tests for PeriodIndex behaving like a vectorized Period scalar"""
from pandas import Timedelta, date_range, period_range
import pandas.util.testing as tm
class TestPeriodIndexOps(object):
def test_start_time(self):
index = period_range(freq='M', start='2016-01-01', end='2016-05-31')
expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS')
tm.assert_index_equal(index.start_time, expected_index)
def test_end_time(self):
index = period_range(freq='M', start='2016-01-01', end='2016-05-31')
expected_index = date_range('2016-01-01', end='2016-05-31', freq='M')
expected_index += Timedelta(1, 'D') - Timedelta(1, 'ns')
tm.assert_index_equal(index.end_time, expected_index)
@@ -1,281 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Index, PeriodIndex, date_range, period_range
import pandas.core.indexes.period as period
import pandas.util.testing as tm
def _permute(obj):
return obj.take(np.random.permutation(len(obj)))
class TestPeriodIndex(object):
def test_joins(self, join_type):
index = period_range('1/1/2000', '1/20/2000', freq='D')
joined = index.join(index[:-5], how=join_type)
assert isinstance(joined, PeriodIndex)
assert joined.freq == index.freq
def test_join_self(self, join_type):
index = period_range('1/1/2000', '1/20/2000', freq='D')
res = index.join(index, how=join_type)
assert index is res
def test_join_does_not_recur(self):
df = tm.makeCustomDataframe(
3, 2, data_gen_f=lambda *args: np.random.randint(2),
c_idx_type='p', r_idx_type='dt')
s = df.iloc[:2, 0]
res = s.index.join(df.columns, how='outer')
expected = Index([s.index[0], s.index[1],
df.columns[0], df.columns[1]], object)
tm.assert_index_equal(res, expected)
@pytest.mark.parametrize("sort", [True, False])
def test_union(self, sort):
# union
other1 = pd.period_range('1/1/2000', freq='D', periods=5)
rng1 = pd.period_range('1/6/2000', freq='D', periods=5)
expected1 = pd.period_range('1/1/2000', freq='D', periods=10)
rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
other2 = pd.period_range('1/4/2000', freq='D', periods=5)
expected2 = pd.period_range('1/1/2000', freq='D', periods=8)
rng3 = pd.period_range('1/1/2000', freq='D', periods=5)
other3 = pd.PeriodIndex([], freq='D')
expected3 = pd.period_range('1/1/2000', freq='D', periods=5)
rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5)
other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00',
'2000-01-01 11:00', '2000-01-01 12:00',
'2000-01-01 13:00', '2000-01-02 09:00',
'2000-01-02 10:00', '2000-01-02 11:00',
'2000-01-02 12:00', '2000-01-02 13:00'],
freq='H')
rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
'2000-01-01 09:05'], freq='T')
other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'
'2000-01-01 09:08'],
freq='T')
expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
'2000-01-01 09:05', '2000-01-01 09:08'],
freq='T')
rng6 = pd.period_range('2000-01-01', freq='M', periods=7)
other6 = pd.period_range('2000-04-01', freq='M', periods=7)
expected6 = pd.period_range('2000-01-01', freq='M', periods=10)
rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
other7 = pd.period_range('1998-01-01', freq='A', periods=8)
expected7 = pd.period_range('1998-01-01', freq='A', periods=10)
rng8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
'1/5/2000', '1/4/2000'], freq='D')
other8 = pd.period_range('1/6/2000', freq='D', periods=5)
expected8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
'1/5/2000', '1/4/2000', '1/6/2000',
'1/7/2000', '1/8/2000', '1/9/2000',
'1/10/2000'], freq='D')
for rng, other, expected in [(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
(rng4, other4, expected4),
(rng5, other5, expected5),
(rng6, other6, expected6),
(rng7, other7, expected7),
(rng8, other8, expected8)]:
result_union = rng.union(other, sort=sort)
if sort:
expected = expected.sort_values()
tm.assert_index_equal(result_union, expected)
@pytest.mark.parametrize("sort", [True, False])
def test_union_misc(self, sort):
index = period_range('1/1/2000', '1/20/2000', freq='D')
result = index[:-5].union(index[10:], sort=sort)
tm.assert_index_equal(result, index)
# not in order
result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort)
if sort:
tm.assert_index_equal(result, index)
assert tm.equalContents(result, index)
# raise if different frequencies
index = period_range('1/1/2000', '1/20/2000', freq='D')
index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED')
with pytest.raises(period.IncompatibleFrequency):
index.union(index2, sort=sort)
msg = 'can only call with other PeriodIndex-ed objects'
with pytest.raises(ValueError, match=msg):
index.join(index.to_timestamp())
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
with pytest.raises(period.IncompatibleFrequency):
index.join(index3)
def test_union_dataframe_index(self):
rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M')
s1 = pd.Series(np.random.randn(len(rng1)), rng1)
rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M')
s2 = pd.Series(np.random.randn(len(rng2)), rng2)
df = pd.DataFrame({'s1': s1, 's2': s2})
exp = pd.period_range('1/1/1980', '1/1/2012', freq='M')
tm.assert_index_equal(df.index, exp)
@pytest.mark.parametrize("sort", [True, False])
def test_intersection(self, sort):
index = period_range('1/1/2000', '1/20/2000', freq='D')
result = index[:-5].intersection(index[10:], sort=sort)
tm.assert_index_equal(result, index[10:-5])
# not in order
left = _permute(index[:-5])
right = _permute(index[10:])
result = left.intersection(right, sort=sort)
if sort:
tm.assert_index_equal(result, index[10:-5])
assert tm.equalContents(result, index[10:-5])
# raise if different frequencies
index = period_range('1/1/2000', '1/20/2000', freq='D')
index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED')
with pytest.raises(period.IncompatibleFrequency):
index.intersection(index2, sort=sort)
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
with pytest.raises(period.IncompatibleFrequency):
index.intersection(index3, sort=sort)
@pytest.mark.parametrize("sort", [True, False])
def test_intersection_cases(self, sort):
base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx')
# if target has the same name, it is preserved
rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx')
expected2 = period_range('6/1/2000', '6/20/2000', freq='D',
name='idx')
# if target name is different, it will be reset
rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other')
expected3 = period_range('6/1/2000', '6/20/2000', freq='D',
name=None)
rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
expected4 = PeriodIndex([], name='idx', freq='D')
for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng, sort=sort)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# non-monotonic
base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02',
'2011-01-03'], freq='D', name='idx')
rng2 = PeriodIndex(['2011-01-04', '2011-01-02',
'2011-02-02', '2011-02-03'],
freq='D', name='idx')
expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
name='idx')
rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02',
'2011-02-03'],
freq='D', name='other')
expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
name=None)
rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
expected4 = PeriodIndex([], freq='D', name='idx')
for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng, sort=sort)
if sort:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == 'D'
# empty same freq
rng = date_range('6/1/2000', '6/15/2000', freq='T')
result = rng[0:0].intersection(rng)
assert len(result) == 0
result = rng.intersection(rng[0:0])
assert len(result) == 0
@pytest.mark.parametrize("sort", [True, False])
def test_difference(self, sort):
# diff
period_rng = ['1/3/2000', '1/2/2000', '1/1/2000', '1/5/2000',
'1/4/2000']
rng1 = pd.PeriodIndex(period_rng, freq='D')
other1 = pd.period_range('1/6/2000', freq='D', periods=5)
expected1 = rng1
rng2 = pd.PeriodIndex(period_rng, freq='D')
other2 = pd.period_range('1/4/2000', freq='D', periods=5)
expected2 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000'],
freq='D')
rng3 = pd.PeriodIndex(period_rng, freq='D')
other3 = pd.PeriodIndex([], freq='D')
expected3 = rng3
period_rng = ['2000-01-01 10:00', '2000-01-01 09:00',
'2000-01-01 12:00', '2000-01-01 11:00',
'2000-01-01 13:00']
rng4 = pd.PeriodIndex(period_rng, freq='H')
other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
expected4 = rng4
rng5 = pd.PeriodIndex(['2000-01-01 09:03', '2000-01-01 09:01',
'2000-01-01 09:05'], freq='T')
other5 = pd.PeriodIndex(
['2000-01-01 09:01', '2000-01-01 09:05'], freq='T')
expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T')
period_rng = ['2000-02-01', '2000-01-01', '2000-06-01',
'2000-07-01', '2000-05-01', '2000-03-01',
'2000-04-01']
rng6 = pd.PeriodIndex(period_rng, freq='M')
other6 = pd.period_range('2000-04-01', freq='M', periods=7)
expected6 = pd.PeriodIndex(['2000-02-01', '2000-01-01', '2000-03-01'],
freq='M')
period_rng = ['2003', '2007', '2006', '2005', '2004']
rng7 = pd.PeriodIndex(period_rng, freq='A')
other7 = pd.period_range('1998-01-01', freq='A', periods=8)
expected7 = pd.PeriodIndex(['2007', '2006'], freq='A')
for rng, other, expected in [(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
(rng4, other4, expected4),
(rng5, other5, expected5),
(rng6, other6, expected6),
(rng7, other7, expected7), ]:
result_difference = rng.difference(other, sort=sort)
if sort:
expected = expected.sort_values()
tm.assert_index_equal(result_difference, expected)
@@ -1,345 +0,0 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
from pandas._libs.tslibs.ccalendar import MONTHS
from pandas.compat import lrange
import pandas as pd
from pandas import (
DatetimeIndex, Period, PeriodIndex, Series, Timedelta, Timestamp,
date_range, period_range, to_datetime)
import pandas.core.indexes.period as period
import pandas.util.testing as tm
class TestPeriodRepresentation(object):
"""
Wish to match NumPy units
"""
def _check_freq(self, freq, base_date):
rng = period_range(start=base_date, periods=10, freq=freq)
exp = np.arange(10, dtype=np.int64)
tm.assert_numpy_array_equal(rng.asi8, exp)
def test_annual(self):
self._check_freq('A', 1970)
def test_monthly(self):
self._check_freq('M', '1970-01')
@pytest.mark.parametrize('freq', ['W-THU', 'D', 'B', 'H', 'T',
'S', 'L', 'U', 'N'])
def test_freq(self, freq):
self._check_freq(freq, '1970-01-01')
def test_negone_ordinals(self):
freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S']
period = Period(ordinal=-1, freq='D')
for freq in freqs:
repr(period.asfreq(freq))
for freq in freqs:
period = Period(ordinal=-1, freq=freq)
repr(period)
assert period.year == 1969
period = Period(ordinal=-1, freq='B')
repr(period)
period = Period(ordinal=-1, freq='W')
repr(period)
class TestPeriodIndex(object):
def test_to_timestamp(self):
index = period_range(freq='A', start='1/1/2001', end='12/1/2009')
series = Series(1, index=index, name='foo')
exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC')
result = series.to_timestamp(how='end')
exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns')
tm.assert_index_equal(result.index, exp_index)
assert result.name == 'foo'
exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN')
result = series.to_timestamp(how='start')
tm.assert_index_equal(result.index, exp_index)
def _get_with_delta(delta, freq='A-DEC'):
return date_range(to_datetime('1/1/2001') + delta,
to_datetime('12/31/2009') + delta, freq=freq)
delta = timedelta(hours=23)
result = series.to_timestamp('H', 'end')
exp_index = _get_with_delta(delta)
exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns')
tm.assert_index_equal(result.index, exp_index)
delta = timedelta(hours=23, minutes=59)
result = series.to_timestamp('T', 'end')
exp_index = _get_with_delta(delta)
exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns')
tm.assert_index_equal(result.index, exp_index)
result = series.to_timestamp('S', 'end')
delta = timedelta(hours=23, minutes=59, seconds=59)
exp_index = _get_with_delta(delta)
exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns')
tm.assert_index_equal(result.index, exp_index)
index = period_range(freq='H', start='1/1/2001', end='1/2/2001')
series = Series(1, index=index, name='foo')
exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59',
freq='H')
result = series.to_timestamp(how='end')
exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns')
tm.assert_index_equal(result.index, exp_index)
assert result.name == 'foo'
def test_to_timestamp_freq(self):
idx = pd.period_range('2017', periods=12, freq="A-DEC")
result = idx.to_timestamp()
expected = pd.date_range("2017", periods=12, freq="AS-JAN")
tm.assert_index_equal(result, expected)
def test_to_timestamp_repr_is_code(self):
zs = [Timestamp('99-04-17 00:00:00', tz='UTC'),
Timestamp('2001-04-17 00:00:00', tz='UTC'),
Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'),
Timestamp('2001-04-17 00:00:00', tz=None)]
for z in zs:
assert eval(repr(z)) == z
def test_to_timestamp_to_period_astype(self):
idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx')
res = idx.astype('period[M]')
exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx')
tm.assert_index_equal(res, exp)
res = idx.astype('period[3M]')
exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx')
tm.assert_index_equal(res, exp)
def test_dti_to_period(self):
dti = pd.date_range(start='1/1/2005', end='12/1/2005', freq='M')
pi1 = dti.to_period()
pi2 = dti.to_period(freq='D')
pi3 = dti.to_period(freq='3D')
assert pi1[0] == Period('Jan 2005', freq='M')
assert pi2[0] == Period('1/31/2005', freq='D')
assert pi3[0] == Period('1/31/2005', freq='3D')
assert pi1[-1] == Period('Nov 2005', freq='M')
assert pi2[-1] == Period('11/30/2005', freq='D')
assert pi3[-1], Period('11/30/2005', freq='3D')
tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005',
freq='M'))
tm.assert_index_equal(pi2, period_range('1/1/2005', '11/1/2005',
freq='M').asfreq('D'))
tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005',
freq='M').asfreq('3D'))
@pytest.mark.parametrize('month', MONTHS)
def test_to_period_quarterly(self, month):
# make sure we can make the round trip
freq = 'Q-%s' % month
rng = period_range('1989Q3', '1991Q3', freq=freq)
stamps = rng.to_timestamp()
result = stamps.to_period(freq)
tm.assert_index_equal(rng, result)
@pytest.mark.parametrize('off', ['BQ', 'QS', 'BQS'])
def test_to_period_quarterlyish(self, off):
rng = date_range('01-Jan-2012', periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == 'Q-DEC'
@pytest.mark.parametrize('off', ['BA', 'AS', 'BAS'])
def test_to_period_annualish(self, off):
rng = date_range('01-Jan-2012', periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == 'A-DEC'
def test_to_period_monthish(self):
offsets = ['MS', 'BM']
for off in offsets:
rng = date_range('01-Jan-2012', periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == 'M'
rng = date_range('01-Jan-2012', periods=8, freq='M')
prng = rng.to_period()
assert prng.freq == 'M'
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
with pytest.raises(ValueError, match=msg):
date_range('01-Jan-2012', periods=8, freq='EOM')
def test_period_dt64_round_trip(self):
dti = date_range('1/1/2000', '1/7/2002', freq='B')
pi = dti.to_period()
tm.assert_index_equal(pi.to_timestamp(), dti)
dti = date_range('1/1/2000', '1/7/2002', freq='B')
pi = dti.to_period(freq='H')
tm.assert_index_equal(pi.to_timestamp(), dti)
def test_combine_first(self):
# GH#3367
didx = pd.date_range(start='1950-01-31', end='1950-07-31', freq='M')
pidx = pd.period_range(start=pd.Period('1950-1'),
end=pd.Period('1950-7'), freq='M')
# check to be consistent with DatetimeIndex
for idx in [didx, pidx]:
a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx)
result = a.combine_first(b)
expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx,
dtype=np.float64)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('freq', ['D', '2D'])
def test_searchsorted(self, freq):
pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05'], freq=freq)
p1 = pd.Period('2014-01-01', freq=freq)
assert pidx.searchsorted(p1) == 0
p2 = pd.Period('2014-01-04', freq=freq)
assert pidx.searchsorted(p2) == 3
msg = "Input has different freq=H from PeriodIndex"
with pytest.raises(period.IncompatibleFrequency, match=msg):
pidx.searchsorted(pd.Period('2014-01-01', freq='H'))
msg = "Input has different freq=5D from PeriodIndex"
with pytest.raises(period.IncompatibleFrequency, match=msg):
pidx.searchsorted(pd.Period('2014-01-01', freq='5D'))
class TestPeriodIndexConversion(object):
def test_tolist(self):
index = period_range(freq='A', start='1/1/2001', end='12/1/2009')
rs = index.tolist()
for x in rs:
assert isinstance(x, Period)
recon = PeriodIndex(rs)
tm.assert_index_equal(index, recon)
def test_to_timestamp_pi_nat(self):
# GH#7228
index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M',
name='idx')
result = index.to_timestamp('D')
expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1),
datetime(2011, 2, 1)], name='idx')
tm.assert_index_equal(result, expected)
assert result.name == 'idx'
result2 = result.to_period(freq='M')
tm.assert_index_equal(result2, index)
assert result2.name == 'idx'
result3 = result.to_period(freq='3M')
exp = PeriodIndex(['NaT', '2011-01', '2011-02'],
freq='3M', name='idx')
tm.assert_index_equal(result3, exp)
assert result3.freqstr == '3M'
msg = ('Frequency must be positive, because it'
' represents span: -2A')
with pytest.raises(ValueError, match=msg):
result.to_period(freq='-2A')
def test_to_timestamp_preserve_name(self):
index = period_range(freq='A', start='1/1/2001', end='12/1/2009',
name='foo')
assert index.name == 'foo'
conv = index.to_timestamp('D')
assert conv.name == 'foo'
def test_to_timestamp_quarterly_bug(self):
years = np.arange(1960, 2000).repeat(4)
quarters = np.tile(lrange(1, 5), 40)
pindex = PeriodIndex(year=years, quarter=quarters)
stamps = pindex.to_timestamp('D', 'end')
expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex])
tm.assert_index_equal(stamps, expected)
def test_to_timestamp_pi_mult(self):
idx = PeriodIndex(['2011-01', 'NaT', '2011-02'],
freq='2M', name='idx')
result = idx.to_timestamp()
expected = DatetimeIndex(['2011-01-01', 'NaT', '2011-02-01'],
name='idx')
tm.assert_index_equal(result, expected)
result = idx.to_timestamp(how='E')
expected = DatetimeIndex(['2011-02-28', 'NaT', '2011-03-31'],
name='idx')
expected = expected + Timedelta(1, 'D') - Timedelta(1, 'ns')
tm.assert_index_equal(result, expected)
def test_to_timestamp_pi_combined(self):
idx = period_range(start='2011', periods=2, freq='1D1H', name='idx')
result = idx.to_timestamp()
expected = DatetimeIndex(['2011-01-01 00:00', '2011-01-02 01:00'],
name='idx')
tm.assert_index_equal(result, expected)
result = idx.to_timestamp(how='E')
expected = DatetimeIndex(['2011-01-02 00:59:59',
'2011-01-03 01:59:59'],
name='idx')
expected = expected + Timedelta(1, 's') - Timedelta(1, 'ns')
tm.assert_index_equal(result, expected)
result = idx.to_timestamp(how='E', freq='H')
expected = DatetimeIndex(['2011-01-02 00:00', '2011-01-03 01:00'],
name='idx')
expected = expected + Timedelta(1, 'h') - Timedelta(1, 'ns')
tm.assert_index_equal(result, expected)
def test_period_astype_to_timestamp(self):
pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M')
exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'])
tm.assert_index_equal(pi.astype('datetime64[ns]'), exp)
exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'])
exp = exp + Timedelta(1, 'D') - Timedelta(1, 'ns')
tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp)
exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
tz='US/Eastern')
res = pi.astype('datetime64[ns, US/Eastern]')
tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp)
exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'],
tz='US/Eastern')
exp = exp + Timedelta(1, 'D') - Timedelta(1, 'ns')
res = pi.astype('datetime64[ns, US/Eastern]', how='end')
tm.assert_index_equal(res, exp)
def test_to_timestamp_1703(self):
index = period_range('1/1/2012', periods=4, freq='D')
result = index.to_timestamp()
assert result[0] == Timestamp('1/1/2012')
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,343 +0,0 @@
"""
Collection of tests asserting things that should be true for
any index subclass. Makes use of the `indices` fixture defined
in pandas/tests/indexes/conftest.py.
"""
import numpy as np
import pytest
from pandas._libs.tslibs import iNaT
from pandas.core.dtypes.common import needs_i8_conversion
import pandas as pd
from pandas import CategoricalIndex, MultiIndex, RangeIndex, compat
import pandas.util.testing as tm
class TestCommon(object):
def test_droplevel(self, indices):
# GH 21115
if isinstance(indices, MultiIndex):
# Tested separately in test_multi.py
return
assert indices.droplevel([]).equals(indices)
for level in indices.name, [indices.name]:
if isinstance(indices.name, tuple) and level is indices.name:
# GH 21121 : droplevel with tuple name
continue
with pytest.raises(ValueError):
indices.droplevel(level)
for level in 'wrong', ['wrong']:
with pytest.raises(KeyError):
indices.droplevel(level)
def test_constructor_non_hashable_name(self, indices):
# GH 20527
if isinstance(indices, MultiIndex):
pytest.skip("multiindex handled in test_multi.py")
message = "Index.name must be a hashable type"
renamed = [['1']]
# With .rename()
with pytest.raises(TypeError, match=message):
indices.rename(name=renamed)
# With .set_names()
with pytest.raises(TypeError, match=message):
indices.set_names(names=renamed)
def test_constructor_unwraps_index(self, indices):
if isinstance(indices, pd.MultiIndex):
raise pytest.skip("MultiIndex has no ._data")
a = indices
b = type(a)(a)
tm.assert_equal(a._data, b._data)
@pytest.mark.parametrize("itm", [101, 'no_int'])
# FutureWarning from non-tuple sequence of nd indexing
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_getitem_error(self, indices, itm):
with pytest.raises(IndexError):
indices[itm]
@pytest.mark.parametrize(
'fname, sname, expected_name',
[
('A', 'A', 'A'),
('A', 'B', None),
('A', None, None),
(None, 'B', None),
(None, None, None),
])
def test_corner_union(self, indices, fname, sname, expected_name):
# GH 9943 9862
# Test unions with various name combinations
# Do not test MultiIndex or repeats
if isinstance(indices, MultiIndex) or not indices.is_unique:
pytest.skip("Not for MultiIndex or repeated indices")
# Test copy.union(copy)
first = indices.copy().set_names(fname)
second = indices.copy().set_names(sname)
union = first.union(second)
expected = indices.copy().set_names(expected_name)
tm.assert_index_equal(union, expected)
# Test copy.union(empty)
first = indices.copy().set_names(fname)
second = indices.drop(indices).set_names(sname)
union = first.union(second)
expected = indices.copy().set_names(expected_name)
tm.assert_index_equal(union, expected)
# Test empty.union(copy)
first = indices.drop(indices).set_names(fname)
second = indices.copy().set_names(sname)
union = first.union(second)
expected = indices.copy().set_names(expected_name)
tm.assert_index_equal(union, expected)
# Test empty.union(empty)
first = indices.drop(indices).set_names(fname)
second = indices.drop(indices).set_names(sname)
union = first.union(second)
expected = indices.drop(indices).set_names(expected_name)
tm.assert_index_equal(union, expected)
def test_to_flat_index(self, indices):
# 22866
if isinstance(indices, MultiIndex):
pytest.skip("Separate expectation for MultiIndex")
result = indices.to_flat_index()
tm.assert_index_equal(result, indices)
def test_wrong_number_names(self, indices):
with pytest.raises(ValueError, match="^Length"):
indices.names = ["apple", "banana", "carrot"]
def test_set_name_methods(self, indices):
new_name = "This is the new name for this index"
# don't tests a MultiIndex here (as its tested separated)
if isinstance(indices, MultiIndex):
pytest.skip('Skip check for MultiIndex')
original_name = indices.name
new_ind = indices.set_names([new_name])
assert new_ind.name == new_name
assert indices.name == original_name
res = indices.rename(new_name, inplace=True)
# should return None
assert res is None
assert indices.name == new_name
assert indices.names == [new_name]
# with pytest.raises(TypeError, match="list-like"):
# # should still fail even if it would be the right length
# ind.set_names("a")
with pytest.raises(ValueError, match="Level must be None"):
indices.set_names("a", level=0)
# rename in place just leaves tuples and other containers alone
name = ('A', 'B')
indices.rename(name, inplace=True)
assert indices.name == name
assert indices.names == [name]
def test_dtype_str(self, indices):
dtype = indices.dtype_str
assert isinstance(dtype, compat.string_types)
assert dtype == str(indices.dtype)
def test_hash_error(self, indices):
index = indices
with pytest.raises(TypeError, match=("unhashable type: %r" %
type(index).__name__)):
hash(indices)
def test_copy_and_deepcopy(self, indices):
from copy import copy, deepcopy
if isinstance(indices, MultiIndex):
pytest.skip('Skip check for MultiIndex')
for func in (copy, deepcopy):
idx_copy = func(indices)
assert idx_copy is not indices
assert idx_copy.equals(indices)
new_copy = indices.copy(deep=True, name="banana")
assert new_copy.name == "banana"
def test_unique(self, indices):
# don't test a MultiIndex here (as its tested separated)
# don't test a CategoricalIndex because categories change (GH 18291)
if isinstance(indices, (MultiIndex, CategoricalIndex)):
pytest.skip('Skip check for MultiIndex/CategoricalIndex')
# GH 17896
expected = indices.drop_duplicates()
for level in 0, indices.name, None:
result = indices.unique(level=level)
tm.assert_index_equal(result, expected)
for level in 3, 'wrong':
pytest.raises((IndexError, KeyError), indices.unique, level=level)
def test_get_unique_index(self, indices):
# MultiIndex tested separately
if not len(indices) or isinstance(indices, MultiIndex):
pytest.skip('Skip check for empty Index and MultiIndex')
idx = indices[[0] * 5]
idx_unique = indices[[0]]
# We test against `idx_unique`, so first we make sure it's unique
# and doesn't contain nans.
assert idx_unique.is_unique is True
try:
assert idx_unique.hasnans is False
except NotImplementedError:
pass
for dropna in [False, True]:
result = idx._get_unique_index(dropna=dropna)
tm.assert_index_equal(result, idx_unique)
# nans:
if not indices._can_hold_na:
pytest.skip('Skip na-check if index cannot hold na')
if needs_i8_conversion(indices):
vals = indices.asi8[[0] * 5]
vals[0] = iNaT
else:
vals = indices.values[[0] * 5]
vals[0] = np.nan
vals_unique = vals[:2]
idx_nan = indices._shallow_copy(vals)
idx_unique_nan = indices._shallow_copy(vals_unique)
assert idx_unique_nan.is_unique is True
assert idx_nan.dtype == indices.dtype
assert idx_unique_nan.dtype == indices.dtype
for dropna, expected in zip([False, True],
[idx_unique_nan,
idx_unique]):
for i in [idx_nan, idx_unique_nan]:
result = i._get_unique_index(dropna=dropna)
tm.assert_index_equal(result, expected)
def test_sort(self, indices):
pytest.raises(TypeError, indices.sort)
def test_mutability(self, indices):
if not len(indices):
pytest.skip('Skip check for empty Index')
pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
def test_view(self, indices):
assert indices.view().name == indices.name
def test_compat(self, indices):
assert indices.tolist() == list(indices)
def test_searchsorted_monotonic(self, indices):
# GH17271
# not implemented for tuple searches in MultiIndex
# or Intervals searches in IntervalIndex
if isinstance(indices, (MultiIndex, pd.IntervalIndex)):
pytest.skip('Skip check for MultiIndex/IntervalIndex')
# nothing to test if the index is empty
if indices.empty:
pytest.skip('Skip check for empty Index')
value = indices[0]
# determine the expected results (handle dupes for 'right')
expected_left, expected_right = 0, (indices == value).argmin()
if expected_right == 0:
# all values are the same, expected_right should be length
expected_right = len(indices)
# test _searchsorted_monotonic in all cases
# test searchsorted only for increasing
if indices.is_monotonic_increasing:
ssm_left = indices._searchsorted_monotonic(value, side='left')
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side='right')
assert expected_right == ssm_right
ss_left = indices.searchsorted(value, side='left')
assert expected_left == ss_left
ss_right = indices.searchsorted(value, side='right')
assert expected_right == ss_right
elif indices.is_monotonic_decreasing:
ssm_left = indices._searchsorted_monotonic(value, side='left')
assert expected_left == ssm_left
ssm_right = indices._searchsorted_monotonic(value, side='right')
assert expected_right == ssm_right
else:
# non-monotonic should raise.
with pytest.raises(ValueError):
indices._searchsorted_monotonic(value, side='left')
def test_pickle(self, indices):
original_name, indices.name = indices.name, 'foo'
unpickled = tm.round_trip_pickle(indices)
assert indices.equals(unpickled)
indices.name = original_name
@pytest.mark.parametrize('keep', ['first', 'last', False])
def test_duplicated(self, indices, keep):
if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
# MultiIndex tested separately in:
# tests/indexes/multi/test_unique_and_duplicates
pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex')
holder = type(indices)
idx = holder(indices)
if idx.has_duplicates:
# We are testing the duplicated-method here, so we need to know
# exactly which indices are duplicate and how (for the result).
# This is not possible if "idx" has duplicates already, which we
# therefore remove. This is seemingly circular, as drop_duplicates
# invokes duplicated, but in the end, it all works out because we
# cross-check with Series.duplicated, which is tested separately.
idx = idx.drop_duplicates()
n, k = len(idx), 10
duplicated_selection = np.random.choice(n, k * n)
expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
idx = holder(idx.values[duplicated_selection])
result = idx.duplicated(keep=keep)
tm.assert_numpy_array_equal(result, expected)
def test_has_duplicates(self, indices):
holder = type(indices)
if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
# MultiIndex tested separately in:
# tests/indexes/multi/test_unique_and_duplicates.
# RangeIndex is unique by definition.
pytest.skip('Skip check for empty Index, MultiIndex, '
'and RangeIndex')
idx = holder([indices[0]] * 5)
assert idx.is_unique is False
assert idx.has_duplicates is True
@@ -1,109 +0,0 @@
import warnings
import numpy as np
from pandas.compat import u
from pandas.core.indexes.frozen import FrozenList, FrozenNDArray
from pandas.tests.test_base import CheckImmutable, CheckStringMixin
from pandas.util import testing as tm
class TestFrozenList(CheckImmutable, CheckStringMixin):
mutable_methods = ('extend', 'pop', 'remove', 'insert')
unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"])
def setup_method(self, _):
self.lst = [1, 2, 3, 4, 5]
self.container = FrozenList(self.lst)
self.klass = FrozenList
def test_add(self):
result = self.container + (1, 2, 3)
expected = FrozenList(self.lst + [1, 2, 3])
self.check_result(result, expected)
result = (1, 2, 3) + self.container
expected = FrozenList([1, 2, 3] + self.lst)
self.check_result(result, expected)
def test_iadd(self):
q = r = self.container
q += [5]
self.check_result(q, self.lst + [5])
# Other shouldn't be mutated.
self.check_result(r, self.lst)
def test_union(self):
result = self.container.union((1, 2, 3))
expected = FrozenList(self.lst + [1, 2, 3])
self.check_result(result, expected)
def test_difference(self):
result = self.container.difference([2])
expected = FrozenList([1, 3, 4, 5])
self.check_result(result, expected)
def test_difference_dupe(self):
result = FrozenList([1, 2, 3, 2]).difference([2])
expected = FrozenList([1, 3])
self.check_result(result, expected)
class TestFrozenNDArray(CheckImmutable, CheckStringMixin):
mutable_methods = ('put', 'itemset', 'fill')
def setup_method(self, _):
self.lst = [3, 5, 7, -2]
self.klass = FrozenNDArray
with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", FutureWarning)
self.container = FrozenNDArray(self.lst)
self.unicode_container = FrozenNDArray(
[u("\u05d0"), u("\u05d1"), "c"])
def test_constructor_warns(self):
# see gh-9031
with tm.assert_produces_warning(FutureWarning):
FrozenNDArray([1, 2, 3])
def test_shallow_copying(self):
original = self.container.copy()
assert isinstance(self.container.view(), FrozenNDArray)
assert not isinstance(self.container.view(np.ndarray), FrozenNDArray)
assert self.container.view() is not self.container
tm.assert_numpy_array_equal(self.container, original)
# Shallow copy should be the same too
assert isinstance(self.container._shallow_copy(), FrozenNDArray)
# setting should not be allowed
def testit(container):
container[0] = 16
self.check_mutable_error(testit, self.container)
def test_values(self):
original = self.container.view(np.ndarray).copy()
n = original[0] + 15
vals = self.container.values()
tm.assert_numpy_array_equal(original, vals)
assert original is not vals
vals[0] = n
assert isinstance(self.container, FrozenNDArray)
tm.assert_numpy_array_equal(self.container.values(), original)
assert vals[0] == n
def test_searchsorted(self):
expected = 2
assert self.container.searchsorted(7) == expected
with tm.assert_produces_warning(FutureWarning):
assert self.container.searchsorted(v=7) == expected
File diff suppressed because it is too large Load Diff
@@ -1,887 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import datetime
import numpy as np
import pytest
from pandas.compat import PY3, range, u
import pandas as pd
from pandas import Float64Index, Index, Int64Index, RangeIndex, Series
import pandas.util.testing as tm
from .test_numeric import Numeric
class TestRangeIndex(Numeric):
_holder = RangeIndex
_compat_props = ['shape', 'ndim', 'size']
def setup_method(self, method):
self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'),
index_dec=RangeIndex(18, -1, -2, name='bar'))
self.setup_indices()
def create_index(self):
return RangeIndex(5)
def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is False
def test_too_many_names(self):
with pytest.raises(ValueError, match="^Length"):
self.index.names = ["roger", "harold"]
def test_constructor(self):
index = RangeIndex(5)
expected = np.arange(5, dtype=np.int64)
assert isinstance(index, RangeIndex)
assert index._start == 0
assert index._stop == 5
assert index._step == 1
assert index.name is None
tm.assert_index_equal(Index(expected), index)
index = RangeIndex(1, 5)
expected = np.arange(1, 5, dtype=np.int64)
assert isinstance(index, RangeIndex)
assert index._start == 1
tm.assert_index_equal(Index(expected), index)
index = RangeIndex(1, 5, 2)
expected = np.arange(1, 5, 2, dtype=np.int64)
assert isinstance(index, RangeIndex)
assert index._step == 2
tm.assert_index_equal(Index(expected), index)
for index in [RangeIndex(0), RangeIndex(start=0), RangeIndex(stop=0),
RangeIndex(0, 0)]:
expected = np.empty(0, dtype=np.int64)
assert isinstance(index, RangeIndex)
assert index._start == 0
assert index._stop == 0
assert index._step == 1
tm.assert_index_equal(Index(expected), index)
for index in [RangeIndex(0, name='Foo'),
RangeIndex(start=0, name='Foo'),
RangeIndex(stop=0, name='Foo'),
RangeIndex(0, 0, name='Foo')]:
assert isinstance(index, RangeIndex)
assert index.name == 'Foo'
# we don't allow on a bare Index
with pytest.raises(TypeError):
Index(0, 1000)
def test_constructor_invalid_args(self):
msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers"
with pytest.raises(TypeError, match=msg):
RangeIndex()
with pytest.raises(TypeError, match=msg):
RangeIndex(name='Foo')
# invalid args
for i in [Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']),
[], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10),
np.array([1]), [1]]:
with pytest.raises(TypeError):
RangeIndex(i)
def test_constructor_same(self):
# pass thru w and w/o copy
index = RangeIndex(1, 5, 2)
result = RangeIndex(index, copy=False)
assert result.identical(index)
result = RangeIndex(index, copy=True)
tm.assert_index_equal(result, index, exact=True)
result = RangeIndex(index)
tm.assert_index_equal(result, index, exact=True)
with pytest.raises(TypeError):
RangeIndex(index, dtype='float64')
def test_constructor_range(self):
with pytest.raises(TypeError):
RangeIndex(range(1, 5, 2))
result = RangeIndex.from_range(range(1, 5, 2))
expected = RangeIndex(1, 5, 2)
tm.assert_index_equal(result, expected, exact=True)
result = RangeIndex.from_range(range(5, 6))
expected = RangeIndex(5, 6, 1)
tm.assert_index_equal(result, expected, exact=True)
# an invalid range
result = RangeIndex.from_range(range(5, 1))
expected = RangeIndex(0, 0, 1)
tm.assert_index_equal(result, expected, exact=True)
result = RangeIndex.from_range(range(5))
expected = RangeIndex(0, 5, 1)
tm.assert_index_equal(result, expected, exact=True)
result = Index(range(1, 5, 2))
expected = RangeIndex(1, 5, 2)
tm.assert_index_equal(result, expected, exact=True)
with pytest.raises(TypeError):
Index(range(1, 5, 2), dtype='float64')
def test_constructor_name(self):
# GH12288
orig = RangeIndex(10)
orig.name = 'original'
copy = RangeIndex(orig)
copy.name = 'copy'
assert orig.name == 'original'
assert copy.name == 'copy'
new = Index(copy)
assert new.name == 'copy'
new.name = 'new'
assert orig.name == 'original'
assert copy.name == 'copy'
assert new.name == 'new'
def test_constructor_corner(self):
arr = np.array([1, 2, 3, 4], dtype=object)
index = RangeIndex(1, 5)
assert index.values.dtype == np.int64
tm.assert_index_equal(index, Index(arr))
# non-int raise Exception
with pytest.raises(TypeError):
RangeIndex('1', '10', '1')
with pytest.raises(TypeError):
RangeIndex(1.1, 10.2, 1.3)
# invalid passed type
with pytest.raises(TypeError):
RangeIndex(1, 5, dtype='float64')
def test_copy(self):
i = RangeIndex(5, name='Foo')
i_copy = i.copy()
assert i_copy is not i
assert i_copy.identical(i)
assert i_copy._start == 0
assert i_copy._stop == 5
assert i_copy._step == 1
assert i_copy.name == 'Foo'
def test_repr(self):
i = RangeIndex(5, name='Foo')
result = repr(i)
if PY3:
expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')"
else:
expected = "RangeIndex(start=0, stop=5, step=1, name=u'Foo')"
assert result == expected
result = eval(result)
tm.assert_index_equal(result, i, exact=True)
i = RangeIndex(5, 0, -1)
result = repr(i)
expected = "RangeIndex(start=5, stop=0, step=-1)"
assert result == expected
result = eval(result)
tm.assert_index_equal(result, i, exact=True)
def test_insert(self):
idx = RangeIndex(5, name='Foo')
result = idx[1:4]
# test 0th element
tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]))
# GH 18295 (test missing)
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
for na in (np.nan, pd.NaT, None):
result = RangeIndex(5).insert(1, na)
tm.assert_index_equal(result, expected)
def test_delete(self):
idx = RangeIndex(5, name='Foo')
expected = idx[1:].astype(int)
result = idx.delete(0)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
expected = idx[:-1].astype(int)
result = idx.delete(-1)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
with pytest.raises((IndexError, ValueError)):
# either depending on numpy version
result = idx.delete(len(idx))
def test_view(self):
i = RangeIndex(0, name='Foo')
i_view = i.view()
assert i_view.name == 'Foo'
i_view = i.view('i8')
tm.assert_numpy_array_equal(i.values, i_view)
i_view = i.view(RangeIndex)
tm.assert_index_equal(i, i_view)
def test_dtype(self):
assert self.index.dtype == np.int64
def test_is_monotonic(self):
assert self.index.is_monotonic is True
assert self.index.is_monotonic_increasing is True
assert self.index.is_monotonic_decreasing is False
assert self.index._is_strictly_monotonic_increasing is True
assert self.index._is_strictly_monotonic_decreasing is False
index = RangeIndex(4, 0, -1)
assert index.is_monotonic is False
assert index._is_strictly_monotonic_increasing is False
assert index.is_monotonic_decreasing is True
assert index._is_strictly_monotonic_decreasing is True
index = RangeIndex(1, 2)
assert index.is_monotonic is True
assert index.is_monotonic_increasing is True
assert index.is_monotonic_decreasing is True
assert index._is_strictly_monotonic_increasing is True
assert index._is_strictly_monotonic_decreasing is True
index = RangeIndex(2, 1)
assert index.is_monotonic is True
assert index.is_monotonic_increasing is True
assert index.is_monotonic_decreasing is True
assert index._is_strictly_monotonic_increasing is True
assert index._is_strictly_monotonic_decreasing is True
index = RangeIndex(1, 1)
assert index.is_monotonic is True
assert index.is_monotonic_increasing is True
assert index.is_monotonic_decreasing is True
assert index._is_strictly_monotonic_increasing is True
assert index._is_strictly_monotonic_decreasing is True
def test_equals_range(self):
equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)),
(RangeIndex(0), RangeIndex(1, -1, 3)),
(RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)),
(RangeIndex(0, -9, -2), RangeIndex(0, -10, -2))]
for left, right in equiv_pairs:
assert left.equals(right)
assert right.equals(left)
def test_logical_compat(self):
idx = self.create_index()
assert idx.all() == idx.values.all()
assert idx.any() == idx.values.any()
def test_identical(self):
i = Index(self.index.copy())
assert i.identical(self.index)
# we don't allow object dtype for RangeIndex
if isinstance(self.index, RangeIndex):
return
same_values_different_type = Index(i, dtype=object)
assert not i.identical(same_values_different_type)
i = self.index.copy(dtype=object)
i = i.rename('foo')
same_values = Index(i, dtype=object)
assert same_values.identical(self.index.copy(dtype=object))
assert not i.identical(self.index)
assert Index(same_values, name='foo', dtype=object).identical(i)
assert not self.index.copy(dtype=object).identical(
self.index.copy(dtype='int64'))
def test_get_indexer(self):
target = RangeIndex(10)
indexer = self.index.get_indexer(target)
expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
def test_get_indexer_pad(self):
target = RangeIndex(10)
indexer = self.index.get_indexer(target, method='pad')
expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
def test_get_indexer_backfill(self):
target = RangeIndex(10)
indexer = self.index.get_indexer(target, method='backfill')
expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
def test_join_outer(self):
# join with Int64Index
other = Int64Index(np.arange(25, 14, -1))
res, lidx, ridx = self.index.join(other, how='outer',
return_indexers=True)
noidx_res = self.index.join(other, how='outer')
tm.assert_index_equal(res, noidx_res)
eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25])
elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9,
-1, -1, -1, -1, -1, -1, -1], dtype=np.intp)
eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6,
5, 4, 3, 2, 1, 0], dtype=np.intp)
assert isinstance(res, Int64Index)
assert not isinstance(res, RangeIndex)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)
# join with RangeIndex
other = RangeIndex(25, 14, -1)
res, lidx, ridx = self.index.join(other, how='outer',
return_indexers=True)
noidx_res = self.index.join(other, how='outer')
tm.assert_index_equal(res, noidx_res)
assert isinstance(res, Int64Index)
assert not isinstance(res, RangeIndex)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)
def test_join_inner(self):
# Join with non-RangeIndex
other = Int64Index(np.arange(25, 14, -1))
res, lidx, ridx = self.index.join(other, how='inner',
return_indexers=True)
# no guarantee of sortedness, so sort for comparison purposes
ind = res.argsort()
res = res.take(ind)
lidx = lidx.take(ind)
ridx = ridx.take(ind)
eres = Int64Index([16, 18])
elidx = np.array([8, 9], dtype=np.intp)
eridx = np.array([9, 7], dtype=np.intp)
assert isinstance(res, Int64Index)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)
# Join two RangeIndex
other = RangeIndex(25, 14, -1)
res, lidx, ridx = self.index.join(other, how='inner',
return_indexers=True)
assert isinstance(res, RangeIndex)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)
def test_join_left(self):
# Join with Int64Index
other = Int64Index(np.arange(25, 14, -1))
res, lidx, ridx = self.index.join(other, how='left',
return_indexers=True)
eres = self.index
eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp)
assert isinstance(res, RangeIndex)
tm.assert_index_equal(res, eres)
assert lidx is None
tm.assert_numpy_array_equal(ridx, eridx)
# Join withRangeIndex
other = Int64Index(np.arange(25, 14, -1))
res, lidx, ridx = self.index.join(other, how='left',
return_indexers=True)
assert isinstance(res, RangeIndex)
tm.assert_index_equal(res, eres)
assert lidx is None
tm.assert_numpy_array_equal(ridx, eridx)
def test_join_right(self):
# Join with Int64Index
other = Int64Index(np.arange(25, 14, -1))
res, lidx, ridx = self.index.join(other, how='right',
return_indexers=True)
eres = other
elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1],
dtype=np.intp)
assert isinstance(other, Int64Index)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
assert ridx is None
# Join withRangeIndex
other = RangeIndex(25, 14, -1)
res, lidx, ridx = self.index.join(other, how='right',
return_indexers=True)
eres = other
assert isinstance(other, RangeIndex)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
assert ridx is None
def test_join_non_int_index(self):
other = Index([3, 6, 7, 8, 10], dtype=object)
outer = self.index.join(other, how='outer')
outer2 = other.join(self.index, how='outer')
expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18])
tm.assert_index_equal(outer, outer2)
tm.assert_index_equal(outer, expected)
inner = self.index.join(other, how='inner')
inner2 = other.join(self.index, how='inner')
expected = Index([6, 8, 10])
tm.assert_index_equal(inner, inner2)
tm.assert_index_equal(inner, expected)
left = self.index.join(other, how='left')
tm.assert_index_equal(left, self.index.astype(object))
left2 = other.join(self.index, how='left')
tm.assert_index_equal(left2, other)
right = self.index.join(other, how='right')
tm.assert_index_equal(right, other)
right2 = other.join(self.index, how='right')
tm.assert_index_equal(right2, self.index.astype(object))
def test_join_non_unique(self):
other = Index([4, 4, 3, 3])
res, lidx, ridx = self.index.join(other, return_indexers=True)
eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18])
elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp)
eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1],
dtype=np.intp)
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)
def test_join_self(self):
kinds = 'outer', 'inner', 'left', 'right'
for kind in kinds:
joined = self.index.join(self.index, how=kind)
assert self.index is joined
@pytest.mark.parametrize("sort", [True, False])
def test_intersection(self, sort):
# intersect with Int64Index
other = Index(np.arange(1, 6))
result = self.index.intersection(other, sort=sort)
expected = Index(np.sort(np.intersect1d(self.index.values,
other.values)))
tm.assert_index_equal(result, expected)
result = other.intersection(self.index, sort=sort)
expected = Index(np.sort(np.asarray(np.intersect1d(self.index.values,
other.values))))
tm.assert_index_equal(result, expected)
# intersect with increasing RangeIndex
other = RangeIndex(1, 6)
result = self.index.intersection(other, sort=sort)
expected = Index(np.sort(np.intersect1d(self.index.values,
other.values)))
tm.assert_index_equal(result, expected)
# intersect with decreasing RangeIndex
other = RangeIndex(5, 0, -1)
result = self.index.intersection(other, sort=sort)
expected = Index(np.sort(np.intersect1d(self.index.values,
other.values)))
tm.assert_index_equal(result, expected)
# reversed (GH 17296)
result = other.intersection(self.index, sort=sort)
tm.assert_index_equal(result, expected)
# GH 17296: intersect two decreasing RangeIndexes
first = RangeIndex(10, -2, -2)
other = RangeIndex(5, -4, -1)
expected = first.astype(int).intersection(other.astype(int), sort=sort)
result = first.intersection(other, sort=sort).astype(int)
tm.assert_index_equal(result, expected)
# reversed
result = other.intersection(first, sort=sort).astype(int)
tm.assert_index_equal(result, expected)
index = RangeIndex(5)
# intersect of non-overlapping indices
other = RangeIndex(5, 10, 1)
result = index.intersection(other, sort=sort)
expected = RangeIndex(0, 0, 1)
tm.assert_index_equal(result, expected)
other = RangeIndex(-1, -5, -1)
result = index.intersection(other, sort=sort)
expected = RangeIndex(0, 0, 1)
tm.assert_index_equal(result, expected)
# intersection of empty indices
other = RangeIndex(0, 0, 1)
result = index.intersection(other, sort=sort)
expected = RangeIndex(0, 0, 1)
tm.assert_index_equal(result, expected)
result = other.intersection(index, sort=sort)
tm.assert_index_equal(result, expected)
# intersection of non-overlapping values based on start value and gcd
index = RangeIndex(1, 10, 2)
other = RangeIndex(0, 10, 4)
result = index.intersection(other, sort=sort)
expected = RangeIndex(0, 0, 1)
tm.assert_index_equal(result, expected)
def test_union_noncomparable(self):
from datetime import datetime, timedelta
# corner case, non-Int64Index
now = datetime.now()
other = Index([now + timedelta(i) for i in range(4)], dtype=object)
result = self.index.union(other)
expected = Index(np.concatenate((self.index, other)))
tm.assert_index_equal(result, expected)
result = other.union(self.index)
expected = Index(np.concatenate((other, self.index)))
tm.assert_index_equal(result, expected)
def test_union(self):
RI = RangeIndex
I64 = Int64Index
cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
(RI(0), RI(0), RI(0)),
(RI(0, -10, -2), RI(0), RI(0, -10, -2)),
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
(RI(0, 10, 1), I64([]), RI(0, 10, 1)),
(RI(0), I64([1, 5, 6]), I64([1, 5, 6]))]
for idx1, idx2, expected in cases:
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)
res3 = idx1._int64index.union(idx2)
tm.assert_index_equal(res1, expected, exact=True)
tm.assert_index_equal(res2, expected, exact=True)
tm.assert_index_equal(res3, expected)
def test_nbytes(self):
# memory savings vs int index
i = RangeIndex(0, 1000)
assert i.nbytes < i._int64index.nbytes / 10
# constant memory usage
i2 = RangeIndex(0, 10)
assert i.nbytes == i2.nbytes
def test_cant_or_shouldnt_cast(self):
# can't
with pytest.raises(TypeError):
RangeIndex('foo', 'bar', 'baz')
# shouldn't
with pytest.raises(TypeError):
RangeIndex('0', '1', '2')
def test_view_Index(self):
self.index.view(Index)
def test_prevent_casting(self):
result = self.index.astype('O')
assert result.dtype == np.object_
def test_take_preserve_name(self):
index = RangeIndex(1, 5, name='foo')
taken = index.take([3, 0, 1])
assert index.name == taken.name
def test_take_fill_value(self):
# GH 12631
idx = pd.RangeIndex(1, 4, name='xxx')
result = idx.take(np.array([1, 0, -1]))
expected = pd.Int64Index([2, 1, 3], name='xxx')
tm.assert_index_equal(result, expected)
# fill_value
msg = "Unable to fill values because RangeIndex cannot contain NA"
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -1]), fill_value=True)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
expected = pd.Int64Index([2, 1, 3], name='xxx')
tm.assert_index_equal(result, expected)
msg = "Unable to fill values because RangeIndex cannot contain NA"
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
def test_print_unicode_columns(self):
df = pd.DataFrame({u("\u05d0"): [1, 2, 3],
"\u05d1": [4, 5, 6],
"c": [7, 8, 9]})
repr(df.columns) # should not raise UnicodeDecodeError
def test_repr_roundtrip(self):
tm.assert_index_equal(eval(repr(self.index)), self.index)
def test_slice_keep_name(self):
idx = RangeIndex(1, 2, name='asdf')
assert idx.name == idx[1:].name
def test_explicit_conversions(self):
# GH 8608
# add/sub are overridden explicitly for Float/Int Index
idx = RangeIndex(5)
# float conversions
arr = np.arange(5, dtype='int64') * 3.2
expected = Float64Index(arr)
fidx = idx * 3.2
tm.assert_index_equal(fidx, expected)
fidx = 3.2 * idx
tm.assert_index_equal(fidx, expected)
# interops with numpy arrays
expected = Float64Index(arr)
a = np.zeros(5, dtype='float64')
result = fidx - a
tm.assert_index_equal(result, expected)
expected = Float64Index(-arr)
a = np.zeros(5, dtype='float64')
result = a - fidx
tm.assert_index_equal(result, expected)
def test_has_duplicates(self):
for ind in self.indices:
if not len(ind):
continue
idx = self.indices[ind]
assert idx.is_unique
assert not idx.has_duplicates
def test_extended_gcd(self):
result = self.index._extended_gcd(6, 10)
assert result[0] == result[1] * 6 + result[2] * 10
assert 2 == result[0]
result = self.index._extended_gcd(10, 6)
assert 2 == result[1] * 10 + result[2] * 6
assert 2 == result[0]
def test_min_fitting_element(self):
result = RangeIndex(0, 20, 2)._min_fitting_element(1)
assert 2 == result
result = RangeIndex(1, 6)._min_fitting_element(1)
assert 1 == result
result = RangeIndex(18, -2, -2)._min_fitting_element(1)
assert 2 == result
result = RangeIndex(5, 0, -1)._min_fitting_element(1)
assert 1 == result
big_num = 500000000000000000000000
result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num)
assert big_num == result
def test_max_fitting_element(self):
result = RangeIndex(0, 20, 2)._max_fitting_element(17)
assert 16 == result
result = RangeIndex(1, 6)._max_fitting_element(4)
assert 4 == result
result = RangeIndex(18, -2, -2)._max_fitting_element(17)
assert 16 == result
result = RangeIndex(5, 0, -1)._max_fitting_element(4)
assert 4 == result
big_num = 500000000000000000000000
result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num)
assert big_num == result
def test_pickle_compat_construction(self):
# RangeIndex() is a valid constructor
pass
def test_slice_specialised(self):
# scalar indexing
res = self.index[1]
expected = 2
assert res == expected
res = self.index[-1]
expected = 18
assert res == expected
# slicing
# slice value completion
index = self.index[:]
expected = self.index
tm.assert_index_equal(index, expected)
# positive slice values
index = self.index[7:10:2]
expected = Index(np.array([14, 18]), name='foo')
tm.assert_index_equal(index, expected)
# negative slice values
index = self.index[-1:-5:-2]
expected = Index(np.array([18, 14]), name='foo')
tm.assert_index_equal(index, expected)
# stop overshoot
index = self.index[2:100:4]
expected = Index(np.array([4, 12]), name='foo')
tm.assert_index_equal(index, expected)
# reverse
index = self.index[::-1]
expected = Index(self.index.values[::-1], name='foo')
tm.assert_index_equal(index, expected)
index = self.index[-8::-1]
expected = Index(np.array([4, 2, 0]), name='foo')
tm.assert_index_equal(index, expected)
index = self.index[-40::-1]
expected = Index(np.array([], dtype=np.int64), name='foo')
tm.assert_index_equal(index, expected)
index = self.index[40::-1]
expected = Index(self.index.values[40::-1], name='foo')
tm.assert_index_equal(index, expected)
index = self.index[10::-1]
expected = Index(self.index.values[::-1], name='foo')
tm.assert_index_equal(index, expected)
def test_len_specialised(self):
# make sure that our len is the same as
# np.arange calc
for step in np.arange(1, 6, 1):
arr = np.arange(0, 5, step)
i = RangeIndex(0, 5, step)
assert len(i) == len(arr)
i = RangeIndex(5, 0, step)
assert len(i) == 0
for step in np.arange(-6, -1, 1):
arr = np.arange(5, 0, step)
i = RangeIndex(5, 0, step)
assert len(i) == len(arr)
i = RangeIndex(0, 5, step)
assert len(i) == 0
def test_append(self):
# GH16212
RI = RangeIndex
I64 = Int64Index
F64 = Float64Index
OI = Index
cases = [([RI(1, 12, 5)], RI(1, 12, 5)),
([RI(0, 6, 4)], RI(0, 6, 4)),
([RI(1, 3), RI(3, 7)], RI(1, 7)),
([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
([RI(-4, -8), RI(3, -4)], RI(0, 0)),
([RI(-4, -8), RI(3, 5)], RI(3, 5)),
([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
([RI(-2,), RI(3, 5)], RI(3, 5)),
([RI(2,), RI(2)], I64([0, 1, 0, 1])),
([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])),
([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])),
([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])),
([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])),
([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))
]
for indices, expected in cases:
result = indices[0].append(indices[1:])
tm.assert_index_equal(result, expected, exact=True)
if len(indices) == 2:
# Append single item rather than list
result2 = indices[0].append(indices[1])
tm.assert_index_equal(result2, expected, exact=True)
@@ -1,265 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
import numpy as np
import pytest
from pandas.errors import NullFrequencyError
import pandas as pd
from pandas import Timedelta, TimedeltaIndex, timedelta_range
import pandas.util.testing as tm
@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2),
np.timedelta64(2, 'h'), Timedelta(hours=2)],
ids=str)
def delta(request):
# Several ways of representing two hours
return request.param
@pytest.fixture(params=['B', 'D'])
def freq(request):
return request.param
class TestTimedeltaIndexArithmetic(object):
# Addition and Subtraction Operations
# -------------------------------------------------------------
# TimedeltaIndex.shift is used by __add__/__sub__
def test_tdi_shift_empty(self):
# GH#9903
idx = pd.TimedeltaIndex([], name='xxx')
tm.assert_index_equal(idx.shift(0, freq='H'), idx)
tm.assert_index_equal(idx.shift(3, freq='H'), idx)
def test_tdi_shift_hours(self):
# GH#9903
idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx')
tm.assert_index_equal(idx.shift(0, freq='H'), idx)
exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx')
tm.assert_index_equal(idx.shift(3, freq='H'), exp)
exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx')
tm.assert_index_equal(idx.shift(-3, freq='H'), exp)
def test_tdi_shift_minutes(self):
# GH#9903
idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx')
tm.assert_index_equal(idx.shift(0, freq='T'), idx)
exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'],
name='xxx')
tm.assert_index_equal(idx.shift(3, freq='T'), exp)
exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'],
name='xxx')
tm.assert_index_equal(idx.shift(-3, freq='T'), exp)
def test_tdi_shift_int(self):
# GH#8083
trange = pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)
result = trange.shift(1)
expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00',
'3 days 01:00:00',
'4 days 01:00:00', '5 days 01:00:00'],
freq='D')
tm.assert_index_equal(result, expected)
def test_tdi_shift_nonstandard_freq(self):
# GH#8083
trange = pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)
result = trange.shift(3, freq='2D 1s')
expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03',
'8 days 01:00:03', '9 days 01:00:03',
'10 days 01:00:03'], freq='D')
tm.assert_index_equal(result, expected)
def test_shift_no_freq(self):
# GH#19147
tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None)
with pytest.raises(NullFrequencyError):
tdi.shift(2)
# -------------------------------------------------------------
# Binary operations TimedeltaIndex and integer
def test_tdi_add_int(self, one):
# Variants of `one` for #19012
rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng + one
expected = timedelta_range('1 days 10:00:00', freq='H', periods=10)
tm.assert_index_equal(result, expected)
def test_tdi_iadd_int(self, one):
rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
expected = timedelta_range('1 days 10:00:00', freq='H', periods=10)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
rng += one
tm.assert_index_equal(rng, expected)
def test_tdi_sub_int(self, one):
rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng - one
expected = timedelta_range('1 days 08:00:00', freq='H', periods=10)
tm.assert_index_equal(result, expected)
def test_tdi_isub_int(self, one):
rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
expected = timedelta_range('1 days 08:00:00', freq='H', periods=10)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
rng -= one
tm.assert_index_equal(rng, expected)
# -------------------------------------------------------------
# __add__/__sub__ with integer arrays
@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_tdi_add_integer_array(self, box):
# GH#19959
rng = timedelta_range('1 days 09:00:00', freq='H', periods=3)
other = box([4, 3, 2])
expected = TimedeltaIndex(['1 day 13:00:00'] * 3)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng + other
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = other + rng
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_tdi_sub_integer_array(self, box):
# GH#19959
rng = timedelta_range('9H', freq='H', periods=3)
other = box([4, 3, 2])
expected = TimedeltaIndex(['5H', '7H', '9H'])
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = rng - other
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# GH#22535
result = other - rng
tm.assert_index_equal(result, -expected)
@pytest.mark.parametrize('box', [np.array, pd.Index])
def test_tdi_addsub_integer_array_no_freq(self, box):
# GH#19959
tdi = TimedeltaIndex(['1 Day', 'NaT', '3 Hours'])
other = box([14, -1, 16])
with pytest.raises(NullFrequencyError):
tdi + other
with pytest.raises(NullFrequencyError):
other + tdi
with pytest.raises(NullFrequencyError):
tdi - other
with pytest.raises(NullFrequencyError):
other - tdi
# -------------------------------------------------------------
# Binary operations TimedeltaIndex and timedelta-like
# Note: add and sub are tested in tests.test_arithmetic, in-place
# tests are kept here because their behavior is Index-specific
def test_tdi_iadd_timedeltalike(self, delta):
# only test adding/sub offsets as + is now numeric
rng = timedelta_range('1 days', '10 days')
expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00',
freq='D')
rng += delta
tm.assert_index_equal(rng, expected)
def test_tdi_isub_timedeltalike(self, delta):
# only test adding/sub offsets as - is now numeric
rng = timedelta_range('1 days', '10 days')
expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00')
rng -= delta
tm.assert_index_equal(rng, expected)
# -------------------------------------------------------------
# TODO: after #24365 this probably belongs in scalar tests
def test_ops_ndarray(self):
td = Timedelta('1 day')
# timedelta, timedelta
other = pd.to_timedelta(['1 day']).values
expected = pd.to_timedelta(['2 days']).values
tm.assert_numpy_array_equal(td + other, expected)
tm.assert_numpy_array_equal(other + td, expected)
pytest.raises(TypeError, lambda: td + np.array([1]))
pytest.raises(TypeError, lambda: np.array([1]) + td)
expected = pd.to_timedelta(['0 days']).values
tm.assert_numpy_array_equal(td - other, expected)
tm.assert_numpy_array_equal(-other + td, expected)
pytest.raises(TypeError, lambda: td - np.array([1]))
pytest.raises(TypeError, lambda: np.array([1]) - td)
expected = pd.to_timedelta(['2 days']).values
tm.assert_numpy_array_equal(td * np.array([2]), expected)
tm.assert_numpy_array_equal(np.array([2]) * td, expected)
pytest.raises(TypeError, lambda: td * other)
pytest.raises(TypeError, lambda: other * td)
tm.assert_numpy_array_equal(td / other,
np.array([1], dtype=np.float64))
tm.assert_numpy_array_equal(other / td,
np.array([1], dtype=np.float64))
# timedelta, datetime
other = pd.to_datetime(['2000-01-01']).values
expected = pd.to_datetime(['2000-01-02']).values
tm.assert_numpy_array_equal(td + other, expected)
tm.assert_numpy_array_equal(other + td, expected)
expected = pd.to_datetime(['1999-12-31']).values
tm.assert_numpy_array_equal(-td + other, expected)
tm.assert_numpy_array_equal(other - td, expected)
def test_tdi_ops_attributes(self):
rng = timedelta_range('2 days', periods=5, freq='2D', name='x')
result = rng + 1 * rng.freq
exp = timedelta_range('4 days', periods=5, freq='2D', name='x')
tm.assert_index_equal(result, exp)
assert result.freq == '2D'
result = rng - 2 * rng.freq
exp = timedelta_range('-2 days', periods=5, freq='2D', name='x')
tm.assert_index_equal(result, exp)
assert result.freq == '2D'
result = rng * 2
exp = timedelta_range('4 days', periods=5, freq='4D', name='x')
tm.assert_index_equal(result, exp)
assert result.freq == '4D'
result = rng / 2
exp = timedelta_range('1 days', periods=5, freq='D', name='x')
tm.assert_index_equal(result, exp)
assert result.freq == 'D'
result = -rng
exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x')
tm.assert_index_equal(result, exp)
assert result.freq == '-2D'
rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x')
result = abs(rng)
exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days',
'2 days'], name='x')
tm.assert_index_equal(result, exp)
assert result.freq is None
@@ -1,110 +0,0 @@
from datetime import timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import (
Float64Index, Index, Int64Index, NaT, Timedelta, TimedeltaIndex,
timedelta_range)
import pandas.util.testing as tm
class TestTimedeltaIndex(object):
def test_astype_object(self):
idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx')
expected_list = [Timedelta('1 days'), Timedelta('2 days'),
Timedelta('3 days'), Timedelta('4 days')]
result = idx.astype(object)
expected = Index(expected_list, dtype=object, name='idx')
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
def test_astype_object_with_nat(self):
idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), NaT,
timedelta(days=4)], name='idx')
expected_list = [Timedelta('1 days'), Timedelta('2 days'), NaT,
Timedelta('4 days')]
result = idx.astype(object)
expected = Index(expected_list, dtype=object, name='idx')
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
def test_astype(self):
# GH 13149, GH 13209
idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN])
result = idx.astype(object)
expected = Index([Timedelta('1 days 03:46:40')] + [NaT] * 3,
dtype=object)
tm.assert_index_equal(result, expected)
result = idx.astype(int)
expected = Int64Index([100000000000000] + [-9223372036854775808] * 3,
dtype=np.int64)
tm.assert_index_equal(result, expected)
result = idx.astype(str)
expected = Index(str(x) for x in idx)
tm.assert_index_equal(result, expected)
rng = timedelta_range('1 days', periods=10)
result = rng.astype('i8')
tm.assert_index_equal(result, Index(rng.asi8))
tm.assert_numpy_array_equal(rng.asi8, result.values)
def test_astype_uint(self):
arr = timedelta_range('1H', periods=2)
expected = pd.UInt64Index(
np.array([3600000000000, 90000000000000], dtype="uint64")
)
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)
def test_astype_timedelta64(self):
# GH 13149, GH 13209
idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN])
result = idx.astype('timedelta64')
expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64')
tm.assert_index_equal(result, expected)
result = idx.astype('timedelta64[ns]')
tm.assert_index_equal(result, idx)
assert result is not idx
result = idx.astype('timedelta64[ns]', copy=False)
tm.assert_index_equal(result, idx)
assert result is idx
@pytest.mark.parametrize('dtype', [
float, 'datetime64', 'datetime64[ns]'])
def test_astype_raises(self, dtype):
# GH 13149, GH 13209
idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN])
msg = 'Cannot cast TimedeltaArray to dtype'
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)
def test_astype_category(self):
obj = pd.timedelta_range("1H", periods=2, freq='H')
result = obj.astype('category')
expected = pd.CategoricalIndex([pd.Timedelta('1H'),
pd.Timedelta('2H')])
tm.assert_index_equal(result, expected)
result = obj._data.astype('category')
expected = expected.values
tm.assert_categorical_equal(result, expected)
def test_astype_array_fallback(self):
obj = pd.timedelta_range("1H", periods=2)
result = obj.astype(bool)
expected = pd.Index(np.array([True, True]))
tm.assert_index_equal(result, expected)
result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
@@ -1,199 +0,0 @@
from datetime import timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import Timedelta, TimedeltaIndex, timedelta_range, to_timedelta
from pandas.core.arrays import TimedeltaArray
import pandas.util.testing as tm
class TestTimedeltaIndex(object):
def test_verify_integrity_deprecated(self):
# GH#23919
with tm.assert_produces_warning(FutureWarning):
TimedeltaIndex(['1 Day'], verify_integrity=False)
def test_range_kwargs_deprecated(self):
# GH#23919
with tm.assert_produces_warning(FutureWarning):
TimedeltaIndex(start='1 Day', end='3 Days', freq='D')
def test_int64_nocopy(self):
# GH#23539 check that a copy isn't made when we pass int64 data
# and copy=False
arr = np.arange(10, dtype=np.int64)
tdi = TimedeltaIndex(arr, copy=False)
assert tdi._data._data.base is arr
def test_infer_from_tdi(self):
# GH#23539
# fast-path for inferring a frequency if the passed data already
# has one
tdi = pd.timedelta_range('1 second', periods=10**7, freq='1s')
result = pd.TimedeltaIndex(tdi, freq='infer')
assert result.freq == tdi.freq
# check that inferred_freq was not called by checking that the
# value has not been cached
assert "inferred_freq" not in getattr(result, "_cache", {})
def test_infer_from_tdi_mismatch(self):
# GH#23539
# fast-path for invalidating a frequency if the passed data already
# has one and it does not match the `freq` input
tdi = pd.timedelta_range('1 second', periods=100, freq='1s')
msg = ("Inferred frequency .* from passed values does "
"not conform to passed frequency")
with pytest.raises(ValueError, match=msg):
TimedeltaIndex(tdi, freq='D')
with pytest.raises(ValueError, match=msg):
# GH#23789
TimedeltaArray(tdi, freq='D')
def test_dt64_data_invalid(self):
# GH#23539
# passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64]
# does not yet, but will in the future
dti = pd.date_range('2016-01-01', periods=3)
msg = "cannot be converted to timedelta64"
with pytest.raises(TypeError, match=msg):
TimedeltaIndex(dti.tz_localize('Europe/Brussels'))
with tm.assert_produces_warning(FutureWarning):
TimedeltaIndex(dti)
with tm.assert_produces_warning(FutureWarning):
TimedeltaIndex(np.asarray(dti))
def test_float64_ns_rounded(self):
# GH#23539 without specifying a unit, floats are regarded as nanos,
# and fractional portions are truncated
tdi = TimedeltaIndex([2.3, 9.7])
expected = TimedeltaIndex([2, 9])
tm.assert_index_equal(tdi, expected)
# integral floats are non-lossy
tdi = TimedeltaIndex([2.0, 9.0])
expected = TimedeltaIndex([2, 9])
tm.assert_index_equal(tdi, expected)
# NaNs get converted to NaT
tdi = TimedeltaIndex([2.0, np.nan])
expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT])
tm.assert_index_equal(tdi, expected)
def test_float64_unit_conversion(self):
# GH#23539
tdi = TimedeltaIndex([1.5, 2.25], unit='D')
expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)])
tm.assert_index_equal(tdi, expected)
def test_construction_base_constructor(self):
arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')]
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.TimedeltaIndex(np.array(arr)))
arr = [np.nan, pd.NaT, pd.Timedelta('1 days')]
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
tm.assert_index_equal(pd.Index(np.array(arr)),
pd.TimedeltaIndex(np.array(arr)))
def test_constructor(self):
expected = TimedeltaIndex(['1 days', '1 days 00:00:05', '2 days',
'2 days 00:00:02', '0 days 00:00:03'])
result = TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64(
2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)])
tm.assert_index_equal(result, expected)
# unicode
result = TimedeltaIndex([u'1 days', '1 days, 00:00:05', np.timedelta64(
2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)])
expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01',
'0 days 00:00:02'])
tm.assert_index_equal(TimedeltaIndex(range(3), unit='s'), expected)
expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:05',
'0 days 00:00:09'])
tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit='s'), expected)
expected = TimedeltaIndex(
['0 days 00:00:00.400', '0 days 00:00:00.450',
'0 days 00:00:01.200'])
tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit='ms'),
expected)
def test_constructor_iso(self):
# GH #21877
expected = timedelta_range('1s', periods=9, freq='s')
durations = ['P0DT0H0M{}S'.format(i) for i in range(1, 10)]
result = to_timedelta(durations)
tm.assert_index_equal(result, expected)
def test_constructor_coverage(self):
rng = timedelta_range('1 days', periods=10.5)
exp = timedelta_range('1 days', periods=10)
tm.assert_index_equal(rng, exp)
msg = 'periods must be a number, got foo'
with pytest.raises(TypeError, match=msg):
timedelta_range(start='1 days', periods='foo', freq='D')
with pytest.raises(ValueError):
with tm.assert_produces_warning(FutureWarning):
TimedeltaIndex(start='1 days', end='10 days')
with pytest.raises(TypeError):
TimedeltaIndex('1 days')
# generator expression
gen = (timedelta(i) for i in range(10))
result = TimedeltaIndex(gen)
expected = TimedeltaIndex([timedelta(i) for i in range(10)])
tm.assert_index_equal(result, expected)
# NumPy string array
strings = np.array(['1 days', '2 days', '3 days'])
result = TimedeltaIndex(strings)
expected = to_timedelta([1, 2, 3], unit='d')
tm.assert_index_equal(result, expected)
from_ints = TimedeltaIndex(expected.asi8)
tm.assert_index_equal(from_ints, expected)
# non-conforming freq
pytest.raises(ValueError, TimedeltaIndex,
['1 days', '2 days', '4 days'], freq='D')
pytest.raises(ValueError, timedelta_range, periods=10, freq='D')
def test_constructor_name(self):
idx = timedelta_range(start='1 days', periods=1, freq='D', name='TEST')
assert idx.name == 'TEST'
# GH10025
idx2 = TimedeltaIndex(idx, name='something else')
assert idx2.name == 'something else'
def test_constructor_no_precision_warns(self):
# GH-24753, GH-24739
expected = pd.TimedeltaIndex(['2000'], dtype='timedelta64[ns]')
# we set the stacklevel for DatetimeIndex
with tm.assert_produces_warning(FutureWarning):
result = pd.TimedeltaIndex(['2000'], dtype='timedelta64')
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = pd.Index(['2000'], dtype='timedelta64')
tm.assert_index_equal(result, expected)
def test_constructor_wrong_precision_raises(self):
with pytest.raises(ValueError):
pd.TimedeltaIndex(['2000'], dtype='timedelta64[us]')
@@ -1,96 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import pandas as pd
from pandas import TimedeltaIndex
class TestTimedeltaIndexRendering(object):
@pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__'])
def test_representation(self, method):
idx1 = TimedeltaIndex([], freq='D')
idx2 = TimedeltaIndex(['1 days'], freq='D')
idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D')
idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])
exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')"""
exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', "
"freq='D')")
exp3 = ("TimedeltaIndex(['1 days', '2 days'], "
"dtype='timedelta64[ns]', freq='D')")
exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], "
"dtype='timedelta64[ns]', freq='D')")
exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', "
"'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)")
with pd.option_context('display.width', 300):
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
[exp1, exp2, exp3, exp4, exp5]):
result = getattr(idx, method)()
assert result == expected
def test_representation_to_series(self):
idx1 = TimedeltaIndex([], freq='D')
idx2 = TimedeltaIndex(['1 days'], freq='D')
idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D')
idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])
exp1 = """Series([], dtype: timedelta64[ns])"""
exp2 = ("0 1 days\n"
"dtype: timedelta64[ns]")
exp3 = ("0 1 days\n"
"1 2 days\n"
"dtype: timedelta64[ns]")
exp4 = ("0 1 days\n"
"1 2 days\n"
"2 3 days\n"
"dtype: timedelta64[ns]")
exp5 = ("0 1 days 00:00:01\n"
"1 2 days 00:00:00\n"
"2 3 days 00:00:00\n"
"dtype: timedelta64[ns]")
with pd.option_context('display.width', 300):
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
[exp1, exp2, exp3, exp4, exp5]):
result = repr(pd.Series(idx))
assert result == expected
def test_summary(self):
# GH#9116
idx1 = TimedeltaIndex([], freq='D')
idx2 = TimedeltaIndex(['1 days'], freq='D')
idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D')
idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])
exp1 = ("TimedeltaIndex: 0 entries\n"
"Freq: D")
exp2 = ("TimedeltaIndex: 1 entries, 1 days to 1 days\n"
"Freq: D")
exp3 = ("TimedeltaIndex: 2 entries, 1 days to 2 days\n"
"Freq: D")
exp4 = ("TimedeltaIndex: 3 entries, 1 days to 3 days\n"
"Freq: D")
exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days "
"00:00:00")
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
[exp1, exp2, exp3, exp4, exp5]):
result = idx._summary()
assert result == expected
@@ -1,338 +0,0 @@
from datetime import datetime, timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import Index, Timedelta, TimedeltaIndex, compat, timedelta_range
import pandas.util.testing as tm
class TestGetItem(object):
def test_ellipsis(self):
# GH#21282
idx = timedelta_range('1 day', '31 day', freq='D', name='idx')
result = idx[...]
assert result.equals(idx)
assert result is not idx
def test_getitem(self):
idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx')
for idx in [idx1]:
result = idx[0]
assert result == Timedelta('1 day')
result = idx[0:5]
expected = timedelta_range('1 day', '5 day', freq='D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[0:10:2]
expected = timedelta_range('1 day', '9 day', freq='2D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[-20:-5:3]
expected = timedelta_range('12 day', '24 day', freq='3D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[4::-1]
expected = TimedeltaIndex(['5 day', '4 day', '3 day',
'2 day', '1 day'],
freq='-1D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
@pytest.mark.parametrize('key', [pd.Timestamp('1970-01-01'),
pd.Timestamp('1970-01-02'),
datetime(1970, 1, 1)])
def test_timestamp_invalid_key(self, key):
# GH#20464
tdi = pd.timedelta_range(0, periods=10)
with pytest.raises(TypeError):
tdi.get_loc(key)
class TestWhere(object):
# placeholder for symmetry with DatetimeIndex and PeriodIndex tests
pass
class TestTake(object):
def test_take(self):
# GH 10295
idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx')
for idx in [idx1]:
result = idx.take([0])
assert result == Timedelta('1 day')
result = idx.take([-1])
assert result == Timedelta('31 day')
result = idx.take([0, 1, 2])
expected = timedelta_range('1 day', '3 day', freq='D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = timedelta_range('1 day', '5 day', freq='2D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([7, 4, 1])
expected = timedelta_range('8 day', '2 day', freq='-3D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([3, 2, 5])
expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx')
tm.assert_index_equal(result, expected)
assert result.freq is None
result = idx.take([-3, 2, 5])
expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx')
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_take_invalid_kwargs(self):
idx = timedelta_range('1 day', '31 day', freq='D', name='idx')
indices = [1, 6, 5, 9, 10, 13, 15, 3]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode='clip')
# TODO: This method came from test_timedelta; de-dup with version above
def test_take2(self):
tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00']
idx = timedelta_range(start='1d', end='2d', freq='H', name='idx')
expected = TimedeltaIndex(tds, freq=None, name='idx')
taken1 = idx.take([2, 4, 10])
taken2 = idx[[2, 4, 10]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, TimedeltaIndex)
assert taken.freq is None
assert taken.name == expected.name
def test_take_fill_value(self):
# GH 12631
idx = TimedeltaIndex(['1 days', '2 days', '3 days'],
name='xxx')
result = idx.take(np.array([1, 0, -1]))
expected = TimedeltaIndex(['2 days', '1 days', '3 days'],
name='xxx')
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = TimedeltaIndex(['2 days', '1 days', 'NaT'],
name='xxx')
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
expected = TimedeltaIndex(['2 days', '1 days', '3 days'],
name='xxx')
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
class TestTimedeltaIndex(object):
def test_insert(self):
idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx')
result = idx.insert(2, timedelta(days=5))
exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx')
tm.assert_index_equal(result, exp)
# insertion of non-datetime should coerce to object index
result = idx.insert(1, 'inserted')
expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'),
Timedelta('2day')], name='idx')
assert not isinstance(result, TimedeltaIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx')
# preserve freq
expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02',
'1day 00:00:03'],
name='idx', freq='s')
expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02',
'1day 00:00:03', '1day 00:00:04'],
name='idx', freq='s')
# reset freq to None
expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01',
'1day 00:00:02', '1day 00:00:03'],
name='idx', freq=None)
expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02',
'1day 00:00:03', '1day 00:00:05'],
name='idx', freq=None)
cases = [(0, Timedelta('1day'), expected_0),
(-3, Timedelta('1day'), expected_0),
(3, Timedelta('1day 00:00:04'), expected_3),
(1, Timedelta('1day 00:00:01'), expected_1_nofreq),
(3, Timedelta('1day 00:00:05'), expected_3_nofreq)]
for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# GH 18295 (test missing)
expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
for na in (np.nan, pd.NaT, None):
result = timedelta_range('1day', '3day').insert(1, na)
tm.assert_index_equal(result, expected)
def test_delete(self):
idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')
# prserve freq
expected_0 = timedelta_range(start='2 Days', periods=4, freq='D',
name='idx')
expected_4 = timedelta_range(start='1 Days', periods=4, freq='D',
name='idx')
# reset freq to None
expected_1 = TimedeltaIndex(
['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx')
cases = {0: expected_0,
-5: expected_0,
-1: expected_4,
4: expected_4,
1: expected_1}
for n, expected in compat.iteritems(cases):
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
with pytest.raises((IndexError, ValueError)):
# either depending on numpy version
idx.delete(5)
def test_delete_slice(self):
idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx')
# prserve freq
expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D',
name='idx')
expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D',
name='idx')
# reset freq to None
expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d',
'7 d', '8 d', '9 d', '10d'],
freq=None, name='idx')
cases = {(0, 1, 2): expected_0_2,
(7, 8, 9): expected_7_9,
(3, 4, 5): expected_3_5}
for n, expected in compat.iteritems(cases):
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
result = idx.delete(slice(n[0], n[-1] + 1))
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
def test_get_loc(self):
idx = pd.to_timedelta(['0 days', '1 days', '2 days'])
for method in [None, 'pad', 'backfill', 'nearest']:
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1
assert idx.get_loc(idx[1], 'pad',
tolerance=Timedelta(0)) == 1
assert idx.get_loc(idx[1], 'pad',
tolerance=np.timedelta64(0, 's')) == 1
assert idx.get_loc(idx[1], 'pad',
tolerance=timedelta(0)) == 1
with pytest.raises(ValueError, match='unit abbreviation w/o a number'):
idx.get_loc(idx[1], method='nearest', tolerance='foo')
with pytest.raises(
ValueError,
match='tolerance size must match'):
idx.get_loc(idx[1], method='nearest',
tolerance=[Timedelta(0).to_timedelta64(),
Timedelta(0).to_timedelta64()])
for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]:
assert idx.get_loc('1 day 1 hour', method) == loc
# GH 16909
assert idx.get_loc(idx[1].to_timedelta64()) == 1
# GH 16896
assert idx.get_loc('0 days') == 0
def test_get_loc_nat(self):
tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00'])
assert tidx.get_loc(pd.NaT) == 1
assert tidx.get_loc(None) == 1
assert tidx.get_loc(float('nan')) == 1
assert tidx.get_loc(np.nan) == 1
def test_get_indexer(self):
idx = pd.to_timedelta(['0 days', '1 days', '2 days'])
tm.assert_numpy_array_equal(idx.get_indexer(idx),
np.array([0, 1, 2], dtype=np.intp))
target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour'])
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
np.array([-1, 0, 1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
np.array([0, 1, 2], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
np.array([0, 1, 1], dtype=np.intp))
res = idx.get_indexer(target, 'nearest',
tolerance=Timedelta('1 hour'))
tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp))
@@ -1,281 +0,0 @@
import numpy as np
import pytest
from pandas.core.dtypes.generic import ABCDateOffset
import pandas as pd
from pandas import Series, TimedeltaIndex, timedelta_range
from pandas.tests.test_base import Ops
import pandas.util.testing as tm
from pandas.tseries.offsets import Day, Hour
class TestTimedeltaIndexOps(Ops):
def setup_method(self, method):
super(TestTimedeltaIndexOps, self).setup_method(method)
mask = lambda x: isinstance(x, TimedeltaIndex)
self.is_valid_objs = [o for o in self.objs if mask(o)]
self.not_valid_objs = []
def test_ops_properties(self):
f = lambda x: isinstance(x, TimedeltaIndex)
self.check_ops_properties(TimedeltaIndex._field_ops, f)
self.check_ops_properties(TimedeltaIndex._object_ops, f)
def test_value_counts_unique(self):
# GH 7735
idx = timedelta_range('1 days 09:00:00', freq='H', periods=10)
# create repeated values, 'n'th element is repeated by n+1 times
idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10)
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
expected = timedelta_range('1 days 09:00:00', freq='H', periods=10)
tm.assert_index_equal(idx.unique(), expected)
idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00',
'1 days 09:00:00', '1 days 08:00:00',
'1 days 08:00:00', pd.NaT])
exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00'])
expected = Series([3, 2], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00',
pd.NaT])
expected = Series([3, 2, 1], index=exp_idx)
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
tm.assert_index_equal(idx.unique(), exp_idx)
def test_nonunique_contains(self):
# GH 9512
for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1],
['00:01:00', '00:01:00', '00:02:00'],
['00:01:00', '00:01:00', '00:00:01'])):
assert idx[0] in idx
def test_unknown_attribute(self):
# see gh-9680
tdi = pd.timedelta_range(start=0, periods=10, freq='1s')
ts = pd.Series(np.random.normal(size=10), index=tdi)
assert 'foo' not in ts.__dict__.keys()
pytest.raises(AttributeError, lambda: ts.foo)
def test_order(self):
# GH 10295
idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D',
name='idx')
idx2 = TimedeltaIndex(
['1 hour', '2 hour', '3 hour'], freq='H', name='idx')
for idx in [idx1, idx2]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
assert ordered.freq == idx.freq
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
assert ordered.freq == expected.freq
assert ordered.freq.n == -1
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
check_dtype=False)
assert ordered.freq == idx.freq
ordered, indexer = idx.sort_values(return_indexer=True,
ascending=False)
tm.assert_index_equal(ordered, idx[::-1])
assert ordered.freq == expected.freq
assert ordered.freq.n == -1
idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour',
'2 hour ', '1 hour'], name='idx1')
exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour',
'3 hour', '5 hour'], name='idx1')
idx2 = TimedeltaIndex(['1 day', '3 day', '5 day',
'2 day', '1 day'], name='idx2')
# TODO(wesm): unused?
# exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
# '3 day', '5 day'], name='idx2')
# idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
# '2 minute', pd.NaT], name='idx3')
# exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
# '5 minute'], name='idx3')
for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
assert ordered.freq is None
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
assert ordered.freq is None
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None
ordered, indexer = idx.sort_values(return_indexer=True,
ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 4, 0])
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
assert ordered.freq is None
def test_drop_duplicates_metadata(self):
# GH 10115
idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx)
assert idx_dup.freq is None # freq is reset
result = idx_dup.drop_duplicates()
tm.assert_index_equal(idx, result)
assert result.freq is None
def test_drop_duplicates(self):
# to check Index/Series compat
base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
idx = base.append(base[:5])
res = idx.drop_duplicates()
tm.assert_index_equal(res, base)
res = Series(idx).drop_duplicates()
tm.assert_series_equal(res, Series(base))
res = idx.drop_duplicates(keep='last')
exp = base[5:].append(base[:5])
tm.assert_index_equal(res, exp)
res = Series(idx).drop_duplicates(keep='last')
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
res = idx.drop_duplicates(keep=False)
tm.assert_index_equal(res, base[5:])
res = Series(idx).drop_duplicates(keep=False)
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
@pytest.mark.parametrize('freq', ['D', '3D', '-3D',
'H', '2H', '-2H',
'T', '2T', 'S', '-3S'])
def test_infer_freq(self, freq):
# GH#11018
idx = pd.timedelta_range('1', freq=freq, periods=10)
result = pd.TimedeltaIndex(idx.asi8, freq='infer')
tm.assert_index_equal(idx, result)
assert result.freq == freq
def test_shift(self):
pass # handled in test_arithmetic.py
def test_repeat(self):
index = pd.timedelta_range('1 days', periods=2, freq='D')
exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days'])
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
index = TimedeltaIndex(['1 days', 'NaT', '3 days'])
exp = TimedeltaIndex(['1 days', '1 days', '1 days',
'NaT', 'NaT', 'NaT',
'3 days', '3 days', '3 days'])
for res in [index.repeat(3), np.repeat(index, 3)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_nat(self):
assert pd.TimedeltaIndex._na_value is pd.NaT
assert pd.TimedeltaIndex([])._na_value is pd.NaT
idx = pd.TimedeltaIndex(['1 days', '2 days'])
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([], dtype=np.intp))
idx = pd.TimedeltaIndex(['1 days', 'NaT'])
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([1], dtype=np.intp))
def test_equals(self):
# GH 13107
idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT'])
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'])
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.astype(object).equals(idx2.astype(object))
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
@pytest.mark.parametrize('values', [['0 days', '2 days', '4 days'], []])
@pytest.mark.parametrize('freq', ['2D', Day(2), '48H', Hour(48)])
def test_freq_setter(self, values, freq):
# GH 20678
idx = TimedeltaIndex(values)
# can set to an offset, converting from string if necessary
idx.freq = freq
assert idx.freq == freq
assert isinstance(idx.freq, ABCDateOffset)
# can reset to None
idx.freq = None
assert idx.freq is None
def test_freq_setter_errors(self):
# GH 20678
idx = TimedeltaIndex(['0 days', '2 days', '4 days'])
# setting with an incompatible freq
msg = ('Inferred frequency 2D from passed values does not conform to '
'passed frequency 5D')
with pytest.raises(ValueError, match=msg):
idx.freq = '5D'
# setting with a non-fixed frequency
msg = r'<2 \* BusinessDays> is a non-fixed frequency'
with pytest.raises(ValueError, match=msg):
idx.freq = '2B'
# setting with non-freq string
with pytest.raises(ValueError, match='Invalid frequency'):
idx.freq = 'foo'
@@ -1,85 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Series, Timedelta, timedelta_range
from pandas.util.testing import assert_series_equal
class TestSlicing(object):
def test_slice_keeps_name(self):
# GH4226
dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket')
assert dr[1:].name == dr.name
def test_partial_slice(self):
rng = timedelta_range('1 day 10:11:12', freq='h', periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s['5 day':'6 day']
expected = s.iloc[86:134]
assert_series_equal(result, expected)
result = s['5 day':]
expected = s.iloc[86:]
assert_series_equal(result, expected)
result = s[:'6 day']
expected = s.iloc[:134]
assert_series_equal(result, expected)
result = s['6 days, 23:11:12']
assert result == s.iloc[133]
pytest.raises(KeyError, s.__getitem__, '50 days')
def test_partial_slice_high_reso(self):
# higher reso
rng = timedelta_range('1 day 10:11:12', freq='us', periods=2000)
s = Series(np.arange(len(rng)), index=rng)
result = s['1 day 10:11:12':]
expected = s.iloc[0:]
assert_series_equal(result, expected)
result = s['1 day 10:11:12.001':]
expected = s.iloc[1000:]
assert_series_equal(result, expected)
result = s['1 days, 10:11:12.001001']
assert result == s.iloc[1001]
def test_slice_with_negative_step(self):
ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H'))
SLC = pd.IndexSlice
def assert_slices_equivalent(l_slc, i_slc):
assert_series_equal(ts[l_slc], ts.iloc[i_slc])
assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc])
assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1])
assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1])
assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1])
assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1])
assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1])
assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):-
1], SLC[15:6:-1])
assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1],
SLC[15:6:-1])
assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1],
SLC[15:6:-1])
assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0])
def test_slice_with_zero_step_raises(self):
ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H'))
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts[::0]
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts.loc[::0]
with pytest.raises(ValueError, match='slice step cannot be zero'):
ts.loc[::0]
@@ -1,64 +0,0 @@
# -*- coding: utf-8 -*-
"""
Tests for TimedeltaIndex methods behaving like their Timedelta counterparts
"""
import numpy as np
import pytest
import pandas as pd
from pandas import Index, Series, Timedelta, TimedeltaIndex, timedelta_range
import pandas.util.testing as tm
class TestVectorizedTimedelta(object):
def test_tdi_total_seconds(self):
# GH#10939
# test index
rng = timedelta_range('1 days, 10:11:12.100123456', periods=2,
freq='s')
expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9,
1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9]
tm.assert_almost_equal(rng.total_seconds(), Index(expt))
# test Series
ser = Series(rng)
s_expt = Series(expt, index=[0, 1])
tm.assert_series_equal(ser.dt.total_seconds(), s_expt)
# with nat
ser[1] = np.nan
s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 +
12 + 100123456. / 1e9, np.nan], index=[0, 1])
tm.assert_series_equal(ser.dt.total_seconds(), s_expt)
# with both nat
ser = Series([np.nan, np.nan], dtype='timedelta64[ns]')
tm.assert_series_equal(ser.dt.total_seconds(),
Series([np.nan, np.nan], index=[0, 1]))
def test_tdi_round(self):
td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min')
elt = td[1]
expected_rng = TimedeltaIndex([Timedelta('16801 days 00:00:00'),
Timedelta('16801 days 00:00:00'),
Timedelta('16801 days 01:00:00'),
Timedelta('16801 days 02:00:00'),
Timedelta('16801 days 02:00:00')])
expected_elt = expected_rng[1]
tm.assert_index_equal(td.round(freq='H'), expected_rng)
assert elt.round(freq='H') == expected_elt
msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
with pytest.raises(ValueError, match=msg):
td.round(freq='foo')
with pytest.raises(ValueError, match=msg):
elt.round(freq='foo')
msg = "<MonthEnd> is a non-fixed frequency"
with pytest.raises(ValueError, match=msg):
td.round(freq='M')
with pytest.raises(ValueError, match=msg):
elt.round(freq='M')
@@ -1,75 +0,0 @@
import numpy as np
import pandas as pd
from pandas import Int64Index, TimedeltaIndex, timedelta_range
import pandas.util.testing as tm
class TestTimedeltaIndex(object):
def test_union(self):
i1 = timedelta_range('1day', periods=5)
i2 = timedelta_range('3day', periods=5)
result = i1.union(i2)
expected = timedelta_range('1day', periods=7)
tm.assert_index_equal(result, expected)
i1 = Int64Index(np.arange(0, 20, 2))
i2 = timedelta_range(start='1 day', periods=10, freq='D')
i1.union(i2) # Works
i2.union(i1) # Fails with "AttributeError: can't set attribute"
def test_union_coverage(self):
idx = TimedeltaIndex(['3d', '1d', '2d'])
ordered = TimedeltaIndex(idx.sort_values(), freq='infer')
result = ordered.union(idx)
tm.assert_index_equal(result, ordered)
result = ordered[:0].union(ordered)
tm.assert_index_equal(result, ordered)
assert result.freq == ordered.freq
def test_union_bug_1730(self):
rng_a = timedelta_range('1 day', periods=4, freq='3H')
rng_b = timedelta_range('1 day', periods=4, freq='4H')
result = rng_a.union(rng_b)
exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
tm.assert_index_equal(result, exp)
def test_union_bug_1745(self):
left = TimedeltaIndex(['1 day 15:19:49.695000'])
right = TimedeltaIndex(['2 day 13:04:21.322000',
'1 day 15:27:24.873000',
'1 day 15:31:05.350000'])
result = left.union(right)
exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right))))
tm.assert_index_equal(result, exp)
def test_union_bug_4564(self):
left = timedelta_range("1 day", "30d")
right = left + pd.offsets.Minute(15)
result = left.union(right)
exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right))))
tm.assert_index_equal(result, exp)
def test_intersection_bug_1708(self):
index_1 = timedelta_range('1 day', periods=4, freq='h')
index_2 = index_1 + pd.offsets.Hour(5)
result = index_1 & index_2
assert len(result) == 0
index_1 = timedelta_range('1 day', periods=4, freq='h')
index_2 = index_1 + pd.offsets.Hour(1)
result = index_1 & index_2
expected = timedelta_range('1 day 01:00:00', periods=3, freq='h')
tm.assert_index_equal(result, expected)
@@ -1,335 +0,0 @@
from datetime import timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame, Index, Int64Index, Series, Timedelta, TimedeltaIndex,
date_range, timedelta_range)
import pandas.util.testing as tm
from pandas.util.testing import (
assert_almost_equal, assert_index_equal, assert_series_equal)
from ..datetimelike import DatetimeLike
randn = np.random.randn
class TestTimedeltaIndex(DatetimeLike):
_holder = TimedeltaIndex
def setup_method(self, method):
self.indices = dict(index=tm.makeTimedeltaIndex(10))
self.setup_indices()
def create_index(self):
return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)
def test_numeric_compat(self):
# Dummy method to override super's version; this test is now done
# in test_arithmetic.py
pass
def test_shift(self):
pass # this is handled in test_arithmetic.py
def test_pickle_compat_construction(self):
pass
def test_fillna_timedelta(self):
# GH 11343
idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day'])
exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day'])
tm.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp)
exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day'])
idx.fillna(pd.Timedelta('3 hour'))
exp = pd.Index(
[pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object)
tm.assert_index_equal(idx.fillna('x'), exp)
@pytest.mark.parametrize("sort", [True, False])
def test_difference_freq(self, sort):
# GH14323: Difference of TimedeltaIndex should not preserve frequency
index = timedelta_range("0 days", "5 days", freq="D")
other = timedelta_range("1 days", "4 days", freq="D")
expected = TimedeltaIndex(["0 days", "5 days"], freq=None)
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
other = timedelta_range("2 days", "5 days", freq="D")
idx_diff = index.difference(other, sort)
expected = TimedeltaIndex(["0 days", "1 days"], freq=None)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
@pytest.mark.parametrize("sort", [True, False])
def test_difference_sort(self, sort):
index = pd.TimedeltaIndex(["5 days", "3 days", "2 days", "4 days",
"1 days", "0 days"])
other = timedelta_range("1 days", "4 days", freq="D")
idx_diff = index.difference(other, sort)
expected = TimedeltaIndex(["5 days", "0 days"], freq=None)
if sort:
expected = expected.sort_values()
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
other = timedelta_range("2 days", "5 days", freq="D")
idx_diff = index.difference(other, sort)
expected = TimedeltaIndex(["1 days", "0 days"], freq=None)
if sort:
expected = expected.sort_values()
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
def test_isin(self):
index = tm.makeTimedeltaIndex(4)
result = index.isin(index)
assert result.all()
result = index.isin(list(index))
assert result.all()
assert_almost_equal(index.isin([index[2], 5]),
np.array([False, False, True, False]))
def test_factorize(self):
idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day',
'3 day'])
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day'])
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
# freq must be preserved
idx3 = timedelta_range('1 day', periods=4, freq='s')
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
arr, idx = idx3.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, idx3)
def test_join_self(self, join_type):
index = timedelta_range('1 day', periods=10)
joined = index.join(index, how=join_type)
tm.assert_index_equal(index, joined)
def test_does_not_convert_mixed_integer(self):
df = tm.makeCustomDataframe(10, 10,
data_gen_f=lambda *args, **kwargs: randn(),
r_idx_type='i', c_idx_type='td')
str(df)
cols = df.columns.join(df.index, how='outer')
joined = cols.join(df.columns)
assert cols.dtype == np.dtype('O')
assert cols.dtype == joined.dtype
tm.assert_index_equal(cols, joined)
def test_sort_values(self):
idx = TimedeltaIndex(['4d', '1d', '2d'])
ordered = idx.sort_values()
assert ordered.is_monotonic
ordered = idx.sort_values(ascending=False)
assert ordered[::-1].is_monotonic
ordered, dexer = idx.sort_values(return_indexer=True)
assert ordered.is_monotonic
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0]),
check_dtype=False)
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
assert ordered[::-1].is_monotonic
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]),
check_dtype=False)
def test_get_duplicates(self):
idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day',
'4day'])
with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
result = idx.get_duplicates()
ex = TimedeltaIndex(['2 day', '3day'])
tm.assert_index_equal(result, ex)
def test_argmin_argmax(self):
idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01',
'1 day 00:00:02'])
assert idx.argmin() == 1
assert idx.argmax() == 0
def test_misc_coverage(self):
rng = timedelta_range('1 day', periods=5)
result = rng.groupby(rng.days)
assert isinstance(list(result.values())[0][0], Timedelta)
idx = TimedeltaIndex(['3d', '1d', '2d'])
assert not idx.equals(list(idx))
non_td = Index(list('abc'))
assert not idx.equals(list(non_td))
def test_map(self):
# test_map_dictlike generally tests
rng = timedelta_range('1 day', periods=10)
f = lambda x: x.days
result = rng.map(f)
exp = Int64Index([f(x) for x in rng])
tm.assert_index_equal(result, exp)
def test_pass_TimedeltaIndex_to_index(self):
rng = timedelta_range('1 days', '10 days')
idx = Index(rng, dtype=object)
expected = Index(rng.to_pytimedelta(), dtype=object)
tm.assert_numpy_array_equal(idx.values, expected.values)
def test_pickle(self):
rng = timedelta_range('1 days', periods=10)
rng_p = tm.round_trip_pickle(rng)
tm.assert_index_equal(rng, rng_p)
def test_hash_error(self):
index = timedelta_range('1 days', periods=10)
with pytest.raises(TypeError, match=("unhashable type: %r" %
type(index).__name__)):
hash(index)
def test_append_join_nondatetimeindex(self):
rng = timedelta_range('1 days', periods=10)
idx = Index(['a', 'b', 'c', 'd'])
result = rng.append(idx)
assert isinstance(result[0], Timedelta)
# it works
rng.join(idx, how='outer')
def test_append_numpy_bug_1681(self):
td = timedelta_range('1 days', '10 days', freq='2D')
a = DataFrame()
c = DataFrame({'A': 'foo', 'B': td}, index=td)
str(c)
result = a.append(c)
assert (result['B'] == td).all()
def test_fields(self):
rng = timedelta_range('1 days, 10:11:12.100123456', periods=2,
freq='s')
tm.assert_index_equal(rng.days, Index([1, 1], dtype='int64'))
tm.assert_index_equal(
rng.seconds,
Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13],
dtype='int64'))
tm.assert_index_equal(
rng.microseconds,
Index([100 * 1000 + 123, 100 * 1000 + 123], dtype='int64'))
tm.assert_index_equal(rng.nanoseconds,
Index([456, 456], dtype='int64'))
pytest.raises(AttributeError, lambda: rng.hours)
pytest.raises(AttributeError, lambda: rng.minutes)
pytest.raises(AttributeError, lambda: rng.milliseconds)
# with nat
s = Series(rng)
s[1] = np.nan
tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1]))
tm.assert_series_equal(s.dt.seconds, Series(
[10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]))
# preserve name (GH15589)
rng.name = 'name'
assert rng.days.name == 'name'
def test_freq_conversion(self):
# doc example
# series
td = Series(date_range('20130101', periods=4)) - \
Series(date_range('20121201', periods=4))
td[2] += timedelta(minutes=5, seconds=3)
td[3] = np.nan
result = td / np.timedelta64(1, 'D')
expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan
])
assert_series_equal(result, expected)
result = td.astype('timedelta64[D]')
expected = Series([31, 31, 31, np.nan])
assert_series_equal(result, expected)
result = td / np.timedelta64(1, 's')
expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3,
np.nan])
assert_series_equal(result, expected)
result = td.astype('timedelta64[s]')
assert_series_equal(result, expected)
# tdi
td = TimedeltaIndex(td)
result = td / np.timedelta64(1, 'D')
expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan])
assert_index_equal(result, expected)
result = td.astype('timedelta64[D]')
expected = Index([31, 31, 31, np.nan])
assert_index_equal(result, expected)
result = td / np.timedelta64(1, 's')
expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3,
np.nan])
assert_index_equal(result, expected)
result = td.astype('timedelta64[s]')
assert_index_equal(result, expected)
class TestTimeSeries(object):
def test_series_box_timedelta(self):
rng = timedelta_range('1 day 1 s', periods=5, freq='h')
s = Series(rng)
assert isinstance(s[1], Timedelta)
assert isinstance(s.iat[2], Timedelta)
@@ -1,79 +0,0 @@
import numpy as np
import pytest
import pandas as pd
from pandas import timedelta_range, to_timedelta
import pandas.util.testing as tm
from pandas.tseries.offsets import Day, Second
class TestTimedeltas(object):
def test_timedelta_range(self):
expected = to_timedelta(np.arange(5), unit='D')
result = timedelta_range('0 days', periods=5, freq='D')
tm.assert_index_equal(result, expected)
expected = to_timedelta(np.arange(11), unit='D')
result = timedelta_range('0 days', '10 days', freq='D')
tm.assert_index_equal(result, expected)
expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day()
result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02',
freq='D')
tm.assert_index_equal(result, expected)
expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2)
result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D')
tm.assert_index_equal(result, expected)
expected = to_timedelta(np.arange(50), unit='T') * 30
result = timedelta_range('0 days', freq='30T', periods=50)
tm.assert_index_equal(result, expected)
# GH 11776
arr = np.arange(10).reshape(2, 5)
df = pd.DataFrame(np.arange(10).reshape(2, 5))
for arg in (arr, df):
with pytest.raises(TypeError, match="1-d array"):
to_timedelta(arg)
for errors in ['ignore', 'raise', 'coerce']:
with pytest.raises(TypeError, match="1-d array"):
to_timedelta(arg, errors=errors)
# issue10583
df = pd.DataFrame(np.random.normal(size=(10, 4)))
df.index = pd.timedelta_range(start='0s', periods=10, freq='s')
expected = df.loc[pd.Timedelta('0s'):, :]
result = df.loc['0s':, :]
tm.assert_frame_equal(expected, result)
@pytest.mark.parametrize('periods, freq', [
(3, '2D'), (5, 'D'), (6, '19H12T'), (7, '16H'), (9, '12H')])
def test_linspace_behavior(self, periods, freq):
# GH 20976
result = timedelta_range(start='0 days', end='4 days', periods=periods)
expected = timedelta_range(start='0 days', end='4 days', freq=freq)
tm.assert_index_equal(result, expected)
def test_errors(self):
# not enough params
msg = ('Of the four parameters: start, end, periods, and freq, '
'exactly three must be specified')
with pytest.raises(ValueError, match=msg):
timedelta_range(start='0 days')
with pytest.raises(ValueError, match=msg):
timedelta_range(end='5 days')
with pytest.raises(ValueError, match=msg):
timedelta_range(periods=2)
with pytest.raises(ValueError, match=msg):
timedelta_range()
# too many params
with pytest.raises(ValueError, match=msg):
timedelta_range(start='0 days', end='5 days', periods=10, freq='H')
@@ -1,175 +0,0 @@
from datetime import time, timedelta
import numpy as np
import pytest
from pandas._libs.tslib import iNaT
import pandas as pd
from pandas import Series, TimedeltaIndex, isna, to_timedelta
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
class TestTimedeltas(object):
def test_to_timedelta(self):
def conv(v):
return v.astype('m8[ns]')
d1 = np.timedelta64(1, 'D')
assert (to_timedelta('1 days 06:05:01.00003', box=False) ==
conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') +
np.timedelta64(30, 'us')))
assert (to_timedelta('15.5us', box=False) ==
conv(np.timedelta64(15500, 'ns')))
# empty string
result = to_timedelta('', box=False)
assert result.astype('int64') == iNaT
result = to_timedelta(['', ''])
assert isna(result).all()
# pass thru
result = to_timedelta(np.array([np.timedelta64(1, 's')]))
expected = pd.Index(np.array([np.timedelta64(1, 's')]))
tm.assert_index_equal(result, expected)
# ints
result = np.timedelta64(0, 'ns')
expected = to_timedelta(0, box=False)
assert result == expected
# Series
expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
result = to_timedelta(Series(['1d', '1days 00:00:01']))
tm.assert_series_equal(result, expected)
# with units
result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64(
10, 's').astype('m8[ns]')])
expected = to_timedelta([0, 10], unit='s')
tm.assert_index_equal(result, expected)
# single element conversion
v = timedelta(seconds=1)
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected
v = np.timedelta64(timedelta(seconds=1))
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected
# arrays of various dtypes
arr = np.array([1] * 5, dtype='int64')
result = to_timedelta(arr, unit='s')
expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5)
tm.assert_index_equal(result, expected)
arr = np.array([1] * 5, dtype='int64')
result = to_timedelta(arr, unit='m')
expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5)
tm.assert_index_equal(result, expected)
arr = np.array([1] * 5, dtype='int64')
result = to_timedelta(arr, unit='h')
expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5)
tm.assert_index_equal(result, expected)
arr = np.array([1] * 5, dtype='timedelta64[s]')
result = to_timedelta(arr)
expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5)
tm.assert_index_equal(result, expected)
arr = np.array([1] * 5, dtype='timedelta64[D]')
result = to_timedelta(arr)
expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5)
tm.assert_index_equal(result, expected)
# Test with lists as input when box=false
expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]')
result = to_timedelta(range(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
result = to_timedelta(np.arange(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
result = to_timedelta([0, 1, 2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
# Tests with fractional seconds as input:
expected = np.array(
[0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]')
result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
def test_to_timedelta_invalid(self):
# bad value for errors parameter
msg = "errors must be one of"
with pytest.raises(ValueError, match=msg):
to_timedelta(['foo'], errors='never')
# these will error
pytest.raises(ValueError, lambda: to_timedelta([1, 2], unit='foo'))
pytest.raises(ValueError, lambda: to_timedelta(1, unit='foo'))
# time not supported ATM
pytest.raises(ValueError, lambda: to_timedelta(time(second=1)))
assert to_timedelta(time(second=1), errors='coerce') is pd.NaT
pytest.raises(ValueError, lambda: to_timedelta(['foo', 'bar']))
tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]),
to_timedelta(['foo', 'bar'], errors='coerce'))
tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']),
to_timedelta(['1 day', 'bar', '1 min'],
errors='coerce'))
# gh-13613: these should not error because errors='ignore'
invalid_data = 'apple'
assert invalid_data == to_timedelta(invalid_data, errors='ignore')
invalid_data = ['apple', '1 days']
tm.assert_numpy_array_equal(
np.array(invalid_data, dtype=object),
to_timedelta(invalid_data, errors='ignore'))
invalid_data = pd.Index(['apple', '1 days'])
tm.assert_index_equal(invalid_data, to_timedelta(
invalid_data, errors='ignore'))
invalid_data = Series(['apple', '1 days'])
tm.assert_series_equal(invalid_data, to_timedelta(
invalid_data, errors='ignore'))
def test_to_timedelta_via_apply(self):
# GH 5458
expected = Series([np.timedelta64(1, 's')])
result = Series(['00:00:01']).apply(to_timedelta)
tm.assert_series_equal(result, expected)
result = Series([to_timedelta('00:00:01')])
tm.assert_series_equal(result, expected)
def test_to_timedelta_on_missing_values(self):
# GH5438
timedelta_NaT = np.timedelta64('NaT')
actual = pd.to_timedelta(Series(['00:00:01', np.nan]))
expected = Series([np.timedelta64(1000000000, 'ns'),
timedelta_NaT], dtype='<m8[ns]')
assert_series_equal(actual, expected)
actual = pd.to_timedelta(Series(['00:00:01', pd.NaT]))
assert_series_equal(actual, expected)
actual = pd.to_timedelta(np.nan)
assert actual.value == timedelta_NaT.astype('int64')
actual = pd.to_timedelta(pd.NaT)
assert actual.value == timedelta_NaT.astype('int64')