demo + utils venv
This commit is contained in:
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,306 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy._lib._numpy_compat import suppress_warnings
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import numpy.ma.testutils as ma_npt
|
||||
|
||||
from scipy._lib._util import getargspec_no_self as _getargspec
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def check_named_results(res, attributes, ma=False):
|
||||
for i, attr in enumerate(attributes):
|
||||
if ma:
|
||||
ma_npt.assert_equal(res[i], getattr(res, attr))
|
||||
else:
|
||||
npt.assert_equal(res[i], getattr(res, attr))
|
||||
|
||||
|
||||
def check_normalization(distfn, args, distname):
|
||||
norm_moment = distfn.moment(0, *args)
|
||||
npt.assert_allclose(norm_moment, 1.0)
|
||||
|
||||
# this is a temporary plug: either ncf or expect is problematic;
|
||||
# best be marked as a knownfail, but I've no clue how to do it.
|
||||
if distname == "ncf":
|
||||
atol, rtol = 1e-5, 0
|
||||
else:
|
||||
atol, rtol = 1e-7, 1e-7
|
||||
|
||||
normalization_expect = distfn.expect(lambda x: 1, args=args)
|
||||
npt.assert_allclose(normalization_expect, 1.0, atol=atol, rtol=rtol,
|
||||
err_msg=distname, verbose=True)
|
||||
|
||||
normalization_cdf = distfn.cdf(distfn.b, *args)
|
||||
npt.assert_allclose(normalization_cdf, 1.0)
|
||||
|
||||
|
||||
def check_moment(distfn, arg, m, v, msg):
|
||||
m1 = distfn.moment(1, *arg)
|
||||
m2 = distfn.moment(2, *arg)
|
||||
if not np.isinf(m):
|
||||
npt.assert_almost_equal(m1, m, decimal=10, err_msg=msg +
|
||||
' - 1st moment')
|
||||
else: # or np.isnan(m1),
|
||||
npt.assert_(np.isinf(m1),
|
||||
msg + ' - 1st moment -infinite, m1=%s' % str(m1))
|
||||
|
||||
if not np.isinf(v):
|
||||
npt.assert_almost_equal(m2 - m1 * m1, v, decimal=10, err_msg=msg +
|
||||
' - 2ndt moment')
|
||||
else: # or np.isnan(m2),
|
||||
npt.assert_(np.isinf(m2),
|
||||
msg + ' - 2nd moment -infinite, m2=%s' % str(m2))
|
||||
|
||||
|
||||
def check_mean_expect(distfn, arg, m, msg):
|
||||
if np.isfinite(m):
|
||||
m1 = distfn.expect(lambda x: x, arg)
|
||||
npt.assert_almost_equal(m1, m, decimal=5, err_msg=msg +
|
||||
' - 1st moment (expect)')
|
||||
|
||||
|
||||
def check_var_expect(distfn, arg, m, v, msg):
|
||||
if np.isfinite(v):
|
||||
m2 = distfn.expect(lambda x: x*x, arg)
|
||||
npt.assert_almost_equal(m2, v + m*m, decimal=5, err_msg=msg +
|
||||
' - 2st moment (expect)')
|
||||
|
||||
|
||||
def check_skew_expect(distfn, arg, m, v, s, msg):
|
||||
if np.isfinite(s):
|
||||
m3e = distfn.expect(lambda x: np.power(x-m, 3), arg)
|
||||
npt.assert_almost_equal(m3e, s * np.power(v, 1.5),
|
||||
decimal=5, err_msg=msg + ' - skew')
|
||||
else:
|
||||
npt.assert_(np.isnan(s))
|
||||
|
||||
|
||||
def check_kurt_expect(distfn, arg, m, v, k, msg):
|
||||
if np.isfinite(k):
|
||||
m4e = distfn.expect(lambda x: np.power(x-m, 4), arg)
|
||||
npt.assert_allclose(m4e, (k + 3.) * np.power(v, 2), atol=1e-5, rtol=1e-5,
|
||||
err_msg=msg + ' - kurtosis')
|
||||
elif not np.isposinf(k):
|
||||
npt.assert_(np.isnan(k))
|
||||
|
||||
|
||||
def check_entropy(distfn, arg, msg):
|
||||
ent = distfn.entropy(*arg)
|
||||
npt.assert_(not np.isnan(ent), msg + 'test Entropy is nan')
|
||||
|
||||
|
||||
def check_private_entropy(distfn, args, superclass):
|
||||
# compare a generic _entropy with the distribution-specific implementation
|
||||
npt.assert_allclose(distfn._entropy(*args),
|
||||
superclass._entropy(distfn, *args))
|
||||
|
||||
|
||||
def check_entropy_vect_scale(distfn, arg):
|
||||
# check 2-d
|
||||
sc = np.asarray([[1, 2], [3, 4]])
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc.ravel()]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
# check invalid value, check cast
|
||||
sc = [1, 2, -3]
|
||||
v_ent = distfn.entropy(*arg, scale=sc)
|
||||
s_ent = [distfn.entropy(*arg, scale=s) for s in sc]
|
||||
s_ent = np.asarray(s_ent).reshape(v_ent.shape)
|
||||
assert_allclose(v_ent, s_ent, atol=1e-14)
|
||||
|
||||
|
||||
def check_edge_support(distfn, args):
|
||||
# Make sure that x=self.a and self.b are handled correctly.
|
||||
x = [distfn.a, distfn.b]
|
||||
if isinstance(distfn, stats.rv_discrete):
|
||||
x = [distfn.a - 1, distfn.b]
|
||||
|
||||
npt.assert_equal(distfn.cdf(x, *args), [0.0, 1.0])
|
||||
npt.assert_equal(distfn.sf(x, *args), [1.0, 0.0])
|
||||
|
||||
if distfn.name not in ('skellam', 'dlaplace'):
|
||||
# with a = -inf, log(0) generates warnings
|
||||
npt.assert_equal(distfn.logcdf(x, *args), [-np.inf, 0.0])
|
||||
npt.assert_equal(distfn.logsf(x, *args), [0.0, -np.inf])
|
||||
|
||||
npt.assert_equal(distfn.ppf([0.0, 1.0], *args), x)
|
||||
npt.assert_equal(distfn.isf([0.0, 1.0], *args), x[::-1])
|
||||
|
||||
# out-of-bounds for isf & ppf
|
||||
npt.assert_(np.isnan(distfn.isf([-1, 2], *args)).all())
|
||||
npt.assert_(np.isnan(distfn.ppf([-1, 2], *args)).all())
|
||||
|
||||
|
||||
def check_named_args(distfn, x, shape_args, defaults, meths):
|
||||
## Check calling w/ named arguments.
|
||||
|
||||
# check consistency of shapes, numargs and _parse signature
|
||||
signature = _getargspec(distfn._parse_args)
|
||||
npt.assert_(signature.varargs is None)
|
||||
npt.assert_(signature.keywords is None)
|
||||
npt.assert_(list(signature.defaults) == list(defaults))
|
||||
|
||||
shape_argnames = signature.args[:-len(defaults)] # a, b, loc=0, scale=1
|
||||
if distfn.shapes:
|
||||
shapes_ = distfn.shapes.replace(',', ' ').split()
|
||||
else:
|
||||
shapes_ = ''
|
||||
npt.assert_(len(shapes_) == distfn.numargs)
|
||||
npt.assert_(len(shapes_) == len(shape_argnames))
|
||||
|
||||
# check calling w/ named arguments
|
||||
shape_args = list(shape_args)
|
||||
|
||||
vals = [meth(x, *shape_args) for meth in meths]
|
||||
npt.assert_(np.all(np.isfinite(vals)))
|
||||
|
||||
names, a, k = shape_argnames[:], shape_args[:], {}
|
||||
while names:
|
||||
k.update({names.pop(): a.pop()})
|
||||
v = [meth(x, *a, **k) for meth in meths]
|
||||
npt.assert_array_equal(vals, v)
|
||||
if 'n' not in k.keys():
|
||||
# `n` is first parameter of moment(), so can't be used as named arg
|
||||
npt.assert_equal(distfn.moment(1, *a, **k),
|
||||
distfn.moment(1, *shape_args))
|
||||
|
||||
# unknown arguments should not go through:
|
||||
k.update({'kaboom': 42})
|
||||
assert_raises(TypeError, distfn.cdf, x, **k)
|
||||
|
||||
|
||||
def check_random_state_property(distfn, args):
|
||||
# check the random_state attribute of a distribution *instance*
|
||||
|
||||
# This test fiddles with distfn.random_state. This breaks other tests,
|
||||
# hence need to save it and then restore.
|
||||
rndm = distfn.random_state
|
||||
|
||||
# baseline: this relies on the global state
|
||||
np.random.seed(1234)
|
||||
distfn.random_state = None
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
# use an explicit instance-level random_state
|
||||
distfn.random_state = 1234
|
||||
r1 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
distfn.random_state = np.random.RandomState(1234)
|
||||
r2 = distfn.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r2)
|
||||
|
||||
# can override the instance-level random_state for an individual .rvs call
|
||||
distfn.random_state = 2
|
||||
orig_state = distfn.random_state.get_state()
|
||||
|
||||
r3 = distfn.rvs(*args, size=8, random_state=np.random.RandomState(1234))
|
||||
npt.assert_equal(r0, r3)
|
||||
|
||||
# ... and that does not alter the instance-level random_state!
|
||||
npt.assert_equal(distfn.random_state.get_state(), orig_state)
|
||||
|
||||
# finally, restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_meth_dtype(distfn, arg, meths):
|
||||
q0 = [0.25, 0.5, 0.75]
|
||||
x0 = distfn.ppf(q0, *arg)
|
||||
x_cast = [x0.astype(tp) for tp in
|
||||
(np.int_, np.float16, np.float32, np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
for meth in meths:
|
||||
val = meth(x, *arg)
|
||||
npt.assert_(val.dtype == np.float_)
|
||||
|
||||
|
||||
def check_ppf_dtype(distfn, arg):
|
||||
q0 = np.asarray([0.25, 0.5, 0.75])
|
||||
q_cast = [q0.astype(tp) for tp in (np.float16, np.float32, np.float64)]
|
||||
for q in q_cast:
|
||||
for meth in [distfn.ppf, distfn.isf]:
|
||||
val = meth(q, *arg)
|
||||
npt.assert_(val.dtype == np.float_)
|
||||
|
||||
|
||||
def check_cmplx_deriv(distfn, arg):
|
||||
# Distributions allow complex arguments.
|
||||
def deriv(f, x, *arg):
|
||||
x = np.asarray(x)
|
||||
h = 1e-10
|
||||
return (f(x + h*1j, *arg)/h).imag
|
||||
|
||||
x0 = distfn.ppf([0.25, 0.51, 0.75], *arg)
|
||||
x_cast = [x0.astype(tp) for tp in
|
||||
(np.int_, np.float16, np.float32, np.float64)]
|
||||
|
||||
for x in x_cast:
|
||||
# casting may have clipped the values, exclude those
|
||||
distfn._argcheck(*arg)
|
||||
x = x[(distfn.a < x) & (x < distfn.b)]
|
||||
|
||||
pdf, cdf, sf = distfn.pdf(x, *arg), distfn.cdf(x, *arg), distfn.sf(x, *arg)
|
||||
assert_allclose(deriv(distfn.cdf, x, *arg), pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logcdf, x, *arg), pdf/cdf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.sf, x, *arg), -pdf, rtol=1e-5)
|
||||
assert_allclose(deriv(distfn.logsf, x, *arg), -pdf/sf, rtol=1e-5)
|
||||
|
||||
assert_allclose(deriv(distfn.logpdf, x, *arg),
|
||||
deriv(distfn.pdf, x, *arg) / distfn.pdf(x, *arg),
|
||||
rtol=1e-5)
|
||||
|
||||
|
||||
def check_pickling(distfn, args):
|
||||
# check that a distribution instance pickles and unpickles
|
||||
# pay special attention to the random_state property
|
||||
|
||||
# save the random_state (restore later)
|
||||
rndm = distfn.random_state
|
||||
|
||||
distfn.random_state = 1234
|
||||
distfn.rvs(*args, size=8)
|
||||
s = pickle.dumps(distfn)
|
||||
r0 = distfn.rvs(*args, size=8)
|
||||
|
||||
unpickled = pickle.loads(s)
|
||||
r1 = unpickled.rvs(*args, size=8)
|
||||
npt.assert_equal(r0, r1)
|
||||
|
||||
# also smoke test some methods
|
||||
medians = [distfn.ppf(0.5, *args), unpickled.ppf(0.5, *args)]
|
||||
npt.assert_equal(medians[0], medians[1])
|
||||
npt.assert_equal(distfn.cdf(medians[0], *args),
|
||||
unpickled.cdf(medians[1], *args))
|
||||
|
||||
# restore the random_state
|
||||
distfn.random_state = rndm
|
||||
|
||||
|
||||
def check_rvs_broadcast(distfunc, distname, allargs, shape, shape_only, otype):
|
||||
np.random.seed(123)
|
||||
with suppress_warnings() as sup:
|
||||
# frechet_l and frechet_r are deprecated, so all their
|
||||
# methods generate DeprecationWarnings.
|
||||
sup.filter(category=DeprecationWarning, message=".*frechet_")
|
||||
sample = distfunc.rvs(*allargs)
|
||||
assert_equal(sample.shape, shape, "%s: rvs failed to broadcast" % distname)
|
||||
if not shape_only:
|
||||
rvs = np.vectorize(lambda *allargs: distfunc.rvs(*allargs), otypes=otype)
|
||||
np.random.seed(123)
|
||||
expected = rvs(*allargs)
|
||||
assert_allclose(sample, expected, rtol=1e-15)
|
||||
@@ -0,0 +1,108 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: AtmWtAg (AtmWtAg.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 108)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Powell, L.J., Murphy, T.J. and Gramlich, J.W. (1982).
|
||||
"The Absolute Isotopic Abundance & Atomic Weight
|
||||
of a Reference Sample of Silver".
|
||||
NBS Journal of Research, 87, pp. 9-19.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
2 Treatments
|
||||
24 Replicates/Cell
|
||||
48 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 3 Parameters (mu, tau_1, tau_2)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
|
||||
Between Instrument 1 3.63834187500000E-09 3.63834187500000E-09 1.59467335677930E+01
|
||||
Within Instrument 46 1.04951729166667E-08 2.28155932971014E-10
|
||||
|
||||
Certified R-Squared 2.57426544538321E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.51048314446410E-05
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument AgWt
|
||||
1 107.8681568
|
||||
1 107.8681465
|
||||
1 107.8681572
|
||||
1 107.8681785
|
||||
1 107.8681446
|
||||
1 107.8681903
|
||||
1 107.8681526
|
||||
1 107.8681494
|
||||
1 107.8681616
|
||||
1 107.8681587
|
||||
1 107.8681519
|
||||
1 107.8681486
|
||||
1 107.8681419
|
||||
1 107.8681569
|
||||
1 107.8681508
|
||||
1 107.8681672
|
||||
1 107.8681385
|
||||
1 107.8681518
|
||||
1 107.8681662
|
||||
1 107.8681424
|
||||
1 107.8681360
|
||||
1 107.8681333
|
||||
1 107.8681610
|
||||
1 107.8681477
|
||||
2 107.8681079
|
||||
2 107.8681344
|
||||
2 107.8681513
|
||||
2 107.8681197
|
||||
2 107.8681604
|
||||
2 107.8681385
|
||||
2 107.8681642
|
||||
2 107.8681365
|
||||
2 107.8681151
|
||||
2 107.8681082
|
||||
2 107.8681517
|
||||
2 107.8681448
|
||||
2 107.8681198
|
||||
2 107.8681482
|
||||
2 107.8681334
|
||||
2 107.8681609
|
||||
2 107.8681101
|
||||
2 107.8681512
|
||||
2 107.8681469
|
||||
2 107.8681360
|
||||
2 107.8681254
|
||||
2 107.8681261
|
||||
2 107.8681450
|
||||
2 107.8681368
|
||||
@@ -0,0 +1,85 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SiRstv (SiRstv.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 85)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Ehrstein, James and Croarkin, M. Carroll.
|
||||
Unpublished NIST dataset.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
5 Treatments
|
||||
5 Replicates/Cell
|
||||
25 Observations
|
||||
3 Constant Leading Digits
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
|
||||
Model: 6 Parameters (mu,tau_1, ... , tau_5)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Instrument 4 5.11462616000000E-02 1.27865654000000E-02 1.18046237440255E+00
|
||||
Within Instrument 20 2.16636560000000E-01 1.08318280000000E-02
|
||||
|
||||
Certified R-Squared 1.90999039051129E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.04076068334656E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Instrument Resistance
|
||||
1 196.3052
|
||||
1 196.1240
|
||||
1 196.1890
|
||||
1 196.2569
|
||||
1 196.3403
|
||||
2 196.3042
|
||||
2 196.3825
|
||||
2 196.1669
|
||||
2 196.3257
|
||||
2 196.0422
|
||||
3 196.1303
|
||||
3 196.2005
|
||||
3 196.2889
|
||||
3 196.0343
|
||||
3 196.1811
|
||||
4 196.2795
|
||||
4 196.1748
|
||||
4 196.1494
|
||||
4 196.1485
|
||||
4 195.9885
|
||||
5 196.2119
|
||||
5 196.1051
|
||||
5 196.1850
|
||||
5 196.0052
|
||||
5 196.2090
|
||||
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs01 (SmLs01.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
1 Constant Leading Digit
|
||||
Lower Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1.4
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
1 1.3
|
||||
1 1.5
|
||||
2 1.3
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
2 1.2
|
||||
2 1.4
|
||||
3 1.5
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
3 1.4
|
||||
3 1.6
|
||||
4 1.3
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
4 1.2
|
||||
4 1.4
|
||||
5 1.5
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
5 1.4
|
||||
5 1.6
|
||||
6 1.3
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
6 1.2
|
||||
6 1.4
|
||||
7 1.5
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
7 1.4
|
||||
7 1.6
|
||||
8 1.3
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
8 1.2
|
||||
8 1.4
|
||||
9 1.5
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
9 1.4
|
||||
9 1.6
|
||||
File diff suppressed because it is too large
Load Diff
+18069
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs04 (SmLs04.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
7 Constant Leading Digits
|
||||
Average Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000.4
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
1 1000000.3
|
||||
1 1000000.5
|
||||
2 1000000.3
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
2 1000000.2
|
||||
2 1000000.4
|
||||
3 1000000.5
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
3 1000000.4
|
||||
3 1000000.6
|
||||
4 1000000.3
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
4 1000000.2
|
||||
4 1000000.4
|
||||
5 1000000.5
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
5 1000000.4
|
||||
5 1000000.6
|
||||
6 1000000.3
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
6 1000000.2
|
||||
6 1000000.4
|
||||
7 1000000.5
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
7 1000000.4
|
||||
7 1000000.6
|
||||
8 1000000.3
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
8 1000000.2
|
||||
8 1000000.4
|
||||
9 1000000.5
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
9 1000000.4
|
||||
9 1000000.6
|
||||
File diff suppressed because it is too large
Load Diff
+18069
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,249 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: SmLs07 (SmLs07.dat)
|
||||
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 41 to 47)
|
||||
Data (lines 61 to 249)
|
||||
|
||||
|
||||
Procedure: Analysis of Variance
|
||||
|
||||
|
||||
Reference: Simon, Stephen D. and Lesage, James P. (1989).
|
||||
"Assessing the Accuracy of ANOVA Calculations in
|
||||
Statistical Software".
|
||||
Computational Statistics & Data Analysis, 8, pp. 325-332.
|
||||
|
||||
|
||||
Data: 1 Factor
|
||||
9 Treatments
|
||||
21 Replicates/Cell
|
||||
189 Observations
|
||||
13 Constant Leading Digits
|
||||
Higher Level of Difficulty
|
||||
Generated Data
|
||||
|
||||
|
||||
Model: 10 Parameters (mu,tau_1, ... , tau_9)
|
||||
y_{ij} = mu + tau_i + epsilon_{ij}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Certified Values:
|
||||
|
||||
Source of Sums of Mean
|
||||
Variation df Squares Squares F Statistic
|
||||
|
||||
Between Treatment 8 1.68000000000000E+00 2.10000000000000E-01 2.10000000000000E+01
|
||||
Within Treatment 180 1.80000000000000E+00 1.00000000000000E-02
|
||||
|
||||
Certified R-Squared 4.82758620689655E-01
|
||||
|
||||
Certified Residual
|
||||
Standard Deviation 1.00000000000000E-01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: Treatment Response
|
||||
1 1000000000000.4
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
1 1000000000000.3
|
||||
1 1000000000000.5
|
||||
2 1000000000000.3
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
2 1000000000000.2
|
||||
2 1000000000000.4
|
||||
3 1000000000000.5
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
3 1000000000000.4
|
||||
3 1000000000000.6
|
||||
4 1000000000000.3
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
4 1000000000000.2
|
||||
4 1000000000000.4
|
||||
5 1000000000000.5
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
5 1000000000000.4
|
||||
5 1000000000000.6
|
||||
6 1000000000000.3
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
6 1000000000000.2
|
||||
6 1000000000000.4
|
||||
7 1000000000000.5
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
7 1000000000000.4
|
||||
7 1000000000000.6
|
||||
8 1000000000000.3
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
8 1000000000000.2
|
||||
8 1000000000000.4
|
||||
9 1000000000000.5
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
9 1000000000000.4
|
||||
9 1000000000000.6
|
||||
File diff suppressed because it is too large
Load Diff
+18069
File diff suppressed because it is too large
Load Diff
+97
@@ -0,0 +1,97 @@
|
||||
NIST/ITL StRD
|
||||
Dataset Name: Norris (Norris.dat)
|
||||
|
||||
File Format: ASCII
|
||||
Certified Values (lines 31 to 46)
|
||||
Data (lines 61 to 96)
|
||||
|
||||
Procedure: Linear Least Squares Regression
|
||||
|
||||
Reference: Norris, J., NIST.
|
||||
Calibration of Ozone Monitors.
|
||||
|
||||
Data: 1 Response Variable (y)
|
||||
1 Predictor Variable (x)
|
||||
36 Observations
|
||||
Lower Level of Difficulty
|
||||
Observed Data
|
||||
|
||||
Model: Linear Class
|
||||
2 Parameters (B0,B1)
|
||||
|
||||
y = B0 + B1*x + e
|
||||
|
||||
|
||||
|
||||
Certified Regression Statistics
|
||||
|
||||
Standard Deviation
|
||||
Parameter Estimate of Estimate
|
||||
|
||||
B0 -0.262323073774029 0.232818234301152
|
||||
B1 1.00211681802045 0.429796848199937E-03
|
||||
|
||||
Residual
|
||||
Standard Deviation 0.884796396144373
|
||||
|
||||
R-Squared 0.999993745883712
|
||||
|
||||
|
||||
Certified Analysis of Variance Table
|
||||
|
||||
Source of Degrees of Sums of Mean
|
||||
Variation Freedom Squares Squares F Statistic
|
||||
|
||||
Regression 1 4255954.13232369 4255954.13232369 5436385.54079785
|
||||
Residual 34 26.6173985294224 0.782864662630069
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Data: y x
|
||||
0.1 0.2
|
||||
338.8 337.4
|
||||
118.1 118.2
|
||||
888.0 884.6
|
||||
9.2 10.1
|
||||
228.1 226.5
|
||||
668.5 666.3
|
||||
998.5 996.3
|
||||
449.1 448.6
|
||||
778.9 777.0
|
||||
559.2 558.2
|
||||
0.3 0.4
|
||||
0.1 0.6
|
||||
778.1 775.5
|
||||
668.8 666.9
|
||||
339.3 338.0
|
||||
448.9 447.5
|
||||
10.8 11.6
|
||||
557.7 556.0
|
||||
228.3 228.1
|
||||
998.0 995.8
|
||||
888.8 887.6
|
||||
119.6 120.2
|
||||
0.3 0.3
|
||||
0.6 0.3
|
||||
557.6 556.8
|
||||
339.3 339.1
|
||||
888.0 887.2
|
||||
998.5 999.0
|
||||
778.9 779.0
|
||||
10.2 11.1
|
||||
117.6 118.3
|
||||
228.9 229.2
|
||||
668.4 669.1
|
||||
449.2 448.9
|
||||
0.2 0.5
|
||||
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,437 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
from scipy.stats import (binned_statistic, binned_statistic_2d,
|
||||
binned_statistic_dd)
|
||||
|
||||
from scipy._lib.six import u
|
||||
from .common_tests import check_named_results
|
||||
|
||||
|
||||
class TestBinnedStatistic(object):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
np.random.seed(9865)
|
||||
cls.x = np.random.random(100)
|
||||
cls.y = np.random.random(100)
|
||||
cls.v = np.random.random(100)
|
||||
cls.X = np.random.random((100, 3))
|
||||
cls.w = np.random.random(100)
|
||||
|
||||
def test_1d_count(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
|
||||
count2, edges2 = np.histogram(x, bins=10)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_gh5927(self):
|
||||
# smoke test for gh5927 - binned_statistic was using `is` for string
|
||||
# comparison
|
||||
x = self.x
|
||||
v = self.v
|
||||
statistics = [u'mean', u'median', u'count', u'sum']
|
||||
for statistic in statistics:
|
||||
res = binned_statistic(x, v, statistic, bins=10)
|
||||
|
||||
def test_1d_result_attributes(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic(x, v, 'count', bins=10)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_1d_sum(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
|
||||
sum2, edges2 = np.histogram(x, bins=10, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_mean(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_std(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_min(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'min', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.min, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_max(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_median(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
|
||||
stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_1d_bincode(self):
|
||||
x = self.x[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
|
||||
bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
|
||||
1, 2, 1])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
assert_allclose(bcount, count1)
|
||||
|
||||
def test_1d_range_keyword(self):
|
||||
# Regression test for gh-3063, range can be (min, max) or [(min, max)]
|
||||
np.random.seed(9865)
|
||||
x = np.arange(30)
|
||||
data = np.random.random(30)
|
||||
|
||||
mean, bins, _ = binned_statistic(x[:15], data[:15])
|
||||
mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
|
||||
mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
|
||||
|
||||
assert_allclose(mean, mean_range)
|
||||
assert_allclose(bins, bins_range)
|
||||
assert_allclose(mean, mean_range2)
|
||||
assert_allclose(bins, bins_range2)
|
||||
|
||||
def test_1d_multi_values(self):
|
||||
x = self.x
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10)
|
||||
stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10)
|
||||
stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_count(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=5)
|
||||
count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_result_attributes(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_2d(x, y, v, 'count', bins=5)
|
||||
attributes = ('statistic', 'x_edge', 'y_edge', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_2d_sum(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
|
||||
sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_mean_unicode(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, u('mean'), bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_std(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_min(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'min', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.min, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_max(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'max', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.max, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_median(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'median', bins=5)
|
||||
stat2, binx2, biny2, bc = binned_statistic_2d(
|
||||
x, y, v, np.median, bins=5)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(binx1, binx2)
|
||||
assert_allclose(biny1, biny2)
|
||||
|
||||
def test_2d_bincode(self):
|
||||
x = self.x[:20]
|
||||
y = self.y[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, binx1, biny1, bc = binned_statistic_2d(
|
||||
x, y, v, 'count', bins=3)
|
||||
bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
|
||||
6, 11, 16, 6, 6, 11, 8])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_2d_multi_values(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, binx1v, biny1v, bc1v = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=8)
|
||||
stat1w, binx1w, biny1w, bc1w = binned_statistic_2d(
|
||||
x, y, w, 'mean', bins=8)
|
||||
stat2, binx2, biny2, bc2 = binned_statistic_2d(
|
||||
x, y, [v, w], 'mean', bins=8)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(binx1v, binx2)
|
||||
assert_allclose(biny1w, biny2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_2d_binnumbers_unraveled(self):
|
||||
x = self.x
|
||||
y = self.y
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(x, v, 'mean', bins=20)
|
||||
stat, edgesy, bcy = binned_statistic(y, v, 'mean', bins=10)
|
||||
|
||||
stat2, edgesx2, edgesy2, bc2 = binned_statistic_2d(
|
||||
x, y, v, 'mean', bins=(20, 10), expand_binnumbers=True)
|
||||
|
||||
bcx3 = np.searchsorted(edgesx, x, side='right')
|
||||
bcy3 = np.searchsorted(edgesy, y, side='right')
|
||||
|
||||
# `numpy.searchsorted` is non-inclusive on right-edge, compensate
|
||||
bcx3[x == x.max()] -= 1
|
||||
bcy3[y == y.max()] -= 1
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcx3, bc2[0])
|
||||
assert_allclose(bcy3, bc2[1])
|
||||
|
||||
def test_dd_count(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
count2, edges2 = np.histogramdd(X, bins=3)
|
||||
|
||||
assert_allclose(count1, count2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_result_attributes(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
res = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
attributes = ('statistic', 'bin_edges', 'binnumber')
|
||||
check_named_results(res, attributes)
|
||||
|
||||
def test_dd_sum(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
|
||||
sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
|
||||
|
||||
assert_allclose(sum1, sum2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_mean(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_std(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_min(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'min', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.min, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_max(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_median(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
|
||||
stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
|
||||
|
||||
assert_allclose(stat1, stat2)
|
||||
assert_allclose(edges1, edges2)
|
||||
|
||||
def test_dd_bincode(self):
|
||||
X = self.X[:20]
|
||||
v = self.v[:20]
|
||||
|
||||
count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
|
||||
bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
|
||||
32, 36, 91, 43, 87, 81, 81])
|
||||
|
||||
bcount = [(bc == i).sum() for i in np.unique(bc)]
|
||||
|
||||
assert_allclose(bc, bc2)
|
||||
count1adj = count1[count1.nonzero()]
|
||||
assert_allclose(bcount, count1adj)
|
||||
|
||||
def test_dd_multi_values(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
w = self.w
|
||||
|
||||
stat1v, edges1v, bc1v = binned_statistic_dd(X, v, np.std, bins=8)
|
||||
stat1w, edges1w, bc1w = binned_statistic_dd(X, w, np.std, bins=8)
|
||||
stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], np.std, bins=8)
|
||||
|
||||
assert_allclose(stat2[0], stat1v)
|
||||
assert_allclose(stat2[1], stat1w)
|
||||
assert_allclose(edges1v, edges2)
|
||||
assert_allclose(edges1w, edges2)
|
||||
assert_allclose(bc1v, bc2)
|
||||
|
||||
def test_dd_binnumbers_unraveled(self):
|
||||
X = self.X
|
||||
v = self.v
|
||||
|
||||
stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15)
|
||||
stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20)
|
||||
stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10)
|
||||
|
||||
stat2, edges2, bc2 = binned_statistic_dd(
|
||||
X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True)
|
||||
|
||||
assert_allclose(bcx, bc2[0])
|
||||
assert_allclose(bcy, bc2[1])
|
||||
assert_allclose(bcz, bc2[2])
|
||||
@@ -0,0 +1,200 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_equal, assert_array_equal,
|
||||
assert_array_almost_equal, assert_approx_equal, assert_allclose)
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.special import xlogy
|
||||
from scipy.stats.contingency import margins, expected_freq, chi2_contingency
|
||||
|
||||
|
||||
def test_margins():
|
||||
a = np.array([1])
|
||||
m = margins(a)
|
||||
assert_equal(len(m), 1)
|
||||
m0 = m[0]
|
||||
assert_array_equal(m0, np.array([1]))
|
||||
|
||||
a = np.array([[1]])
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[1]])
|
||||
expected1 = np.array([[1]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(12).reshape(2, 6)
|
||||
m0, m1 = margins(a)
|
||||
expected0 = np.array([[15], [51]])
|
||||
expected1 = np.array([[6, 8, 10, 12, 14, 16]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
|
||||
a = np.arange(24).reshape(2, 3, 4)
|
||||
m0, m1, m2 = margins(a)
|
||||
expected0 = np.array([[[66]], [[210]]])
|
||||
expected1 = np.array([[[60], [92], [124]]])
|
||||
expected2 = np.array([[[60, 66, 72, 78]]])
|
||||
assert_array_equal(m0, expected0)
|
||||
assert_array_equal(m1, expected1)
|
||||
assert_array_equal(m2, expected2)
|
||||
|
||||
|
||||
def test_expected_freq():
|
||||
assert_array_equal(expected_freq([1]), np.array([1.0]))
|
||||
|
||||
observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
|
||||
e = expected_freq(observed)
|
||||
assert_array_equal(e, np.ones_like(observed))
|
||||
|
||||
observed = np.array([[10, 10, 20], [20, 20, 20]])
|
||||
e = expected_freq(observed)
|
||||
correct = np.array([[12., 12., 16.], [18., 18., 24.]])
|
||||
assert_array_almost_equal(e, correct)
|
||||
|
||||
|
||||
def test_chi2_contingency_trivial():
|
||||
# Some very simple tests for chi2_contingency.
|
||||
|
||||
# A trivial case
|
||||
obs = np.array([[1, 2], [1, 2]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 1)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
# A *really* trivial case: 1-D data.
|
||||
obs = np.array([1, 2, 3])
|
||||
chi2, p, dof, expected = chi2_contingency(obs, correction=False)
|
||||
assert_equal(chi2, 0.0)
|
||||
assert_equal(p, 1.0)
|
||||
assert_equal(dof, 0)
|
||||
assert_array_equal(obs, expected)
|
||||
|
||||
|
||||
def test_chi2_contingency_R():
|
||||
# Some test cases that were computed independently, using R.
|
||||
|
||||
Rcode = \
|
||||
"""
|
||||
# Data vector.
|
||||
data <- c(
|
||||
12, 34, 23, 4, 47, 11,
|
||||
35, 31, 11, 34, 10, 18,
|
||||
12, 32, 9, 18, 13, 19,
|
||||
12, 12, 14, 9, 33, 25
|
||||
)
|
||||
|
||||
# Create factor tags:r=rows, c=columns, t=tiers
|
||||
r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
|
||||
c <- factor(gl(3, 1, 2*3*4, labels=c("c1", "c2", "c3")))
|
||||
t <- factor(gl(2, 3, 2*3*4, labels=c("t1", "t2")))
|
||||
|
||||
# 3-way Chi squared test of independence
|
||||
s = summary(xtabs(data~r+c+t))
|
||||
print(s)
|
||||
"""
|
||||
Routput = \
|
||||
"""
|
||||
Call: xtabs(formula = data ~ r + c + t)
|
||||
Number of cases in table: 478
|
||||
Number of factors: 3
|
||||
Test for independence of all factors:
|
||||
Chisq = 102.17, df = 17, p-value = 3.514e-14
|
||||
"""
|
||||
obs = np.array(
|
||||
[[[12, 34, 23],
|
||||
[35, 31, 11],
|
||||
[12, 32, 9],
|
||||
[12, 12, 14]],
|
||||
[[4, 47, 11],
|
||||
[34, 10, 18],
|
||||
[18, 13, 19],
|
||||
[9, 33, 25]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 102.17, significant=5)
|
||||
assert_approx_equal(p, 3.514e-14, significant=4)
|
||||
assert_equal(dof, 17)
|
||||
|
||||
Rcode = \
|
||||
"""
|
||||
# Data vector.
|
||||
data <- c(
|
||||
#
|
||||
12, 17,
|
||||
11, 16,
|
||||
#
|
||||
11, 12,
|
||||
15, 16,
|
||||
#
|
||||
23, 15,
|
||||
30, 22,
|
||||
#
|
||||
14, 17,
|
||||
15, 16
|
||||
)
|
||||
|
||||
# Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
|
||||
r <- factor(gl(2, 2, 2*2*2*2, labels=c("r1", "r2")))
|
||||
c <- factor(gl(2, 1, 2*2*2*2, labels=c("c1", "c2")))
|
||||
d <- factor(gl(2, 4, 2*2*2*2, labels=c("d1", "d2")))
|
||||
t <- factor(gl(2, 8, 2*2*2*2, labels=c("t1", "t2")))
|
||||
|
||||
# 4-way Chi squared test of independence
|
||||
s = summary(xtabs(data~r+c+d+t))
|
||||
print(s)
|
||||
"""
|
||||
Routput = \
|
||||
"""
|
||||
Call: xtabs(formula = data ~ r + c + d + t)
|
||||
Number of cases in table: 262
|
||||
Number of factors: 4
|
||||
Test for independence of all factors:
|
||||
Chisq = 8.758, df = 11, p-value = 0.6442
|
||||
"""
|
||||
obs = np.array(
|
||||
[[[[12, 17],
|
||||
[11, 16]],
|
||||
[[11, 12],
|
||||
[15, 16]]],
|
||||
[[[23, 15],
|
||||
[30, 22]],
|
||||
[[14, 17],
|
||||
[15, 16]]]])
|
||||
chi2, p, dof, expected = chi2_contingency(obs)
|
||||
assert_approx_equal(chi2, 8.758, significant=4)
|
||||
assert_approx_equal(p, 0.6442, significant=4)
|
||||
assert_equal(dof, 11)
|
||||
|
||||
|
||||
def test_chi2_contingency_g():
|
||||
c = np.array([[15, 60], [15, 90]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False)
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True)
|
||||
c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
|
||||
assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())
|
||||
|
||||
c = np.array([[10, 12, 10], [12, 10, 10]])
|
||||
g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
|
||||
assert_allclose(g, 2*xlogy(c, c/e).sum())
|
||||
|
||||
|
||||
def test_chi2_contingency_bad_args():
|
||||
# Test that "bad" inputs raise a ValueError.
|
||||
|
||||
# Negative value in the array of observed frequencies.
|
||||
obs = np.array([[-1, 10], [1, 2]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# The zeros in this will result in zeros in the array
|
||||
# of expected frequencies.
|
||||
obs = np.array([[0, 1], [0, 1]])
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
# A degenerate case: `observed` has size 0.
|
||||
obs = np.empty((0, 8))
|
||||
assert_raises(ValueError, chi2_contingency, obs)
|
||||
|
||||
@@ -0,0 +1,420 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
from scipy._lib._numpy_compat import suppress_warnings
|
||||
from scipy.integrate import IntegrationWarning
|
||||
|
||||
from scipy import stats
|
||||
from scipy.special import betainc
|
||||
from. common_tests import (check_normalization, check_moment, check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_entropy_vect_scale,
|
||||
check_edge_support, check_named_args,
|
||||
check_random_state_property,
|
||||
check_meth_dtype, check_ppf_dtype, check_cmplx_deriv,
|
||||
check_pickling, check_rvs_broadcast)
|
||||
from scipy.stats._distr_params import distcont
|
||||
|
||||
"""
|
||||
Test all continuous distributions.
|
||||
|
||||
Parameters were chosen for those distributions that pass the
|
||||
Kolmogorov-Smirnov test. This provides safe parameters for each
|
||||
distributions so that we can perform further testing of class methods.
|
||||
|
||||
These tests currently check only/mostly for serious errors and exceptions,
|
||||
not for numerically exact results.
|
||||
"""
|
||||
|
||||
# Note that you need to add new distributions you want tested
|
||||
# to _distr_params
|
||||
|
||||
DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5
|
||||
|
||||
# Last four of these fail all around. Need to be checked
|
||||
distcont_extra = [
|
||||
['betaprime', (100, 86)],
|
||||
['fatiguelife', (5,)],
|
||||
['mielke', (4.6420495492121487, 0.59707419545516938)],
|
||||
['invweibull', (0.58847112119264788,)],
|
||||
# burr: sample mean test fails still for c<1
|
||||
['burr', (0.94839838075366045, 4.3820284068855795)],
|
||||
# genextreme: sample mean test, sf-logsf test fail
|
||||
['genextreme', (3.3184017469423535,)],
|
||||
]
|
||||
|
||||
|
||||
distslow = ['kappa4', 'rdist', 'gausshyper',
|
||||
'recipinvgauss', 'ksone', 'genexpon',
|
||||
'vonmises', 'vonmises_line', 'mielke', 'semicircular',
|
||||
'cosine', 'invweibull', 'powerlognorm', 'johnsonsu', 'kstwobign']
|
||||
# distslow are sorted by speed (very slow to slow)
|
||||
|
||||
|
||||
# These distributions fail the complex derivative test below.
|
||||
# Here 'fail' mean produce wrong results and/or raise exceptions, depending
|
||||
# on the implementation details of corresponding special functions.
|
||||
# cf https://github.com/scipy/scipy/pull/4979 for a discussion.
|
||||
fails_cmplx = set(['beta', 'betaprime', 'chi', 'chi2', 'dgamma', 'dweibull',
|
||||
'erlang', 'f', 'gamma', 'gausshyper', 'gengamma',
|
||||
'gennorm', 'genpareto', 'halfgennorm', 'invgamma',
|
||||
'ksone', 'kstwobign', 'levy_l', 'loggamma', 'logistic',
|
||||
'maxwell', 'nakagami', 'ncf', 'nct', 'ncx2', 'norminvgauss',
|
||||
'pearson3', 'rice', 't', 'skewnorm', 'tukeylambda',
|
||||
'vonmises', 'vonmises_line', 'rv_histogram_instance'])
|
||||
|
||||
_h = np.histogram([1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6,
|
||||
6, 6, 6, 7, 7, 7, 8, 8, 9], bins=8)
|
||||
histogram_test_instance = stats.rv_histogram(_h)
|
||||
|
||||
|
||||
def cases_test_cont_basic():
|
||||
for distname, arg in distcont[:] + [(histogram_test_instance, tuple())]:
|
||||
if distname == 'levy_stable':
|
||||
continue
|
||||
elif distname in distslow:
|
||||
yield pytest.param(distname, arg, marks=pytest.mark.slow)
|
||||
else:
|
||||
yield distname, arg
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_basic())
|
||||
def test_cont_basic(distname, arg):
|
||||
# this test skips slow distributions
|
||||
|
||||
if distname == 'truncnorm':
|
||||
pytest.xfail(reason=distname)
|
||||
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
np.random.seed(765456)
|
||||
sn = 500
|
||||
with suppress_warnings() as sup:
|
||||
# frechet_l and frechet_r are deprecated, so all their
|
||||
# methods generate DeprecationWarnings.
|
||||
sup.filter(category=DeprecationWarning, message=".*frechet_")
|
||||
rvs = distfn.rvs(size=sn, *arg)
|
||||
sm = rvs.mean()
|
||||
sv = rvs.var()
|
||||
m, v = distfn.stats(*arg)
|
||||
|
||||
check_sample_meanvar_(distfn, arg, m, v, sm, sv, sn, distname + 'sample mean test')
|
||||
check_cdf_ppf(distfn, arg, distname)
|
||||
check_sf_isf(distfn, arg, distname)
|
||||
check_pdf(distfn, arg, distname)
|
||||
check_pdf_logpdf(distfn, arg, distname)
|
||||
check_cdf_logcdf(distfn, arg, distname)
|
||||
check_sf_logsf(distfn, arg, distname)
|
||||
|
||||
alpha = 0.01
|
||||
if distname == 'rv_histogram_instance':
|
||||
check_distribution_rvs(distfn.cdf, arg, alpha, rvs)
|
||||
else:
|
||||
check_distribution_rvs(distname, arg, alpha, rvs)
|
||||
|
||||
locscale_defaults = (0, 1)
|
||||
meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
spec_x = {'frechet_l': -0.5, 'weibull_max': -0.5, 'levy_l': -0.5,
|
||||
'pareto': 1.5, 'tukeylambda': 0.3,
|
||||
'rv_histogram_instance': 5.0}
|
||||
x = spec_x.get(distname, 0.5)
|
||||
if distname == 'invweibull':
|
||||
arg = (1,)
|
||||
elif distname == 'ksone':
|
||||
arg = (3,)
|
||||
check_named_args(distfn, x, arg, locscale_defaults, meths)
|
||||
check_random_state_property(distfn, arg)
|
||||
check_pickling(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
if distname not in ['ksone', 'kstwobign']:
|
||||
check_entropy(distfn, arg, distname)
|
||||
|
||||
if distfn.numargs == 0:
|
||||
check_vecentropy(distfn, arg)
|
||||
|
||||
if (distfn.__class__._entropy != stats.rv_continuous._entropy
|
||||
and distname != 'vonmises'):
|
||||
check_private_entropy(distfn, arg, stats.rv_continuous)
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(IntegrationWarning, "The occurrence of roundoff error")
|
||||
sup.filter(IntegrationWarning, "Extremely bad integrand")
|
||||
sup.filter(RuntimeWarning, "invalid value")
|
||||
check_entropy_vect_scale(distfn, arg)
|
||||
|
||||
check_edge_support(distfn, arg)
|
||||
|
||||
check_meth_dtype(distfn, arg, meths)
|
||||
check_ppf_dtype(distfn, arg)
|
||||
|
||||
if distname not in fails_cmplx:
|
||||
check_cmplx_deriv(distfn, arg)
|
||||
|
||||
if distname != 'truncnorm':
|
||||
check_ppf_private(distfn, arg, distname)
|
||||
|
||||
|
||||
def test_levy_stable_random_state_property():
|
||||
# levy_stable only implements rvs(), so it is skipped in the
|
||||
# main loop in test_cont_basic(). Here we apply just the test
|
||||
# check_random_state_property to levy_stable.
|
||||
check_random_state_property(stats.levy_stable, (0.5, 0.1))
|
||||
|
||||
|
||||
def cases_test_moments():
|
||||
fail_normalization = set(['vonmises', 'ksone'])
|
||||
fail_higher = set(['vonmises', 'ksone', 'ncf'])
|
||||
|
||||
for distname, arg in distcont[:] + [(histogram_test_instance, tuple())]:
|
||||
if distname == 'levy_stable':
|
||||
continue
|
||||
|
||||
cond1 = distname not in fail_normalization
|
||||
cond2 = distname not in fail_higher
|
||||
|
||||
yield distname, arg, cond1, cond2, False
|
||||
|
||||
if not cond1 or not cond2:
|
||||
# Run the distributions that have issues twice, once skipping the
|
||||
# not_ok parts, once with the not_ok parts but marked as knownfail
|
||||
yield pytest.param(distname, arg, True, True, True,
|
||||
marks=pytest.mark.xfail)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('distname,arg,normalization_ok,higher_ok,is_xfailing',
|
||||
cases_test_moments())
|
||||
def test_moments(distname, arg, normalization_ok, higher_ok, is_xfailing):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'rv_histogram_instance'
|
||||
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(IntegrationWarning,
|
||||
"The integral is probably divergent, or slowly convergent.")
|
||||
sup.filter(category=DeprecationWarning, message=".*frechet_")
|
||||
if is_xfailing:
|
||||
sup.filter(IntegrationWarning)
|
||||
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
|
||||
if normalization_ok:
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
if higher_ok:
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
check_loc_scale(distfn, arg, m, v, distname)
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distcont)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
if dist in ['gausshyper', 'genexpon']:
|
||||
pytest.skip("too slow")
|
||||
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['betaprime', 'dgamma', 'exponnorm', 'norminvgauss',
|
||||
'nct', 'dweibull', 'rice', 'levy_stable', 'skewnorm']
|
||||
|
||||
distfunc = getattr(stats, dist)
|
||||
loc = np.zeros(2)
|
||||
scale = np.ones((3, 1))
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = [3, 2]
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 4,) + (1,)*(k + 2)
|
||||
allargs.append(shape_args[k]*np.ones(shp))
|
||||
bshape.insert(0, k + 4)
|
||||
allargs.extend([loc, scale])
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
|
||||
check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, 'd')
|
||||
|
||||
|
||||
def test_rvs_gh2069_regression():
|
||||
# Regression tests for gh-2069. In scipy 0.17 and earlier,
|
||||
# these tests would fail.
|
||||
#
|
||||
# A typical example of the broken behavior:
|
||||
# >>> norm.rvs(loc=np.zeros(5), scale=np.ones(5))
|
||||
# array([-2.49613705, -2.49613705, -2.49613705, -2.49613705, -2.49613705])
|
||||
np.random.seed(123)
|
||||
vals = stats.norm.rvs(loc=np.zeros(5), scale=1)
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=0, scale=np.ones(5))
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=np.zeros(5), scale=np.ones(5))
|
||||
d = np.diff(vals)
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
vals = stats.norm.rvs(loc=np.array([[0], [0]]), scale=np.ones(5))
|
||||
d = np.diff(vals.ravel())
|
||||
npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!")
|
||||
|
||||
assert_raises(ValueError, stats.norm.rvs, [[0, 0], [0, 0]],
|
||||
[[1, 1], [1, 1]], 1)
|
||||
assert_raises(ValueError, stats.gamma.rvs, [2, 3, 4, 5], 0, 1, (2, 2))
|
||||
assert_raises(ValueError, stats.gamma.rvs, [1, 1, 1, 1], [0, 0, 0, 0],
|
||||
[[1], [2]], (4,))
|
||||
|
||||
|
||||
def check_sample_meanvar_(distfn, arg, m, v, sm, sv, sn, msg):
|
||||
# this did not work, skipped silently by nose
|
||||
if np.isfinite(m):
|
||||
check_sample_mean(sm, sv, sn, m)
|
||||
if np.isfinite(v):
|
||||
check_sample_var(sv, sn, v)
|
||||
|
||||
|
||||
def check_sample_mean(sm, v, n, popmean):
|
||||
# from stats.stats.ttest_1samp(a, popmean):
|
||||
# Calculates the t-obtained for the independent samples T-test on ONE group
|
||||
# of scores a, given a population mean.
|
||||
#
|
||||
# Returns: t-value, two-tailed prob
|
||||
df = n-1
|
||||
svar = ((n-1)*v) / float(df) # looks redundant
|
||||
t = (sm-popmean) / np.sqrt(svar*(1.0/n))
|
||||
prob = betainc(0.5*df, 0.5, df/(df + t*t))
|
||||
|
||||
# return t,prob
|
||||
npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m, sm=%f,%f' %
|
||||
(t, prob, popmean, sm))
|
||||
|
||||
|
||||
def check_sample_var(sv, n, popvar):
|
||||
# two-sided chisquare test for sample variance equal to
|
||||
# hypothesized variance
|
||||
df = n-1
|
||||
chi2 = (n-1)*popvar/float(popvar)
|
||||
pval = stats.distributions.chi2.sf(chi2, df) * 2
|
||||
npt.assert_(pval > 0.01, 'var fail, t, pval = %f, %f, v, sv=%f, %f' %
|
||||
(chi2, pval, popvar, sv))
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, msg):
|
||||
values = [0.001, 0.5, 0.999]
|
||||
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
|
||||
values, decimal=DECIMAL, err_msg=msg +
|
||||
' - cdf-ppf roundtrip')
|
||||
|
||||
|
||||
def check_sf_isf(distfn, arg, msg):
|
||||
npt.assert_almost_equal(distfn.sf(distfn.isf([0.1, 0.5, 0.9], *arg), *arg),
|
||||
[0.1, 0.5, 0.9], decimal=DECIMAL, err_msg=msg +
|
||||
' - sf-isf roundtrip')
|
||||
npt.assert_almost_equal(distfn.cdf([0.1, 0.9], *arg),
|
||||
1.0 - distfn.sf([0.1, 0.9], *arg),
|
||||
decimal=DECIMAL, err_msg=msg +
|
||||
' - cdf-sf relationship')
|
||||
|
||||
|
||||
def check_pdf(distfn, arg, msg):
|
||||
# compares pdf at median with numerical derivative of cdf
|
||||
median = distfn.ppf(0.5, *arg)
|
||||
eps = 1e-6
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
if (pdfv < 1e-4) or (pdfv > 1e4):
|
||||
# avoid checking a case where pdf is close to zero or
|
||||
# huge (singularity)
|
||||
median = median + 0.1
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
cdfdiff = (distfn.cdf(median + eps, *arg) -
|
||||
distfn.cdf(median - eps, *arg))/eps/2.0
|
||||
# replace with better diff and better test (more points),
|
||||
# actually, this works pretty well
|
||||
msg += ' - cdf-pdf relationship'
|
||||
npt.assert_almost_equal(pdfv, cdfdiff, decimal=DECIMAL, err_msg=msg)
|
||||
|
||||
|
||||
def check_pdf_logpdf(distfn, args, msg):
|
||||
# compares pdf at several points with the log of the pdf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
pdf = distfn.pdf(vals, *args)
|
||||
logpdf = distfn.logpdf(vals, *args)
|
||||
pdf = pdf[pdf != 0]
|
||||
logpdf = logpdf[np.isfinite(logpdf)]
|
||||
msg += " - logpdf-log(pdf) relationship"
|
||||
npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_sf_logsf(distfn, args, msg):
|
||||
# compares sf at several points with the log of the sf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
sf = distfn.sf(vals, *args)
|
||||
logsf = distfn.logsf(vals, *args)
|
||||
sf = sf[sf != 0]
|
||||
logsf = logsf[np.isfinite(logsf)]
|
||||
msg += " - logsf-log(sf) relationship"
|
||||
npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_cdf_logcdf(distfn, args, msg):
|
||||
# compares cdf at several points with the log of the cdf
|
||||
points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
|
||||
vals = distfn.ppf(points, *args)
|
||||
cdf = distfn.cdf(vals, *args)
|
||||
logcdf = distfn.logcdf(vals, *args)
|
||||
cdf = cdf[cdf != 0]
|
||||
logcdf = logcdf[np.isfinite(logcdf)]
|
||||
msg += " - logcdf-log(cdf) relationship"
|
||||
npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg)
|
||||
|
||||
|
||||
def check_distribution_rvs(dist, args, alpha, rvs):
|
||||
# test from scipy.stats.tests
|
||||
# this version reuses existing random variables
|
||||
D, pval = stats.kstest(rvs, dist, args=args, N=1000)
|
||||
if (pval < alpha):
|
||||
D, pval = stats.kstest(dist, '', args=args, N=1000)
|
||||
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
|
||||
"; alpha = " + str(alpha) + "\nargs = " + str(args))
|
||||
|
||||
|
||||
def check_vecentropy(distfn, args):
|
||||
npt.assert_equal(distfn.vecentropy(*args), distfn._entropy(*args))
|
||||
|
||||
|
||||
def check_loc_scale(distfn, arg, m, v, msg):
|
||||
loc, scale = 10.0, 10.0
|
||||
mt, vt = distfn.stats(loc=loc, scale=scale, *arg)
|
||||
npt.assert_allclose(m*scale + loc, mt)
|
||||
npt.assert_allclose(v*scale*scale, vt)
|
||||
|
||||
|
||||
def check_ppf_private(distfn, arg, msg):
|
||||
# fails by design for truncnorm self.nb not defined
|
||||
ppfs = distfn._ppf(np.array([0.1, 0.5, 0.9]), *arg)
|
||||
npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan')
|
||||
|
||||
@@ -0,0 +1,234 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy.testing as npt
|
||||
import numpy as np
|
||||
from scipy._lib.six import xrange
|
||||
import pytest
|
||||
|
||||
from scipy import stats
|
||||
from .common_tests import (check_normalization, check_moment, check_mean_expect,
|
||||
check_var_expect, check_skew_expect,
|
||||
check_kurt_expect, check_entropy,
|
||||
check_private_entropy, check_edge_support,
|
||||
check_named_args, check_random_state_property,
|
||||
check_pickling, check_rvs_broadcast)
|
||||
from scipy.stats._distr_params import distdiscrete
|
||||
|
||||
vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4])
|
||||
distdiscrete += [[stats.rv_discrete(values=vals), ()]]
|
||||
|
||||
|
||||
def cases_test_discrete_basic():
|
||||
seen = set()
|
||||
for distname, arg in distdiscrete:
|
||||
yield distname, arg, distname not in seen
|
||||
seen.add(distname)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg,first_case', cases_test_discrete_basic())
|
||||
def test_discrete_basic(distname, arg, first_case):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
np.random.seed(9765456)
|
||||
rvs = distfn.rvs(size=2000, *arg)
|
||||
supp = np.unique(rvs)
|
||||
m, v = distfn.stats(*arg)
|
||||
check_cdf_ppf(distfn, arg, supp, distname + ' cdf_ppf')
|
||||
|
||||
check_pmf_cdf(distfn, arg, distname)
|
||||
check_oth(distfn, arg, supp, distname + ' oth')
|
||||
check_edge_support(distfn, arg)
|
||||
|
||||
alpha = 0.01
|
||||
check_discrete_chisquare(distfn, arg, rvs, alpha,
|
||||
distname + ' chisquare')
|
||||
|
||||
if first_case:
|
||||
locscale_defaults = (0,)
|
||||
meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
|
||||
distfn.logsf]
|
||||
# make sure arguments are within support
|
||||
spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0, }
|
||||
k = spec_k.get(distname, 1)
|
||||
check_named_args(distfn, k, arg, locscale_defaults, meths)
|
||||
if distname != 'sample distribution':
|
||||
check_scale_docstring(distfn)
|
||||
check_random_state_property(distfn, arg)
|
||||
check_pickling(distfn, arg)
|
||||
|
||||
# Entropy
|
||||
check_entropy(distfn, arg, distname)
|
||||
if distfn.__class__._entropy != stats.rv_discrete._entropy:
|
||||
check_private_entropy(distfn, arg, stats.rv_discrete)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('distname,arg', distdiscrete)
|
||||
def test_moments(distname, arg):
|
||||
try:
|
||||
distfn = getattr(stats, distname)
|
||||
except TypeError:
|
||||
distfn = distname
|
||||
distname = 'sample distribution'
|
||||
m, v, s, k = distfn.stats(*arg, moments='mvsk')
|
||||
check_normalization(distfn, arg, distname)
|
||||
|
||||
# compare `stats` and `moment` methods
|
||||
check_moment(distfn, arg, m, v, distname)
|
||||
check_mean_expect(distfn, arg, m, distname)
|
||||
check_var_expect(distfn, arg, m, v, distname)
|
||||
check_skew_expect(distfn, arg, m, v, s, distname)
|
||||
if distname not in ['zipf', 'yulesimon']:
|
||||
check_kurt_expect(distfn, arg, m, v, k, distname)
|
||||
|
||||
# frozen distr moments
|
||||
check_moment_frozen(distfn, arg, m, 1)
|
||||
check_moment_frozen(distfn, arg, v+m*m, 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dist,shape_args', distdiscrete)
|
||||
def test_rvs_broadcast(dist, shape_args):
|
||||
# If shape_only is True, it means the _rvs method of the
|
||||
# distribution uses more than one random number to generate a random
|
||||
# variate. That means the result of using rvs with broadcasting or
|
||||
# with a nontrivial size will not necessarily be the same as using the
|
||||
# numpy.vectorize'd version of rvs(), so we can only compare the shapes
|
||||
# of the results, not the values.
|
||||
# Whether or not a distribution is in the following list is an
|
||||
# implementation detail of the distribution, not a requirement. If
|
||||
# the implementation the rvs() method of a distribution changes, this
|
||||
# test might also have to be changed.
|
||||
shape_only = dist in ['skellam', 'yulesimon']
|
||||
|
||||
try:
|
||||
distfunc = getattr(stats, dist)
|
||||
except TypeError:
|
||||
distfunc = dist
|
||||
dist = 'rv_discrete(values=(%r, %r))' % (dist.xk, dist.pk)
|
||||
loc = np.zeros(2)
|
||||
nargs = distfunc.numargs
|
||||
allargs = []
|
||||
bshape = []
|
||||
# Generate shape parameter arguments...
|
||||
for k in range(nargs):
|
||||
shp = (k + 3,) + (1,)*(k + 1)
|
||||
param_val = shape_args[k]
|
||||
allargs.append(param_val*np.ones(shp, dtype=np.array(param_val).dtype))
|
||||
bshape.insert(0, shp[0])
|
||||
allargs.append(loc)
|
||||
bshape.append(loc.size)
|
||||
# bshape holds the expected shape when loc, scale, and the shape
|
||||
# parameters are all broadcast together.
|
||||
check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, [np.int_])
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, supp, msg):
|
||||
# cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer}
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg), *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg),
|
||||
supp, msg + '-roundtrip')
|
||||
|
||||
if not hasattr(distfn, 'xk'):
|
||||
supp1 = supp[supp < distfn.b]
|
||||
npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
|
||||
supp1 + distfn.inc, msg + ' ppf-cdf-next')
|
||||
# -1e-8 could cause an error if pmf < 1e-8
|
||||
|
||||
|
||||
def check_pmf_cdf(distfn, arg, distname):
|
||||
if hasattr(distfn, 'xk'):
|
||||
index = distfn.xk
|
||||
else:
|
||||
startind = int(distfn.ppf(0.01, *arg) - 1)
|
||||
index = list(range(startind, startind + 10))
|
||||
cdfs = distfn.cdf(index, *arg)
|
||||
pmfs_cum = distfn.pmf(index, *arg).cumsum()
|
||||
|
||||
atol, rtol = 1e-10, 1e-10
|
||||
if distname == 'skellam': # ncx2 accuracy
|
||||
atol, rtol = 1e-5, 1e-5
|
||||
npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
|
||||
atol=atol, rtol=rtol)
|
||||
|
||||
|
||||
def check_moment_frozen(distfn, arg, m, k):
|
||||
npt.assert_allclose(distfn(*arg).moment(k), m,
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
|
||||
def check_oth(distfn, arg, supp, msg):
|
||||
# checking other methods of distfn
|
||||
npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
q = np.linspace(0.01, 0.99, 20)
|
||||
npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
|
||||
atol=1e-10, rtol=1e-10)
|
||||
|
||||
median_sf = distfn.isf(0.5, *arg)
|
||||
npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
|
||||
npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
|
||||
|
||||
|
||||
def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
|
||||
"""Perform chisquare test for random sample of a discrete distribution
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distname : string
|
||||
name of distribution function
|
||||
arg : sequence
|
||||
parameters of distribution
|
||||
alpha : float
|
||||
significance level, threshold for p-value
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
0 if test passes, 1 if test fails
|
||||
|
||||
"""
|
||||
wsupp = 0.05
|
||||
|
||||
# construct intervals with minimum mass `wsupp`.
|
||||
# intervals are left-half-open as in a cdf difference
|
||||
lo = int(max(distfn.a, -1000))
|
||||
distsupport = xrange(lo, int(min(distfn.b, 1000)) + 1)
|
||||
last = 0
|
||||
distsupp = [lo]
|
||||
distmass = []
|
||||
for ii in distsupport:
|
||||
current = distfn.cdf(ii, *arg)
|
||||
if current - last >= wsupp - 1e-14:
|
||||
distsupp.append(ii)
|
||||
distmass.append(current - last)
|
||||
last = current
|
||||
if current > (1 - wsupp):
|
||||
break
|
||||
if distsupp[-1] < distfn.b:
|
||||
distsupp.append(distfn.b)
|
||||
distmass.append(1 - last)
|
||||
distsupp = np.array(distsupp)
|
||||
distmass = np.array(distmass)
|
||||
|
||||
# convert intervals to right-half-open as required by histogram
|
||||
histsupp = distsupp + 1e-8
|
||||
histsupp[0] = distfn.a
|
||||
|
||||
# find sample frequencies and perform chisquare test
|
||||
freq, hsupp = np.histogram(rvs, histsupp)
|
||||
chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass)
|
||||
|
||||
npt.assert_(pval > alpha,
|
||||
'chisquare - test for %s at arg = %s with pval = %s' %
|
||||
(msg, str(arg), str(pval)))
|
||||
|
||||
|
||||
def check_scale_docstring(distfn):
|
||||
if distfn.__doc__ is not None:
|
||||
# Docstrings can be stripped if interpreter is run with -OO
|
||||
npt.assert_('scale' not in distfn.__doc__)
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from scipy.stats import hypergeom, bernoulli, boltzmann
|
||||
import numpy as np
|
||||
from numpy.testing import assert_almost_equal, assert_equal, assert_allclose
|
||||
|
||||
|
||||
def test_hypergeom_logpmf():
|
||||
# symmetries test
|
||||
# f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K)
|
||||
k = 5
|
||||
N = 50
|
||||
K = 10
|
||||
n = 5
|
||||
logpmf1 = hypergeom.logpmf(k, N, K, n)
|
||||
logpmf2 = hypergeom.logpmf(n - k, N, N - K, n)
|
||||
logpmf3 = hypergeom.logpmf(K - k, N, K, N - n)
|
||||
logpmf4 = hypergeom.logpmf(k, N, n, K)
|
||||
assert_almost_equal(logpmf1, logpmf2, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf3, decimal=12)
|
||||
assert_almost_equal(logpmf1, logpmf4, decimal=12)
|
||||
|
||||
# test related distribution
|
||||
# Bernoulli distribution if n = 1
|
||||
k = 1
|
||||
N = 10
|
||||
K = 7
|
||||
n = 1
|
||||
hypergeom_logpmf = hypergeom.logpmf(k, N, K, n)
|
||||
bernoulli_logpmf = bernoulli.logpmf(k, K/N)
|
||||
assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12)
|
||||
|
||||
|
||||
def test_boltzmann_upper_bound():
|
||||
k = np.arange(-3, 5)
|
||||
|
||||
N = 1
|
||||
p = boltzmann.pmf(k, 0.123, N)
|
||||
expected = k == 0
|
||||
assert_equal(p, expected)
|
||||
|
||||
lam = np.log(2)
|
||||
N = 3
|
||||
p = boltzmann.pmf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0]
|
||||
assert_allclose(p, expected, rtol=1e-13)
|
||||
|
||||
c = boltzmann.cdf(k, lam, N)
|
||||
expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1]
|
||||
assert_allclose(c, expected, rtol=1e-13)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,122 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
from scipy._lib._numpy_compat import suppress_warnings
|
||||
import pytest
|
||||
from scipy import stats
|
||||
|
||||
from .test_continuous_basic import distcont
|
||||
|
||||
# this is not a proper statistical test for convergence, but only
|
||||
# verifies that the estimate and true values don't differ by too much
|
||||
|
||||
fit_sizes = [1000, 5000] # sample sizes to try
|
||||
|
||||
thresh_percent = 0.25 # percent of true parameters for fail cut-off
|
||||
thresh_min = 0.75 # minimum difference estimate - true to fail test
|
||||
|
||||
failing_fits = [
|
||||
'burr',
|
||||
'chi2',
|
||||
'gausshyper',
|
||||
'genexpon',
|
||||
'gengamma',
|
||||
'kappa4',
|
||||
'ksone',
|
||||
'mielke',
|
||||
'ncf',
|
||||
'ncx2',
|
||||
'pearson3',
|
||||
'powerlognorm',
|
||||
'truncexpon',
|
||||
'tukeylambda',
|
||||
'vonmises',
|
||||
'wrapcauchy',
|
||||
'levy_stable',
|
||||
'trapz'
|
||||
]
|
||||
|
||||
# Don't run the fit test on these:
|
||||
skip_fit = [
|
||||
'erlang', # Subclass of gamma, generates a warning.
|
||||
]
|
||||
|
||||
|
||||
def cases_test_cont_fit():
|
||||
# this tests the closeness of the estimated parameters to the true
|
||||
# parameters with fit method of continuous distributions
|
||||
# Note: is slow, some distributions don't converge with sample size <= 10000
|
||||
for distname, arg in distcont:
|
||||
if distname not in skip_fit:
|
||||
yield distname, arg
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('distname,arg', cases_test_cont_fit())
|
||||
def test_cont_fit(distname, arg):
|
||||
if distname in failing_fits:
|
||||
# Skip failing fits unless overridden
|
||||
try:
|
||||
xfail = not int(os.environ['SCIPY_XFAIL'])
|
||||
except Exception:
|
||||
xfail = True
|
||||
if xfail:
|
||||
msg = "Fitting %s doesn't work reliably yet" % distname
|
||||
msg += " [Set environment variable SCIPY_XFAIL=1 to run this test nevertheless.]"
|
||||
pytest.xfail(msg)
|
||||
|
||||
distfn = getattr(stats, distname)
|
||||
|
||||
truearg = np.hstack([arg, [0.0, 1.0]])
|
||||
diffthreshold = np.max(np.vstack([truearg*thresh_percent,
|
||||
np.ones(distfn.numargs+2)*thresh_min]),
|
||||
0)
|
||||
|
||||
for fit_size in fit_sizes:
|
||||
# Note that if a fit succeeds, the other fit_sizes are skipped
|
||||
np.random.seed(1234)
|
||||
|
||||
with np.errstate(all='ignore'), suppress_warnings() as sup:
|
||||
sup.filter(category=DeprecationWarning, message=".*frechet_")
|
||||
rvs = distfn.rvs(size=fit_size, *arg)
|
||||
est = distfn.fit(rvs) # start with default values
|
||||
|
||||
diff = est - truearg
|
||||
|
||||
# threshold for location
|
||||
diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])
|
||||
|
||||
if np.any(np.isnan(est)):
|
||||
raise AssertionError('nan returned in fit')
|
||||
else:
|
||||
if np.all(np.abs(diff) <= diffthreshold):
|
||||
break
|
||||
else:
|
||||
txt = 'parameter: %s\n' % str(truearg)
|
||||
txt += 'estimated: %s\n' % str(est)
|
||||
txt += 'diff : %s\n' % str(diff)
|
||||
raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
|
||||
|
||||
|
||||
def _check_loc_scale_mle_fit(name, data, desired, atol=None):
|
||||
d = getattr(stats, name)
|
||||
actual = d.fit(data)[-2:]
|
||||
assert_allclose(actual, desired, atol=atol,
|
||||
err_msg='poor mle fit of (loc, scale) in %s' % name)
|
||||
|
||||
|
||||
def test_non_default_loc_scale_mle_fit():
|
||||
data = np.array([1.01, 1.78, 1.78, 1.78, 1.88, 1.88, 1.88, 2.00])
|
||||
_check_loc_scale_mle_fit('uniform', data, [1.01, 0.99], 1e-3)
|
||||
_check_loc_scale_mle_fit('expon', data, [1.01, 0.73875], 1e-3)
|
||||
|
||||
|
||||
def test_expon_fit():
|
||||
"""gh-6167"""
|
||||
data = [0, 0, 0, 0, 2, 2, 2, 2]
|
||||
phat = stats.expon.fit(data, floc=0)
|
||||
assert_allclose(phat, [0, 1.0], atol=1e-3)
|
||||
|
||||
@@ -0,0 +1,368 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from scipy import stats
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_almost_equal, assert_,
|
||||
assert_array_almost_equal, assert_array_almost_equal_nulp)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
|
||||
def test_kde_1d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
xnmean = xn.mean()
|
||||
xnstd = xn.std(ddof=1)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
def test_kde_1d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
xnmean = np.average(xn, weights=wn)
|
||||
xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn))
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
xs = np.linspace(-7,7,501)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
|
||||
intervall = xs[1] - xs[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
|
||||
prob1 = gkde.integrate_box_1d(xnmean, np.inf)
|
||||
prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
|
||||
assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
|
||||
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*intervall, decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
|
||||
(kdepdf*normpdf).sum()*intervall, decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_kde_2d():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_kde_2d_weighted():
|
||||
#some basic tests comparing to normal distribution
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 500
|
||||
|
||||
mean = np.array([1.0, 3.0])
|
||||
covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
|
||||
|
||||
# Need transpose (shape (2, 500)) for kde
|
||||
xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# get kde for original sample
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
# evaluate the density function for the kde for some points
|
||||
x, y = np.mgrid[-7:7:500j, -7:7:500j]
|
||||
grid_coords = np.vstack([x.ravel(), y.ravel()])
|
||||
kdepdf = gkde.evaluate(grid_coords)
|
||||
kdepdf = kdepdf.reshape(500, 500)
|
||||
|
||||
normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
|
||||
intervall = y.ravel()[1] - y.ravel()[0]
|
||||
|
||||
assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
|
||||
|
||||
small = -1e100
|
||||
large = 1e100
|
||||
prob1 = gkde.integrate_box([small, mean[1]], [large, large])
|
||||
prob2 = gkde.integrate_box([small, small], [large, mean[1]])
|
||||
|
||||
assert_almost_equal(prob1, 0.5, decimal=1)
|
||||
assert_almost_equal(prob2, 0.5, decimal=1)
|
||||
assert_almost_equal(gkde.integrate_kde(gkde),
|
||||
(kdepdf**2).sum()*(intervall**2), decimal=2)
|
||||
assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
|
||||
(kdepdf*normpdf).sum()*(intervall**2), decimal=2)
|
||||
|
||||
|
||||
def test_kde_bandwidth_method():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.n, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
def test_kde_bandwidth_method_weighted():
|
||||
def scotts_factor(kde_obj):
|
||||
"""Same as default, just check that it works."""
|
||||
return np.power(kde_obj.neff, -1./(kde_obj.d+4))
|
||||
|
||||
np.random.seed(8765678)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
# Supply a callable
|
||||
gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
|
||||
# Supply a scalar
|
||||
gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
|
||||
|
||||
xs = np.linspace(-7,7,51)
|
||||
kdepdf = gkde.evaluate(xs)
|
||||
kdepdf2 = gkde2.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf2)
|
||||
kdepdf3 = gkde3.evaluate(xs)
|
||||
assert_almost_equal(kdepdf, kdepdf3)
|
||||
|
||||
assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
|
||||
|
||||
|
||||
# Subclasses that should stay working (extracted from various sources).
|
||||
# Unfortunately the earlier design of gaussian_kde made it necessary for users
|
||||
# to create these kinds of subclasses, or call _compute_covariance() directly.
|
||||
|
||||
class _kde_subclass1(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.dataset = np.atleast_2d(dataset)
|
||||
self.d, self.n = self.dataset.shape
|
||||
self.covariance_factor = self.scotts_factor
|
||||
self._compute_covariance()
|
||||
|
||||
|
||||
class _kde_subclass2(stats.gaussian_kde):
|
||||
def __init__(self, dataset):
|
||||
self.covariance_factor = self.scotts_factor
|
||||
super(_kde_subclass2, self).__init__(dataset)
|
||||
|
||||
|
||||
class _kde_subclass3(stats.gaussian_kde):
|
||||
def __init__(self, dataset, covariance):
|
||||
self.covariance = covariance
|
||||
stats.gaussian_kde.__init__(self, dataset)
|
||||
|
||||
def _compute_covariance(self):
|
||||
self.inv_cov = np.linalg.inv(self.covariance)
|
||||
self._norm_factor = np.sqrt(np.linalg.det(2*np.pi * self.covariance)) \
|
||||
* self.n
|
||||
|
||||
|
||||
class _kde_subclass4(stats.gaussian_kde):
|
||||
def covariance_factor(self):
|
||||
return 0.5 * self.silverman_factor()
|
||||
|
||||
|
||||
def test_gaussian_kde_subclassing():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# gaussian_kde itself
|
||||
kde = stats.gaussian_kde(x1)
|
||||
ys = kde(xs)
|
||||
|
||||
# subclass 1
|
||||
kde1 = _kde_subclass1(x1)
|
||||
y1 = kde1(xs)
|
||||
assert_array_almost_equal_nulp(ys, y1, nulp=10)
|
||||
|
||||
# subclass 2
|
||||
kde2 = _kde_subclass2(x1)
|
||||
y2 = kde2(xs)
|
||||
assert_array_almost_equal_nulp(ys, y2, nulp=10)
|
||||
|
||||
# subclass 3
|
||||
kde3 = _kde_subclass3(x1, kde.covariance)
|
||||
y3 = kde3(xs)
|
||||
assert_array_almost_equal_nulp(ys, y3, nulp=10)
|
||||
|
||||
# subclass 4
|
||||
kde4 = _kde_subclass4(x1)
|
||||
y4 = kde4(x1)
|
||||
y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
|
||||
|
||||
assert_array_almost_equal(y_expected, y4, decimal=6)
|
||||
|
||||
# Not a subclass, but check for use of _compute_covariance()
|
||||
kde5 = kde
|
||||
kde5.covariance_factor = lambda: kde.factor
|
||||
kde5._compute_covariance()
|
||||
y5 = kde5(xs)
|
||||
assert_array_almost_equal_nulp(ys, y5, nulp=10)
|
||||
|
||||
|
||||
def test_gaussian_kde_covariance_caching():
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=5)
|
||||
# These expected values are from scipy 0.10, before some changes to
|
||||
# gaussian_kde. They were not compared with any external reference.
|
||||
y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
|
||||
|
||||
# Set the bandwidth, then reset it to the default.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.set_bandwidth(bw_method=0.5)
|
||||
kde.set_bandwidth(bw_method='scott')
|
||||
y2 = kde(xs)
|
||||
|
||||
assert_array_almost_equal(y_expected, y2, decimal=7)
|
||||
|
||||
|
||||
def test_gaussian_kde_monkeypatch():
|
||||
"""Ugly, but people may rely on this. See scipy pull request 123,
|
||||
specifically the linked ML thread "Width of the Gaussian in stats.kde".
|
||||
If it is necessary to break this later on, that is to be discussed on ML.
|
||||
"""
|
||||
x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
|
||||
xs = np.linspace(-10, 10, num=50)
|
||||
|
||||
# The old monkeypatched version to get at Silverman's Rule.
|
||||
kde = stats.gaussian_kde(x1)
|
||||
kde.covariance_factor = kde.silverman_factor
|
||||
kde._compute_covariance()
|
||||
y1 = kde(xs)
|
||||
|
||||
# The new saner version.
|
||||
kde2 = stats.gaussian_kde(x1, bw_method='silverman')
|
||||
y2 = kde2(xs)
|
||||
|
||||
assert_array_almost_equal_nulp(y1, y2, nulp=10)
|
||||
|
||||
|
||||
def test_kde_integer_input():
|
||||
"""Regression test for #1181."""
|
||||
x1 = np.arange(5)
|
||||
kde = stats.gaussian_kde(x1)
|
||||
y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
|
||||
assert_array_almost_equal(kde(x1), y_expected, decimal=6)
|
||||
|
||||
|
||||
def test_pdf_logpdf():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs)
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
|
||||
def test_pdf_logpdf_weighted():
|
||||
np.random.seed(1)
|
||||
n_basesample = 50
|
||||
xn = np.random.randn(n_basesample)
|
||||
wn = np.random.rand(n_basesample)
|
||||
|
||||
# Default
|
||||
gkde = stats.gaussian_kde(xn, weights=wn)
|
||||
|
||||
xs = np.linspace(-15, 12, 25)
|
||||
pdf = gkde.evaluate(xs)
|
||||
pdf2 = gkde.pdf(xs)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
|
||||
logpdf = np.log(pdf)
|
||||
logpdf2 = gkde.logpdf(xs)
|
||||
assert_almost_equal(logpdf, logpdf2, decimal=12)
|
||||
|
||||
# There are more points than data
|
||||
gkde = stats.gaussian_kde(xs)
|
||||
pdf = np.log(gkde.evaluate(xn))
|
||||
pdf2 = gkde.logpdf(xn)
|
||||
assert_almost_equal(pdf, pdf2, decimal=12)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,136 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
import numpy.ma as ma
|
||||
import scipy.stats.mstats as ms
|
||||
|
||||
from numpy.testing import (assert_equal, assert_almost_equal, assert_,
|
||||
assert_allclose)
|
||||
|
||||
|
||||
def test_compare_medians_ms():
|
||||
x = np.arange(7)
|
||||
y = x + 10
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y), 0)
|
||||
|
||||
y2 = np.linspace(0, 1, num=10)
|
||||
assert_almost_equal(ms.compare_medians_ms(x, y2), 0.017116406778)
|
||||
|
||||
|
||||
def test_hdmedian():
|
||||
# 1-D array
|
||||
x = ma.arange(11)
|
||||
assert_allclose(ms.hdmedian(x), 5, rtol=1e-14)
|
||||
x.mask = ma.make_mask(x)
|
||||
x.mask[:7] = False
|
||||
assert_allclose(ms.hdmedian(x), 3, rtol=1e-14)
|
||||
|
||||
# Check that `var` keyword returns a value. TODO: check whether returned
|
||||
# value is actually correct.
|
||||
assert_(ms.hdmedian(x, var=True).size == 2)
|
||||
|
||||
# 2-D array
|
||||
x2 = ma.arange(22).reshape((11, 2))
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [10, 11])
|
||||
x2.mask = ma.make_mask(x2)
|
||||
x2.mask[:7, :] = False
|
||||
assert_allclose(ms.hdmedian(x2, axis=0), [6, 7])
|
||||
|
||||
|
||||
def test_rsh():
|
||||
np.random.seed(132345)
|
||||
x = np.random.randn(100)
|
||||
res = ms.rsh(x)
|
||||
# Just a sanity check that the code runs and output shape is correct.
|
||||
# TODO: check that implementation is correct.
|
||||
assert_(res.shape == x.shape)
|
||||
|
||||
# Check points keyword
|
||||
res = ms.rsh(x, points=[0, 1.])
|
||||
assert_(res.size == 2)
|
||||
|
||||
|
||||
def test_mjci():
|
||||
# Tests the Marits-Jarrett estimator
|
||||
data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
|
||||
296,299,306,376,428,515,666,1310,2611])
|
||||
assert_almost_equal(ms.mjci(data),[55.76819,45.84028,198.87875],5)
|
||||
|
||||
|
||||
def test_trimmed_mean_ci():
|
||||
# Tests the confidence intervals of the trimmed mean.
|
||||
data = ma.array([545,555,558,572,575,576,578,580,
|
||||
594,605,635,651,653,661,666])
|
||||
assert_almost_equal(ms.trimmed_mean(data,0.2), 596.2, 1)
|
||||
assert_equal(np.round(ms.trimmed_mean_ci(data,(0.2,0.2)),1),
|
||||
[561.8, 630.6])
|
||||
|
||||
|
||||
def test_idealfourths():
|
||||
# Tests ideal-fourths
|
||||
test = np.arange(100)
|
||||
assert_almost_equal(np.asarray(ms.idealfourths(test)),
|
||||
[24.416667,74.583333],6)
|
||||
test_2D = test.repeat(3).reshape(-1,3)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=0),
|
||||
[[24.416667,24.416667,24.416667],
|
||||
[74.583333,74.583333,74.583333]],6)
|
||||
assert_almost_equal(ms.idealfourths(test_2D, axis=1),
|
||||
test.repeat(2).reshape(-1,2))
|
||||
test = [0, 0]
|
||||
_result = ms.idealfourths(test)
|
||||
assert_(np.isnan(_result).all())
|
||||
|
||||
|
||||
class TestQuantiles(object):
|
||||
data = [0.706560797,0.727229578,0.990399276,0.927065621,0.158953014,
|
||||
0.887764025,0.239407086,0.349638551,0.972791145,0.149789972,
|
||||
0.936947700,0.132359948,0.046041972,0.641675031,0.945530547,
|
||||
0.224218684,0.771450991,0.820257774,0.336458052,0.589113496,
|
||||
0.509736129,0.696838829,0.491323573,0.622767425,0.775189248,
|
||||
0.641461450,0.118455200,0.773029450,0.319280007,0.752229111,
|
||||
0.047841438,0.466295911,0.583850781,0.840581845,0.550086491,
|
||||
0.466470062,0.504765074,0.226855960,0.362641207,0.891620942,
|
||||
0.127898691,0.490094097,0.044882048,0.041441695,0.317976349,
|
||||
0.504135618,0.567353033,0.434617473,0.636243375,0.231803616,
|
||||
0.230154113,0.160011327,0.819464108,0.854706985,0.438809221,
|
||||
0.487427267,0.786907310,0.408367937,0.405534192,0.250444460,
|
||||
0.995309248,0.144389588,0.739947527,0.953543606,0.680051621,
|
||||
0.388382017,0.863530727,0.006514031,0.118007779,0.924024803,
|
||||
0.384236354,0.893687694,0.626534881,0.473051932,0.750134705,
|
||||
0.241843555,0.432947602,0.689538104,0.136934797,0.150206859,
|
||||
0.474335206,0.907775349,0.525869295,0.189184225,0.854284286,
|
||||
0.831089744,0.251637345,0.587038213,0.254475554,0.237781276,
|
||||
0.827928620,0.480283781,0.594514455,0.213641488,0.024194386,
|
||||
0.536668589,0.699497811,0.892804071,0.093835427,0.731107772]
|
||||
|
||||
def test_hdquantiles(self):
|
||||
data = self.data
|
||||
assert_almost_equal(ms.hdquantiles(data,[0., 1.]),
|
||||
[0.006514031, 0.995309248])
|
||||
hdq = ms.hdquantiles(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.253210762, 0.512847491, 0.762232442,])
|
||||
hdq = ms.hdquantiles_sd(data,[0.25, 0.5, 0.75])
|
||||
assert_almost_equal(hdq, [0.03786954, 0.03805389, 0.03800152,], 4)
|
||||
|
||||
data = np.array(data).reshape(10,10)
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0)
|
||||
assert_almost_equal(hdq[:,0], ms.hdquantiles(data[:,0],[0.25,0.5,0.75]))
|
||||
assert_almost_equal(hdq[:,-1], ms.hdquantiles(data[:,-1],[0.25,0.5,0.75]))
|
||||
hdq = ms.hdquantiles(data,[0.25,0.5,0.75],axis=0,var=True)
|
||||
assert_almost_equal(hdq[...,0],
|
||||
ms.hdquantiles(data[:,0],[0.25,0.5,0.75],var=True))
|
||||
assert_almost_equal(hdq[...,-1],
|
||||
ms.hdquantiles(data[:,-1],[0.25,0.5,0.75], var=True))
|
||||
|
||||
def test_hdquantiles_sd(self):
|
||||
# Only test that code runs, implementation not checked for correctness
|
||||
res = ms.hdquantiles_sd(self.data)
|
||||
assert_(res.size == 3)
|
||||
|
||||
def test_mquantiles_cimj(self):
|
||||
# Only test that code runs, implementation not checked for correctness
|
||||
ci_lower, ci_upper = ms.mquantiles_cimj(self.data)
|
||||
assert_(ci_lower.size == ci_upper.size == 3)
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,218 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_array_equal
|
||||
|
||||
from scipy.stats import rankdata, tiecorrect
|
||||
|
||||
|
||||
class TestTieCorrect(object):
|
||||
|
||||
def test_empty(self):
|
||||
"""An empty array requires no correction, should return 1.0."""
|
||||
ranks = np.array([], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_one(self):
|
||||
"""A single element requires no correction, should return 1.0."""
|
||||
ranks = np.array([1.0], dtype=np.float64)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_no_correction(self):
|
||||
"""Arrays with no ties require no correction."""
|
||||
ranks = np.arange(2.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
ranks = np.arange(3.0)
|
||||
c = tiecorrect(ranks)
|
||||
assert_equal(c, 1.0)
|
||||
|
||||
def test_basic(self):
|
||||
"""Check a few basic examples of the tie correction factor."""
|
||||
# One tie of two elements
|
||||
ranks = np.array([1.0, 2.5, 2.5])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of two elements (same as above, but tie is not at the end)
|
||||
ranks = np.array([1.5, 1.5, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 2.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# One tie of three elements
|
||||
ranks = np.array([1.0, 3.0, 3.0, 3.0])
|
||||
c = tiecorrect(ranks)
|
||||
T = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - (T**3 - T) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
# Two ties, lengths 2 and 3.
|
||||
ranks = np.array([1.5, 1.5, 4.0, 4.0, 4.0])
|
||||
c = tiecorrect(ranks)
|
||||
T1 = 2.0
|
||||
T2 = 3.0
|
||||
N = ranks.size
|
||||
expected = 1.0 - ((T1**3 - T1) + (T2**3 - T2)) / (N**3 - N)
|
||||
assert_equal(c, expected)
|
||||
|
||||
def test_overflow(self):
|
||||
ntie, k = 2000, 5
|
||||
a = np.repeat(np.arange(k), ntie)
|
||||
n = a.size # ntie * k
|
||||
out = tiecorrect(rankdata(a))
|
||||
assert_equal(out, 1.0 - k * (ntie**3 - ntie) / float(n**3 - n))
|
||||
|
||||
|
||||
class TestRankData(object):
|
||||
|
||||
def test_empty(self):
|
||||
"""stats.rankdata([]) should return an empty array."""
|
||||
a = np.array([], dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
r = rankdata([])
|
||||
assert_array_equal(r, np.array([], dtype=np.float64))
|
||||
|
||||
def test_one(self):
|
||||
"""Check stats.rankdata with an array of length 1."""
|
||||
data = [100]
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, np.array([1.0], dtype=np.float64))
|
||||
|
||||
def test_basic(self):
|
||||
"""Basic tests of stats.rankdata."""
|
||||
data = [100, 10, 50]
|
||||
expected = np.array([3.0, 1.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [40, 10, 30, 10, 50]
|
||||
expected = np.array([4.0, 1.5, 3.0, 1.5, 5.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
data = [20, 20, 20, 10, 10, 10]
|
||||
expected = np.array([5.0, 5.0, 5.0, 2.0, 2.0, 2.0], dtype=np.float64)
|
||||
a = np.array(data, dtype=int)
|
||||
r = rankdata(a)
|
||||
assert_array_equal(r, expected)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, expected)
|
||||
# The docstring states explicitly that the argument is flattened.
|
||||
a2d = a.reshape(2, 3)
|
||||
r = rankdata(a2d)
|
||||
assert_array_equal(r, expected)
|
||||
|
||||
def test_rankdata_object_string(self):
|
||||
min_rank = lambda a: [1 + sum(i < j for i in a) for j in a]
|
||||
max_rank = lambda a: [sum(i <= j for i in a) for j in a]
|
||||
ordinal_rank = lambda a: min_rank([(x, i) for i, x in enumerate(a)])
|
||||
|
||||
def average_rank(a):
|
||||
return [(i + j) / 2.0 for i, j in zip(min_rank(a), max_rank(a))]
|
||||
|
||||
def dense_rank(a):
|
||||
b = np.unique(a)
|
||||
return [1 + sum(i < j for i in b) for j in a]
|
||||
|
||||
rankf = dict(min=min_rank, max=max_rank, ordinal=ordinal_rank,
|
||||
average=average_rank, dense=dense_rank)
|
||||
|
||||
def check_ranks(a):
|
||||
for method in 'min', 'max', 'dense', 'ordinal', 'average':
|
||||
out = rankdata(a, method=method)
|
||||
assert_array_equal(out, rankf[method](a))
|
||||
|
||||
val = ['foo', 'bar', 'qux', 'xyz', 'abc', 'efg', 'ace', 'qwe', 'qaz']
|
||||
check_ranks(np.random.choice(val, 200))
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
val = np.array([0, 1, 2, 2.718, 3, 3.141], dtype='object')
|
||||
check_ranks(np.random.choice(val, 200).astype('object'))
|
||||
|
||||
def test_large_int(self):
|
||||
data = np.array([2**60, 2**60+1], dtype=np.uint64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, 2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [1.0, 2.0])
|
||||
|
||||
data = np.array([2**60, -2**60+1], dtype=np.int64)
|
||||
r = rankdata(data)
|
||||
assert_array_equal(r, [2.0, 1.0])
|
||||
|
||||
def test_big_tie(self):
|
||||
for n in [10000, 100000, 1000000]:
|
||||
data = np.ones(n, dtype=int)
|
||||
r = rankdata(data)
|
||||
expected_rank = 0.5 * (n + 1)
|
||||
assert_array_equal(r, expected_rank * data,
|
||||
"test failed with n=%d" % n)
|
||||
|
||||
|
||||
_cases = (
|
||||
# values, method, expected
|
||||
([], 'average', []),
|
||||
([], 'min', []),
|
||||
([], 'max', []),
|
||||
([], 'dense', []),
|
||||
([], 'ordinal', []),
|
||||
#
|
||||
([100], 'average', [1.0]),
|
||||
([100], 'min', [1.0]),
|
||||
([100], 'max', [1.0]),
|
||||
([100], 'dense', [1.0]),
|
||||
([100], 'ordinal', [1.0]),
|
||||
#
|
||||
([100, 100, 100], 'average', [2.0, 2.0, 2.0]),
|
||||
([100, 100, 100], 'min', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
|
||||
([100, 100, 100], 'dense', [1.0, 1.0, 1.0]),
|
||||
([100, 100, 100], 'ordinal', [1.0, 2.0, 3.0]),
|
||||
#
|
||||
([100, 300, 200], 'average', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'min', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'dense', [1.0, 3.0, 2.0]),
|
||||
([100, 300, 200], 'ordinal', [1.0, 3.0, 2.0]),
|
||||
#
|
||||
([100, 200, 300, 200], 'average', [1.0, 2.5, 4.0, 2.5]),
|
||||
([100, 200, 300, 200], 'min', [1.0, 2.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
|
||||
([100, 200, 300, 200], 'dense', [1.0, 2.0, 3.0, 2.0]),
|
||||
([100, 200, 300, 200], 'ordinal', [1.0, 2.0, 4.0, 3.0]),
|
||||
#
|
||||
([100, 200, 300, 200, 100], 'average', [1.5, 3.5, 5.0, 3.5, 1.5]),
|
||||
([100, 200, 300, 200, 100], 'min', [1.0, 3.0, 5.0, 3.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
|
||||
([100, 200, 300, 200, 100], 'dense', [1.0, 2.0, 3.0, 2.0, 1.0]),
|
||||
([100, 200, 300, 200, 100], 'ordinal', [1.0, 3.0, 5.0, 4.0, 2.0]),
|
||||
#
|
||||
([10] * 30, 'ordinal', np.arange(1.0, 31.0)),
|
||||
)
|
||||
|
||||
|
||||
def test_cases():
|
||||
for values, method, expected in _cases:
|
||||
r = rankdata(values, method=method)
|
||||
assert_array_equal(r, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,88 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
|
||||
from scipy.stats._tukeylambda_stats import (tukeylambda_variance,
|
||||
tukeylambda_kurtosis)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_known_exact():
|
||||
"""Compare results with some known exact formulas."""
|
||||
# Some exact values of the Tukey Lambda variance and kurtosis:
|
||||
# lambda var kurtosis
|
||||
# 0 pi**2/3 6/5 (logistic distribution)
|
||||
# 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3
|
||||
# 1 1/3 -6/5 (uniform distribution on (-1,1))
|
||||
# 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2))
|
||||
|
||||
# lambda = 0
|
||||
var = tukeylambda_variance(0)
|
||||
assert_allclose(var, np.pi**2 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0)
|
||||
assert_allclose(kurt, 1.2, atol=1e-10)
|
||||
|
||||
# lambda = 0.5
|
||||
var = tukeylambda_variance(0.5)
|
||||
assert_allclose(var, 4 - np.pi, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(0.5)
|
||||
desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
|
||||
assert_allclose(kurt, desired, atol=1e-10)
|
||||
|
||||
# lambda = 1
|
||||
var = tukeylambda_variance(1)
|
||||
assert_allclose(var, 1.0 / 3, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(1)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
# lambda = 2
|
||||
var = tukeylambda_variance(2)
|
||||
assert_allclose(var, 1.0 / 12, atol=1e-12)
|
||||
kurt = tukeylambda_kurtosis(2)
|
||||
assert_allclose(kurt, -1.2, atol=1e-10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_mpmath():
|
||||
"""Compare results with some values that were computed using mpmath."""
|
||||
a10 = dict(atol=1e-10, rtol=0)
|
||||
a12 = dict(atol=1e-12, rtol=0)
|
||||
data = [
|
||||
# lambda variance kurtosis
|
||||
[-0.1, 4.78050217874253547, 3.78559520346454510],
|
||||
[-0.0649, 4.16428023599895777, 2.52019675947435718],
|
||||
[-0.05, 3.93672267890775277, 2.13129793057777277],
|
||||
[-0.001, 3.30128380390964882, 1.21452460083542988],
|
||||
[0.001, 3.27850775649572176, 1.18560634779287585],
|
||||
[0.03125, 2.95927803254615800, 0.804487555161819980],
|
||||
[0.05, 2.78281053405464501, 0.611604043886644327],
|
||||
[0.0649, 2.65282386754100551, 0.476834119532774540],
|
||||
[1.2, 0.242153920578588346, -1.23428047169049726],
|
||||
[10.0, 0.00095237579757703597, 2.37810697355144933],
|
||||
[20.0, 0.00012195121951131043, 7.37654321002709531],
|
||||
]
|
||||
|
||||
for lam, var_expected, kurt_expected in data:
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
# Test with vector arguments (most of the other tests are for single
|
||||
# values).
|
||||
lam, var_expected, kurt_expected = zip(*data)
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_allclose(var, var_expected, **a12)
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_allclose(kurt, kurt_expected, **a10)
|
||||
|
||||
|
||||
def test_tukeylambda_stats_invalid():
|
||||
"""Test values of lambda outside the domains of the functions."""
|
||||
lam = [-1.0, -0.5]
|
||||
var = tukeylambda_variance(lam)
|
||||
assert_equal(var, np.array([np.nan, np.inf]))
|
||||
|
||||
lam = [-1.0, -0.25]
|
||||
kurt = tukeylambda_kurtosis(lam)
|
||||
assert_equal(kurt, np.array([np.nan, np.inf]))
|
||||
|
||||
Reference in New Issue
Block a user