pruned venvs

2019-03-12 21:57:16 +01:00
parent 33f0511081
commit e441f4f7f7
5988 changed files with 0 additions and 1353666 deletions
@@ -1,26 +0,0 @@
-"""
-Module to read ARFF files, which are the standard data format for WEKA.
-
-ARFF is a text file format which support numerical, string and data values.
-The format can also represent missing data and sparse data.
-
-Notes
-----
-The ARFF support in ``scipy.io`` provides file reading functionality only.
-For more extensive ARFF functionality, see `liac-arff
-<https://github.com/renatopp/liac-arff>`_.
-
-See the `WEKA website <http://weka.wikispaces.com/ARFF>`_
-for more details about the ARFF format and available datasets.
-
-"""
-from __future__ import division, print_function, absolute_import
-
-from .arffread import *
-from . import arffread
-
-__all__ = arffread.__all__
-
-from scipy._lib._testutils import PytestTester
-test = PytestTester(__name__)
-del PytestTester
@@ -1,670 +0,0 @@
-# Last Change: Mon Aug 20 08:00 PM 2007 J
-from __future__ import division, print_function, absolute_import
-
-import re
-import itertools
-import datetime
-from functools import partial
-
-import numpy as np
-
-from scipy._lib.six import next
-
-"""A module to read arff files."""
-
-__all__ = ['MetaData', 'loadarff', 'ArffError', 'ParseArffError']
-
-# An Arff file is basically two parts:
-#   - header
-#   - data
-#
-# A header has each of its components starting by @META where META is one of
-# the keyword (attribute of relation, for now).
-
-# TODO:
-#   - both integer and reals are treated as numeric -> the integer info
-#    is lost!
-#   - Replace ValueError by ParseError or something
-
-# We know can handle the following:
-#   - numeric and nominal attributes
-#   - missing values for numeric attributes
-
-r_meta = re.compile(r'^\s*@')
-# Match a comment
-r_comment = re.compile(r'^%')
-# Match an empty line
-r_empty = re.compile(r'^\s+$')
-# Match a header line, that is a line which starts by @ + a word
-r_headerline = re.compile(r'^@\S*')
-r_datameta = re.compile(r'^@[Dd][Aa][Tt][Aa]')
-r_relation = re.compile(r'^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)')
-r_attribute = re.compile(r'^@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)')
-
-# To get attributes name enclosed with ''
-r_comattrval = re.compile(r"'(..+)'\s+(..+$)")
-# To get normal attributes
-r_wcomattrval = re.compile(r"(\S+)\s+(..+$)")
-
-#-------------------------
-# Module defined exception
-#-------------------------
-
-
-class ArffError(IOError):
-    pass
-
-
-class ParseArffError(ArffError):
-    pass
-
-#------------------
-# Various utilities
-#------------------
-
-# An attribute  is defined as @attribute name value
-
-
-def parse_type(attrtype):
-    """Given an arff attribute value (meta data), returns its type.
-
-    Expect the value to be a name."""
-    uattribute = attrtype.lower().strip()
-    if uattribute[0] == '{':
-        return 'nominal'
-    elif uattribute[:len('real')] == 'real':
-        return 'numeric'
-    elif uattribute[:len('integer')] == 'integer':
-        return 'numeric'
-    elif uattribute[:len('numeric')] == 'numeric':
-        return 'numeric'
-    elif uattribute[:len('string')] == 'string':
-        return 'string'
-    elif uattribute[:len('relational')] == 'relational':
-        return 'relational'
-    elif uattribute[:len('date')] == 'date':
-        return 'date'
-    else:
-        raise ParseArffError("unknown attribute %s" % uattribute)
-
-
-def get_nominal(attribute):
-    """If attribute is nominal, returns a list of the values"""
-    return attribute.split(',')
-
-
-def read_data_list(ofile):
-    """Read each line of the iterable and put it in a list."""
-    data = [next(ofile)]
-    if data[0].strip()[0] == '{':
-        raise ValueError("This looks like a sparse ARFF: not supported yet")
-    data.extend([i for i in ofile])
-    return data
-
-
-def get_ndata(ofile):
-    """Read the whole file to get number of data attributes."""
-    data = [next(ofile)]
-    loc = 1
-    if data[0].strip()[0] == '{':
-        raise ValueError("This looks like a sparse ARFF: not supported yet")
-    for i in ofile:
-        loc += 1
-    return loc
-
-
-def maxnomlen(atrv):
-    """Given a string containing a nominal type definition, returns the
-    string len of the biggest component.
-
-    A nominal type is defined as seomthing framed between brace ({}).
-
-    Parameters
-    ----------
-    atrv : str
-       Nominal type definition
-
-    Returns
-    -------
-    slen : int
-       length of longest component
-
-    Examples
-    --------
-    maxnomlen("{floup, bouga, fl, ratata}") returns 6 (the size of
-    ratata, the longest nominal value).
-
-    >>> maxnomlen("{floup, bouga, fl, ratata}")
-    6
-    """
-    nomtp = get_nom_val(atrv)
-    return max(len(i) for i in nomtp)
-
-
-def get_nom_val(atrv):
-    """Given a string containing a nominal type, returns a tuple of the
-    possible values.
-
-    A nominal type is defined as something framed between braces ({}).
-
-    Parameters
-    ----------
-    atrv : str
-       Nominal type definition
-
-    Returns
-    -------
-    poss_vals : tuple
-       possible values
-
-    Examples
-    --------
-    >>> get_nom_val("{floup, bouga, fl, ratata}")
-    ('floup', 'bouga', 'fl', 'ratata')
-    """
-    r_nominal = re.compile('{(.+)}')
-    m = r_nominal.match(atrv)
-    if m:
-        return tuple(i.strip() for i in m.group(1).split(','))
-    else:
-        raise ValueError("This does not look like a nominal string")
-
-
-def get_date_format(atrv):
-    r_date = re.compile(r"[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$")
-    m = r_date.match(atrv)
-    if m:
-        pattern = m.group(1).strip()
-        # convert time pattern from Java's SimpleDateFormat to C's format
-        datetime_unit = None
-        if "yyyy" in pattern:
-            pattern = pattern.replace("yyyy", "%Y")
-            datetime_unit = "Y"
-        elif "yy":
-            pattern = pattern.replace("yy", "%y")
-            datetime_unit = "Y"
-        if "MM" in pattern:
-            pattern = pattern.replace("MM", "%m")
-            datetime_unit = "M"
-        if "dd" in pattern:
-            pattern = pattern.replace("dd", "%d")
-            datetime_unit = "D"
-        if "HH" in pattern:
-            pattern = pattern.replace("HH", "%H")
-            datetime_unit = "h"
-        if "mm" in pattern:
-            pattern = pattern.replace("mm", "%M")
-            datetime_unit = "m"
-        if "ss" in pattern:
-            pattern = pattern.replace("ss", "%S")
-            datetime_unit = "s"
-        if "z" in pattern or "Z" in pattern:
-            raise ValueError("Date type attributes with time zone not "
-                             "supported, yet")
-
-        if datetime_unit is None:
-            raise ValueError("Invalid or unsupported date format")
-
-        return pattern, datetime_unit
-    else:
-        raise ValueError("Invalid or no date format")
-
-
-def go_data(ofile):
-    """Skip header.
-
-    the first next() call of the returned iterator will be the @data line"""
-    return itertools.dropwhile(lambda x: not r_datameta.match(x), ofile)
-
-
-#----------------
-# Parsing header
-#----------------
-def tokenize_attribute(iterable, attribute):
-    """Parse a raw string in header (eg starts by @attribute).
-
-    Given a raw string attribute, try to get the name and type of the
-    attribute. Constraints:
-
-    * The first line must start with @attribute (case insensitive, and
-      space like characters before @attribute are allowed)
-    * Works also if the attribute is spread on multilines.
-    * Works if empty lines or comments are in between
-
-    Parameters
-    ----------
-    attribute : str
-       the attribute string.
-
-    Returns
-    -------
-    name : str
-       name of the attribute
-    value : str
-       value of the attribute
-    next : str
-       next line to be parsed
-
-    Examples
-    --------
-    If attribute is a string defined in python as r"floupi real", will
-    return floupi as name, and real as value.
-
-    >>> iterable = iter([0] * 10) # dummy iterator
-    >>> tokenize_attribute(iterable, r"@attribute floupi real")
-    ('floupi', 'real', 0)
-
-    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
-    and real as value.
-
-    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
-    ('floupi 2', 'real', 0)
-
-    """
-    sattr = attribute.strip()
-    mattr = r_attribute.match(sattr)
-    if mattr:
-        # atrv is everything after @attribute
-        atrv = mattr.group(1)
-        if r_comattrval.match(atrv):
-            name, type = tokenize_single_comma(atrv)
-            next_item = next(iterable)
-        elif r_wcomattrval.match(atrv):
-            name, type = tokenize_single_wcomma(atrv)
-            next_item = next(iterable)
-        else:
-            # Not sure we should support this, as it does not seem supported by
-            # weka.
-            raise ValueError("multi line not supported yet")
-            #name, type, next_item = tokenize_multilines(iterable, atrv)
-    else:
-        raise ValueError("First line unparsable: %s" % sattr)
-
-    if type == 'relational':
-        raise ValueError("relational attributes not supported yet")
-    return name, type, next_item
-
-
-def tokenize_single_comma(val):
-    # XXX we match twice the same string (here and at the caller level). It is
-    # stupid, but it is easier for now...
-    m = r_comattrval.match(val)
-    if m:
-        try:
-            name = m.group(1).strip()
-            type = m.group(2).strip()
-        except IndexError:
-            raise ValueError("Error while tokenizing attribute")
-    else:
-        raise ValueError("Error while tokenizing single %s" % val)
-    return name, type
-
-
-def tokenize_single_wcomma(val):
-    # XXX we match twice the same string (here and at the caller level). It is
-    # stupid, but it is easier for now...
-    m = r_wcomattrval.match(val)
-    if m:
-        try:
-            name = m.group(1).strip()
-            type = m.group(2).strip()
-        except IndexError:
-            raise ValueError("Error while tokenizing attribute")
-    else:
-        raise ValueError("Error while tokenizing single %s" % val)
-    return name, type
-
-
-def read_header(ofile):
-    """Read the header of the iterable ofile."""
-    i = next(ofile)
-
-    # Pass first comments
-    while r_comment.match(i):
-        i = next(ofile)
-
-    # Header is everything up to DATA attribute ?
-    relation = None
-    attributes = []
-    while not r_datameta.match(i):
-        m = r_headerline.match(i)
-        if m:
-            isattr = r_attribute.match(i)
-            if isattr:
-                name, type, i = tokenize_attribute(ofile, i)
-                attributes.append((name, type))
-            else:
-                isrel = r_relation.match(i)
-                if isrel:
-                    relation = isrel.group(1)
-                else:
-                    raise ValueError("Error parsing line %s" % i)
-                i = next(ofile)
-        else:
-            i = next(ofile)
-
-    return relation, attributes
-
-
-#--------------------
-# Parsing actual data
-#--------------------
-def safe_float(x):
-    """given a string x, convert it to a float. If the stripped string is a ?,
-    return a Nan (missing value).
-
-    Parameters
-    ----------
-    x : str
-       string to convert
-
-    Returns
-    -------
-    f : float
-       where float can be nan
-
-    Examples
-    --------
-    >>> safe_float('1')
-    1.0
-    >>> safe_float('1\\n')
-    1.0
-    >>> safe_float('?\\n')
-    nan
-    """
-    if '?' in x:
-        return np.nan
-    else:
-        return float(x)
-
-
-def safe_nominal(value, pvalue):
-    svalue = value.strip()
-    if svalue in pvalue:
-        return svalue
-    elif svalue == '?':
-        return svalue
-    else:
-        raise ValueError("%s value not in %s" % (str(svalue), str(pvalue)))
-
-
-def safe_date(value, date_format, datetime_unit):
-    date_str = value.strip().strip("'").strip('"')
-    if date_str == '?':
-        return np.datetime64('NaT', datetime_unit)
-    else:
-        dt = datetime.datetime.strptime(date_str, date_format)
-        return np.datetime64(dt).astype("datetime64[%s]" % datetime_unit)
-
-
-class MetaData(object):
-    """Small container to keep useful information on a ARFF dataset.
-
-    Knows about attributes names and types.
-
-    Examples
-    --------
-    ::
-
-        data, meta = loadarff('iris.arff')
-        # This will print the attributes names of the iris.arff dataset
-        for i in meta:
-            print(i)
-        # This works too
-        meta.names()
-        # Getting attribute type
-        types = meta.types()
-
-    Notes
-    -----
-    Also maintains the list of attributes in order, i.e. doing for i in
-    meta, where meta is an instance of MetaData, will return the
-    different attribute names in the order they were defined.
-    """
-    def __init__(self, rel, attr):
-        self.name = rel
-        # We need the dictionary to be ordered
-        # XXX: may be better to implement an ordered dictionary
-        self._attributes = {}
-        self._attrnames = []
-        for name, value in attr:
-            tp = parse_type(value)
-            self._attrnames.append(name)
-            if tp == 'nominal':
-                self._attributes[name] = (tp, get_nom_val(value))
-            elif tp == 'date':
-                self._attributes[name] = (tp, get_date_format(value)[0])
-            else:
-                self._attributes[name] = (tp, None)
-
-    def __repr__(self):
-        msg = ""
-        msg += "Dataset: %s\n" % self.name
-        for i in self._attrnames:
-            msg += "\t%s's type is %s" % (i, self._attributes[i][0])
-            if self._attributes[i][1]:
-                msg += ", range is %s" % str(self._attributes[i][1])
-            msg += '\n'
-        return msg
-
-    def __iter__(self):
-        return iter(self._attrnames)
-
-    def __getitem__(self, key):
-        return self._attributes[key]
-
-    def names(self):
-        """Return the list of attribute names."""
-        return self._attrnames
-
-    def types(self):
-        """Return the list of attribute types."""
-        attr_types = [self._attributes[name][0] for name in self._attrnames]
-        return attr_types
-
-
-def loadarff(f):
-    """
-    Read an arff file.
-
-    The data is returned as a record array, which can be accessed much like
-    a dictionary of numpy arrays.  For example, if one of the attributes is
-    called 'pressure', then its first 10 data points can be accessed from the
-    ``data`` record array like so: ``data['pressure'][0:10]``
-
-
-    Parameters
-    ----------
-    f : file-like or str
-       File-like object to read from, or filename to open.
-
-    Returns
-    -------
-    data : record array
-       The data of the arff file, accessible by attribute names.
-    meta : `MetaData`
-       Contains information about the arff file such as name and
-       type of attributes, the relation (name of the dataset), etc...
-
-    Raises
-    ------
-    ParseArffError
-        This is raised if the given file is not ARFF-formatted.
-    NotImplementedError
-        The ARFF file has an attribute which is not supported yet.
-
-    Notes
-    -----
-
-    This function should be able to read most arff files. Not
-    implemented functionality include:
-
-    * date type attributes
-    * string type attributes
-
-    It can read files with numeric and nominal attributes.  It cannot read
-    files with sparse data ({} in the file).  However, this function can
-    read files with missing data (? in the file), representing the data
-    points as NaNs.
-
-    Examples
-    --------
-    >>> from scipy.io import arff
-    >>> from io import StringIO
-    >>> content = \"\"\"
-    ... @relation foo
-    ... @attribute width  numeric
-    ... @attribute height numeric
-    ... @attribute color  {red,green,blue,yellow,black}
-    ... @data
-    ... 5.0,3.25,blue
-    ... 4.5,3.75,green
-    ... 3.0,4.00,red
-    ... \"\"\"
-    >>> f = StringIO(content)
-    >>> data, meta = arff.loadarff(f)
-    >>> data
-    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
-          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
-    >>> meta
-    Dataset: foo
-    \twidth's type is numeric
-    \theight's type is numeric
-    \tcolor's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')
-
-    """
-    if hasattr(f, 'read'):
-        ofile = f
-    else:
-        ofile = open(f, 'rt')
-    try:
-        return _loadarff(ofile)
-    finally:
-        if ofile is not f:  # only close what we opened
-            ofile.close()
-
-
-def _loadarff(ofile):
-    # Parse the header file
-    try:
-        rel, attr = read_header(ofile)
-    except ValueError as e:
-        msg = "Error while parsing header, error was: " + str(e)
-        raise ParseArffError(msg)
-
-    # Check whether we have a string attribute (not supported yet)
-    hasstr = False
-    for name, value in attr:
-        type = parse_type(value)
-        if type == 'string':
-            hasstr = True
-
-    meta = MetaData(rel, attr)
-
-    # XXX The following code is not great
-    # Build the type descriptor descr and the list of convertors to convert
-    # each attribute to the suitable type (which should match the one in
-    # descr).
-
-    # This can be used once we want to support integer as integer values and
-    # not as numeric anymore (using masked arrays ?).
-    acls2dtype = {'real': float, 'integer': float, 'numeric': float}
-    acls2conv = {'real': safe_float,
-                 'integer': safe_float,
-                 'numeric': safe_float}
-    descr = []
-    convertors = []
-    if not hasstr:
-        for name, value in attr:
-            type = parse_type(value)
-            if type == 'date':
-                date_format, datetime_unit = get_date_format(value)
-                descr.append((name, "datetime64[%s]" % datetime_unit))
-                convertors.append(partial(safe_date, date_format=date_format,
-                                          datetime_unit=datetime_unit))
-            elif type == 'nominal':
-                n = maxnomlen(value)
-                descr.append((name, 'S%d' % n))
-                pvalue = get_nom_val(value)
-                convertors.append(partial(safe_nominal, pvalue=pvalue))
-            else:
-                descr.append((name, acls2dtype[type]))
-                convertors.append(safe_float)
-                #dc.append(acls2conv[type])
-                #sdescr.append((name, acls2sdtype[type]))
-    else:
-        # How to support string efficiently ? Ideally, we should know the max
-        # size of the string before allocating the numpy array.
-        raise NotImplementedError("String attributes not supported yet, sorry")
-
-    ni = len(convertors)
-
-    def generator(row_iter, delim=','):
-        # TODO: this is where we are spending times (~80%). I think things
-        # could be made more efficiently:
-        #   - We could for example "compile" the function, because some values
-        #   do not change here.
-        #   - The function to convert a line to dtyped values could also be
-        #   generated on the fly from a string and be executed instead of
-        #   looping.
-        #   - The regex are overkill: for comments, checking that a line starts
-        #   by % should be enough and faster, and for empty lines, same thing
-        #   --> this does not seem to change anything.
-
-        # 'compiling' the range since it does not change
-        # Note, I have already tried zipping the converters and
-        # row elements and got slightly worse performance.
-        elems = list(range(ni))
-
-        for raw in row_iter:
-            # We do not abstract skipping comments and empty lines for
-            # performance reasons.
-            if r_comment.match(raw) or r_empty.match(raw):
-                continue
-            row = raw.split(delim)
-            yield tuple([convertors[i](row[i]) for i in elems])
-
-    a = generator(ofile)
-    # No error should happen here: it is a bug otherwise
-    data = np.fromiter(a, descr)
-    return data, meta
-
-
-#-----
-# Misc
-#-----
-def basic_stats(data):
-    nbfac = data.size * 1. / (data.size - 1)
-    return np.nanmin(data), np.nanmax(data), np.mean(data), np.std(data) * nbfac
-
-
-def print_attribute(name, tp, data):
-    type = tp[0]
-    if type == 'numeric' or type == 'real' or type == 'integer':
-        min, max, mean, std = basic_stats(data)
-        print("%s,%s,%f,%f,%f,%f" % (name, type, min, max, mean, std))
-    else:
-        msg = name + ",{"
-        for i in range(len(tp[1])-1):
-            msg += tp[1][i] + ","
-        msg += tp[1][-1]
-        msg += "}"
-        print(msg)
-
-
-def test_weka(filename):
-    data, meta = loadarff(filename)
-    print(len(data.dtype))
-    print(data.size)
-    for i in meta:
-        print_attribute(i, meta[i], data[i])
-
-
-# make sure nose does not find this as a test
-test_weka.__test__ = False
-
-
-if __name__ == '__main__':
-    import sys
-    filename = sys.argv[1]
-    test_weka(filename)
@@ -1,13 +0,0 @@
-from __future__ import division, print_function, absolute_import
-
-
-def configuration(parent_package='io',top_path=None):
-    from numpy.distutils.misc_util import Configuration
-    config = Configuration('arff', parent_package, top_path)
-    config.add_data_dir('tests')
-    return config
-
-
-if __name__ == '__main__':
-    from numpy.distutils.core import setup
-    setup(**configuration(top_path='').todict())
@@ -1,225 +0,0 @@
-% 1. Title: Iris Plants Database
-% 
-% 2. Sources:
-%      (a) Creator: R.A. Fisher
-%      (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
-%      (c) Date: July, 1988
-% 
-% 3. Past Usage:
-%    - Publications: too many to mention!!!  Here are a few.
-%    1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
-%       Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
-%       to Mathematical Statistics" (John Wiley, NY, 1950).
-%    2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
-%       (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
-%    3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
-%       Structure and Classification Rule for Recognition in Partially Exposed
-%       Environments".  IEEE Transactions on Pattern Analysis and Machine
-%       Intelligence, Vol. PAMI-2, No. 1, 67-71.
-%       -- Results:
-%          -- very low misclassification rates (0% for the setosa class)
-%    4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE 
-%       Transactions on Information Theory, May 1972, 431-433.
-%       -- Results:
-%          -- very low misclassification rates again
-%    5. See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al's AUTOCLASS II
-%       conceptual clustering system finds 3 classes in the data.
-% 
-% 4. Relevant Information:
-%    --- This is perhaps the best known database to be found in the pattern
-%        recognition literature.  Fisher's paper is a classic in the field
-%        and is referenced frequently to this day.  (See Duda & Hart, for
-%        example.)  The data set contains 3 classes of 50 instances each,
-%        where each class refers to a type of iris plant.  One class is
-%        linearly separable from the other 2; the latter are NOT linearly
-%        separable from each other.
-%    --- Predicted attribute: class of iris plant.
-%    --- This is an exceedingly simple domain.
-% 
-% 5. Number of Instances: 150 (50 in each of three classes)
-% 
-% 6. Number of Attributes: 4 numeric, predictive attributes and the class
-% 
-% 7. Attribute Information:
-%    1. sepal length in cm
-%    2. sepal width in cm
-%    3. petal length in cm
-%    4. petal width in cm
-%    5. class: 
-%       -- Iris Setosa
-%       -- Iris Versicolour
-%       -- Iris Virginica
-% 
-% 8. Missing Attribute Values: None
-% 
-% Summary Statistics:
-%  	           Min  Max   Mean    SD   Class Correlation
-%    sepal length: 4.3  7.9   5.84  0.83    0.7826   
-%     sepal width: 2.0  4.4   3.05  0.43   -0.4194
-%    petal length: 1.0  6.9   3.76  1.76    0.9490  (high!)
-%     petal width: 0.1  2.5   1.20  0.76    0.9565  (high!)
-% 
-% 9. Class Distribution: 33.3% for each of 3 classes.
-
-@RELATION iris
-
-@ATTRIBUTE sepallength	REAL
-@ATTRIBUTE sepalwidth 	REAL
-@ATTRIBUTE petallength 	REAL
-@ATTRIBUTE petalwidth	REAL
-@ATTRIBUTE class 	{Iris-setosa,Iris-versicolor,Iris-virginica}
-
-@DATA
-5.1,3.5,1.4,0.2,Iris-setosa
-4.9,3.0,1.4,0.2,Iris-setosa
-4.7,3.2,1.3,0.2,Iris-setosa
-4.6,3.1,1.5,0.2,Iris-setosa
-5.0,3.6,1.4,0.2,Iris-setosa
-5.4,3.9,1.7,0.4,Iris-setosa
-4.6,3.4,1.4,0.3,Iris-setosa
-5.0,3.4,1.5,0.2,Iris-setosa
-4.4,2.9,1.4,0.2,Iris-setosa
-4.9,3.1,1.5,0.1,Iris-setosa
-5.4,3.7,1.5,0.2,Iris-setosa
-4.8,3.4,1.6,0.2,Iris-setosa
-4.8,3.0,1.4,0.1,Iris-setosa
-4.3,3.0,1.1,0.1,Iris-setosa
-5.8,4.0,1.2,0.2,Iris-setosa
-5.7,4.4,1.5,0.4,Iris-setosa
-5.4,3.9,1.3,0.4,Iris-setosa
-5.1,3.5,1.4,0.3,Iris-setosa
-5.7,3.8,1.7,0.3,Iris-setosa
-5.1,3.8,1.5,0.3,Iris-setosa
-5.4,3.4,1.7,0.2,Iris-setosa
-5.1,3.7,1.5,0.4,Iris-setosa
-4.6,3.6,1.0,0.2,Iris-setosa
-5.1,3.3,1.7,0.5,Iris-setosa
-4.8,3.4,1.9,0.2,Iris-setosa
-5.0,3.0,1.6,0.2,Iris-setosa
-5.0,3.4,1.6,0.4,Iris-setosa
-5.2,3.5,1.5,0.2,Iris-setosa
-5.2,3.4,1.4,0.2,Iris-setosa
-4.7,3.2,1.6,0.2,Iris-setosa
-4.8,3.1,1.6,0.2,Iris-setosa
-5.4,3.4,1.5,0.4,Iris-setosa
-5.2,4.1,1.5,0.1,Iris-setosa
-5.5,4.2,1.4,0.2,Iris-setosa
-4.9,3.1,1.5,0.1,Iris-setosa
-5.0,3.2,1.2,0.2,Iris-setosa
-5.5,3.5,1.3,0.2,Iris-setosa
-4.9,3.1,1.5,0.1,Iris-setosa
-4.4,3.0,1.3,0.2,Iris-setosa
-5.1,3.4,1.5,0.2,Iris-setosa
-5.0,3.5,1.3,0.3,Iris-setosa
-4.5,2.3,1.3,0.3,Iris-setosa
-4.4,3.2,1.3,0.2,Iris-setosa
-5.0,3.5,1.6,0.6,Iris-setosa
-5.1,3.8,1.9,0.4,Iris-setosa
-4.8,3.0,1.4,0.3,Iris-setosa
-5.1,3.8,1.6,0.2,Iris-setosa
-4.6,3.2,1.4,0.2,Iris-setosa
-5.3,3.7,1.5,0.2,Iris-setosa
-5.0,3.3,1.4,0.2,Iris-setosa
-7.0,3.2,4.7,1.4,Iris-versicolor
-6.4,3.2,4.5,1.5,Iris-versicolor
-6.9,3.1,4.9,1.5,Iris-versicolor
-5.5,2.3,4.0,1.3,Iris-versicolor
-6.5,2.8,4.6,1.5,Iris-versicolor
-5.7,2.8,4.5,1.3,Iris-versicolor
-6.3,3.3,4.7,1.6,Iris-versicolor
-4.9,2.4,3.3,1.0,Iris-versicolor
-6.6,2.9,4.6,1.3,Iris-versicolor
-5.2,2.7,3.9,1.4,Iris-versicolor
-5.0,2.0,3.5,1.0,Iris-versicolor
-5.9,3.0,4.2,1.5,Iris-versicolor
-6.0,2.2,4.0,1.0,Iris-versicolor
-6.1,2.9,4.7,1.4,Iris-versicolor
-5.6,2.9,3.6,1.3,Iris-versicolor
-6.7,3.1,4.4,1.4,Iris-versicolor
-5.6,3.0,4.5,1.5,Iris-versicolor
-5.8,2.7,4.1,1.0,Iris-versicolor
-6.2,2.2,4.5,1.5,Iris-versicolor
-5.6,2.5,3.9,1.1,Iris-versicolor
-5.9,3.2,4.8,1.8,Iris-versicolor
-6.1,2.8,4.0,1.3,Iris-versicolor
-6.3,2.5,4.9,1.5,Iris-versicolor
-6.1,2.8,4.7,1.2,Iris-versicolor
-6.4,2.9,4.3,1.3,Iris-versicolor
-6.6,3.0,4.4,1.4,Iris-versicolor
-6.8,2.8,4.8,1.4,Iris-versicolor
-6.7,3.0,5.0,1.7,Iris-versicolor
-6.0,2.9,4.5,1.5,Iris-versicolor
-5.7,2.6,3.5,1.0,Iris-versicolor
-5.5,2.4,3.8,1.1,Iris-versicolor
-5.5,2.4,3.7,1.0,Iris-versicolor
-5.8,2.7,3.9,1.2,Iris-versicolor
-6.0,2.7,5.1,1.6,Iris-versicolor
-5.4,3.0,4.5,1.5,Iris-versicolor
-6.0,3.4,4.5,1.6,Iris-versicolor
-6.7,3.1,4.7,1.5,Iris-versicolor
-6.3,2.3,4.4,1.3,Iris-versicolor
-5.6,3.0,4.1,1.3,Iris-versicolor
-5.5,2.5,4.0,1.3,Iris-versicolor
-5.5,2.6,4.4,1.2,Iris-versicolor
-6.1,3.0,4.6,1.4,Iris-versicolor
-5.8,2.6,4.0,1.2,Iris-versicolor
-5.0,2.3,3.3,1.0,Iris-versicolor
-5.6,2.7,4.2,1.3,Iris-versicolor
-5.7,3.0,4.2,1.2,Iris-versicolor
-5.7,2.9,4.2,1.3,Iris-versicolor
-6.2,2.9,4.3,1.3,Iris-versicolor
-5.1,2.5,3.0,1.1,Iris-versicolor
-5.7,2.8,4.1,1.3,Iris-versicolor
-6.3,3.3,6.0,2.5,Iris-virginica
-5.8,2.7,5.1,1.9,Iris-virginica
-7.1,3.0,5.9,2.1,Iris-virginica
-6.3,2.9,5.6,1.8,Iris-virginica
-6.5,3.0,5.8,2.2,Iris-virginica
-7.6,3.0,6.6,2.1,Iris-virginica
-4.9,2.5,4.5,1.7,Iris-virginica
-7.3,2.9,6.3,1.8,Iris-virginica
-6.7,2.5,5.8,1.8,Iris-virginica
-7.2,3.6,6.1,2.5,Iris-virginica
-6.5,3.2,5.1,2.0,Iris-virginica
-6.4,2.7,5.3,1.9,Iris-virginica
-6.8,3.0,5.5,2.1,Iris-virginica
-5.7,2.5,5.0,2.0,Iris-virginica
-5.8,2.8,5.1,2.4,Iris-virginica
-6.4,3.2,5.3,2.3,Iris-virginica
-6.5,3.0,5.5,1.8,Iris-virginica
-7.7,3.8,6.7,2.2,Iris-virginica
-7.7,2.6,6.9,2.3,Iris-virginica
-6.0,2.2,5.0,1.5,Iris-virginica
-6.9,3.2,5.7,2.3,Iris-virginica
-5.6,2.8,4.9,2.0,Iris-virginica
-7.7,2.8,6.7,2.0,Iris-virginica
-6.3,2.7,4.9,1.8,Iris-virginica
-6.7,3.3,5.7,2.1,Iris-virginica
-7.2,3.2,6.0,1.8,Iris-virginica
-6.2,2.8,4.8,1.8,Iris-virginica
-6.1,3.0,4.9,1.8,Iris-virginica
-6.4,2.8,5.6,2.1,Iris-virginica
-7.2,3.0,5.8,1.6,Iris-virginica
-7.4,2.8,6.1,1.9,Iris-virginica
-7.9,3.8,6.4,2.0,Iris-virginica
-6.4,2.8,5.6,2.2,Iris-virginica
-6.3,2.8,5.1,1.5,Iris-virginica
-6.1,2.6,5.6,1.4,Iris-virginica
-7.7,3.0,6.1,2.3,Iris-virginica
-6.3,3.4,5.6,2.4,Iris-virginica
-6.4,3.1,5.5,1.8,Iris-virginica
-6.0,3.0,4.8,1.8,Iris-virginica
-6.9,3.1,5.4,2.1,Iris-virginica
-6.7,3.1,5.6,2.4,Iris-virginica
-6.9,3.1,5.1,2.3,Iris-virginica
-5.8,2.7,5.1,1.9,Iris-virginica
-6.8,3.2,5.9,2.3,Iris-virginica
-6.7,3.3,5.7,2.5,Iris-virginica
-6.7,3.0,5.2,2.3,Iris-virginica
-6.3,2.5,5.0,1.9,Iris-virginica
-6.5,3.0,5.2,2.0,Iris-virginica
-6.2,3.4,5.4,2.3,Iris-virginica
-5.9,3.0,5.1,1.8,Iris-virginica
-%
-%
-%
@@ -1,8 +0,0 @@
-% This arff file contains some missing data
-@relation missing
-@attribute yop real
-@attribute yap real
-@data
-1,5
-2,4
-?,?
@@ -1,11 +0,0 @@
-@RELATION iris
-
-@ATTRIBUTE sepallength  REAL
-@ATTRIBUTE sepalwidth   REAL
-@ATTRIBUTE petallength  REAL
-@ATTRIBUTE petalwidth   REAL
-@ATTRIBUTE class    {Iris-setosa,Iris-versicolor,Iris-virginica}
-
-@DATA
-
-% This file has no data
@@ -1,10 +0,0 @@
-@RELATION test1
-
-@ATTRIBUTE attr0	REAL
-@ATTRIBUTE attr1 	REAL
-@ATTRIBUTE attr2 	REAL
-@ATTRIBUTE attr3	REAL
-@ATTRIBUTE class 	{class0, class1, class2, class3}
-
-@DATA
-0.1, 0.2, 0.3, 0.4,class1
@@ -1,15 +0,0 @@
-@RELATION test2
-
-@ATTRIBUTE attr0	REAL
-@ATTRIBUTE attr1 	real
-@ATTRIBUTE attr2 	integer
-@ATTRIBUTE attr3	Integer
-@ATTRIBUTE attr4 	Numeric
-@ATTRIBUTE attr5	numeric
-@ATTRIBUTE attr6 	string
-@ATTRIBUTE attr7 	STRING
-@ATTRIBUTE attr8 	{bla}
-@ATTRIBUTE attr9 	{bla, bla}
-
-@DATA
-0.1, 0.2, 0.3, 0.4,class1
@@ -1,6 +0,0 @@
-@RELATION test3
-
-@ATTRIBUTE attr0	crap
-
-@DATA
-0.1, 0.2, 0.3, 0.4,class1
@@ -1,11 +0,0 @@
-@RELATION test5
-
-@ATTRIBUTE attr0	REAL
-@ATTRIBUTE attr1 	REAL
-@ATTRIBUTE attr2 	REAL
-@ATTRIBUTE attr3	REAL
-@ATTRIBUTE class 	{class0, class1, class2, class3}
-@DATA
-0.1, 0.2, 0.3, 0.4,class1
-0.1, -0.2, -0.3, -0.4,class2
-1, 2, 3, 4,class3
@@ -1,26 +0,0 @@
-@RELATION test4
-
-@ATTRIBUTE attr0	REAL
-@ATTRIBUTE attr1 	REAL
-@ATTRIBUTE attr2 	REAL
-@ATTRIBUTE attr3	REAL
-@ATTRIBUTE class 	{class0, class1, class2, class3}
-
-@DATA
-
-% lsdflkjhaksjdhf
-
-% lsdflkjhaksjdhf
-
-0.1, 0.2, 0.3, 0.4,class1
-% laksjdhf
-
-% lsdflkjhaksjdhf
-0.1, -0.2, -0.3, -0.4,class2
-
-% lsdflkjhaksjdhf
-% lsdflkjhaksjdhf
-
-% lsdflkjhaksjdhf
-
-1, 2, 3, 4,class3
@@ -1,12 +0,0 @@
-@RELATION test6
-
-@ATTRIBUTE attr0	REAL
-@ATTRIBUTE attr1 	REAL
-@ATTRIBUTE attr2 	REAL
-@ATTRIBUTE attr3	REAL
-@ATTRIBUTE class 	{C}
-
-@DATA
-0.1, 0.2, 0.3, 0.4,C
-0.1, -0.2, -0.3, -0.4,C
-1, 2, 3, 4,C
@@ -1,15 +0,0 @@
-@RELATION test7
-
-@ATTRIBUTE attr_year	DATE yyyy
-@ATTRIBUTE attr_month	DATE yyyy-MM
-@ATTRIBUTE attr_date	DATE yyyy-MM-dd
-@ATTRIBUTE attr_datetime_local	DATE "yyyy-MM-dd HH:mm"
-@ATTRIBUTE attr_datetime_missing	DATE "yyyy-MM-dd HH:mm"
-
-@DATA
-1999,1999-01,1999-01-31,"1999-01-31 00:01",?
-2004,2004-12,2004-12-01,"2004-12-01 23:59","2004-12-01 23:59"
-1817,1817-04,1817-04-28,"1817-04-28 13:00",?
-2100,2100-09,2100-09-10,"2100-09-10 12:00",?
-2013,2013-11,2013-11-30,"2013-11-30 04:55","2013-11-30 04:55"
-1631,1631-10,1631-10-15,"1631-10-15 20:04","1631-10-15 20:04"
@@ -1,12 +0,0 @@
-@RELATION test8
-
-@ATTRIBUTE attr_datetime_utc	DATE "yyyy-MM-dd HH:mm Z"
-@ATTRIBUTE attr_datetime_full	DATE "yy-MM-dd HH:mm:ss z"
-
-@DATA
-"1999-01-31 00:01 UTC","99-01-31 00:01:08 +0430"
-"2004-12-01 23:59 UTC","04-12-01 23:59:59 -0800"
-"1817-04-28 13:00 UTC","17-04-28 13:00:33 +1000"
-"2100-09-10 12:00 UTC","21-09-10 12:00:21 -0300"
-"2013-11-30 04:55 UTC","13-11-30 04:55:48 -1100"
-"1631-10-15 20:04 UTC","31-10-15 20:04:10 +0000"
@@ -1,259 +0,0 @@
-from __future__ import division, print_function, absolute_import
-
-import datetime
-import os
-import sys
-from os.path import join as pjoin
-
-if sys.version_info[0] >= 3:
-    from io import StringIO
-else:
-    from cStringIO import StringIO
-
-import numpy as np
-
-from numpy.testing import (assert_array_almost_equal,
-                           assert_array_equal, assert_equal, assert_)
-import pytest
-from pytest import raises as assert_raises
-
-from scipy.io.arff.arffread import loadarff
-from scipy.io.arff.arffread import read_header, parse_type, ParseArffError
-
-
-data_path = pjoin(os.path.dirname(__file__), 'data')
-
-test1 = pjoin(data_path, 'test1.arff')
-test2 = pjoin(data_path, 'test2.arff')
-test3 = pjoin(data_path, 'test3.arff')
-
-test4 = pjoin(data_path, 'test4.arff')
-test5 = pjoin(data_path, 'test5.arff')
-test6 = pjoin(data_path, 'test6.arff')
-test7 = pjoin(data_path, 'test7.arff')
-test8 = pjoin(data_path, 'test8.arff')
-expect4_data = [(0.1, 0.2, 0.3, 0.4, 'class1'),
-                (-0.1, -0.2, -0.3, -0.4, 'class2'),
-                (1, 2, 3, 4, 'class3')]
-expected_types = ['numeric', 'numeric', 'numeric', 'numeric', 'nominal']
-
-missing = pjoin(data_path, 'missing.arff')
-expect_missing_raw = np.array([[1, 5], [2, 4], [np.nan, np.nan]])
-expect_missing = np.empty(3, [('yop', float), ('yap', float)])
-expect_missing['yop'] = expect_missing_raw[:, 0]
-expect_missing['yap'] = expect_missing_raw[:, 1]
-
-
-class TestData(object):
-    def test1(self):
-        # Parsing trivial file with nothing.
-        self._test(test4)
-
-    def test2(self):
-        # Parsing trivial file with some comments in the data section.
-        self._test(test5)
-
-    def test3(self):
-        # Parsing trivial file with nominal attribute of 1 character.
-        self._test(test6)
-
-    def _test(self, test_file):
-        data, meta = loadarff(test_file)
-        for i in range(len(data)):
-            for j in range(4):
-                assert_array_almost_equal(expect4_data[i][j], data[i][j])
-        assert_equal(meta.types(), expected_types)
-
-    def test_filelike(self):
-        # Test reading from file-like object (StringIO)
-        f1 = open(test1)
-        data1, meta1 = loadarff(f1)
-        f1.close()
-        f2 = open(test1)
-        data2, meta2 = loadarff(StringIO(f2.read()))
-        f2.close()
-        assert_(data1 == data2)
-        assert_(repr(meta1) == repr(meta2))
-
-    @pytest.mark.skipif(sys.version_info < (3, 6),
-                        reason='Passing path-like objects to IO functions requires Python >= 3.6')
-    def test_path(self):
-        # Test reading from `pathlib.Path` object
-        from pathlib import Path
-
-        with open(test1) as f1:
-            data1, meta1 = loadarff(f1)
-
-        data2, meta2 = loadarff(Path(test1))
-
-        assert_(data1 == data2)
-        assert_(repr(meta1) == repr(meta2))
-
-class TestMissingData(object):
-    def test_missing(self):
-        data, meta = loadarff(missing)
-        for i in ['yop', 'yap']:
-            assert_array_almost_equal(data[i], expect_missing[i])
-
-
-class TestNoData(object):
-    def test_nodata(self):
-        # The file nodata.arff has no data in the @DATA section.
-        # Reading it should result in an array with length 0.
-        nodata_filename = os.path.join(data_path, 'nodata.arff')
-        data, meta = loadarff(nodata_filename)
-        expected_dtype = np.dtype([('sepallength', '<f8'),
-                                   ('sepalwidth', '<f8'),
-                                   ('petallength', '<f8'),
-                                   ('petalwidth', '<f8'),
-                                   ('class', 'S15')])
-        assert_equal(data.dtype, expected_dtype)
-        assert_equal(data.size, 0)
-
-
-class TestHeader(object):
-    def test_type_parsing(self):
-        # Test parsing type of attribute from their value.
-        ofile = open(test2)
-        rel, attrs = read_header(ofile)
-        ofile.close()
-
-        expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
-                    'numeric', 'string', 'string', 'nominal', 'nominal']
-
-        for i in range(len(attrs)):
-            assert_(parse_type(attrs[i][1]) == expected[i])
-
-    def test_badtype_parsing(self):
-        # Test parsing wrong type of attribute from their value.
-        ofile = open(test3)
-        rel, attrs = read_header(ofile)
-        ofile.close()
-
-        for name, value in attrs:
-            assert_raises(ParseArffError, parse_type, value)
-
-    def test_fullheader1(self):
-        # Parsing trivial header with nothing.
-        ofile = open(test1)
-        rel, attrs = read_header(ofile)
-        ofile.close()
-
-        # Test relation
-        assert_(rel == 'test1')
-
-        # Test numerical attributes
-        assert_(len(attrs) == 5)
-        for i in range(4):
-            assert_(attrs[i][0] == 'attr%d' % i)
-            assert_(attrs[i][1] == 'REAL')
-
-        # Test nominal attribute
-        assert_(attrs[4][0] == 'class')
-        assert_(attrs[4][1] == '{class0, class1, class2, class3}')
-
-    def test_dateheader(self):
-        ofile = open(test7)
-        rel, attrs = read_header(ofile)
-        ofile.close()
-
-        assert_(rel == 'test7')
-
-        assert_(len(attrs) == 5)
-
-        assert_(attrs[0][0] == 'attr_year')
-        assert_(attrs[0][1] == 'DATE yyyy')
-
-        assert_(attrs[1][0] == 'attr_month')
-        assert_(attrs[1][1] == 'DATE yyyy-MM')
-
-        assert_(attrs[2][0] == 'attr_date')
-        assert_(attrs[2][1] == 'DATE yyyy-MM-dd')
-
-        assert_(attrs[3][0] == 'attr_datetime_local')
-        assert_(attrs[3][1] == 'DATE "yyyy-MM-dd HH:mm"')
-
-        assert_(attrs[4][0] == 'attr_datetime_missing')
-        assert_(attrs[4][1] == 'DATE "yyyy-MM-dd HH:mm"')
-
-    def test_dateheader_unsupported(self):
-        ofile = open(test8)
-        rel, attrs = read_header(ofile)
-        ofile.close()
-
-        assert_(rel == 'test8')
-
-        assert_(len(attrs) == 2)
-        assert_(attrs[0][0] == 'attr_datetime_utc')
-        assert_(attrs[0][1] == 'DATE "yyyy-MM-dd HH:mm Z"')
-
-        assert_(attrs[1][0] == 'attr_datetime_full')
-        assert_(attrs[1][1] == 'DATE "yy-MM-dd HH:mm:ss z"')
-
-
-class TestDateAttribute(object):
-    def setup_method(self):
-        self.data, self.meta = loadarff(test7)
-
-    def test_year_attribute(self):
-        expected = np.array([
-            '1999',
-            '2004',
-            '1817',
-            '2100',
-            '2013',
-            '1631'
-        ], dtype='datetime64[Y]')
-
-        assert_array_equal(self.data["attr_year"], expected)
-
-    def test_month_attribute(self):
-        expected = np.array([
-            '1999-01',
-            '2004-12',
-            '1817-04',
-            '2100-09',
-            '2013-11',
-            '1631-10'
-        ], dtype='datetime64[M]')
-
-        assert_array_equal(self.data["attr_month"], expected)
-
-    def test_date_attribute(self):
-        expected = np.array([
-            '1999-01-31',
-            '2004-12-01',
-            '1817-04-28',
-            '2100-09-10',
-            '2013-11-30',
-            '1631-10-15'
-        ], dtype='datetime64[D]')
-
-        assert_array_equal(self.data["attr_date"], expected)
-
-    def test_datetime_local_attribute(self):
-        expected = np.array([
-            datetime.datetime(year=1999, month=1, day=31, hour=0, minute=1),
-            datetime.datetime(year=2004, month=12, day=1, hour=23, minute=59),
-            datetime.datetime(year=1817, month=4, day=28, hour=13, minute=0),
-            datetime.datetime(year=2100, month=9, day=10, hour=12, minute=0),
-            datetime.datetime(year=2013, month=11, day=30, hour=4, minute=55),
-            datetime.datetime(year=1631, month=10, day=15, hour=20, minute=4)
-        ], dtype='datetime64[m]')
-
-        assert_array_equal(self.data["attr_datetime_local"], expected)
-
-    def test_datetime_missing(self):
-        expected = np.array([
-            'nat',
-            '2004-12-01T23:59',
-            'nat',
-            'nat',
-            '2013-11-30T04:55',
-            '1631-10-15T20:04'
-        ], dtype='datetime64[m]')
-
-        assert_array_equal(self.data["attr_datetime_missing"], expected)
-
-    def test_datetime_timezone(self):
-        assert_raises(ValueError, loadarff, test8)