Static code analysis and corrections

2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,113 @@
+# -*- encoding:utf-8 -*-
+"""
+==================================
+Input and output (:mod:`scipy.io`)
+==================================
+
+.. currentmodule:: scipy.io
+
+SciPy has many modules, classes, and functions available to read data
+from and write data to a variety of file formats.
+
+.. seealso:: :ref:`numpy-reference.routines.io` (in Numpy)
+
+MATLAB® files
+=============
+
+.. autosummary::
+   :toctree: generated/
+
+   loadmat - Read a MATLAB style mat file (version 4 through 7.1)
+   savemat - Write a MATLAB style mat file (version 4 through 7.1)
+   whosmat - List contents of a MATLAB style mat file (version 4 through 7.1)
+
+IDL® files
+==========
+
+.. autosummary::
+   :toctree: generated/
+
+   readsav - Read an IDL 'save' file
+
+Matrix Market files
+===================
+
+.. autosummary::
+   :toctree: generated/
+
+   mminfo - Query matrix info from Matrix Market formatted file
+   mmread - Read matrix from Matrix Market formatted file
+   mmwrite - Write matrix to Matrix Market formatted file
+
+Unformatted Fortran files
+===============================
+
+.. autosummary::
+   :toctree: generated/
+
+   FortranFile - A file object for unformatted sequential Fortran files
+
+Netcdf
+======
+
+.. autosummary::
+   :toctree: generated/
+
+   netcdf_file - A file object for NetCDF data
+   netcdf_variable - A data object for the netcdf module
+
+Harwell-Boeing files
+====================
+
+.. autosummary::
+   :toctree: generated/
+
+   hb_read   -- read H-B file
+   hb_write  -- write H-B file
+
+Wav sound files (:mod:`scipy.io.wavfile`)
+=========================================
+
+.. module:: scipy.io.wavfile
+
+.. autosummary::
+   :toctree: generated/
+
+   read
+   write
+   WavFileWarning
+
+Arff files (:mod:`scipy.io.arff`)
+=================================
+
+.. module:: scipy.io.arff
+
+.. autosummary::
+   :toctree: generated/
+
+   loadarff
+   MetaData
+   ArffError
+   ParseArffError
+
+"""
+from __future__ import division, print_function, absolute_import
+
+# matfile read and write
+from .matlab import loadmat, savemat, whosmat, byteordercodes
+
+# netCDF file support
+from .netcdf import netcdf_file, netcdf_variable
+
+# Fortran file support
+from ._fortran import FortranFile
+
+from .mmio import mminfo, mmread, mmwrite
+from .idl import readsav
+from .harwell_boeing import hb_read, hb_write
+
+__all__ = [s for s in dir() if not s.startswith('_')]
+
+from scipy._lib._testutils import PytestTester
+test = PytestTester(__name__)
+del PytestTester
@@ -0,0 +1,317 @@
+"""
+Module to read / write Fortran unformatted sequential files.
+
+This is in the spirit of code written by Neil Martinsen-Burrell and Joe Zuntz.
+
+"""
+from __future__ import division, print_function, absolute_import
+
+import warnings
+import numpy as np
+
+__all__ = ['FortranFile']
+
+
+class FortranFile(object):
+    """
+    A file object for unformatted sequential files from Fortran code.
+
+    Parameters
+    ----------
+    filename : file or str
+        Open file object or filename.
+    mode : {'r', 'w'}, optional
+        Read-write mode, default is 'r'.
+    header_dtype : dtype, optional
+        Data type of the header. Size and endiness must match the input/output file.
+
+    Notes
+    -----
+    These files are broken up into records of unspecified types. The size of
+    each record is given at the start (although the size of this header is not
+    standard) and the data is written onto disk without any formatting. Fortran
+    compilers supporting the BACKSPACE statement will write a second copy of
+    the size to facilitate backwards seeking.
+
+    This class only supports files written with both sizes for the record.
+    It also does not support the subrecords used in Intel and gfortran compilers
+    for records which are greater than 2GB with a 4-byte header.
+
+    An example of an unformatted sequential file in Fortran would be written as::
+
+        OPEN(1, FILE=myfilename, FORM='unformatted')
+
+        WRITE(1) myvariable
+
+    Since this is a non-standard file format, whose contents depend on the
+    compiler and the endianness of the machine, caution is advised. Files from
+    gfortran 4.8.0 and gfortran 4.1.2 on x86_64 are known to work.
+
+    Consider using Fortran direct-access files or files from the newer Stream
+    I/O, which can be easily read by `numpy.fromfile`.
+
+    Examples
+    --------
+    To create an unformatted sequential Fortran file:
+
+    >>> from scipy.io import FortranFile
+    >>> f = FortranFile('test.unf', 'w')
+    >>> f.write_record(np.array([1,2,3,4,5], dtype=np.int32))
+    >>> f.write_record(np.linspace(0,1,20).reshape((5,4)).T)
+    >>> f.close()
+
+    To read this file:
+
+    >>> f = FortranFile('test.unf', 'r')
+    >>> print(f.read_ints(np.int32))
+    [1 2 3 4 5]
+    >>> print(f.read_reals(float).reshape((5,4), order="F"))
+    [[0.         0.05263158 0.10526316 0.15789474]
+     [0.21052632 0.26315789 0.31578947 0.36842105]
+     [0.42105263 0.47368421 0.52631579 0.57894737]
+     [0.63157895 0.68421053 0.73684211 0.78947368]
+     [0.84210526 0.89473684 0.94736842 1.        ]]
+    >>> f.close()
+
+    Or, in Fortran::
+
+        integer :: a(5), i
+        double precision :: b(5,4)
+        open(1, file='test.unf', form='unformatted')
+        read(1) a
+        read(1) b
+        close(1)
+        write(*,*) a
+        do i = 1, 5
+            write(*,*) b(i,:)
+        end do
+
+    """
+    def __init__(self, filename, mode='r', header_dtype=np.uint32):
+        if header_dtype is None:
+            raise ValueError('Must specify dtype')
+
+        header_dtype = np.dtype(header_dtype)
+        if header_dtype.kind != 'u':
+            warnings.warn("Given a dtype which is not unsigned.")
+
+        if mode not in 'rw' or len(mode) != 1:
+            raise ValueError('mode must be either r or w')
+
+        if hasattr(filename, 'seek'):
+            self._fp = filename
+        else:
+            self._fp = open(filename, '%sb' % mode)
+
+        self._header_dtype = header_dtype
+
+    def _read_size(self):
+        return int(np.fromfile(self._fp, dtype=self._header_dtype, count=1))
+
+    def write_record(self, *items):
+        """
+        Write a record (including sizes) to the file.
+
+        Parameters
+        ----------
+        *items : array_like
+            The data arrays to write.
+
+        Notes
+        -----
+        Writes data items to a file::
+
+            write_record(a.T, b.T, c.T, ...)
+
+            write(1) a, b, c, ...
+
+        Note that data in multidimensional arrays is written in
+        row-major order --- to make them read correctly by Fortran
+        programs, you need to transpose the arrays yourself when
+        writing them.
+
+        """
+        items = tuple(np.asarray(item) for item in items)
+        total_size = sum(item.nbytes for item in items)
+
+        nb = np.array([total_size], dtype=self._header_dtype)
+
+        nb.tofile(self._fp)
+        for item in items:
+            item.tofile(self._fp)
+        nb.tofile(self._fp)
+
+    def read_record(self, *dtypes, **kwargs):
+        """
+        Reads a record of a given type from the file.
+
+        Parameters
+        ----------
+        *dtypes : dtypes, optional
+            Data type(s) specifying the size and endiness of the data.
+
+        Returns
+        -------
+        data : ndarray
+            A one-dimensional array object.
+
+        Notes
+        -----
+        If the record contains a multi-dimensional array, you can specify
+        the size in the dtype. For example::
+
+            INTEGER var(5,4)
+
+        can be read with::
+
+            read_record('(4,5)i4').T
+
+        Note that this function does **not** assume the file data is in Fortran
+        column major order, so you need to (i) swap the order of dimensions
+        when reading and (ii) transpose the resulting array.
+
+        Alternatively, you can read the data as a 1D array and handle the
+        ordering yourself. For example::
+
+            read_record('i4').reshape(5, 4, order='F')
+
+        For records that contain several variables or mixed types (as opposed
+        to single scalar or array types), give them as separate arguments::
+
+            double precision :: a
+            integer :: b
+            write(1) a, b
+
+            record = f.read_record('<f4', '<i4')
+            a = record[0]  # first number
+            b = record[1]  # second number
+
+        and if any of the variables are arrays, the shape can be specified as
+        the third item in the relevant dtype::
+
+            double precision :: a
+            integer :: b(3,4)
+            write(1) a, b
+
+            record = f.read_record('<f4', np.dtype(('<i4', (4, 3))))
+            a = record[0]
+            b = record[1].T
+
+        Numpy also supports a short syntax for this kind of type::
+
+            record = f.read_record('<f4', '(3,3)<i4')
+
+        See Also
+        --------
+        read_reals
+        read_ints
+
+        """
+        dtype = kwargs.pop('dtype', None)
+        if kwargs:
+            raise ValueError("Unknown keyword arguments {}".format(tuple(kwargs.keys())))
+
+        if dtype is not None:
+            dtypes = dtypes + (dtype,)
+        elif not dtypes:
+            raise ValueError('Must specify at least one dtype')
+
+        first_size = self._read_size()
+
+        dtypes = tuple(np.dtype(dtype) for dtype in dtypes)
+        block_size = sum(dtype.itemsize for dtype in dtypes)
+
+        num_blocks, remainder = divmod(first_size, block_size)
+        if remainder != 0:
+            raise ValueError('Size obtained ({0}) is not a multiple of the '
+                             'dtypes given ({1}).'.format(first_size, block_size))
+
+        if len(dtypes) != 1 and first_size != block_size:
+            # Fortran does not write mixed type array items in interleaved order,
+            # and it's not possible to guess the sizes of the arrays that were written.
+            # The user must specify the exact sizes of each of the arrays.
+            raise ValueError('Size obtained ({0}) does not match with the expected '
+                             'size ({1}) of multi-item record'.format(first_size, block_size))
+
+        data = []
+        for dtype in dtypes:
+            r = np.fromfile(self._fp, dtype=dtype, count=num_blocks)
+            if dtype.shape != ():
+                # Squeeze outmost block dimension for array items
+                if num_blocks == 1:
+                    assert r.shape == (1,) + dtype.shape
+                    r = r[0]
+
+            data.append(r)
+
+        second_size = self._read_size()
+        if first_size != second_size:
+            raise IOError('Sizes do not agree in the header and footer for '
+                          'this record - check header dtype')
+
+        # Unpack result
+        if len(dtypes) == 1:
+            return data[0]
+        else:
+            return tuple(data)
+
+    def read_ints(self, dtype='i4'):
+        """
+        Reads a record of a given type from the file, defaulting to an integer
+        type (``INTEGER*4`` in Fortran).
+
+        Parameters
+        ----------
+        dtype : dtype, optional
+            Data type specifying the size and endiness of the data.
+
+        Returns
+        -------
+        data : ndarray
+            A one-dimensional array object.
+
+        See Also
+        --------
+        read_reals
+        read_record
+
+        """
+        return self.read_record(dtype)
+
+    def read_reals(self, dtype='f8'):
+        """
+        Reads a record of a given type from the file, defaulting to a floating
+        point number (``real*8`` in Fortran).
+
+        Parameters
+        ----------
+        dtype : dtype, optional
+            Data type specifying the size and endiness of the data.
+
+        Returns
+        -------
+        data : ndarray
+            A one-dimensional array object.
+
+        See Also
+        --------
+        read_ints
+        read_record
+
+        """
+        return self.read_record(dtype)
+
+    def close(self):
+        """
+        Closes the file. It is unsupported to call any other methods off this
+        object after closing it. Note that this class supports the 'with'
+        statement in modern versions of Python, to call this automatically
+
+        """
+        self._fp.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, tb):
+        self.close()
@@ -0,0 +1,26 @@
+"""
+Module to read ARFF files, which are the standard data format for WEKA.
+
+ARFF is a text file format which support numerical, string and data values.
+The format can also represent missing data and sparse data.
+
+Notes
+-----
+The ARFF support in ``scipy.io`` provides file reading functionality only.
+For more extensive ARFF functionality, see `liac-arff
+<https://github.com/renatopp/liac-arff>`_.
+
+See the `WEKA website <http://weka.wikispaces.com/ARFF>`_
+for more details about the ARFF format and available datasets.
+
+"""
+from __future__ import division, print_function, absolute_import
+
+from .arffread import *
+from . import arffread
+
+__all__ = arffread.__all__
+
+from scipy._lib._testutils import PytestTester
+test = PytestTester(__name__)
+del PytestTester
@@ -0,0 +1,670 @@
+# Last Change: Mon Aug 20 08:00 PM 2007 J
+from __future__ import division, print_function, absolute_import
+
+import re
+import itertools
+import datetime
+from functools import partial
+
+import numpy as np
+
+from scipy._lib.six import next
+
+"""A module to read arff files."""
+
+__all__ = ['MetaData', 'loadarff', 'ArffError', 'ParseArffError']
+
+# An Arff file is basically two parts:
+#   - header
+#   - data
+#
+# A header has each of its components starting by @META where META is one of
+# the keyword (attribute of relation, for now).
+
+# TODO:
+#   - both integer and reals are treated as numeric -> the integer info
+#    is lost!
+#   - Replace ValueError by ParseError or something
+
+# We know can handle the following:
+#   - numeric and nominal attributes
+#   - missing values for numeric attributes
+
+r_meta = re.compile(r'^\s*@')
+# Match a comment
+r_comment = re.compile(r'^%')
+# Match an empty line
+r_empty = re.compile(r'^\s+$')
+# Match a header line, that is a line which starts by @ + a word
+r_headerline = re.compile(r'^@\S*')
+r_datameta = re.compile(r'^@[Dd][Aa][Tt][Aa]')
+r_relation = re.compile(r'^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)')
+r_attribute = re.compile(r'^@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)')
+
+# To get attributes name enclosed with ''
+r_comattrval = re.compile(r"'(..+)'\s+(..+$)")
+# To get normal attributes
+r_wcomattrval = re.compile(r"(\S+)\s+(..+$)")
+
+#-------------------------
+# Module defined exception
+#-------------------------
+
+
+class ArffError(IOError):
+    pass
+
+
+class ParseArffError(ArffError):
+    pass
+
+#------------------
+# Various utilities
+#------------------
+
+# An attribute  is defined as @attribute name value
+
+
+def parse_type(attrtype):
+    """Given an arff attribute value (meta data), returns its type.
+
+    Expect the value to be a name."""
+    uattribute = attrtype.lower().strip()
+    if uattribute[0] == '{':
+        return 'nominal'
+    elif uattribute[:len('real')] == 'real':
+        return 'numeric'
+    elif uattribute[:len('integer')] == 'integer':
+        return 'numeric'
+    elif uattribute[:len('numeric')] == 'numeric':
+        return 'numeric'
+    elif uattribute[:len('string')] == 'string':
+        return 'string'
+    elif uattribute[:len('relational')] == 'relational':
+        return 'relational'
+    elif uattribute[:len('date')] == 'date':
+        return 'date'
+    else:
+        raise ParseArffError("unknown attribute %s" % uattribute)
+
+
+def get_nominal(attribute):
+    """If attribute is nominal, returns a list of the values"""
+    return attribute.split(',')
+
+
+def read_data_list(ofile):
+    """Read each line of the iterable and put it in a list."""
+    data = [next(ofile)]
+    if data[0].strip()[0] == '{':
+        raise ValueError("This looks like a sparse ARFF: not supported yet")
+    data.extend([i for i in ofile])
+    return data
+
+
+def get_ndata(ofile):
+    """Read the whole file to get number of data attributes."""
+    data = [next(ofile)]
+    loc = 1
+    if data[0].strip()[0] == '{':
+        raise ValueError("This looks like a sparse ARFF: not supported yet")
+    for i in ofile:
+        loc += 1
+    return loc
+
+
+def maxnomlen(atrv):
+    """Given a string containing a nominal type definition, returns the
+    string len of the biggest component.
+
+    A nominal type is defined as seomthing framed between brace ({}).
+
+    Parameters
+    ----------
+    atrv : str
+       Nominal type definition
+
+    Returns
+    -------
+    slen : int
+       length of longest component
+
+    Examples
+    --------
+    maxnomlen("{floup, bouga, fl, ratata}") returns 6 (the size of
+    ratata, the longest nominal value).
+
+    >>> maxnomlen("{floup, bouga, fl, ratata}")
+    6
+    """
+    nomtp = get_nom_val(atrv)
+    return max(len(i) for i in nomtp)
+
+
+def get_nom_val(atrv):
+    """Given a string containing a nominal type, returns a tuple of the
+    possible values.
+
+    A nominal type is defined as something framed between braces ({}).
+
+    Parameters
+    ----------
+    atrv : str
+       Nominal type definition
+
+    Returns
+    -------
+    poss_vals : tuple
+       possible values
+
+    Examples
+    --------
+    >>> get_nom_val("{floup, bouga, fl, ratata}")
+    ('floup', 'bouga', 'fl', 'ratata')
+    """
+    r_nominal = re.compile('{(.+)}')
+    m = r_nominal.match(atrv)
+    if m:
+        return tuple(i.strip() for i in m.group(1).split(','))
+    else:
+        raise ValueError("This does not look like a nominal string")
+
+
+def get_date_format(atrv):
+    r_date = re.compile(r"[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$")
+    m = r_date.match(atrv)
+    if m:
+        pattern = m.group(1).strip()
+        # convert time pattern from Java's SimpleDateFormat to C's format
+        datetime_unit = None
+        if "yyyy" in pattern:
+            pattern = pattern.replace("yyyy", "%Y")
+            datetime_unit = "Y"
+        elif "yy":
+            pattern = pattern.replace("yy", "%y")
+            datetime_unit = "Y"
+        if "MM" in pattern:
+            pattern = pattern.replace("MM", "%m")
+            datetime_unit = "M"
+        if "dd" in pattern:
+            pattern = pattern.replace("dd", "%d")
+            datetime_unit = "D"
+        if "HH" in pattern:
+            pattern = pattern.replace("HH", "%H")
+            datetime_unit = "h"
+        if "mm" in pattern:
+            pattern = pattern.replace("mm", "%M")
+            datetime_unit = "m"
+        if "ss" in pattern:
+            pattern = pattern.replace("ss", "%S")
+            datetime_unit = "s"
+        if "z" in pattern or "Z" in pattern:
+            raise ValueError("Date type attributes with time zone not "
+                             "supported, yet")
+
+        if datetime_unit is None:
+            raise ValueError("Invalid or unsupported date format")
+
+        return pattern, datetime_unit
+    else:
+        raise ValueError("Invalid or no date format")
+
+
+def go_data(ofile):
+    """Skip header.
+
+    the first next() call of the returned iterator will be the @data line"""
+    return itertools.dropwhile(lambda x: not r_datameta.match(x), ofile)
+
+
+#----------------
+# Parsing header
+#----------------
+def tokenize_attribute(iterable, attribute):
+    """Parse a raw string in header (eg starts by @attribute).
+
+    Given a raw string attribute, try to get the name and type of the
+    attribute. Constraints:
+
+    * The first line must start with @attribute (case insensitive, and
+      space like characters before @attribute are allowed)
+    * Works also if the attribute is spread on multilines.
+    * Works if empty lines or comments are in between
+
+    Parameters
+    ----------
+    attribute : str
+       the attribute string.
+
+    Returns
+    -------
+    name : str
+       name of the attribute
+    value : str
+       value of the attribute
+    next : str
+       next line to be parsed
+
+    Examples
+    --------
+    If attribute is a string defined in python as r"floupi real", will
+    return floupi as name, and real as value.
+
+    >>> iterable = iter([0] * 10) # dummy iterator
+    >>> tokenize_attribute(iterable, r"@attribute floupi real")
+    ('floupi', 'real', 0)
+
+    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
+    and real as value.
+
+    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
+    ('floupi 2', 'real', 0)
+
+    """
+    sattr = attribute.strip()
+    mattr = r_attribute.match(sattr)
+    if mattr:
+        # atrv is everything after @attribute
+        atrv = mattr.group(1)
+        if r_comattrval.match(atrv):
+            name, type = tokenize_single_comma(atrv)
+            next_item = next(iterable)
+        elif r_wcomattrval.match(atrv):
+            name, type = tokenize_single_wcomma(atrv)
+            next_item = next(iterable)
+        else:
+            # Not sure we should support this, as it does not seem supported by
+            # weka.
+            raise ValueError("multi line not supported yet")
+            #name, type, next_item = tokenize_multilines(iterable, atrv)
+    else:
+        raise ValueError("First line unparsable: %s" % sattr)
+
+    if type == 'relational':
+        raise ValueError("relational attributes not supported yet")
+    return name, type, next_item
+
+
+def tokenize_single_comma(val):
+    # XXX we match twice the same string (here and at the caller level). It is
+    # stupid, but it is easier for now...
+    m = r_comattrval.match(val)
+    if m:
+        try:
+            name = m.group(1).strip()
+            type = m.group(2).strip()
+        except IndexError:
+            raise ValueError("Error while tokenizing attribute")
+    else:
+        raise ValueError("Error while tokenizing single %s" % val)
+    return name, type
+
+
+def tokenize_single_wcomma(val):
+    # XXX we match twice the same string (here and at the caller level). It is
+    # stupid, but it is easier for now...
+    m = r_wcomattrval.match(val)
+    if m:
+        try:
+            name = m.group(1).strip()
+            type = m.group(2).strip()
+        except IndexError:
+            raise ValueError("Error while tokenizing attribute")
+    else:
+        raise ValueError("Error while tokenizing single %s" % val)
+    return name, type
+
+
+def read_header(ofile):
+    """Read the header of the iterable ofile."""
+    i = next(ofile)
+
+    # Pass first comments
+    while r_comment.match(i):
+        i = next(ofile)
+
+    # Header is everything up to DATA attribute ?
+    relation = None
+    attributes = []
+    while not r_datameta.match(i):
+        m = r_headerline.match(i)
+        if m:
+            isattr = r_attribute.match(i)
+            if isattr:
+                name, type, i = tokenize_attribute(ofile, i)
+                attributes.append((name, type))
+            else:
+                isrel = r_relation.match(i)
+                if isrel:
+                    relation = isrel.group(1)
+                else:
+                    raise ValueError("Error parsing line %s" % i)
+                i = next(ofile)
+        else:
+            i = next(ofile)
+
+    return relation, attributes
+
+
+#--------------------
+# Parsing actual data
+#--------------------
+def safe_float(x):
+    """given a string x, convert it to a float. If the stripped string is a ?,
+    return a Nan (missing value).
+
+    Parameters
+    ----------
+    x : str
+       string to convert
+
+    Returns
+    -------
+    f : float
+       where float can be nan
+
+    Examples
+    --------
+    >>> safe_float('1')
+    1.0
+    >>> safe_float('1\\n')
+    1.0
+    >>> safe_float('?\\n')
+    nan
+    """
+    if '?' in x:
+        return np.nan
+    else:
+        return float(x)
+
+
+def safe_nominal(value, pvalue):
+    svalue = value.strip()
+    if svalue in pvalue:
+        return svalue
+    elif svalue == '?':
+        return svalue
+    else:
+        raise ValueError("%s value not in %s" % (str(svalue), str(pvalue)))
+
+
+def safe_date(value, date_format, datetime_unit):
+    date_str = value.strip().strip("'").strip('"')
+    if date_str == '?':
+        return np.datetime64('NaT', datetime_unit)
+    else:
+        dt = datetime.datetime.strptime(date_str, date_format)
+        return np.datetime64(dt).astype("datetime64[%s]" % datetime_unit)
+
+
+class MetaData(object):
+    """Small container to keep useful information on a ARFF dataset.
+
+    Knows about attributes names and types.
+
+    Examples
+    --------
+    ::
+
+        data, meta = loadarff('iris.arff')
+        # This will print the attributes names of the iris.arff dataset
+        for i in meta:
+            print(i)
+        # This works too
+        meta.names()
+        # Getting attribute type
+        types = meta.types()
+
+    Notes
+    -----
+    Also maintains the list of attributes in order, i.e. doing for i in
+    meta, where meta is an instance of MetaData, will return the
+    different attribute names in the order they were defined.
+    """
+    def __init__(self, rel, attr):
+        self.name = rel
+        # We need the dictionary to be ordered
+        # XXX: may be better to implement an ordered dictionary
+        self._attributes = {}
+        self._attrnames = []
+        for name, value in attr:
+            tp = parse_type(value)
+            self._attrnames.append(name)
+            if tp == 'nominal':
+                self._attributes[name] = (tp, get_nom_val(value))
+            elif tp == 'date':
+                self._attributes[name] = (tp, get_date_format(value)[0])
+            else:
+                self._attributes[name] = (tp, None)
+
+    def __repr__(self):
+        msg = ""
+        msg += "Dataset: %s\n" % self.name
+        for i in self._attrnames:
+            msg += "\t%s's type is %s" % (i, self._attributes[i][0])
+            if self._attributes[i][1]:
+                msg += ", range is %s" % str(self._attributes[i][1])
+            msg += '\n'
+        return msg
+
+    def __iter__(self):
+        return iter(self._attrnames)
+
+    def __getitem__(self, key):
+        return self._attributes[key]
+
+    def names(self):
+        """Return the list of attribute names."""
+        return self._attrnames
+
+    def types(self):
+        """Return the list of attribute types."""
+        attr_types = [self._attributes[name][0] for name in self._attrnames]
+        return attr_types
+
+
+def loadarff(f):
+    """
+    Read an arff file.
+
+    The data is returned as a record array, which can be accessed much like
+    a dictionary of numpy arrays.  For example, if one of the attributes is
+    called 'pressure', then its first 10 data points can be accessed from the
+    ``data`` record array like so: ``data['pressure'][0:10]``
+
+
+    Parameters
+    ----------
+    f : file-like or str
+       File-like object to read from, or filename to open.
+
+    Returns
+    -------
+    data : record array
+       The data of the arff file, accessible by attribute names.
+    meta : `MetaData`
+       Contains information about the arff file such as name and
+       type of attributes, the relation (name of the dataset), etc...
+
+    Raises
+    ------
+    ParseArffError
+        This is raised if the given file is not ARFF-formatted.
+    NotImplementedError
+        The ARFF file has an attribute which is not supported yet.
+
+    Notes
+    -----
+
+    This function should be able to read most arff files. Not
+    implemented functionality include:
+
+    * date type attributes
+    * string type attributes
+
+    It can read files with numeric and nominal attributes.  It cannot read
+    files with sparse data ({} in the file).  However, this function can
+    read files with missing data (? in the file), representing the data
+    points as NaNs.
+
+    Examples
+    --------
+    >>> from scipy.io import arff
+    >>> from io import StringIO
+    >>> content = \"\"\"
+    ... @relation foo
+    ... @attribute width  numeric
+    ... @attribute height numeric
+    ... @attribute color  {red,green,blue,yellow,black}
+    ... @data
+    ... 5.0,3.25,blue
+    ... 4.5,3.75,green
+    ... 3.0,4.00,red
+    ... \"\"\"
+    >>> f = StringIO(content)
+    >>> data, meta = arff.loadarff(f)
+    >>> data
+    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
+          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
+    >>> meta
+    Dataset: foo
+    \twidth's type is numeric
+    \theight's type is numeric
+    \tcolor's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')
+
+    """
+    if hasattr(f, 'read'):
+        ofile = f
+    else:
+        ofile = open(f, 'rt')
+    try:
+        return _loadarff(ofile)
+    finally:
+        if ofile is not f:  # only close what we opened
+            ofile.close()
+
+
+def _loadarff(ofile):
+    # Parse the header file
+    try:
+        rel, attr = read_header(ofile)
+    except ValueError as e:
+        msg = "Error while parsing header, error was: " + str(e)
+        raise ParseArffError(msg)
+
+    # Check whether we have a string attribute (not supported yet)
+    hasstr = False
+    for name, value in attr:
+        type = parse_type(value)
+        if type == 'string':
+            hasstr = True
+
+    meta = MetaData(rel, attr)
+
+    # XXX The following code is not great
+    # Build the type descriptor descr and the list of convertors to convert
+    # each attribute to the suitable type (which should match the one in
+    # descr).
+
+    # This can be used once we want to support integer as integer values and
+    # not as numeric anymore (using masked arrays ?).
+    acls2dtype = {'real': float, 'integer': float, 'numeric': float}
+    acls2conv = {'real': safe_float,
+                 'integer': safe_float,
+                 'numeric': safe_float}
+    descr = []
+    convertors = []
+    if not hasstr:
+        for name, value in attr:
+            type = parse_type(value)
+            if type == 'date':
+                date_format, datetime_unit = get_date_format(value)
+                descr.append((name, "datetime64[%s]" % datetime_unit))
+                convertors.append(partial(safe_date, date_format=date_format,
+                                          datetime_unit=datetime_unit))
+            elif type == 'nominal':
+                n = maxnomlen(value)
+                descr.append((name, 'S%d' % n))
+                pvalue = get_nom_val(value)
+                convertors.append(partial(safe_nominal, pvalue=pvalue))
+            else:
+                descr.append((name, acls2dtype[type]))
+                convertors.append(safe_float)
+                #dc.append(acls2conv[type])
+                #sdescr.append((name, acls2sdtype[type]))
+    else:
+        # How to support string efficiently ? Ideally, we should know the max
+        # size of the string before allocating the numpy array.
+        raise NotImplementedError("String attributes not supported yet, sorry")
+
+    ni = len(convertors)
+
+    def generator(row_iter, delim=','):
+        # TODO: this is where we are spending times (~80%). I think things
+        # could be made more efficiently:
+        #   - We could for example "compile" the function, because some values
+        #   do not change here.
+        #   - The function to convert a line to dtyped values could also be
+        #   generated on the fly from a string and be executed instead of
+        #   looping.
+        #   - The regex are overkill: for comments, checking that a line starts
+        #   by % should be enough and faster, and for empty lines, same thing
+        #   --> this does not seem to change anything.
+
+        # 'compiling' the range since it does not change
+        # Note, I have already tried zipping the converters and
+        # row elements and got slightly worse performance.
+        elems = list(range(ni))
+
+        for raw in row_iter:
+            # We do not abstract skipping comments and empty lines for
+            # performance reasons.
+            if r_comment.match(raw) or r_empty.match(raw):
+                continue
+            row = raw.split(delim)
+            yield tuple([convertors[i](row[i]) for i in elems])
+
+    a = generator(ofile)
+    # No error should happen here: it is a bug otherwise
+    data = np.fromiter(a, descr)
+    return data, meta
+
+
+#-----
+# Misc
+#-----
+def basic_stats(data):
+    nbfac = data.size * 1. / (data.size - 1)
+    return np.nanmin(data), np.nanmax(data), np.mean(data), np.std(data) * nbfac
+
+
+def print_attribute(name, tp, data):
+    type = tp[0]
+    if type == 'numeric' or type == 'real' or type == 'integer':
+        min, max, mean, std = basic_stats(data)
+        print("%s,%s,%f,%f,%f,%f" % (name, type, min, max, mean, std))
+    else:
+        msg = name + ",{"
+        for i in range(len(tp[1])-1):
+            msg += tp[1][i] + ","
+        msg += tp[1][-1]
+        msg += "}"
+        print(msg)
+
+
+def test_weka(filename):
+    data, meta = loadarff(filename)
+    print(len(data.dtype))
+    print(data.size)
+    for i in meta:
+        print_attribute(i, meta[i], data[i])
+
+
+# make sure nose does not find this as a test
+test_weka.__test__ = False
+
+
+if __name__ == '__main__':
+    import sys
+    filename = sys.argv[1]
+    test_weka(filename)
@@ -0,0 +1,13 @@
+from __future__ import division, print_function, absolute_import
+
+
+def configuration(parent_package='io',top_path=None):
+    from numpy.distutils.misc_util import Configuration
+    config = Configuration('arff', parent_package, top_path)
+    config.add_data_dir('tests')
+    return config
+
+
+if __name__ == '__main__':
+    from numpy.distutils.core import setup
+    setup(**configuration(top_path='').todict())
@@ -0,0 +1,225 @@
+% 1. Title: Iris Plants Database
+% 
+% 2. Sources:
+%      (a) Creator: R.A. Fisher
+%      (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
+%      (c) Date: July, 1988
+% 
+% 3. Past Usage:
+%    - Publications: too many to mention!!!  Here are a few.
+%    1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
+%       Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
+%       to Mathematical Statistics" (John Wiley, NY, 1950).
+%    2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
+%       (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
+%    3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
+%       Structure and Classification Rule for Recognition in Partially Exposed
+%       Environments".  IEEE Transactions on Pattern Analysis and Machine
+%       Intelligence, Vol. PAMI-2, No. 1, 67-71.
+%       -- Results:
+%          -- very low misclassification rates (0% for the setosa class)
+%    4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE 
+%       Transactions on Information Theory, May 1972, 431-433.
+%       -- Results:
+%          -- very low misclassification rates again
+%    5. See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al's AUTOCLASS II
+%       conceptual clustering system finds 3 classes in the data.
+% 
+% 4. Relevant Information:
+%    --- This is perhaps the best known database to be found in the pattern
+%        recognition literature.  Fisher's paper is a classic in the field
+%        and is referenced frequently to this day.  (See Duda & Hart, for
+%        example.)  The data set contains 3 classes of 50 instances each,
+%        where each class refers to a type of iris plant.  One class is
+%        linearly separable from the other 2; the latter are NOT linearly
+%        separable from each other.
+%    --- Predicted attribute: class of iris plant.
+%    --- This is an exceedingly simple domain.
+% 
+% 5. Number of Instances: 150 (50 in each of three classes)
+% 
+% 6. Number of Attributes: 4 numeric, predictive attributes and the class
+% 
+% 7. Attribute Information:
+%    1. sepal length in cm
+%    2. sepal width in cm
+%    3. petal length in cm
+%    4. petal width in cm
+%    5. class: 
+%       -- Iris Setosa
+%       -- Iris Versicolour
+%       -- Iris Virginica
+% 
+% 8. Missing Attribute Values: None
+% 
+% Summary Statistics:
+%  	           Min  Max   Mean    SD   Class Correlation
+%    sepal length: 4.3  7.9   5.84  0.83    0.7826   
+%     sepal width: 2.0  4.4   3.05  0.43   -0.4194
+%    petal length: 1.0  6.9   3.76  1.76    0.9490  (high!)
+%     petal width: 0.1  2.5   1.20  0.76    0.9565  (high!)
+% 
+% 9. Class Distribution: 33.3% for each of 3 classes.
+
+@RELATION iris
+
+@ATTRIBUTE sepallength	REAL
+@ATTRIBUTE sepalwidth 	REAL
+@ATTRIBUTE petallength 	REAL
+@ATTRIBUTE petalwidth	REAL
+@ATTRIBUTE class 	{Iris-setosa,Iris-versicolor,Iris-virginica}
+
+@DATA
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
+5.8,4.0,1.2,0.2,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.9,3.1,4.9,1.5,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+6.5,2.8,4.6,1.5,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+6.3,3.3,4.7,1.6,Iris-versicolor
+4.9,2.4,3.3,1.0,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.9,3.0,4.2,1.5,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+6.2,2.2,4.5,1.5,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+5.4,3.0,4.5,1.5,Iris-versicolor
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+6.2,2.9,4.3,1.3,Iris-versicolor
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+4.9,2.5,4.5,1.7,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+6.7,2.5,5.8,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+5.7,2.5,5.0,2.0,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+6.0,2.2,5.0,1.5,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+5.6,2.8,4.9,2.0,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+6.2,2.8,4.8,1.8,Iris-virginica
+6.1,3.0,4.9,1.8,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.1,2.6,5.6,1.4,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,3.1,5.5,1.8,Iris-virginica
+6.0,3.0,4.8,1.8,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.0,5.2,2.3,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.2,3.4,5.4,2.3,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica
+%
+%
+%
@@ -0,0 +1,8 @@
+% This arff file contains some missing data
+@relation missing
+@attribute yop real
+@attribute yap real
+@data
+1,5
+2,4
+?,?
@@ -0,0 +1,11 @@
+@RELATION iris
+
+@ATTRIBUTE sepallength  REAL
+@ATTRIBUTE sepalwidth   REAL
+@ATTRIBUTE petallength  REAL
+@ATTRIBUTE petalwidth   REAL
+@ATTRIBUTE class    {Iris-setosa,Iris-versicolor,Iris-virginica}
+
+@DATA
+
+% This file has no data
@@ -0,0 +1,10 @@
+@RELATION test1
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{class0, class1, class2, class3}
+
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
@@ -0,0 +1,15 @@
+@RELATION test2
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	real
+@ATTRIBUTE attr2 	integer
+@ATTRIBUTE attr3	Integer
+@ATTRIBUTE attr4 	Numeric
+@ATTRIBUTE attr5	numeric
+@ATTRIBUTE attr6 	string
+@ATTRIBUTE attr7 	STRING
+@ATTRIBUTE attr8 	{bla}
+@ATTRIBUTE attr9 	{bla, bla}
+
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
@@ -0,0 +1,6 @@
+@RELATION test3
+
+@ATTRIBUTE attr0	crap
+
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
@@ -0,0 +1,11 @@
+@RELATION test5
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{class0, class1, class2, class3}
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
+-0.1, -0.2, -0.3, -0.4,class2
+1, 2, 3, 4,class3
@@ -0,0 +1,26 @@
+@RELATION test4
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{class0, class1, class2, class3}
+
+@DATA
+
+% lsdflkjhaksjdhf
+
+% lsdflkjhaksjdhf
+
+0.1, 0.2, 0.3, 0.4,class1
+% laksjdhf
+
+% lsdflkjhaksjdhf
+-0.1, -0.2, -0.3, -0.4,class2
+
+% lsdflkjhaksjdhf
+% lsdflkjhaksjdhf
+
+% lsdflkjhaksjdhf
+
+1, 2, 3, 4,class3
@@ -0,0 +1,12 @@
+@RELATION test6
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{C}
+
+@DATA
+0.1, 0.2, 0.3, 0.4,C
+-0.1, -0.2, -0.3, -0.4,C
+1, 2, 3, 4,C
@@ -0,0 +1,15 @@
+@RELATION test7
+
+@ATTRIBUTE attr_year	DATE yyyy
+@ATTRIBUTE attr_month	DATE yyyy-MM
+@ATTRIBUTE attr_date	DATE yyyy-MM-dd
+@ATTRIBUTE attr_datetime_local	DATE "yyyy-MM-dd HH:mm"
+@ATTRIBUTE attr_datetime_missing	DATE "yyyy-MM-dd HH:mm"
+
+@DATA
+1999,1999-01,1999-01-31,"1999-01-31 00:01",?
+2004,2004-12,2004-12-01,"2004-12-01 23:59","2004-12-01 23:59"
+1817,1817-04,1817-04-28,"1817-04-28 13:00",?
+2100,2100-09,2100-09-10,"2100-09-10 12:00",?
+2013,2013-11,2013-11-30,"2013-11-30 04:55","2013-11-30 04:55"
+1631,1631-10,1631-10-15,"1631-10-15 20:04","1631-10-15 20:04"
@@ -0,0 +1,12 @@
+@RELATION test8
+
+@ATTRIBUTE attr_datetime_utc	DATE "yyyy-MM-dd HH:mm Z"
+@ATTRIBUTE attr_datetime_full	DATE "yy-MM-dd HH:mm:ss z"
+
+@DATA
+"1999-01-31 00:01 UTC","99-01-31 00:01:08 +0430"
+"2004-12-01 23:59 UTC","04-12-01 23:59:59 -0800"
+"1817-04-28 13:00 UTC","17-04-28 13:00:33 +1000"
+"2100-09-10 12:00 UTC","21-09-10 12:00:21 -0300"
+"2013-11-30 04:55 UTC","13-11-30 04:55:48 -1100"
+"1631-10-15 20:04 UTC","31-10-15 20:04:10 +0000"
@@ -0,0 +1,259 @@
+from __future__ import division, print_function, absolute_import
+
+import datetime
+import os
+import sys
+from os.path import join as pjoin
+
+if sys.version_info[0] >= 3:
+    from io import StringIO
+else:
+    from cStringIO import StringIO
+
+import numpy as np
+
+from numpy.testing import (assert_array_almost_equal,
+                           assert_array_equal, assert_equal, assert_)
+import pytest
+from pytest import raises as assert_raises
+
+from scipy.io.arff.arffread import loadarff
+from scipy.io.arff.arffread import read_header, parse_type, ParseArffError
+
+
+data_path = pjoin(os.path.dirname(__file__), 'data')
+
+test1 = pjoin(data_path, 'test1.arff')
+test2 = pjoin(data_path, 'test2.arff')
+test3 = pjoin(data_path, 'test3.arff')
+
+test4 = pjoin(data_path, 'test4.arff')
+test5 = pjoin(data_path, 'test5.arff')
+test6 = pjoin(data_path, 'test6.arff')
+test7 = pjoin(data_path, 'test7.arff')
+test8 = pjoin(data_path, 'test8.arff')
+expect4_data = [(0.1, 0.2, 0.3, 0.4, 'class1'),
+                (-0.1, -0.2, -0.3, -0.4, 'class2'),
+                (1, 2, 3, 4, 'class3')]
+expected_types = ['numeric', 'numeric', 'numeric', 'numeric', 'nominal']
+
+missing = pjoin(data_path, 'missing.arff')
+expect_missing_raw = np.array([[1, 5], [2, 4], [np.nan, np.nan]])
+expect_missing = np.empty(3, [('yop', float), ('yap', float)])
+expect_missing['yop'] = expect_missing_raw[:, 0]
+expect_missing['yap'] = expect_missing_raw[:, 1]
+
+
+class TestData(object):
+    def test1(self):
+        # Parsing trivial file with nothing.
+        self._test(test4)
+
+    def test2(self):
+        # Parsing trivial file with some comments in the data section.
+        self._test(test5)
+
+    def test3(self):
+        # Parsing trivial file with nominal attribute of 1 character.
+        self._test(test6)
+
+    def _test(self, test_file):
+        data, meta = loadarff(test_file)
+        for i in range(len(data)):
+            for j in range(4):
+                assert_array_almost_equal(expect4_data[i][j], data[i][j])
+        assert_equal(meta.types(), expected_types)
+
+    def test_filelike(self):
+        # Test reading from file-like object (StringIO)
+        f1 = open(test1)
+        data1, meta1 = loadarff(f1)
+        f1.close()
+        f2 = open(test1)
+        data2, meta2 = loadarff(StringIO(f2.read()))
+        f2.close()
+        assert_(data1 == data2)
+        assert_(repr(meta1) == repr(meta2))
+
+    @pytest.mark.skipif(sys.version_info < (3, 6),
+                        reason='Passing path-like objects to IO functions requires Python >= 3.6')
+    def test_path(self):
+        # Test reading from `pathlib.Path` object
+        from pathlib import Path
+
+        with open(test1) as f1:
+            data1, meta1 = loadarff(f1)
+
+        data2, meta2 = loadarff(Path(test1))
+
+        assert_(data1 == data2)
+        assert_(repr(meta1) == repr(meta2))
+
+class TestMissingData(object):
+    def test_missing(self):
+        data, meta = loadarff(missing)
+        for i in ['yop', 'yap']:
+            assert_array_almost_equal(data[i], expect_missing[i])
+
+
+class TestNoData(object):
+    def test_nodata(self):
+        # The file nodata.arff has no data in the @DATA section.
+        # Reading it should result in an array with length 0.
+        nodata_filename = os.path.join(data_path, 'nodata.arff')
+        data, meta = loadarff(nodata_filename)
+        expected_dtype = np.dtype([('sepallength', '<f8'),
+                                   ('sepalwidth', '<f8'),
+                                   ('petallength', '<f8'),
+                                   ('petalwidth', '<f8'),
+                                   ('class', 'S15')])
+        assert_equal(data.dtype, expected_dtype)
+        assert_equal(data.size, 0)
+
+
+class TestHeader(object):
+    def test_type_parsing(self):
+        # Test parsing type of attribute from their value.
+        ofile = open(test2)
+        rel, attrs = read_header(ofile)
+        ofile.close()
+
+        expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
+                    'numeric', 'string', 'string', 'nominal', 'nominal']
+
+        for i in range(len(attrs)):
+            assert_(parse_type(attrs[i][1]) == expected[i])
+
+    def test_badtype_parsing(self):
+        # Test parsing wrong type of attribute from their value.
+        ofile = open(test3)
+        rel, attrs = read_header(ofile)
+        ofile.close()
+
+        for name, value in attrs:
+            assert_raises(ParseArffError, parse_type, value)
+
+    def test_fullheader1(self):
+        # Parsing trivial header with nothing.
+        ofile = open(test1)
+        rel, attrs = read_header(ofile)
+        ofile.close()
+
+        # Test relation
+        assert_(rel == 'test1')
+
+        # Test numerical attributes
+        assert_(len(attrs) == 5)
+        for i in range(4):
+            assert_(attrs[i][0] == 'attr%d' % i)
+            assert_(attrs[i][1] == 'REAL')
+
+        # Test nominal attribute
+        assert_(attrs[4][0] == 'class')
+        assert_(attrs[4][1] == '{class0, class1, class2, class3}')
+
+    def test_dateheader(self):
+        ofile = open(test7)
+        rel, attrs = read_header(ofile)
+        ofile.close()
+
+        assert_(rel == 'test7')
+
+        assert_(len(attrs) == 5)
+
+        assert_(attrs[0][0] == 'attr_year')
+        assert_(attrs[0][1] == 'DATE yyyy')
+
+        assert_(attrs[1][0] == 'attr_month')
+        assert_(attrs[1][1] == 'DATE yyyy-MM')
+
+        assert_(attrs[2][0] == 'attr_date')
+        assert_(attrs[2][1] == 'DATE yyyy-MM-dd')
+
+        assert_(attrs[3][0] == 'attr_datetime_local')
+        assert_(attrs[3][1] == 'DATE "yyyy-MM-dd HH:mm"')
+
+        assert_(attrs[4][0] == 'attr_datetime_missing')
+        assert_(attrs[4][1] == 'DATE "yyyy-MM-dd HH:mm"')
+
+    def test_dateheader_unsupported(self):
+        ofile = open(test8)
+        rel, attrs = read_header(ofile)
+        ofile.close()
+
+        assert_(rel == 'test8')
+
+        assert_(len(attrs) == 2)
+        assert_(attrs[0][0] == 'attr_datetime_utc')
+        assert_(attrs[0][1] == 'DATE "yyyy-MM-dd HH:mm Z"')
+
+        assert_(attrs[1][0] == 'attr_datetime_full')
+        assert_(attrs[1][1] == 'DATE "yy-MM-dd HH:mm:ss z"')
+
+
+class TestDateAttribute(object):
+    def setup_method(self):
+        self.data, self.meta = loadarff(test7)
+
+    def test_year_attribute(self):
+        expected = np.array([
+            '1999',
+            '2004',
+            '1817',
+            '2100',
+            '2013',
+            '1631'
+        ], dtype='datetime64[Y]')
+
+        assert_array_equal(self.data["attr_year"], expected)
+
+    def test_month_attribute(self):
+        expected = np.array([
+            '1999-01',
+            '2004-12',
+            '1817-04',
+            '2100-09',
+            '2013-11',
+            '1631-10'
+        ], dtype='datetime64[M]')
+
+        assert_array_equal(self.data["attr_month"], expected)
+
+    def test_date_attribute(self):
+        expected = np.array([
+            '1999-01-31',
+            '2004-12-01',
+            '1817-04-28',
+            '2100-09-10',
+            '2013-11-30',
+            '1631-10-15'
+        ], dtype='datetime64[D]')
+
+        assert_array_equal(self.data["attr_date"], expected)
+
+    def test_datetime_local_attribute(self):
+        expected = np.array([
+            datetime.datetime(year=1999, month=1, day=31, hour=0, minute=1),
+            datetime.datetime(year=2004, month=12, day=1, hour=23, minute=59),
+            datetime.datetime(year=1817, month=4, day=28, hour=13, minute=0),
+            datetime.datetime(year=2100, month=9, day=10, hour=12, minute=0),
+            datetime.datetime(year=2013, month=11, day=30, hour=4, minute=55),
+            datetime.datetime(year=1631, month=10, day=15, hour=20, minute=4)
+        ], dtype='datetime64[m]')
+
+        assert_array_equal(self.data["attr_datetime_local"], expected)
+
+    def test_datetime_missing(self):
+        expected = np.array([
+            'nat',
+            '2004-12-01T23:59',
+            'nat',
+            'nat',
+            '2013-11-30T04:55',
+            '1631-10-15T20:04'
+        ], dtype='datetime64[m]')
+
+        assert_array_equal(self.data["attr_datetime_missing"], expected)
+
+    def test_datetime_timezone(self):
+        assert_raises(ValueError, loadarff, test8)
@@ -0,0 +1,4 @@
+from __future__ import division, print_function, absolute_import
+
+from scipy.io.harwell_boeing.hb import MalformedHeader, HBInfo, HBFile, \
+    HBMatrixType, hb_read, hb_write
@@ -0,0 +1,312 @@
+"""
+Preliminary module to handle fortran formats for IO. Does not use this outside
+scipy.sparse io for now, until the API is deemed reasonable.
+
+The *Format classes handle conversion between fortran and python format, and
+FortranFormatParser can create *Format instances from raw fortran format
+strings (e.g. '(3I4)', '(10I3)', etc...)
+"""
+from __future__ import division, print_function, absolute_import
+
+import re
+
+import numpy as np
+
+
+__all__ = ["BadFortranFormat", "FortranFormatParser", "IntFormat", "ExpFormat"]
+
+
+TOKENS = {
+    "LPAR": r"\(",
+    "RPAR": r"\)",
+    "INT_ID": r"I",
+    "EXP_ID": r"E",
+    "INT": r"\d+",
+    "DOT": r"\.",
+}
+
+
+class BadFortranFormat(SyntaxError):
+    pass
+
+
+def number_digits(n):
+    return int(np.floor(np.log10(np.abs(n))) + 1)
+
+
+class IntFormat(object):
+    @classmethod
+    def from_number(cls, n, min=None):
+        """Given an integer, returns a "reasonable" IntFormat instance to represent
+        any number between 0 and n if n > 0, -n and n if n < 0
+
+        Parameters
+        ----------
+        n : int
+            max number one wants to be able to represent
+        min : int
+            minimum number of characters to use for the format
+
+        Returns
+        -------
+        res : IntFormat
+            IntFormat instance with reasonable (see Notes) computed width
+
+        Notes
+        -----
+        Reasonable should be understood as the minimal string length necessary
+        without losing precision. For example, IntFormat.from_number(1) will
+        return an IntFormat instance of width 2, so that any 0 and 1 may be
+        represented as 1-character strings without loss of information.
+        """
+        width = number_digits(n) + 1
+        if n < 0:
+            width += 1
+        repeat = 80 // width
+        return cls(width, min, repeat=repeat)
+
+    def __init__(self, width, min=None, repeat=None):
+        self.width = width
+        self.repeat = repeat
+        self.min = min
+
+    def __repr__(self):
+        r = "IntFormat("
+        if self.repeat:
+            r += "%d" % self.repeat
+        r += "I%d" % self.width
+        if self.min:
+            r += ".%d" % self.min
+        return r + ")"
+
+    @property
+    def fortran_format(self):
+        r = "("
+        if self.repeat:
+            r += "%d" % self.repeat
+        r += "I%d" % self.width
+        if self.min:
+            r += ".%d" % self.min
+        return r + ")"
+
+    @property
+    def python_format(self):
+        return "%" + str(self.width) + "d"
+
+
+class ExpFormat(object):
+    @classmethod
+    def from_number(cls, n, min=None):
+        """Given a float number, returns a "reasonable" ExpFormat instance to
+        represent any number between -n and n.
+
+        Parameters
+        ----------
+        n : float
+            max number one wants to be able to represent
+        min : int
+            minimum number of characters to use for the format
+
+        Returns
+        -------
+        res : ExpFormat
+            ExpFormat instance with reasonable (see Notes) computed width
+
+        Notes
+        -----
+        Reasonable should be understood as the minimal string length necessary
+        to avoid losing precision.
+        """
+        # len of one number in exp format: sign + 1|0 + "." +
+        # number of digit for fractional part + 'E' + sign of exponent +
+        # len of exponent
+        finfo = np.finfo(n.dtype)
+        # Number of digits for fractional part
+        n_prec = finfo.precision + 1
+        # Number of digits for exponential part
+        n_exp = number_digits(np.max(np.abs([finfo.maxexp, finfo.minexp])))
+        width = 1 + 1 + n_prec + 1 + n_exp + 1
+        if n < 0:
+            width += 1
+        repeat = int(np.floor(80 / width))
+        return cls(width, n_prec, min, repeat=repeat)
+
+    def __init__(self, width, significand, min=None, repeat=None):
+        """\
+        Parameters
+        ----------
+        width : int
+            number of characters taken by the string (includes space).
+        """
+        self.width = width
+        self.significand = significand
+        self.repeat = repeat
+        self.min = min
+
+    def __repr__(self):
+        r = "ExpFormat("
+        if self.repeat:
+            r += "%d" % self.repeat
+        r += "E%d.%d" % (self.width, self.significand)
+        if self.min:
+            r += "E%d" % self.min
+        return r + ")"
+
+    @property
+    def fortran_format(self):
+        r = "("
+        if self.repeat:
+            r += "%d" % self.repeat
+        r += "E%d.%d" % (self.width, self.significand)
+        if self.min:
+            r += "E%d" % self.min
+        return r + ")"
+
+    @property
+    def python_format(self):
+        return "%" + str(self.width-1) + "." + str(self.significand) + "E"
+
+
+class Token(object):
+    def __init__(self, type, value, pos):
+        self.type = type
+        self.value = value
+        self.pos = pos
+
+    def __str__(self):
+        return """Token('%s', "%s")""" % (self.type, self.value)
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class Tokenizer(object):
+    def __init__(self):
+        self.tokens = list(TOKENS.keys())
+        self.res = [re.compile(TOKENS[i]) for i in self.tokens]
+
+    def input(self, s):
+        self.data = s
+        self.curpos = 0
+        self.len = len(s)
+
+    def next_token(self):
+        curpos = self.curpos
+        tokens = self.tokens
+
+        while curpos < self.len:
+            for i, r in enumerate(self.res):
+                m = r.match(self.data, curpos)
+                if m is None:
+                    continue
+                else:
+                    self.curpos = m.end()
+                    return Token(self.tokens[i], m.group(), self.curpos)
+            raise SyntaxError("Unknown character at position %d (%s)"
+                              % (self.curpos, self.data[curpos]))
+
+
+# Grammar for fortran format:
+# format            : LPAR format_string RPAR
+# format_string     : repeated | simple
+# repeated          : repeat simple
+# simple            : int_fmt | exp_fmt
+# int_fmt           : INT_ID width
+# exp_fmt           : simple_exp_fmt
+# simple_exp_fmt    : EXP_ID width DOT significand
+# extended_exp_fmt  : EXP_ID width DOT significand EXP_ID ndigits
+# repeat            : INT
+# width             : INT
+# significand       : INT
+# ndigits           : INT
+
+# Naive fortran formatter - parser is hand-made
+class FortranFormatParser(object):
+    """Parser for fortran format strings. The parse method returns a *Format
+    instance.
+
+    Notes
+    -----
+    Only ExpFormat (exponential format for floating values) and IntFormat
+    (integer format) for now.
+    """
+    def __init__(self):
+        self.tokenizer = Tokenizer()
+
+    def parse(self, s):
+        self.tokenizer.input(s)
+
+        tokens = []
+
+        try:
+            while True:
+                t = self.tokenizer.next_token()
+                if t is None:
+                    break
+                else:
+                    tokens.append(t)
+            return self._parse_format(tokens)
+        except SyntaxError as e:
+            raise BadFortranFormat(str(e))
+
+    def _get_min(self, tokens):
+        next = tokens.pop(0)
+        if not next.type == "DOT":
+            raise SyntaxError()
+        next = tokens.pop(0)
+        return next.value
+
+    def _expect(self, token, tp):
+        if not token.type == tp:
+            raise SyntaxError()
+
+    def _parse_format(self, tokens):
+        if not tokens[0].type == "LPAR":
+            raise SyntaxError("Expected left parenthesis at position "
+                              "%d (got '%s')" % (0, tokens[0].value))
+        elif not tokens[-1].type == "RPAR":
+            raise SyntaxError("Expected right parenthesis at position "
+                              "%d (got '%s')" % (len(tokens), tokens[-1].value))
+
+        tokens = tokens[1:-1]
+        types = [t.type for t in tokens]
+        if types[0] == "INT":
+            repeat = int(tokens.pop(0).value)
+        else:
+            repeat = None
+
+        next = tokens.pop(0)
+        if next.type == "INT_ID":
+            next = self._next(tokens, "INT")
+            width = int(next.value)
+            if tokens:
+                min = int(self._get_min(tokens))
+            else:
+                min = None
+            return IntFormat(width, min, repeat)
+        elif next.type == "EXP_ID":
+            next = self._next(tokens, "INT")
+            width = int(next.value)
+
+            next = self._next(tokens, "DOT")
+
+            next = self._next(tokens, "INT")
+            significand = int(next.value)
+
+            if tokens:
+                next = self._next(tokens, "EXP_ID")
+
+                next = self._next(tokens, "INT")
+                min = int(next.value)
+            else:
+                min = None
+            return ExpFormat(width, significand, min, repeat)
+        else:
+            raise SyntaxError("Invalid formater type %s" % next.value)
+
+    def _next(self, tokens, tp):
+        if not len(tokens) > 0:
+            raise SyntaxError()
+        next = tokens.pop(0)
+        self._expect(next, tp)
+        return next
@@ -0,0 +1,547 @@
+"""
+Implementation of Harwell-Boeing read/write.
+
+At the moment not the full Harwell-Boeing format is supported. Supported
+features are:
+
+    - assembled, non-symmetric, real matrices
+    - integer for pointer/indices
+    - exponential format for float values, and int format
+
+"""
+from __future__ import division, print_function, absolute_import
+
+# TODO:
+#   - Add more support (symmetric/complex matrices, non-assembled matrices ?)
+
+# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
+# takes a lot of memory. Being faster would require compiled code.
+# write is not efficient. Although not a terribly exciting task,
+# having reusable facilities to efficiently read/write fortran-formatted files
+# would be useful outside this module.
+
+import warnings
+
+import numpy as np
+from scipy.sparse import csc_matrix
+from scipy.io.harwell_boeing._fortran_format_parser import \
+        FortranFormatParser, IntFormat, ExpFormat
+
+__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
+           "HBMatrixType"]
+
+
+class MalformedHeader(Exception):
+    pass
+
+
+class LineOverflow(Warning):
+    pass
+
+
+def _nbytes_full(fmt, nlines):
+    """Return the number of bytes to read to get every full lines for the
+    given parsed fortran format."""
+    return (fmt.repeat * fmt.width + 1) * (nlines - 1)
+
+
+class HBInfo(object):
+    @classmethod
+    def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
+        """Create a HBInfo instance from an existing sparse matrix.
+
+        Parameters
+        ----------
+        m : sparse matrix
+            the HBInfo instance will derive its parameters from m
+        title : str
+            Title to put in the HB header
+        key : str
+            Key
+        mxtype : HBMatrixType
+            type of the input matrix
+        fmt : dict
+            not implemented
+
+        Returns
+        -------
+        hb_info : HBInfo instance
+        """
+        m = m.tocsc(copy=False)
+
+        pointer = m.indptr
+        indices = m.indices
+        values = m.data
+
+        nrows, ncols = m.shape
+        nnon_zeros = m.nnz
+
+        if fmt is None:
+            # +1 because HB use one-based indexing (Fortran), and we will write
+            # the indices /pointer as such
+            pointer_fmt = IntFormat.from_number(np.max(pointer+1))
+            indices_fmt = IntFormat.from_number(np.max(indices+1))
+
+            if values.dtype.kind in np.typecodes["AllFloat"]:
+                values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
+            elif values.dtype.kind in np.typecodes["AllInteger"]:
+                values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
+            else:
+                raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
+        else:
+            raise NotImplementedError("fmt argument not supported yet.")
+
+        if mxtype is None:
+            if not np.isrealobj(values):
+                raise ValueError("Complex values not supported yet")
+            if values.dtype.kind in np.typecodes["AllInteger"]:
+                tp = "integer"
+            elif values.dtype.kind in np.typecodes["AllFloat"]:
+                tp = "real"
+            else:
+                raise NotImplementedError("type %s for values not implemented"
+                                          % values.dtype)
+            mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
+        else:
+            raise ValueError("mxtype argument not handled yet.")
+
+        def _nlines(fmt, size):
+            nlines = size // fmt.repeat
+            if nlines * fmt.repeat != size:
+                nlines += 1
+            return nlines
+
+        pointer_nlines = _nlines(pointer_fmt, pointer.size)
+        indices_nlines = _nlines(indices_fmt, indices.size)
+        values_nlines = _nlines(values_fmt, values.size)
+
+        total_nlines = pointer_nlines + indices_nlines + values_nlines
+
+        return cls(title, key,
+            total_nlines, pointer_nlines, indices_nlines, values_nlines,
+            mxtype, nrows, ncols, nnon_zeros,
+            pointer_fmt.fortran_format, indices_fmt.fortran_format,
+            values_fmt.fortran_format)
+
+    @classmethod
+    def from_file(cls, fid):
+        """Create a HBInfo instance from a file object containing a matrix in the
+        HB format.
+
+        Parameters
+        ----------
+        fid : file-like matrix
+            File or file-like object containing a matrix in the HB format.
+
+        Returns
+        -------
+        hb_info : HBInfo instance
+        """
+        # First line
+        line = fid.readline().strip("\n")
+        if not len(line) > 72:
+            raise ValueError("Expected at least 72 characters for first line, "
+                             "got: \n%s" % line)
+        title = line[:72]
+        key = line[72:]
+
+        # Second line
+        line = fid.readline().strip("\n")
+        if not len(line.rstrip()) >= 56:
+            raise ValueError("Expected at least 56 characters for second line, "
+                             "got: \n%s" % line)
+        total_nlines = _expect_int(line[:14])
+        pointer_nlines = _expect_int(line[14:28])
+        indices_nlines = _expect_int(line[28:42])
+        values_nlines = _expect_int(line[42:56])
+
+        rhs_nlines = line[56:72].strip()
+        if rhs_nlines == '':
+            rhs_nlines = 0
+        else:
+            rhs_nlines = _expect_int(rhs_nlines)
+        if not rhs_nlines == 0:
+            raise ValueError("Only files without right hand side supported for "
+                             "now.")
+
+        # Third line
+        line = fid.readline().strip("\n")
+        if not len(line) >= 70:
+            raise ValueError("Expected at least 72 character for third line, got:\n"
+                             "%s" % line)
+
+        mxtype_s = line[:3].upper()
+        if not len(mxtype_s) == 3:
+            raise ValueError("mxtype expected to be 3 characters long")
+
+        mxtype = HBMatrixType.from_fortran(mxtype_s)
+        if mxtype.value_type not in ["real", "integer"]:
+            raise ValueError("Only real or integer matrices supported for "
+                             "now (detected %s)" % mxtype)
+        if not mxtype.structure == "unsymmetric":
+            raise ValueError("Only unsymmetric matrices supported for "
+                             "now (detected %s)" % mxtype)
+        if not mxtype.storage == "assembled":
+            raise ValueError("Only assembled matrices supported for now")
+
+        if not line[3:14] == " " * 11:
+            raise ValueError("Malformed data for third line: %s" % line)
+
+        nrows = _expect_int(line[14:28])
+        ncols = _expect_int(line[28:42])
+        nnon_zeros = _expect_int(line[42:56])
+        nelementals = _expect_int(line[56:70])
+        if not nelementals == 0:
+            raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
+                             % nelementals)
+
+        # Fourth line
+        line = fid.readline().strip("\n")
+
+        ct = line.split()
+        if not len(ct) == 3:
+            raise ValueError("Expected 3 formats, got %s" % ct)
+
+        return cls(title, key,
+                   total_nlines, pointer_nlines, indices_nlines, values_nlines,
+                   mxtype, nrows, ncols, nnon_zeros,
+                   ct[0], ct[1], ct[2],
+                   rhs_nlines, nelementals)
+
+    def __init__(self, title, key,
+            total_nlines, pointer_nlines, indices_nlines, values_nlines,
+            mxtype, nrows, ncols, nnon_zeros,
+            pointer_format_str, indices_format_str, values_format_str,
+            right_hand_sides_nlines=0, nelementals=0):
+        """Do not use this directly, but the class ctrs (from_* functions)."""
+        self.title = title
+        self.key = key
+        if title is None:
+            title = "No Title"
+        if len(title) > 72:
+            raise ValueError("title cannot be > 72 characters")
+
+        if key is None:
+            key = "|No Key"
+        if len(key) > 8:
+            warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)
+
+        self.total_nlines = total_nlines
+        self.pointer_nlines = pointer_nlines
+        self.indices_nlines = indices_nlines
+        self.values_nlines = values_nlines
+
+        parser = FortranFormatParser()
+        pointer_format = parser.parse(pointer_format_str)
+        if not isinstance(pointer_format, IntFormat):
+            raise ValueError("Expected int format for pointer format, got %s"
+                             % pointer_format)
+
+        indices_format = parser.parse(indices_format_str)
+        if not isinstance(indices_format, IntFormat):
+            raise ValueError("Expected int format for indices format, got %s" %
+                             indices_format)
+
+        values_format = parser.parse(values_format_str)
+        if isinstance(values_format, ExpFormat):
+            if mxtype.value_type not in ["real", "complex"]:
+                raise ValueError("Inconsistency between matrix type %s and "
+                                 "value type %s" % (mxtype, values_format))
+            values_dtype = np.float64
+        elif isinstance(values_format, IntFormat):
+            if mxtype.value_type not in ["integer"]:
+                raise ValueError("Inconsistency between matrix type %s and "
+                                 "value type %s" % (mxtype, values_format))
+            # XXX: fortran int -> dtype association ?
+            values_dtype = int
+        else:
+            raise ValueError("Unsupported format for values %r" % (values_format,))
+
+        self.pointer_format = pointer_format
+        self.indices_format = indices_format
+        self.values_format = values_format
+
+        self.pointer_dtype = np.int32
+        self.indices_dtype = np.int32
+        self.values_dtype = values_dtype
+
+        self.pointer_nlines = pointer_nlines
+        self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
+
+        self.indices_nlines = indices_nlines
+        self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
+
+        self.values_nlines = values_nlines
+        self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
+
+        self.nrows = nrows
+        self.ncols = ncols
+        self.nnon_zeros = nnon_zeros
+        self.nelementals = nelementals
+        self.mxtype = mxtype
+
+    def dump(self):
+        """Gives the header corresponding to this instance as a string."""
+        header = [self.title.ljust(72) + self.key.ljust(8)]
+
+        header.append("%14d%14d%14d%14d" %
+                      (self.total_nlines, self.pointer_nlines,
+                       self.indices_nlines, self.values_nlines))
+        header.append("%14s%14d%14d%14d%14d" %
+                      (self.mxtype.fortran_format.ljust(14), self.nrows,
+                       self.ncols, self.nnon_zeros, 0))
+
+        pffmt = self.pointer_format.fortran_format
+        iffmt = self.indices_format.fortran_format
+        vffmt = self.values_format.fortran_format
+        header.append("%16s%16s%20s" %
+                      (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
+        return "\n".join(header)
+
+
+def _expect_int(value, msg=None):
+    try:
+        return int(value)
+    except ValueError:
+        if msg is None:
+            msg = "Expected an int, got %s"
+        raise ValueError(msg % value)
+
+
+def _read_hb_data(content, header):
+    # XXX: look at a way to reduce memory here (big string creation)
+    ptr_string = "".join([content.read(header.pointer_nbytes_full),
+                           content.readline()])
+    ptr = np.fromstring(ptr_string,
+            dtype=int, sep=' ')
+
+    ind_string = "".join([content.read(header.indices_nbytes_full),
+                       content.readline()])
+    ind = np.fromstring(ind_string,
+            dtype=int, sep=' ')
+
+    val_string = "".join([content.read(header.values_nbytes_full),
+                          content.readline()])
+    val = np.fromstring(val_string,
+            dtype=header.values_dtype, sep=' ')
+
+    try:
+        return csc_matrix((val, ind-1, ptr-1),
+                          shape=(header.nrows, header.ncols))
+    except ValueError as e:
+        raise e
+
+
+def _write_data(m, fid, header):
+    m = m.tocsc(copy=False)
+
+    def write_array(f, ar, nlines, fmt):
+        # ar_nlines is the number of full lines, n is the number of items per
+        # line, ffmt the fortran format
+        pyfmt = fmt.python_format
+        pyfmt_full = pyfmt * fmt.repeat
+
+        # for each array to write, we first write the full lines, and special
+        # case for partial line
+        full = ar[:(nlines - 1) * fmt.repeat]
+        for row in full.reshape((nlines-1, fmt.repeat)):
+            f.write(pyfmt_full % tuple(row) + "\n")
+        nremain = ar.size - full.size
+        if nremain > 0:
+            f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
+
+    fid.write(header.dump())
+    fid.write("\n")
+    # +1 is for fortran one-based indexing
+    write_array(fid, m.indptr+1, header.pointer_nlines,
+                header.pointer_format)
+    write_array(fid, m.indices+1, header.indices_nlines,
+                header.indices_format)
+    write_array(fid, m.data, header.values_nlines,
+                header.values_format)
+
+
+class HBMatrixType(object):
+    """Class to hold the matrix type."""
+    # q2f* translates qualified names to fortran character
+    _q2f_type = {
+        "real": "R",
+        "complex": "C",
+        "pattern": "P",
+        "integer": "I",
+    }
+    _q2f_structure = {
+            "symmetric": "S",
+            "unsymmetric": "U",
+            "hermitian": "H",
+            "skewsymmetric": "Z",
+            "rectangular": "R"
+    }
+    _q2f_storage = {
+        "assembled": "A",
+        "elemental": "E",
+    }
+
+    _f2q_type = dict([(j, i) for i, j in _q2f_type.items()])
+    _f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()])
+    _f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()])
+
+    @classmethod
+    def from_fortran(cls, fmt):
+        if not len(fmt) == 3:
+            raise ValueError("Fortran format for matrix type should be 3 "
+                             "characters long")
+        try:
+            value_type = cls._f2q_type[fmt[0]]
+            structure = cls._f2q_structure[fmt[1]]
+            storage = cls._f2q_storage[fmt[2]]
+            return cls(value_type, structure, storage)
+        except KeyError:
+            raise ValueError("Unrecognized format %s" % fmt)
+
+    def __init__(self, value_type, structure, storage="assembled"):
+        self.value_type = value_type
+        self.structure = structure
+        self.storage = storage
+
+        if value_type not in self._q2f_type:
+            raise ValueError("Unrecognized type %s" % value_type)
+        if structure not in self._q2f_structure:
+            raise ValueError("Unrecognized structure %s" % structure)
+        if storage not in self._q2f_storage:
+            raise ValueError("Unrecognized storage %s" % storage)
+
+    @property
+    def fortran_format(self):
+        return self._q2f_type[self.value_type] + \
+               self._q2f_structure[self.structure] + \
+               self._q2f_storage[self.storage]
+
+    def __repr__(self):
+        return "HBMatrixType(%s, %s, %s)" % \
+               (self.value_type, self.structure, self.storage)
+
+
+class HBFile(object):
+    def __init__(self, file, hb_info=None):
+        """Create a HBFile instance.
+
+        Parameters
+        ----------
+        file : file-object
+            StringIO work as well
+        hb_info : HBInfo, optional
+            Should be given as an argument for writing, in which case the file
+            should be writable.
+        """
+        self._fid = file
+        if hb_info is None:
+            self._hb_info = HBInfo.from_file(file)
+        else:
+            #raise IOError("file %s is not writable, and hb_info "
+            #              "was given." % file)
+            self._hb_info = hb_info
+
+    @property
+    def title(self):
+        return self._hb_info.title
+
+    @property
+    def key(self):
+        return self._hb_info.key
+
+    @property
+    def type(self):
+        return self._hb_info.mxtype.value_type
+
+    @property
+    def structure(self):
+        return self._hb_info.mxtype.structure
+
+    @property
+    def storage(self):
+        return self._hb_info.mxtype.storage
+
+    def read_matrix(self):
+        return _read_hb_data(self._fid, self._hb_info)
+
+    def write_matrix(self, m):
+        return _write_data(m, self._fid, self._hb_info)
+
+
+def hb_read(path_or_open_file):
+    """Read HB-format file.
+
+    Parameters
+    ----------
+    path_or_open_file : path-like or file-like
+        If a file-like object, it is used as-is. Otherwise it is opened
+        before reading.
+
+    Returns
+    -------
+    data : scipy.sparse.csc_matrix instance
+        The data read from the HB file as a sparse matrix.
+
+    Notes
+    -----
+    At the moment not the full Harwell-Boeing format is supported. Supported
+    features are:
+
+        - assembled, non-symmetric, real matrices
+        - integer for pointer/indices
+        - exponential format for float values, and int format
+
+    """
+    def _get_matrix(fid):
+        hb = HBFile(fid)
+        return hb.read_matrix()
+
+    if hasattr(path_or_open_file, 'read'):
+        return _get_matrix(path_or_open_file)
+    else:
+        with open(path_or_open_file) as f:
+            return _get_matrix(f)
+
+
+def hb_write(path_or_open_file, m, hb_info=None):
+    """Write HB-format file.
+
+    Parameters
+    ----------
+    path_or_open_file : path-like or file-like
+        If a file-like object, it is used as-is. Otherwise it is opened
+        before writing.
+    m : sparse-matrix
+        the sparse matrix to write
+    hb_info : HBInfo
+        contains the meta-data for write
+
+    Returns
+    -------
+    None
+
+    Notes
+    -----
+    At the moment not the full Harwell-Boeing format is supported. Supported
+    features are:
+
+        - assembled, non-symmetric, real matrices
+        - integer for pointer/indices
+        - exponential format for float values, and int format
+
+    """
+    m = m.tocsc(copy=False)
+
+    if hb_info is None:
+        hb_info = HBInfo.from_data(m)
+
+    def _set_matrix(fid):
+        hb = HBFile(fid, hb_info)
+        return hb.write_matrix(m)
+
+    if hasattr(path_or_open_file, 'write'):
+        return _set_matrix(path_or_open_file)
+    else:
+        with open(path_or_open_file, 'w') as f:
+            return _set_matrix(f)
@@ -0,0 +1,14 @@
+from __future__ import division, print_function, absolute_import
+
+
+def configuration(parent_package='',top_path=None):
+    from numpy.distutils.misc_util import Configuration
+    config = Configuration('harwell_boeing',parent_package,top_path)
+    config.add_data_dir('tests')
+
+    return config
+
+
+if __name__ == '__main__':
+    from numpy.distutils.core import setup
+    setup(**configuration(top_path='').todict())
@@ -0,0 +1,77 @@
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+
+from numpy.testing import assert_equal
+from pytest import raises as assert_raises
+
+from scipy.io.harwell_boeing._fortran_format_parser import (
+        FortranFormatParser, IntFormat, ExpFormat, BadFortranFormat,
+        number_digits)
+
+
+class TestFortranFormatParser(object):
+    def setup_method(self):
+        self.parser = FortranFormatParser()
+
+    def _test_equal(self, format, ref):
+        ret = self.parser.parse(format)
+        assert_equal(ret.__dict__, ref.__dict__)
+
+    def test_simple_int(self):
+        self._test_equal("(I4)", IntFormat(4))
+
+    def test_simple_repeated_int(self):
+        self._test_equal("(3I4)", IntFormat(4, repeat=3))
+
+    def test_simple_exp(self):
+        self._test_equal("(E4.3)", ExpFormat(4, 3))
+
+    def test_exp_exp(self):
+        self._test_equal("(E8.3E3)", ExpFormat(8, 3, 3))
+
+    def test_repeat_exp(self):
+        self._test_equal("(2E4.3)", ExpFormat(4, 3, repeat=2))
+
+    def test_repeat_exp_exp(self):
+        self._test_equal("(2E8.3E3)", ExpFormat(8, 3, 3, repeat=2))
+
+    def test_wrong_formats(self):
+        def _test_invalid(bad_format):
+            assert_raises(BadFortranFormat, lambda: self.parser.parse(bad_format))
+        _test_invalid("I4")
+        _test_invalid("(E4)")
+        _test_invalid("(E4.)")
+        _test_invalid("(E4.E3)")
+
+
+class TestIntFormat(object):
+    def test_to_fortran(self):
+        f = [IntFormat(10), IntFormat(12, 10), IntFormat(12, 10, 3)]
+        res = ["(I10)", "(I12.10)", "(3I12.10)"]
+
+        for i, j in zip(f, res):
+            assert_equal(i.fortran_format, j)
+
+    def test_from_number(self):
+        f = [10, -12, 123456789]
+        r_f = [IntFormat(3, repeat=26), IntFormat(4, repeat=20),
+               IntFormat(10, repeat=8)]
+        for i, j in zip(f, r_f):
+            assert_equal(IntFormat.from_number(i).__dict__, j.__dict__)
+
+
+class TestExpFormat(object):
+    def test_to_fortran(self):
+        f = [ExpFormat(10, 5), ExpFormat(12, 10), ExpFormat(12, 10, min=3),
+             ExpFormat(10, 5, repeat=3)]
+        res = ["(E10.5)", "(E12.10)", "(E12.10E3)", "(3E10.5)"]
+
+        for i, j in zip(f, res):
+            assert_equal(i.fortran_format, j)
+
+    def test_from_number(self):
+        f = np.array([1.0, -1.2])
+        r_f = [ExpFormat(24, 16, repeat=3), ExpFormat(25, 16, repeat=3)]
+        for i, j in zip(f, r_f):
+            assert_equal(ExpFormat.from_number(i).__dict__, j.__dict__)
@@ -0,0 +1,71 @@
+from __future__ import division, print_function, absolute_import
+
+import sys
+if sys.version_info[0] >= 3:
+    from io import StringIO
+else:
+    from StringIO import StringIO
+import tempfile
+
+import numpy as np
+
+from numpy.testing import assert_equal, \
+    assert_array_almost_equal_nulp
+
+from scipy.sparse import coo_matrix, csc_matrix, rand
+
+from scipy.io import hb_read, hb_write
+
+
+SIMPLE = """\
+No Title                                                                |No Key
+             9             4             1             4
+RUA                      100           100            10             0
+(26I3)          (26I3)          (3E23.15)
+1  2  2  2  2  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
+3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
+3  3  3  3  3  3  3  4  4  4  6  6  6  6  6  6  6  6  6  6  6  8  9  9  9  9
+9  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9 11
+37 71 89 18 30 45 70 19 25 52
+2.971243799687726e-01  3.662366682877375e-01  4.786962174699534e-01
+6.490068647991184e-01  6.617490424831662e-02  8.870370343191623e-01
+4.196478590163001e-01  5.649603072111251e-01  9.934423887087086e-01
+6.912334991524289e-01
+"""
+
+SIMPLE_MATRIX = coo_matrix(
+    ((0.297124379969, 0.366236668288, 0.47869621747, 0.649006864799,
+      0.0661749042483, 0.887037034319, 0.419647859016,
+      0.564960307211, 0.993442388709, 0.691233499152,),
+     (np.array([[36, 70, 88, 17, 29, 44, 69, 18, 24, 51],
+                [0, 4, 58, 61, 61, 72, 72, 73, 99, 99]]))))
+
+
+def assert_csc_almost_equal(r, l):
+    r = csc_matrix(r)
+    l = csc_matrix(l)
+    assert_equal(r.indptr, l.indptr)
+    assert_equal(r.indices, l.indices)
+    assert_array_almost_equal_nulp(r.data, l.data, 10000)
+
+
+class TestHBReader(object):
+    def test_simple(self):
+        m = hb_read(StringIO(SIMPLE))
+        assert_csc_almost_equal(m, SIMPLE_MATRIX)
+
+
+class TestHBReadWrite(object):
+
+    def check_save_load(self, value):
+        with tempfile.NamedTemporaryFile(mode='w+t') as file:
+            hb_write(file, value)
+            file.file.seek(0)
+            value_loaded = hb_read(file)
+        assert_csc_almost_equal(value, value_loaded)
+
+    def test_simple(self):
+        random_matrix = rand(10, 100, 0.1)
+        for matrix_format in ('coo', 'csc', 'csr', 'bsr', 'dia', 'dok', 'lil'):
+            matrix = random_matrix.asformat(matrix_format, copy=False)
+            self.check_save_load(matrix)
@@ -0,0 +1,884 @@
+# IDLSave - a python module to read IDL 'save' files
+# Copyright (c) 2010 Thomas P. Robitaille
+
+# Many thanks to Craig Markwardt for publishing the Unofficial Format
+# Specification for IDL .sav files, without which this Python module would not
+# exist (http://cow.physics.wisc.edu/~craigm/idl/savefmt).
+
+# This code was developed by with permission from ITT Visual Information
+# Systems. IDL(r) is a registered trademark of ITT Visual Information Systems,
+# Inc. for their Interactive Data Language software.
+
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from __future__ import division, print_function, absolute_import
+
+__all__ = ['readsav']
+
+import struct
+import numpy as np
+from numpy.compat import asstr
+import tempfile
+import zlib
+import warnings
+
+# Define the different data types that can be found in an IDL save file
+DTYPE_DICT = {1: '>u1',
+              2: '>i2',
+              3: '>i4',
+              4: '>f4',
+              5: '>f8',
+              6: '>c8',
+              7: '|O',
+              8: '|O',
+              9: '>c16',
+              10: '|O',
+              11: '|O',
+              12: '>u2',
+              13: '>u4',
+              14: '>i8',
+              15: '>u8'}
+
+# Define the different record types that can be found in an IDL save file
+RECTYPE_DICT = {0: "START_MARKER",
+                1: "COMMON_VARIABLE",
+                2: "VARIABLE",
+                3: "SYSTEM_VARIABLE",
+                6: "END_MARKER",
+                10: "TIMESTAMP",
+                12: "COMPILED",
+                13: "IDENTIFICATION",
+                14: "VERSION",
+                15: "HEAP_HEADER",
+                16: "HEAP_DATA",
+                17: "PROMOTE64",
+                19: "NOTICE",
+                20: "DESCRIPTION"}
+
+# Define a dictionary to contain structure definitions
+STRUCT_DICT = {}
+
+
+def _align_32(f):
+    '''Align to the next 32-bit position in a file'''
+
+    pos = f.tell()
+    if pos % 4 != 0:
+        f.seek(pos + 4 - pos % 4)
+    return
+
+
+def _skip_bytes(f, n):
+    '''Skip `n` bytes'''
+    f.read(n)
+    return
+
+
+def _read_bytes(f, n):
+    '''Read the next `n` bytes'''
+    return f.read(n)
+
+
+def _read_byte(f):
+    '''Read a single byte'''
+    return np.uint8(struct.unpack('>B', f.read(4)[:1])[0])
+
+
+def _read_long(f):
+    '''Read a signed 32-bit integer'''
+    return np.int32(struct.unpack('>l', f.read(4))[0])
+
+
+def _read_int16(f):
+    '''Read a signed 16-bit integer'''
+    return np.int16(struct.unpack('>h', f.read(4)[2:4])[0])
+
+
+def _read_int32(f):
+    '''Read a signed 32-bit integer'''
+    return np.int32(struct.unpack('>i', f.read(4))[0])
+
+
+def _read_int64(f):
+    '''Read a signed 64-bit integer'''
+    return np.int64(struct.unpack('>q', f.read(8))[0])
+
+
+def _read_uint16(f):
+    '''Read an unsigned 16-bit integer'''
+    return np.uint16(struct.unpack('>H', f.read(4)[2:4])[0])
+
+
+def _read_uint32(f):
+    '''Read an unsigned 32-bit integer'''
+    return np.uint32(struct.unpack('>I', f.read(4))[0])
+
+
+def _read_uint64(f):
+    '''Read an unsigned 64-bit integer'''
+    return np.uint64(struct.unpack('>Q', f.read(8))[0])
+
+
+def _read_float32(f):
+    '''Read a 32-bit float'''
+    return np.float32(struct.unpack('>f', f.read(4))[0])
+
+
+def _read_float64(f):
+    '''Read a 64-bit float'''
+    return np.float64(struct.unpack('>d', f.read(8))[0])
+
+
+class Pointer(object):
+    '''Class used to define pointers'''
+
+    def __init__(self, index):
+        self.index = index
+        return
+
+
+class ObjectPointer(Pointer):
+    '''Class used to define object pointers'''
+    pass
+
+
+def _read_string(f):
+    '''Read a string'''
+    length = _read_long(f)
+    if length > 0:
+        chars = _read_bytes(f, length)
+        _align_32(f)
+        chars = asstr(chars)
+    else:
+        chars = ''
+    return chars
+
+
+def _read_string_data(f):
+    '''Read a data string (length is specified twice)'''
+    length = _read_long(f)
+    if length > 0:
+        length = _read_long(f)
+        string_data = _read_bytes(f, length)
+        _align_32(f)
+    else:
+        string_data = ''
+    return string_data
+
+
+def _read_data(f, dtype):
+    '''Read a variable with a specified data type'''
+    if dtype == 1:
+        if _read_int32(f) != 1:
+            raise Exception("Error occurred while reading byte variable")
+        return _read_byte(f)
+    elif dtype == 2:
+        return _read_int16(f)
+    elif dtype == 3:
+        return _read_int32(f)
+    elif dtype == 4:
+        return _read_float32(f)
+    elif dtype == 5:
+        return _read_float64(f)
+    elif dtype == 6:
+        real = _read_float32(f)
+        imag = _read_float32(f)
+        return np.complex64(real + imag * 1j)
+    elif dtype == 7:
+        return _read_string_data(f)
+    elif dtype == 8:
+        raise Exception("Should not be here - please report this")
+    elif dtype == 9:
+        real = _read_float64(f)
+        imag = _read_float64(f)
+        return np.complex128(real + imag * 1j)
+    elif dtype == 10:
+        return Pointer(_read_int32(f))
+    elif dtype == 11:
+        return ObjectPointer(_read_int32(f))
+    elif dtype == 12:
+        return _read_uint16(f)
+    elif dtype == 13:
+        return _read_uint32(f)
+    elif dtype == 14:
+        return _read_int64(f)
+    elif dtype == 15:
+        return _read_uint64(f)
+    else:
+        raise Exception("Unknown IDL type: %i - please report this" % dtype)
+
+
+def _read_structure(f, array_desc, struct_desc):
+    '''
+    Read a structure, with the array and structure descriptors given as
+    `array_desc` and `structure_desc` respectively.
+    '''
+
+    nrows = array_desc['nelements']
+    columns = struct_desc['tagtable']
+
+    dtype = []
+    for col in columns:
+        if col['structure'] or col['array']:
+            dtype.append(((col['name'].lower(), col['name']), np.object_))
+        else:
+            if col['typecode'] in DTYPE_DICT:
+                dtype.append(((col['name'].lower(), col['name']),
+                                    DTYPE_DICT[col['typecode']]))
+            else:
+                raise Exception("Variable type %i not implemented" %
+                                                            col['typecode'])
+
+    structure = np.recarray((nrows, ), dtype=dtype)
+
+    for i in range(nrows):
+        for col in columns:
+            dtype = col['typecode']
+            if col['structure']:
+                structure[col['name']][i] = _read_structure(f,
+                                      struct_desc['arrtable'][col['name']],
+                                      struct_desc['structtable'][col['name']])
+            elif col['array']:
+                structure[col['name']][i] = _read_array(f, dtype,
+                                      struct_desc['arrtable'][col['name']])
+            else:
+                structure[col['name']][i] = _read_data(f, dtype)
+
+    # Reshape structure if needed
+    if array_desc['ndims'] > 1:
+        dims = array_desc['dims'][:int(array_desc['ndims'])]
+        dims.reverse()
+        structure = structure.reshape(dims)
+
+    return structure
+
+
+def _read_array(f, typecode, array_desc):
+    '''
+    Read an array of type `typecode`, with the array descriptor given as
+    `array_desc`.
+    '''
+
+    if typecode in [1, 3, 4, 5, 6, 9, 13, 14, 15]:
+
+        if typecode == 1:
+            nbytes = _read_int32(f)
+            if nbytes != array_desc['nbytes']:
+                warnings.warn("Not able to verify number of bytes from header")
+
+        # Read bytes as numpy array
+        array = np.frombuffer(f.read(array_desc['nbytes']),
+                              dtype=DTYPE_DICT[typecode])
+
+    elif typecode in [2, 12]:
+
+        # These are 2 byte types, need to skip every two as they are not packed
+
+        array = np.frombuffer(f.read(array_desc['nbytes']*2),
+                              dtype=DTYPE_DICT[typecode])[1::2]
+
+    else:
+
+        # Read bytes into list
+        array = []
+        for i in range(array_desc['nelements']):
+            dtype = typecode
+            data = _read_data(f, dtype)
+            array.append(data)
+
+        array = np.array(array, dtype=np.object_)
+
+    # Reshape array if needed
+    if array_desc['ndims'] > 1:
+        dims = array_desc['dims'][:int(array_desc['ndims'])]
+        dims.reverse()
+        array = array.reshape(dims)
+
+    # Go to next alignment position
+    _align_32(f)
+
+    return array
+
+
+def _read_record(f):
+    '''Function to read in a full record'''
+
+    record = {'rectype': _read_long(f)}
+
+    nextrec = _read_uint32(f)
+    nextrec += _read_uint32(f) * 2**32
+
+    _skip_bytes(f, 4)
+
+    if not record['rectype'] in RECTYPE_DICT:
+        raise Exception("Unknown RECTYPE: %i" % record['rectype'])
+
+    record['rectype'] = RECTYPE_DICT[record['rectype']]
+
+    if record['rectype'] in ["VARIABLE", "HEAP_DATA"]:
+
+        if record['rectype'] == "VARIABLE":
+            record['varname'] = _read_string(f)
+        else:
+            record['heap_index'] = _read_long(f)
+            _skip_bytes(f, 4)
+
+        rectypedesc = _read_typedesc(f)
+
+        if rectypedesc['typecode'] == 0:
+
+            if nextrec == f.tell():
+                record['data'] = None  # Indicates NULL value
+            else:
+                raise ValueError("Unexpected type code: 0")
+
+        else:
+
+            varstart = _read_long(f)
+            if varstart != 7:
+                raise Exception("VARSTART is not 7")
+
+            if rectypedesc['structure']:
+                record['data'] = _read_structure(f, rectypedesc['array_desc'],
+                                                    rectypedesc['struct_desc'])
+            elif rectypedesc['array']:
+                record['data'] = _read_array(f, rectypedesc['typecode'],
+                                                rectypedesc['array_desc'])
+            else:
+                dtype = rectypedesc['typecode']
+                record['data'] = _read_data(f, dtype)
+
+    elif record['rectype'] == "TIMESTAMP":
+
+        _skip_bytes(f, 4*256)
+        record['date'] = _read_string(f)
+        record['user'] = _read_string(f)
+        record['host'] = _read_string(f)
+
+    elif record['rectype'] == "VERSION":
+
+        record['format'] = _read_long(f)
+        record['arch'] = _read_string(f)
+        record['os'] = _read_string(f)
+        record['release'] = _read_string(f)
+
+    elif record['rectype'] == "IDENTIFICATON":
+
+        record['author'] = _read_string(f)
+        record['title'] = _read_string(f)
+        record['idcode'] = _read_string(f)
+
+    elif record['rectype'] == "NOTICE":
+
+        record['notice'] = _read_string(f)
+
+    elif record['rectype'] == "DESCRIPTION":
+
+        record['description'] = _read_string_data(f)
+
+    elif record['rectype'] == "HEAP_HEADER":
+
+        record['nvalues'] = _read_long(f)
+        record['indices'] = []
+        for i in range(record['nvalues']):
+            record['indices'].append(_read_long(f))
+
+    elif record['rectype'] == "COMMONBLOCK":
+
+        record['nvars'] = _read_long(f)
+        record['name'] = _read_string(f)
+        record['varnames'] = []
+        for i in range(record['nvars']):
+            record['varnames'].append(_read_string(f))
+
+    elif record['rectype'] == "END_MARKER":
+
+        record['end'] = True
+
+    elif record['rectype'] == "UNKNOWN":
+
+        warnings.warn("Skipping UNKNOWN record")
+
+    elif record['rectype'] == "SYSTEM_VARIABLE":
+
+        warnings.warn("Skipping SYSTEM_VARIABLE record")
+
+    else:
+
+        raise Exception("record['rectype']=%s not implemented" %
+                                                            record['rectype'])
+
+    f.seek(nextrec)
+
+    return record
+
+
+def _read_typedesc(f):
+    '''Function to read in a type descriptor'''
+
+    typedesc = {'typecode': _read_long(f), 'varflags': _read_long(f)}
+
+    if typedesc['varflags'] & 2 == 2:
+        raise Exception("System variables not implemented")
+
+    typedesc['array'] = typedesc['varflags'] & 4 == 4
+    typedesc['structure'] = typedesc['varflags'] & 32 == 32
+
+    if typedesc['structure']:
+        typedesc['array_desc'] = _read_arraydesc(f)
+        typedesc['struct_desc'] = _read_structdesc(f)
+    elif typedesc['array']:
+        typedesc['array_desc'] = _read_arraydesc(f)
+
+    return typedesc
+
+
+def _read_arraydesc(f):
+    '''Function to read in an array descriptor'''
+
+    arraydesc = {'arrstart': _read_long(f)}
+
+    if arraydesc['arrstart'] == 8:
+
+        _skip_bytes(f, 4)
+
+        arraydesc['nbytes'] = _read_long(f)
+        arraydesc['nelements'] = _read_long(f)
+        arraydesc['ndims'] = _read_long(f)
+
+        _skip_bytes(f, 8)
+
+        arraydesc['nmax'] = _read_long(f)
+
+        arraydesc['dims'] = []
+        for d in range(arraydesc['nmax']):
+            arraydesc['dims'].append(_read_long(f))
+
+    elif arraydesc['arrstart'] == 18:
+
+        warnings.warn("Using experimental 64-bit array read")
+
+        _skip_bytes(f, 8)
+
+        arraydesc['nbytes'] = _read_uint64(f)
+        arraydesc['nelements'] = _read_uint64(f)
+        arraydesc['ndims'] = _read_long(f)
+
+        _skip_bytes(f, 8)
+
+        arraydesc['nmax'] = 8
+
+        arraydesc['dims'] = []
+        for d in range(arraydesc['nmax']):
+            v = _read_long(f)
+            if v != 0:
+                raise Exception("Expected a zero in ARRAY_DESC")
+            arraydesc['dims'].append(_read_long(f))
+
+    else:
+
+        raise Exception("Unknown ARRSTART: %i" % arraydesc['arrstart'])
+
+    return arraydesc
+
+
+def _read_structdesc(f):
+    '''Function to read in a structure descriptor'''
+
+    structdesc = {}
+
+    structstart = _read_long(f)
+    if structstart != 9:
+        raise Exception("STRUCTSTART should be 9")
+
+    structdesc['name'] = _read_string(f)
+    predef = _read_long(f)
+    structdesc['ntags'] = _read_long(f)
+    structdesc['nbytes'] = _read_long(f)
+
+    structdesc['predef'] = predef & 1
+    structdesc['inherits'] = predef & 2
+    structdesc['is_super'] = predef & 4
+
+    if not structdesc['predef']:
+
+        structdesc['tagtable'] = []
+        for t in range(structdesc['ntags']):
+            structdesc['tagtable'].append(_read_tagdesc(f))
+
+        for tag in structdesc['tagtable']:
+            tag['name'] = _read_string(f)
+
+        structdesc['arrtable'] = {}
+        for tag in structdesc['tagtable']:
+            if tag['array']:
+                structdesc['arrtable'][tag['name']] = _read_arraydesc(f)
+
+        structdesc['structtable'] = {}
+        for tag in structdesc['tagtable']:
+            if tag['structure']:
+                structdesc['structtable'][tag['name']] = _read_structdesc(f)
+
+        if structdesc['inherits'] or structdesc['is_super']:
+            structdesc['classname'] = _read_string(f)
+            structdesc['nsupclasses'] = _read_long(f)
+            structdesc['supclassnames'] = []
+            for s in range(structdesc['nsupclasses']):
+                structdesc['supclassnames'].append(_read_string(f))
+            structdesc['supclasstable'] = []
+            for s in range(structdesc['nsupclasses']):
+                structdesc['supclasstable'].append(_read_structdesc(f))
+
+        STRUCT_DICT[structdesc['name']] = structdesc
+
+    else:
+
+        if not structdesc['name'] in STRUCT_DICT:
+            raise Exception("PREDEF=1 but can't find definition")
+
+        structdesc = STRUCT_DICT[structdesc['name']]
+
+    return structdesc
+
+
+def _read_tagdesc(f):
+    '''Function to read in a tag descriptor'''
+
+    tagdesc = {'offset': _read_long(f)}
+
+    if tagdesc['offset'] == -1:
+        tagdesc['offset'] = _read_uint64(f)
+
+    tagdesc['typecode'] = _read_long(f)
+    tagflags = _read_long(f)
+
+    tagdesc['array'] = tagflags & 4 == 4
+    tagdesc['structure'] = tagflags & 32 == 32
+    tagdesc['scalar'] = tagdesc['typecode'] in DTYPE_DICT
+    # Assume '10'x is scalar
+
+    return tagdesc
+
+
+def _replace_heap(variable, heap):
+
+    if isinstance(variable, Pointer):
+
+        while isinstance(variable, Pointer):
+
+            if variable.index == 0:
+                variable = None
+            else:
+                if variable.index in heap:
+                    variable = heap[variable.index]
+                else:
+                    warnings.warn("Variable referenced by pointer not found "
+                                  "in heap: variable will be set to None")
+                    variable = None
+
+        replace, new = _replace_heap(variable, heap)
+
+        if replace:
+            variable = new
+
+        return True, variable
+
+    elif isinstance(variable, np.core.records.recarray):
+
+        # Loop over records
+        for ir, record in enumerate(variable):
+
+            replace, new = _replace_heap(record, heap)
+
+            if replace:
+                variable[ir] = new
+
+        return False, variable
+
+    elif isinstance(variable, np.core.records.record):
+
+        # Loop over values
+        for iv, value in enumerate(variable):
+
+            replace, new = _replace_heap(value, heap)
+
+            if replace:
+                variable[iv] = new
+
+        return False, variable
+
+    elif isinstance(variable, np.ndarray):
+
+        # Loop over values if type is np.object_
+        if variable.dtype.type is np.object_:
+
+            for iv in range(variable.size):
+
+                replace, new = _replace_heap(variable.item(iv), heap)
+
+                if replace:
+                    variable.itemset(iv, new)
+
+        return False, variable
+
+    else:
+
+        return False, variable
+
+
+class AttrDict(dict):
+    '''
+    A case-insensitive dictionary with access via item, attribute, and call
+    notations:
+
+        >>> d = AttrDict()
+        >>> d['Variable'] = 123
+        >>> d['Variable']
+        123
+        >>> d.Variable
+        123
+        >>> d.variable
+        123
+        >>> d('VARIABLE')
+        123
+    '''
+
+    def __init__(self, init={}):
+        dict.__init__(self, init)
+
+    def __getitem__(self, name):
+        return super(AttrDict, self).__getitem__(name.lower())
+
+    def __setitem__(self, key, value):
+        return super(AttrDict, self).__setitem__(key.lower(), value)
+
+    __getattr__ = __getitem__
+    __setattr__ = __setitem__
+    __call__ = __getitem__
+
+
+def readsav(file_name, idict=None, python_dict=False,
+            uncompressed_file_name=None, verbose=False):
+    """
+    Read an IDL .sav file.
+
+    Parameters
+    ----------
+    file_name : str
+        Name of the IDL save file.
+    idict : dict, optional
+        Dictionary in which to insert .sav file variables.
+    python_dict : bool, optional
+        By default, the object return is not a Python dictionary, but a
+        case-insensitive dictionary with item, attribute, and call access
+        to variables. To get a standard Python dictionary, set this option
+        to True.
+    uncompressed_file_name : str, optional
+        This option only has an effect for .sav files written with the
+        /compress option. If a file name is specified, compressed .sav
+        files are uncompressed to this file. Otherwise, readsav will use
+        the `tempfile` module to determine a temporary filename
+        automatically, and will remove the temporary file upon successfully
+        reading it in.
+    verbose : bool, optional
+        Whether to print out information about the save file, including
+        the records read, and available variables.
+
+    Returns
+    -------
+    idl_dict : AttrDict or dict
+        If `python_dict` is set to False (default), this function returns a
+        case-insensitive dictionary with item, attribute, and call access
+        to variables. If `python_dict` is set to True, this function
+        returns a Python dictionary with all variable names in lowercase.
+        If `idict` was specified, then variables are written to the
+        dictionary specified, and the updated dictionary is returned.
+
+    """
+
+    # Initialize record and variable holders
+    records = []
+    if python_dict or idict:
+        variables = {}
+    else:
+        variables = AttrDict()
+
+    # Open the IDL file
+    f = open(file_name, 'rb')
+
+    # Read the signature, which should be 'SR'
+    signature = _read_bytes(f, 2)
+    if signature != b'SR':
+        raise Exception("Invalid SIGNATURE: %s" % signature)
+
+    # Next, the record format, which is '\x00\x04' for normal .sav
+    # files, and '\x00\x06' for compressed .sav files.
+    recfmt = _read_bytes(f, 2)
+
+    if recfmt == b'\x00\x04':
+        pass
+
+    elif recfmt == b'\x00\x06':
+
+        if verbose:
+            print("IDL Save file is compressed")
+
+        if uncompressed_file_name:
+            fout = open(uncompressed_file_name, 'w+b')
+        else:
+            fout = tempfile.NamedTemporaryFile(suffix='.sav')
+
+        if verbose:
+            print(" -> expanding to %s" % fout.name)
+
+        # Write header
+        fout.write(b'SR\x00\x04')
+
+        # Cycle through records
+        while True:
+
+            # Read record type
+            rectype = _read_long(f)
+            fout.write(struct.pack('>l', int(rectype)))
+
+            # Read position of next record and return as int
+            nextrec = _read_uint32(f)
+            nextrec += _read_uint32(f) * 2**32
+
+            # Read the unknown 4 bytes
+            unknown = f.read(4)
+
+            # Check if the end of the file has been reached
+            if RECTYPE_DICT[rectype] == 'END_MARKER':
+                fout.write(struct.pack('>I', int(nextrec) % 2**32))
+                fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))
+                fout.write(unknown)
+                break
+
+            # Find current position
+            pos = f.tell()
+
+            # Decompress record
+            rec_string = zlib.decompress(f.read(nextrec-pos))
+
+            # Find new position of next record
+            nextrec = fout.tell() + len(rec_string) + 12
+
+            # Write out record
+            fout.write(struct.pack('>I', int(nextrec % 2**32)))
+            fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))
+            fout.write(unknown)
+            fout.write(rec_string)
+
+        # Close the original compressed file
+        f.close()
+
+        # Set f to be the decompressed file, and skip the first four bytes
+        f = fout
+        f.seek(4)
+
+    else:
+        raise Exception("Invalid RECFMT: %s" % recfmt)
+
+    # Loop through records, and add them to the list
+    while True:
+        r = _read_record(f)
+        records.append(r)
+        if 'end' in r:
+            if r['end']:
+                break
+
+    # Close the file
+    f.close()
+
+    # Find heap data variables
+    heap = {}
+    for r in records:
+        if r['rectype'] == "HEAP_DATA":
+            heap[r['heap_index']] = r['data']
+
+    # Find all variables
+    for r in records:
+        if r['rectype'] == "VARIABLE":
+            replace, new = _replace_heap(r['data'], heap)
+            if replace:
+                r['data'] = new
+            variables[r['varname'].lower()] = r['data']
+
+    if verbose:
+
+        # Print out timestamp info about the file
+        for record in records:
+            if record['rectype'] == "TIMESTAMP":
+                print("-"*50)
+                print("Date: %s" % record['date'])
+                print("User: %s" % record['user'])
+                print("Host: %s" % record['host'])
+                break
+
+        # Print out version info about the file
+        for record in records:
+            if record['rectype'] == "VERSION":
+                print("-"*50)
+                print("Format: %s" % record['format'])
+                print("Architecture: %s" % record['arch'])
+                print("Operating System: %s" % record['os'])
+                print("IDL Version: %s" % record['release'])
+                break
+
+        # Print out identification info about the file
+        for record in records:
+            if record['rectype'] == "IDENTIFICATON":
+                print("-"*50)
+                print("Author: %s" % record['author'])
+                print("Title: %s" % record['title'])
+                print("ID Code: %s" % record['idcode'])
+                break
+
+        # Print out descriptions saved with the file
+        for record in records:
+            if record['rectype'] == "DESCRIPTION":
+                print("-"*50)
+                print("Description: %s" % record['description'])
+                break
+
+        print("-"*50)
+        print("Successfully read %i records of which:" %
+                                            (len(records)))
+
+        # Create convenience list of record types
+        rectypes = [r['rectype'] for r in records]
+
+        for rt in set(rectypes):
+            if rt != 'END_MARKER':
+                print(" - %i are of type %s" % (rectypes.count(rt), rt))
+        print("-"*50)
+
+        if 'VARIABLE' in rectypes:
+            print("Available variables:")
+            for var in variables:
+                print(" - %s [%s]" % (var, type(variables[var])))
+            print("-"*50)
+
+    if idict:
+        for var in variables:
+            idict[var] = variables[var]
+        return idict
+    else:
+        return variables
@@ -0,0 +1,20 @@
+"""
+Utilities for dealing with MATLAB(R) files
+
+Notes
+-----
+MATLAB(R) is a registered trademark of The MathWorks, Inc., 3 Apple Hill
+Drive, Natick, MA 01760-2098, USA.
+
+"""
+from __future__ import division, print_function, absolute_import
+
+# Matlab file read and write utilities
+from .mio import loadmat, savemat, whosmat
+from . import byteordercodes
+
+__all__ = ['loadmat', 'savemat', 'whosmat', 'byteordercodes']
+
+from scipy._lib._testutils import PytestTester
+test = PytestTester(__name__)
+del PytestTester
@@ -0,0 +1,70 @@
+''' Byteorder utilities for system - numpy byteorder encoding
+
+Converts a variety of string codes for little endian, big endian,
+native byte order and swapped byte order to explicit numpy endian
+codes - one of '<' (little endian) or '>' (big endian)
+
+'''
+from __future__ import division, print_function, absolute_import
+
+import sys
+
+sys_is_le = sys.byteorder == 'little'
+native_code = sys_is_le and '<' or '>'
+swapped_code = sys_is_le and '>' or '<'
+
+aliases = {'little': ('little', '<', 'l', 'le'),
+           'big': ('big', '>', 'b', 'be'),
+           'native': ('native', '='),
+           'swapped': ('swapped', 'S')}
+
+
+def to_numpy_code(code):
+    """
+    Convert various order codings to numpy format.
+
+    Parameters
+    ----------
+    code : str
+        The code to convert. It is converted to lower case before parsing.
+        Legal values are:
+        'little', 'big', 'l', 'b', 'le', 'be', '<', '>', 'native', '=',
+        'swapped', 's'.
+
+    Returns
+    -------
+    out_code : {'<', '>'}
+        Here '<' is the numpy dtype code for little endian,
+        and '>' is the code for big endian.
+
+    Examples
+    --------
+    >>> import sys
+    >>> sys_is_le == (sys.byteorder == 'little')
+    True
+    >>> to_numpy_code('big')
+    '>'
+    >>> to_numpy_code('little')
+    '<'
+    >>> nc = to_numpy_code('native')
+    >>> nc == '<' if sys_is_le else nc == '>'
+    True
+    >>> sc = to_numpy_code('swapped')
+    >>> sc == '>' if sys_is_le else sc == '<'
+    True
+
+    """
+    code = code.lower()
+    if code is None:
+        return native_code
+    if code in aliases['little']:
+        return '<'
+    elif code in aliases['big']:
+        return '>'
+    elif code in aliases['native']:
+        return native_code
+    elif code in aliases['swapped']:
+        return swapped_code
+    else:
+        raise ValueError(
+            'We cannot handle byte order %s' % code)
@@ -0,0 +1,326 @@
+"""
+Module for reading and writing matlab (TM) .mat files
+"""
+# Authors: Travis Oliphant, Matthew Brett
+
+from __future__ import division, print_function, absolute_import
+
+from scipy._lib.six import string_types
+
+from .miobase import get_matfile_version, docfiller
+from .mio4 import MatFile4Reader, MatFile4Writer
+from .mio5 import MatFile5Reader, MatFile5Writer
+
+__all__ = ['mat_reader_factory', 'loadmat', 'savemat', 'whosmat']
+
+
+def _open_file(file_like, appendmat):
+    """
+    Open `file_like` and return as file-like object. First, check if object is
+    already file-like; if so, return it as-is. Otherwise, try to pass it
+    to open(). If that fails, and `file_like` is a string, and `appendmat` is true,
+    append '.mat' and try again.
+    """
+    try:
+        file_like.read(0)
+        return file_like, False
+    except AttributeError:
+        pass
+
+    try:
+        return open(file_like, 'rb'), True
+    except IOError:
+        # Probably "not found"
+        if isinstance(file_like, string_types):
+            if appendmat and not file_like.endswith('.mat'):
+                file_like += '.mat'
+            return open(file_like, 'rb'), True
+        else:
+            raise IOError('Reader needs file name or open file-like object')
+
+@docfiller
+def mat_reader_factory(file_name, appendmat=True, **kwargs):
+    """
+    Create reader for matlab .mat format files.
+
+    Parameters
+    ----------
+    %(file_arg)s
+    %(append_arg)s
+    %(load_args)s
+    %(struct_arg)s
+
+    Returns
+    -------
+    matreader : MatFileReader object
+       Initialized instance of MatFileReader class matching the mat file
+       type detected in `filename`.
+    file_opened : bool
+       Whether the file was opened by this routine.
+
+    """
+    byte_stream, file_opened = _open_file(file_name, appendmat)
+    mjv, mnv = get_matfile_version(byte_stream)
+    if mjv == 0:
+        return MatFile4Reader(byte_stream, **kwargs), file_opened
+    elif mjv == 1:
+        return MatFile5Reader(byte_stream, **kwargs), file_opened
+    elif mjv == 2:
+        raise NotImplementedError('Please use HDF reader for matlab v7.3 files')
+    else:
+        raise TypeError('Did not recognize version %s' % mjv)
+
+
+@docfiller
+def loadmat(file_name, mdict=None, appendmat=True, **kwargs):
+    """
+    Load MATLAB file.
+
+    Parameters
+    ----------
+    file_name : str
+       Name of the mat file (do not need .mat extension if
+       appendmat==True). Can also pass open file-like object.
+    mdict : dict, optional
+        Dictionary in which to insert matfile variables.
+    appendmat : bool, optional
+       True to append the .mat extension to the end of the given
+       filename, if not already present.
+    byte_order : str or None, optional
+       None by default, implying byte order guessed from mat
+       file. Otherwise can be one of ('native', '=', 'little', '<',
+       'BIG', '>').
+    mat_dtype : bool, optional
+       If True, return arrays in same dtype as would be loaded into
+       MATLAB (instead of the dtype with which they are saved).
+    squeeze_me : bool, optional
+       Whether to squeeze unit matrix dimensions or not.
+    chars_as_strings : bool, optional
+       Whether to convert char arrays to string arrays.
+    matlab_compatible : bool, optional
+       Returns matrices as would be loaded by MATLAB (implies
+       squeeze_me=False, chars_as_strings=False, mat_dtype=True,
+       struct_as_record=True).
+    struct_as_record : bool, optional
+       Whether to load MATLAB structs as numpy record arrays, or as
+       old-style numpy arrays with dtype=object.  Setting this flag to
+       False replicates the behavior of scipy version 0.7.x (returning
+       numpy object arrays).  The default setting is True, because it
+       allows easier round-trip load and save of MATLAB files.
+    verify_compressed_data_integrity : bool, optional
+        Whether the length of compressed sequences in the MATLAB file
+        should be checked, to ensure that they are not longer than we expect.
+        It is advisable to enable this (the default) because overlong
+        compressed sequences in MATLAB files generally indicate that the
+        files have experienced some sort of corruption.
+    variable_names : None or sequence
+        If None (the default) - read all variables in file. Otherwise
+        `variable_names` should be a sequence of strings, giving names of the
+        MATLAB variables to read from the file.  The reader will skip any
+        variable with a name not in this sequence, possibly saving some read
+        processing.
+
+    Returns
+    -------
+    mat_dict : dict
+       dictionary with variable names as keys, and loaded matrices as
+       values.
+
+    Notes
+    -----
+    v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
+
+    You will need an HDF5 python library to read MATLAB 7.3 format mat
+    files.  Because scipy does not supply one, we do not implement the
+    HDF5 / 7.3 interface here.
+
+    Examples
+    --------
+    >>> from os.path import dirname, join as pjoin
+    >>> import scipy.io as sio
+
+    Get the filename for an example .mat file from the tests/data directory.
+
+    >>> data_dir = pjoin(dirname(sio.__file__), 'matlab', 'tests', 'data')
+    >>> mat_fname = pjoin(data_dir, 'testdouble_7.4_GLNX86.mat')
+
+    Load the .mat file contents.
+
+    >>> mat_contents = sio.loadmat(mat_fname)
+
+    The result is a dictionary, one key/value pair for each variable:
+
+    >>> sorted(mat_contents.keys())
+    ['__globals__', '__header__', '__version__', 'testdouble']
+    >>> mat_contents['testdouble']
+    array([[0.        , 0.78539816, 1.57079633, 2.35619449, 3.14159265,
+            3.92699082, 4.71238898, 5.49778714, 6.28318531]])
+
+    By default SciPy reads MATLAB structs as structured NumPy arrays where the
+    dtype fields are of type `object` and the names correspond to the MATLAB
+    struct field names. This can be disabled by setting the optional argument
+    `struct_as_record=False`.
+
+    Get the filename for an example .mat file that contains a MATLAB struct
+    called `teststruct` and load the contents.
+
+    >>> matstruct_fname = pjoin(data_dir, 'teststruct_7.4_GLNX86.mat')
+    >>> matstruct_contents = sio.loadmat(matstruct_fname)
+    >>> teststruct = matstruct_contents['teststruct']
+    >>> teststruct.dtype
+    dtype([('stringfield', 'O'), ('doublefield', 'O'), ('complexfield', 'O')])
+
+    The size of the structured array is the size of the MATLAB struct, not the
+    number of elements in any particular field. The shape defaults to 2-D
+    unless the optional argument `squeeze_me=True`, in which case all length 1
+    dimensions are removed.
+
+    >>> teststruct.size
+    1
+    >>> teststruct.shape
+    (1, 1)
+
+    Get the 'stringfield' of the first element in the MATLAB struct.
+
+    >>> teststruct[0, 0]['stringfield']
+    array(['Rats live on no evil star.'],
+      dtype='<U26')
+
+    Get the first element of the 'doublefield'.
+
+    >>> teststruct['doublefield'][0, 0]
+    array([[ 1.41421356,  2.71828183,  3.14159265]])
+
+    Load the MATLAB struct, squeezing out length 1 dimensions, and get the item
+    from the 'complexfield'.
+
+    >>> matstruct_squeezed = sio.loadmat(matstruct_fname, squeeze_me=True)
+    >>> matstruct_squeezed['teststruct'].shape
+    ()
+    >>> matstruct_squeezed['teststruct']['complexfield'].shape
+    ()
+    >>> matstruct_squeezed['teststruct']['complexfield'].item()
+    array([ 1.41421356+1.41421356j,  2.71828183+2.71828183j,
+        3.14159265+3.14159265j])
+    """
+    variable_names = kwargs.pop('variable_names', None)
+    MR, file_opened = mat_reader_factory(file_name, appendmat, **kwargs)
+    matfile_dict = MR.get_variables(variable_names)
+    if mdict is not None:
+        mdict.update(matfile_dict)
+    else:
+        mdict = matfile_dict
+    if file_opened:
+        MR.mat_stream.close()
+    return mdict
+
+
+@docfiller
+def savemat(file_name, mdict,
+            appendmat=True,
+            format='5',
+            long_field_names=False,
+            do_compression=False,
+            oned_as='row'):
+    """
+    Save a dictionary of names and arrays into a MATLAB-style .mat file.
+
+    This saves the array objects in the given dictionary to a MATLAB-
+    style .mat file.
+
+    Parameters
+    ----------
+    file_name : str or file-like object
+        Name of the .mat file (.mat extension not needed if ``appendmat ==
+        True``).
+        Can also pass open file_like object.
+    mdict : dict
+        Dictionary from which to save matfile variables.
+    appendmat : bool, optional
+        True (the default) to append the .mat extension to the end of the
+        given filename, if not already present.
+    format : {'5', '4'}, string, optional
+        '5' (the default) for MATLAB 5 and up (to 7.2),
+        '4' for MATLAB 4 .mat files.
+    long_field_names : bool, optional
+        False (the default) - maximum field name length in a structure is
+        31 characters which is the documented maximum length.
+        True - maximum field name length in a structure is 63 characters
+        which works for MATLAB 7.6+.
+    do_compression : bool, optional
+        Whether or not to compress matrices on write.  Default is False.
+    oned_as : {'row', 'column'}, optional
+        If 'column', write 1-D numpy arrays as column vectors.
+        If 'row', write 1-D numpy arrays as row vectors.
+
+    See also
+    --------
+    mio4.MatFile4Writer
+    mio5.MatFile5Writer
+    """
+    file_opened = False
+    if hasattr(file_name, 'write'):
+        # File-like object already; use as-is
+        file_stream = file_name
+    else:
+        if isinstance(file_name, string_types):
+            if appendmat and not file_name.endswith('.mat'):
+                file_name = file_name + ".mat"
+
+        file_stream = open(file_name, 'wb')
+        file_opened = True
+
+    if format == '4':
+        if long_field_names:
+            raise ValueError("Long field names are not available for version 4 files")
+        MW = MatFile4Writer(file_stream, oned_as)
+    elif format == '5':
+        MW = MatFile5Writer(file_stream,
+                            do_compression=do_compression,
+                            unicode_strings=True,
+                            long_field_names=long_field_names,
+                            oned_as=oned_as)
+    else:
+        raise ValueError("Format should be '4' or '5'")
+    MW.put_variables(mdict)
+    if file_opened:
+        file_stream.close()
+
+
+@docfiller
+def whosmat(file_name, appendmat=True, **kwargs):
+    """
+    List variables inside a MATLAB file.
+
+    Parameters
+    ----------
+    %(file_arg)s
+    %(append_arg)s
+    %(load_args)s
+    %(struct_arg)s
+
+    Returns
+    -------
+    variables : list of tuples
+        A list of tuples, where each tuple holds the matrix name (a string),
+        its shape (tuple of ints), and its data class (a string).
+        Possible data classes are: int8, uint8, int16, uint16, int32, uint32,
+        int64, uint64, single, double, cell, struct, object, char, sparse,
+        function, opaque, logical, unknown.
+
+    Notes
+    -----
+    v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
+
+    You will need an HDF5 python library to read matlab 7.3 format mat
+    files.  Because scipy does not supply one, we do not implement the
+    HDF5 / 7.3 interface here.
+
+    .. versionadded:: 0.12.0
+
+    """
+    ML, file_opened = mat_reader_factory(file_name, **kwargs)
+    variables = ML.list_variables()
+    if file_opened:
+        ML.mat_stream.close()
+    return variables
@@ -0,0 +1,618 @@
+''' Classes for read / write of matlab (TM) 4 files
+'''
+from __future__ import division, print_function, absolute_import
+
+import sys
+import warnings
+
+import numpy as np
+from numpy.compat import asbytes, asstr
+
+import scipy.sparse
+
+from scipy._lib.six import string_types
+
+from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
+                      convert_dtypes, arr_to_chars, arr_dtype_number)
+
+from .mio_utils import squeeze_element, chars_to_strings
+from functools import reduce
+
+
+SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
+
+miDOUBLE = 0
+miSINGLE = 1
+miINT32 = 2
+miINT16 = 3
+miUINT16 = 4
+miUINT8 = 5
+
+mdtypes_template = {
+    miDOUBLE: 'f8',
+    miSINGLE: 'f4',
+    miINT32: 'i4',
+    miINT16: 'i2',
+    miUINT16: 'u2',
+    miUINT8: 'u1',
+    'header': [('mopt', 'i4'),
+               ('mrows', 'i4'),
+               ('ncols', 'i4'),
+               ('imagf', 'i4'),
+               ('namlen', 'i4')],
+    'U1': 'U1',
+    }
+
+np_to_mtypes = {
+    'f8': miDOUBLE,
+    'c32': miDOUBLE,
+    'c24': miDOUBLE,
+    'c16': miDOUBLE,
+    'f4': miSINGLE,
+    'c8': miSINGLE,
+    'i4': miINT32,
+    'i2': miINT16,
+    'u2': miUINT16,
+    'u1': miUINT8,
+    'S1': miUINT8,
+    }
+
+# matrix classes
+mxFULL_CLASS = 0
+mxCHAR_CLASS = 1
+mxSPARSE_CLASS = 2
+
+order_codes = {
+    0: '<',
+    1: '>',
+    2: 'VAX D-float',  # !
+    3: 'VAX G-float',
+    4: 'Cray',  # !!
+    }
+
+mclass_info = {
+    mxFULL_CLASS: 'double',
+    mxCHAR_CLASS: 'char',
+    mxSPARSE_CLASS: 'sparse',
+    }
+
+
+class VarHeader4(object):
+    # Mat4 variables never logical or global
+    is_logical = False
+    is_global = False
+
+    def __init__(self,
+                 name,
+                 dtype,
+                 mclass,
+                 dims,
+                 is_complex):
+        self.name = name
+        self.dtype = dtype
+        self.mclass = mclass
+        self.dims = dims
+        self.is_complex = is_complex
+
+
+class VarReader4(object):
+    ''' Class to read matlab 4 variables '''
+
+    def __init__(self, file_reader):
+        self.file_reader = file_reader
+        self.mat_stream = file_reader.mat_stream
+        self.dtypes = file_reader.dtypes
+        self.chars_as_strings = file_reader.chars_as_strings
+        self.squeeze_me = file_reader.squeeze_me
+
+    def read_header(self):
+        ''' Read and return header for variable '''
+        data = read_dtype(self.mat_stream, self.dtypes['header'])
+        name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00')
+        if data['mopt'] < 0 or data['mopt'] > 5000:
+            raise ValueError('Mat 4 mopt wrong format, byteswapping problem?')
+        M, rest = divmod(data['mopt'], 1000)  # order code
+        if M not in (0, 1):
+            warnings.warn("We do not support byte ordering '%s'; returned "
+                          "data may be corrupt" % order_codes[M],
+                          UserWarning)
+        O, rest = divmod(rest, 100)  # unused, should be 0
+        if O != 0:
+            raise ValueError('O in MOPT integer should be 0, wrong format?')
+        P, rest = divmod(rest, 10)  # data type code e.g miDOUBLE (see above)
+        T = rest  # matrix type code e.g. mxFULL_CLASS (see above)
+        dims = (data['mrows'], data['ncols'])
+        is_complex = data['imagf'] == 1
+        dtype = self.dtypes[P]
+        return VarHeader4(
+            name,
+            dtype,
+            T,
+            dims,
+            is_complex)
+
+    def array_from_header(self, hdr, process=True):
+        mclass = hdr.mclass
+        if mclass == mxFULL_CLASS:
+            arr = self.read_full_array(hdr)
+        elif mclass == mxCHAR_CLASS:
+            arr = self.read_char_array(hdr)
+            if process and self.chars_as_strings:
+                arr = chars_to_strings(arr)
+        elif mclass == mxSPARSE_CLASS:
+            # no current processing (below) makes sense for sparse
+            return self.read_sparse_array(hdr)
+        else:
+            raise TypeError('No reader for class code %s' % mclass)
+        if process and self.squeeze_me:
+            return squeeze_element(arr)
+        return arr
+
+    def read_sub_array(self, hdr, copy=True):
+        ''' Mat4 read using header `hdr` dtype and dims
+
+        Parameters
+        ----------
+        hdr : object
+           object with attributes ``dtype``, ``dims``.  dtype is assumed to be
+           the correct endianness
+        copy : bool, optional
+           copies array before return if True (default True)
+           (buffer is usually read only)
+
+        Returns
+        -------
+        arr : ndarray
+            of dtype givem by `hdr` ``dtype`` and shape givem by `hdr` ``dims``
+        '''
+        dt = hdr.dtype
+        dims = hdr.dims
+        num_bytes = dt.itemsize
+        for d in dims:
+            num_bytes *= d
+        buffer = self.mat_stream.read(int(num_bytes))
+        if len(buffer) != num_bytes:
+            raise ValueError("Not enough bytes to read matrix '%s'; is this "
+                             "a badly-formed file? Consider listing matrices "
+                             "with `whosmat` and loading named matrices with "
+                             "`variable_names` kwarg to `loadmat`" % hdr.name)
+        arr = np.ndarray(shape=dims,
+                         dtype=dt,
+                         buffer=buffer,
+                         order='F')
+        if copy:
+            arr = arr.copy()
+        return arr
+
+    def read_full_array(self, hdr):
+        ''' Full (rather than sparse) matrix getter
+
+        Read matrix (array) can be real or complex
+
+        Parameters
+        ----------
+        hdr : ``VarHeader4`` instance
+
+        Returns
+        -------
+        arr : ndarray
+            complex array if ``hdr.is_complex`` is True, otherwise a real
+            numeric array
+        '''
+        if hdr.is_complex:
+            # avoid array copy to save memory
+            res = self.read_sub_array(hdr, copy=False)
+            res_j = self.read_sub_array(hdr, copy=False)
+            return res + (res_j * 1j)
+        return self.read_sub_array(hdr)
+
+    def read_char_array(self, hdr):
+        ''' latin-1 text matrix (char matrix) reader
+
+        Parameters
+        ----------
+        hdr : ``VarHeader4`` instance
+
+        Returns
+        -------
+        arr : ndarray
+            with dtype 'U1', shape given by `hdr` ``dims``
+        '''
+        arr = self.read_sub_array(hdr).astype(np.uint8)
+        S = arr.tostring().decode('latin-1')
+        return np.ndarray(shape=hdr.dims,
+                          dtype=np.dtype('U1'),
+                          buffer=np.array(S)).copy()
+
+    def read_sparse_array(self, hdr):
+        ''' Read and return sparse matrix type
+
+        Parameters
+        ----------
+        hdr : ``VarHeader4`` instance
+
+        Returns
+        -------
+        arr : ``scipy.sparse.coo_matrix``
+            with dtype ``float`` and shape read from the sparse matrix data
+
+        Notes
+        -----
+        MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where
+        N is the number of non-zero values.  Column 1 values [0:N] are the
+        (1-based) row indices of the each non-zero value, column 2 [0:N] are the
+        column indices, column 3 [0:N] are the (real) values.  The last values
+        [-1,0:2] of the rows, column indices are shape[0] and shape[1]
+        respectively of the output matrix. The last value for the values column
+        is a padding 0. mrows and ncols values from the header give the shape of
+        the stored matrix, here [N+1, 3].  Complex data is saved as a 4 column
+        matrix, where the fourth column contains the imaginary component; the
+        last value is again 0.  Complex sparse data do *not* have the header
+        ``imagf`` field set to True; the fact that the data are complex is only
+        detectable because there are 4 storage columns
+        '''
+        res = self.read_sub_array(hdr)
+        tmp = res[:-1,:]
+        # All numbers are float64 in Matlab, but Scipy sparse expects int shape
+        dims = (int(res[-1,0]), int(res[-1,1]))
+        I = np.ascontiguousarray(tmp[:,0],dtype='intc')  # fixes byte order also
+        J = np.ascontiguousarray(tmp[:,1],dtype='intc')
+        I -= 1  # for 1-based indexing
+        J -= 1
+        if res.shape[1] == 3:
+            V = np.ascontiguousarray(tmp[:,2],dtype='float')
+        else:
+            V = np.ascontiguousarray(tmp[:,2],dtype='complex')
+            V.imag = tmp[:,3]
+        return scipy.sparse.coo_matrix((V,(I,J)), dims)
+
+    def shape_from_header(self, hdr):
+        '''Read the shape of the array described by the header.
+        The file position after this call is unspecified.
+        '''
+        mclass = hdr.mclass
+        if mclass == mxFULL_CLASS:
+            shape = tuple(map(int, hdr.dims))
+        elif mclass == mxCHAR_CLASS:
+            shape = tuple(map(int, hdr.dims))
+            if self.chars_as_strings:
+                shape = shape[:-1]
+        elif mclass == mxSPARSE_CLASS:
+            dt = hdr.dtype
+            dims = hdr.dims
+
+            if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1):
+                return ()
+
+            # Read only the row and column counts
+            self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
+            rows = np.ndarray(shape=(1,), dtype=dt,
+                              buffer=self.mat_stream.read(dt.itemsize))
+            self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
+            cols = np.ndarray(shape=(1,), dtype=dt,
+                              buffer=self.mat_stream.read(dt.itemsize))
+
+            shape = (int(rows), int(cols))
+        else:
+            raise TypeError('No reader for class code %s' % mclass)
+
+        if self.squeeze_me:
+            shape = tuple([x for x in shape if x != 1])
+        return shape
+
+
+class MatFile4Reader(MatFileReader):
+    ''' Reader for Mat4 files '''
+    @docfiller
+    def __init__(self, mat_stream, *args, **kwargs):
+        ''' Initialize matlab 4 file reader
+
+    %(matstream_arg)s
+    %(load_args)s
+        '''
+        super(MatFile4Reader, self).__init__(mat_stream, *args, **kwargs)
+        self._matrix_reader = None
+
+    def guess_byte_order(self):
+        self.mat_stream.seek(0)
+        mopt = read_dtype(self.mat_stream, np.dtype('i4'))
+        self.mat_stream.seek(0)
+        if mopt == 0:
+            return '<'
+        if mopt < 0 or mopt > 5000:
+            # Number must have been byteswapped
+            return SYS_LITTLE_ENDIAN and '>' or '<'
+        # Not byteswapped
+        return SYS_LITTLE_ENDIAN and '<' or '>'
+
+    def initialize_read(self):
+        ''' Run when beginning read of variables
+
+        Sets up readers from parameters in `self`
+        '''
+        self.dtypes = convert_dtypes(mdtypes_template, self.byte_order)
+        self._matrix_reader = VarReader4(self)
+
+    def read_var_header(self):
+        ''' Read and return header, next position
+
+        Parameters
+        ----------
+        None
+
+        Returns
+        -------
+        header : object
+           object that can be passed to self.read_var_array, and that
+           has attributes ``name`` and ``is_global``
+        next_position : int
+           position in stream of next variable
+        '''
+        hdr = self._matrix_reader.read_header()
+        n = reduce(lambda x, y: x*y, hdr.dims, 1)  # fast product
+        remaining_bytes = hdr.dtype.itemsize * n
+        if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS:
+            remaining_bytes *= 2
+        next_position = self.mat_stream.tell() + remaining_bytes
+        return hdr, next_position
+
+    def read_var_array(self, header, process=True):
+        ''' Read array, given `header`
+
+        Parameters
+        ----------
+        header : header object
+           object with fields defining variable header
+        process : {True, False}, optional
+           If True, apply recursive post-processing during loading of array.
+
+        Returns
+        -------
+        arr : array
+           array with post-processing applied or not according to
+           `process`.
+        '''
+        return self._matrix_reader.array_from_header(header, process)
+
+    def get_variables(self, variable_names=None):
+        ''' get variables from stream as dictionary
+
+        Parameters
+        ----------
+        variable_names : None or str or sequence of str, optional
+            variable name, or sequence of variable names to get from Mat file /
+            file stream.  If None, then get all variables in file
+        '''
+        if isinstance(variable_names, string_types):
+            variable_names = [variable_names]
+        elif variable_names is not None:
+            variable_names = list(variable_names)
+        self.mat_stream.seek(0)
+        # set up variable reader
+        self.initialize_read()
+        mdict = {}
+        while not self.end_of_stream():
+            hdr, next_position = self.read_var_header()
+            name = asstr(hdr.name)
+            if variable_names is not None and name not in variable_names:
+                self.mat_stream.seek(next_position)
+                continue
+            mdict[name] = self.read_var_array(hdr)
+            self.mat_stream.seek(next_position)
+            if variable_names is not None:
+                variable_names.remove(name)
+                if len(variable_names) == 0:
+                    break
+        return mdict
+
+    def list_variables(self):
+        ''' list variables from stream '''
+        self.mat_stream.seek(0)
+        # set up variable reader
+        self.initialize_read()
+        vars = []
+        while not self.end_of_stream():
+            hdr, next_position = self.read_var_header()
+            name = asstr(hdr.name)
+            shape = self._matrix_reader.shape_from_header(hdr)
+            info = mclass_info.get(hdr.mclass, 'unknown')
+            vars.append((name, shape, info))
+
+            self.mat_stream.seek(next_position)
+        return vars
+
+
+def arr_to_2d(arr, oned_as='row'):
+    ''' Make ``arr`` exactly two dimensional
+
+    If `arr` has more than 2 dimensions, raise a ValueError
+
+    Parameters
+    ----------
+    arr : array
+    oned_as : {'row', 'column'}, optional
+       Whether to reshape 1D vectors as row vectors or column vectors.
+       See documentation for ``matdims`` for more detail
+
+    Returns
+    -------
+    arr2d : array
+       2D version of the array
+    '''
+    dims = matdims(arr, oned_as)
+    if len(dims) > 2:
+        raise ValueError('Matlab 4 files cannot save arrays with more than '
+                         '2 dimensions')
+    return arr.reshape(dims)
+
+
+class VarWriter4(object):
+    def __init__(self, file_writer):
+        self.file_stream = file_writer.file_stream
+        self.oned_as = file_writer.oned_as
+
+    def write_bytes(self, arr):
+        self.file_stream.write(arr.tostring(order='F'))
+
+    def write_string(self, s):
+        self.file_stream.write(s)
+
+    def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0):
+        ''' Write header for given data options
+
+        Parameters
+        ----------
+        name : str
+            name of variable
+        shape : sequence
+           Shape of array as it will be read in matlab
+        P : int, optional
+            code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32,
+            miINT16, miUINT16, miUINT8``
+        T : int, optional
+            code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS,
+            mxSPARSE_CLASS``
+        imagf : int, optional
+            flag indicating complex
+        '''
+        header = np.empty((), mdtypes_template['header'])
+        M = not SYS_LITTLE_ENDIAN
+        O = 0
+        header['mopt'] = (M * 1000 +
+                          O * 100 +
+                          P * 10 +
+                          T)
+        header['mrows'] = shape[0]
+        header['ncols'] = shape[1]
+        header['imagf'] = imagf
+        header['namlen'] = len(name) + 1
+        self.write_bytes(header)
+        self.write_string(asbytes(name + '\0'))
+
+    def write(self, arr, name):
+        ''' Write matrix `arr`, with name `name`
+
+        Parameters
+        ----------
+        arr : array_like
+           array to write
+        name : str
+           name in matlab workspace
+        '''
+        # we need to catch sparse first, because np.asarray returns an
+        # an object array for scipy.sparse
+        if scipy.sparse.issparse(arr):
+            self.write_sparse(arr, name)
+            return
+        arr = np.asarray(arr)
+        dt = arr.dtype
+        if not dt.isnative:
+            arr = arr.astype(dt.newbyteorder('='))
+        dtt = dt.type
+        if dtt is np.object_:
+            raise TypeError('Cannot save object arrays in Mat4')
+        elif dtt is np.void:
+            raise TypeError('Cannot save void type arrays')
+        elif dtt in (np.unicode_, np.string_):
+            self.write_char(arr, name)
+            return
+        self.write_numeric(arr, name)
+
+    def write_numeric(self, arr, name):
+        arr = arr_to_2d(arr, self.oned_as)
+        imagf = arr.dtype.kind == 'c'
+        try:
+            P = np_to_mtypes[arr.dtype.str[1:]]
+        except KeyError:
+            if imagf:
+                arr = arr.astype('c128')
+            else:
+                arr = arr.astype('f8')
+            P = miDOUBLE
+        self.write_header(name,
+                          arr.shape,
+                          P=P,
+                          T=mxFULL_CLASS,
+                          imagf=imagf)
+        if imagf:
+            self.write_bytes(arr.real)
+            self.write_bytes(arr.imag)
+        else:
+            self.write_bytes(arr)
+
+    def write_char(self, arr, name):
+        arr = arr_to_chars(arr)
+        arr = arr_to_2d(arr, self.oned_as)
+        dims = arr.shape
+        self.write_header(
+            name,
+            dims,
+            P=miUINT8,
+            T=mxCHAR_CLASS)
+        if arr.dtype.kind == 'U':
+            # Recode unicode to latin1
+            n_chars = np.product(dims)
+            st_arr = np.ndarray(shape=(),
+                                dtype=arr_dtype_number(arr, n_chars),
+                                buffer=arr)
+            st = st_arr.item().encode('latin-1')
+            arr = np.ndarray(shape=dims, dtype='S1', buffer=st)
+        self.write_bytes(arr)
+
+    def write_sparse(self, arr, name):
+        ''' Sparse matrices are 2D
+
+        See docstring for VarReader4.read_sparse_array
+        '''
+        A = arr.tocoo()  # convert to sparse COO format (ijv)
+        imagf = A.dtype.kind == 'c'
+        ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8')
+        ijv[:-1,0] = A.row
+        ijv[:-1,1] = A.col
+        ijv[:-1,0:2] += 1  # 1 based indexing
+        if imagf:
+            ijv[:-1,2] = A.data.real
+            ijv[:-1,3] = A.data.imag
+        else:
+            ijv[:-1,2] = A.data
+        ijv[-1,0:2] = A.shape
+        self.write_header(
+            name,
+            ijv.shape,
+            P=miDOUBLE,
+            T=mxSPARSE_CLASS)
+        self.write_bytes(ijv)
+
+
+class MatFile4Writer(object):
+    ''' Class for writing matlab 4 format files '''
+    def __init__(self, file_stream, oned_as=None):
+        self.file_stream = file_stream
+        if oned_as is None:
+            oned_as = 'row'
+        self.oned_as = oned_as
+        self._matrix_writer = None
+
+    def put_variables(self, mdict, write_header=None):
+        ''' Write variables in `mdict` to stream
+
+        Parameters
+        ----------
+        mdict : mapping
+           mapping with method ``items`` return name, contents pairs
+           where ``name`` which will appeak in the matlab workspace in
+           file load, and ``contents`` is something writeable to a
+           matlab file, such as a numpy array.
+        write_header : {None, True, False}
+           If True, then write the matlab file header before writing the
+           variables.  If None (the default) then write the file header
+           if we are at position 0 in the stream.  By setting False
+           here, and setting the stream position to the end of the file,
+           you can append variables to a matlab file
+        '''
+        # there is no header for a matlab 4 mat file, so we ignore the
+        # ``write_header`` input argument.  It's there for compatibility
+        # with the matlab 5 version of this method
+        self._matrix_writer = VarWriter4(self)
+        for name, var in mdict.items():
+            self._matrix_writer.write(var, name)
@@ -0,0 +1,849 @@
+''' Classes for read / write of matlab (TM) 5 files
+
+The matfile specification last found here:
+
+https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf
+
+(as of December 5 2008)
+'''
+from __future__ import division, print_function, absolute_import
+
+'''
+=================================
+ Note on functions and mat files
+=================================
+
+The document above does not give any hints as to the storage of matlab
+function handles, or anonymous function handles.  I had therefore to
+guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and
+``mxOPAQUE_CLASS`` by looking at example mat files.
+
+``mxFUNCTION_CLASS`` stores all types of matlab functions.  It seems to
+contain a struct matrix with a set pattern of fields.  For anonymous
+functions, a sub-fields of one of these fields seems to contain the
+well-named ``mxOPAQUE_CLASS``. This seems to contain:
+
+* array flags as for any matlab matrix
+* 3 int8 strings
+* a matrix
+
+It seems that, whenever the mat file contains a ``mxOPAQUE_CLASS``
+instance, there is also an un-named matrix (name == '') at the end of
+the mat file.  I'll call this the ``__function_workspace__`` matrix.
+
+When I saved two anonymous functions in a mat file, or appended another
+anonymous function to the mat file, there was still only one
+``__function_workspace__`` un-named matrix at the end, but larger than
+that for a mat file with a single anonymous function, suggesting that
+the workspaces for the two functions had been merged.
+
+The ``__function_workspace__`` matrix appears to be of double class
+(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in
+the format of a mini .mat file, without the first 124 bytes of the file
+header (the description and the subsystem_offset), but with the version
+U2 bytes, and the S2 endian test bytes.  There follow 4 zero bytes,
+presumably for 8 byte padding, and then a series of ``miMATRIX``
+entries, as in a standard mat file. The ``miMATRIX`` entries appear to
+be series of un-named (name == '') matrices, and may also contain arrays
+of this same mini-mat format.
+
+I guess that:
+
+* saving an anonymous function back to a mat file will need the
+  associated ``__function_workspace__`` matrix saved as well for the
+  anonymous function to work correctly.
+* appending to a mat file that has a ``__function_workspace__`` would
+  involve first pulling off this workspace, appending, checking whether
+  there were any more anonymous functions appended, and then somehow
+  merging the relevant workspaces, and saving at the end of the mat
+  file.
+
+The mat files I was playing with are in ``tests/data``:
+
+* sqr.mat
+* parabola.mat
+* some_functions.mat
+
+See ``tests/test_mio.py:test_mio_funcs.py`` for a debugging
+script I was working with.
+
+'''
+
+# Small fragments of current code adapted from matfile.py by Heiko
+# Henkelmann
+
+import os
+import time
+import sys
+import zlib
+
+from io import BytesIO
+
+import warnings
+
+import numpy as np
+from numpy.compat import asbytes, asstr
+
+import scipy.sparse
+
+from scipy._lib.six import string_types
+
+from .byteordercodes import native_code, swapped_code
+
+from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
+                      arr_to_chars, arr_dtype_number, MatWriteError,
+                      MatReadError, MatReadWarning)
+
+# Reader object for matlab 5 format variables
+from .mio5_utils import VarReader5
+
+# Constants and helper objects
+from .mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES,
+                          NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8,
+                          miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS,
+                          mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS,
+                          mxDOUBLE_CLASS, mclass_info)
+
+from .streams import ZlibInputStream
+
+
+class MatFile5Reader(MatFileReader):
+    ''' Reader for Mat 5 mat files
+    Adds the following attribute to base class
+
+    uint16_codec - char codec to use for uint16 char arrays
+        (defaults to system default codec)
+
+    Uses variable reader that has the following stardard interface (see
+    abstract class in ``miobase``::
+
+       __init__(self, file_reader)
+       read_header(self)
+       array_from_header(self)
+
+    and added interface::
+
+       set_stream(self, stream)
+       read_full_tag(self)
+
+    '''
+    @docfiller
+    def __init__(self,
+                 mat_stream,
+                 byte_order=None,
+                 mat_dtype=False,
+                 squeeze_me=False,
+                 chars_as_strings=True,
+                 matlab_compatible=False,
+                 struct_as_record=True,
+                 verify_compressed_data_integrity=True,
+                 uint16_codec=None
+                 ):
+        '''Initializer for matlab 5 file format reader
+
+    %(matstream_arg)s
+    %(load_args)s
+    %(struct_arg)s
+    uint16_codec : {None, string}
+        Set codec to use for uint16 char arrays (e.g. 'utf-8').
+        Use system default codec if None
+        '''
+        super(MatFile5Reader, self).__init__(
+            mat_stream,
+            byte_order,
+            mat_dtype,
+            squeeze_me,
+            chars_as_strings,
+            matlab_compatible,
+            struct_as_record,
+            verify_compressed_data_integrity
+            )
+        # Set uint16 codec
+        if not uint16_codec:
+            uint16_codec = sys.getdefaultencoding()
+        self.uint16_codec = uint16_codec
+        # placeholders for readers - see initialize_read method
+        self._file_reader = None
+        self._matrix_reader = None
+
+    def guess_byte_order(self):
+        ''' Guess byte order.
+        Sets stream pointer to 0 '''
+        self.mat_stream.seek(126)
+        mi = self.mat_stream.read(2)
+        self.mat_stream.seek(0)
+        return mi == b'IM' and '<' or '>'
+
+    def read_file_header(self):
+        ''' Read in mat 5 file header '''
+        hdict = {}
+        hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header']
+        hdr = read_dtype(self.mat_stream, hdr_dtype)
+        hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000')
+        v_major = hdr['version'] >> 8
+        v_minor = hdr['version'] & 0xFF
+        hdict['__version__'] = '%d.%d' % (v_major, v_minor)
+        return hdict
+
+    def initialize_read(self):
+        ''' Run when beginning read of variables
+
+        Sets up readers from parameters in `self`
+        '''
+        # reader for top level stream.  We need this extra top-level
+        # reader because we use the matrix_reader object to contain
+        # compressed matrices (so they have their own stream)
+        self._file_reader = VarReader5(self)
+        # reader for matrix streams
+        self._matrix_reader = VarReader5(self)
+
+    def read_var_header(self):
+        ''' Read header, return header, next position
+
+        Header has to define at least .name and .is_global
+
+        Parameters
+        ----------
+        None
+
+        Returns
+        -------
+        header : object
+           object that can be passed to self.read_var_array, and that
+           has attributes .name and .is_global
+        next_position : int
+           position in stream of next variable
+        '''
+        mdtype, byte_count = self._file_reader.read_full_tag()
+        if not byte_count > 0:
+            raise ValueError("Did not read any bytes")
+        next_pos = self.mat_stream.tell() + byte_count
+        if mdtype == miCOMPRESSED:
+            # Make new stream from compressed data
+            stream = ZlibInputStream(self.mat_stream, byte_count)
+            self._matrix_reader.set_stream(stream)
+            check_stream_limit = self.verify_compressed_data_integrity
+            mdtype, byte_count = self._matrix_reader.read_full_tag()
+        else:
+            check_stream_limit = False
+            self._matrix_reader.set_stream(self.mat_stream)
+        if not mdtype == miMATRIX:
+            raise TypeError('Expecting miMATRIX type here, got %d' % mdtype)
+        header = self._matrix_reader.read_header(check_stream_limit)
+        return header, next_pos
+
+    def read_var_array(self, header, process=True):
+        ''' Read array, given `header`
+
+        Parameters
+        ----------
+        header : header object
+           object with fields defining variable header
+        process : {True, False} bool, optional
+           If True, apply recursive post-processing during loading of
+           array.
+
+        Returns
+        -------
+        arr : array
+           array with post-processing applied or not according to
+           `process`.
+        '''
+        return self._matrix_reader.array_from_header(header, process)
+
+    def get_variables(self, variable_names=None):
+        ''' get variables from stream as dictionary
+
+        variable_names   - optional list of variable names to get
+
+        If variable_names is None, then get all variables in file
+        '''
+        if isinstance(variable_names, string_types):
+            variable_names = [variable_names]
+        elif variable_names is not None:
+            variable_names = list(variable_names)
+
+        self.mat_stream.seek(0)
+        # Here we pass all the parameters in self to the reading objects
+        self.initialize_read()
+        mdict = self.read_file_header()
+        mdict['__globals__'] = []
+        while not self.end_of_stream():
+            hdr, next_position = self.read_var_header()
+            name = asstr(hdr.name)
+            if name in mdict:
+                warnings.warn('Duplicate variable name "%s" in stream'
+                              ' - replacing previous with new\n'
+                              'Consider mio5.varmats_from_mat to split '
+                              'file into single variable files' % name,
+                              MatReadWarning, stacklevel=2)
+            if name == '':
+                # can only be a matlab 7 function workspace
+                name = '__function_workspace__'
+                # We want to keep this raw because mat_dtype processing
+                # will break the format (uint8 as mxDOUBLE_CLASS)
+                process = False
+            else:
+                process = True
+            if variable_names is not None and name not in variable_names:
+                self.mat_stream.seek(next_position)
+                continue
+            try:
+                res = self.read_var_array(hdr, process)
+            except MatReadError as err:
+                warnings.warn(
+                    'Unreadable variable "%s", because "%s"' %
+                    (name, err),
+                    Warning, stacklevel=2)
+                res = "Read error: %s" % err
+            self.mat_stream.seek(next_position)
+            mdict[name] = res
+            if hdr.is_global:
+                mdict['__globals__'].append(name)
+            if variable_names is not None:
+                variable_names.remove(name)
+                if len(variable_names) == 0:
+                    break
+        return mdict
+
+    def list_variables(self):
+        ''' list variables from stream '''
+        self.mat_stream.seek(0)
+        # Here we pass all the parameters in self to the reading objects
+        self.initialize_read()
+        self.read_file_header()
+        vars = []
+        while not self.end_of_stream():
+            hdr, next_position = self.read_var_header()
+            name = asstr(hdr.name)
+            if name == '':
+                # can only be a matlab 7 function workspace
+                name = '__function_workspace__'
+
+            shape = self._matrix_reader.shape_from_header(hdr)
+            if hdr.is_logical:
+                info = 'logical'
+            else:
+                info = mclass_info.get(hdr.mclass, 'unknown')
+            vars.append((name, shape, info))
+
+            self.mat_stream.seek(next_position)
+        return vars
+
+
+def varmats_from_mat(file_obj):
+    """ Pull variables out of mat 5 file as a sequence of mat file objects
+
+    This can be useful with a difficult mat file, containing unreadable
+    variables.  This routine pulls the variables out in raw form and puts them,
+    unread, back into a file stream for saving or reading.  Another use is the
+    pathological case where there is more than one variable of the same name in
+    the file; this routine returns the duplicates, whereas the standard reader
+    will overwrite duplicates in the returned dictionary.
+
+    The file pointer in `file_obj` will be undefined.  File pointers for the
+    returned file-like objects are set at 0.
+
+    Parameters
+    ----------
+    file_obj : file-like
+        file object containing mat file
+
+    Returns
+    -------
+    named_mats : list
+        list contains tuples of (name, BytesIO) where BytesIO is a file-like
+        object containing mat file contents as for a single variable.  The
+        BytesIO contains a string with the original header and a single var. If
+        ``var_file_obj`` is an individual BytesIO instance, then save as a mat
+        file with something like ``open('test.mat',
+        'wb').write(var_file_obj.read())``
+
+    Examples
+    --------
+    >>> import scipy.io
+
+    BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
+    python < 3.
+
+    >>> mat_fileobj = BytesIO()
+    >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
+    >>> varmats = varmats_from_mat(mat_fileobj)
+    >>> sorted([name for name, str_obj in varmats])
+    ['a', 'b']
+    """
+    rdr = MatFile5Reader(file_obj)
+    file_obj.seek(0)
+    # Raw read of top-level file header
+    hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
+    raw_hdr = file_obj.read(hdr_len)
+    # Initialize variable reading
+    file_obj.seek(0)
+    rdr.initialize_read()
+    mdict = rdr.read_file_header()
+    next_position = file_obj.tell()
+    named_mats = []
+    while not rdr.end_of_stream():
+        start_position = next_position
+        hdr, next_position = rdr.read_var_header()
+        name = asstr(hdr.name)
+        # Read raw variable string
+        file_obj.seek(start_position)
+        byte_count = next_position - start_position
+        var_str = file_obj.read(byte_count)
+        # write to stringio object
+        out_obj = BytesIO()
+        out_obj.write(raw_hdr)
+        out_obj.write(var_str)
+        out_obj.seek(0)
+        named_mats.append((name, out_obj))
+    return named_mats
+
+
+class EmptyStructMarker(object):
+    """ Class to indicate presence of empty matlab struct on output """
+
+
+def to_writeable(source):
+    ''' Convert input object ``source`` to something we can write
+
+    Parameters
+    ----------
+    source : object
+
+    Returns
+    -------
+    arr : None or ndarray or EmptyStructMarker
+        If `source` cannot be converted to something we can write to a matfile,
+        return None.  If `source` is equivalent to an empty dictionary, return
+        ``EmptyStructMarker``.  Otherwise return `source` converted to an
+        ndarray with contents for writing to matfile.
+    '''
+    if isinstance(source, np.ndarray):
+        return source
+    if source is None:
+        return None
+    # Objects that implement mappings
+    is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and
+                  hasattr(source, 'items'))
+    # Objects that don't implement mappings, but do have dicts
+    if isinstance(source, np.generic):
+        # Numpy scalars are never mappings (pypy issue workaround)
+        pass
+    elif not is_mapping and hasattr(source, '__dict__'):
+        source = dict((key, value) for key, value in source.__dict__.items()
+                      if not key.startswith('_'))
+        is_mapping = True
+    if is_mapping:
+        dtype = []
+        values = []
+        for field, value in source.items():
+            if (isinstance(field, string_types) and
+                    field[0] not in '_0123456789'):
+                dtype.append((str(field), object))
+                values.append(value)
+        if dtype:
+            return np.array([tuple(values)], dtype)
+        else:
+            return EmptyStructMarker
+    # Next try and convert to an array
+    narr = np.asanyarray(source)
+    if narr.dtype.type in (object, np.object_) and \
+       narr.shape == () and narr == source:
+        # No interesting conversion possible
+        return None
+    return narr
+
+
+# Native byte ordered dtypes for convenience for writers
+NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header']
+NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full']
+NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata']
+NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags']
+
+
+class VarWriter5(object):
+    ''' Generic matlab matrix writing class '''
+    mat_tag = np.zeros((), NDT_TAG_FULL)
+    mat_tag['mdtype'] = miMATRIX
+
+    def __init__(self, file_writer):
+        self.file_stream = file_writer.file_stream
+        self.unicode_strings = file_writer.unicode_strings
+        self.long_field_names = file_writer.long_field_names
+        self.oned_as = file_writer.oned_as
+        # These are used for top level writes, and unset after
+        self._var_name = None
+        self._var_is_global = False
+
+    def write_bytes(self, arr):
+        self.file_stream.write(arr.tostring(order='F'))
+
+    def write_string(self, s):
+        self.file_stream.write(s)
+
+    def write_element(self, arr, mdtype=None):
+        ''' write tag and data '''
+        if mdtype is None:
+            mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]
+        # Array needs to be in native byte order
+        if arr.dtype.byteorder == swapped_code:
+            arr = arr.byteswap().newbyteorder()
+        byte_count = arr.size*arr.itemsize
+        if byte_count <= 4:
+            self.write_smalldata_element(arr, mdtype, byte_count)
+        else:
+            self.write_regular_element(arr, mdtype, byte_count)
+
+    def write_smalldata_element(self, arr, mdtype, byte_count):
+        # write tag with embedded data
+        tag = np.zeros((), NDT_TAG_SMALL)
+        tag['byte_count_mdtype'] = (byte_count << 16) + mdtype
+        # if arr.tostring is < 4, the element will be zero-padded as needed.
+        tag['data'] = arr.tostring(order='F')
+        self.write_bytes(tag)
+
+    def write_regular_element(self, arr, mdtype, byte_count):
+        # write tag, data
+        tag = np.zeros((), NDT_TAG_FULL)
+        tag['mdtype'] = mdtype
+        tag['byte_count'] = byte_count
+        self.write_bytes(tag)
+        self.write_bytes(arr)
+        # pad to next 64-bit boundary
+        bc_mod_8 = byte_count % 8
+        if bc_mod_8:
+            self.file_stream.write(b'\x00' * (8-bc_mod_8))
+
+    def write_header(self,
+                     shape,
+                     mclass,
+                     is_complex=False,
+                     is_logical=False,
+                     nzmax=0):
+        ''' Write header for given data options
+        shape : sequence
+           array shape
+        mclass      - mat5 matrix class
+        is_complex  - True if matrix is complex
+        is_logical  - True if matrix is logical
+        nzmax        - max non zero elements for sparse arrays
+
+        We get the name and the global flag from the object, and reset
+        them to defaults after we've used them
+        '''
+        # get name and is_global from one-shot object store
+        name = self._var_name
+        is_global = self._var_is_global
+        # initialize the top-level matrix tag, store position
+        self._mat_tag_pos = self.file_stream.tell()
+        self.write_bytes(self.mat_tag)
+        # write array flags (complex, global, logical, class, nzmax)
+        af = np.zeros((), NDT_ARRAY_FLAGS)
+        af['data_type'] = miUINT32
+        af['byte_count'] = 8
+        flags = is_complex << 3 | is_global << 2 | is_logical << 1
+        af['flags_class'] = mclass | flags << 8
+        af['nzmax'] = nzmax
+        self.write_bytes(af)
+        # shape
+        self.write_element(np.array(shape, dtype='i4'))
+        # write name
+        name = np.asarray(name)
+        if name == '':  # empty string zero-terminated
+            self.write_smalldata_element(name, miINT8, 0)
+        else:
+            self.write_element(name, miINT8)
+        # reset the one-shot store to defaults
+        self._var_name = ''
+        self._var_is_global = False
+
+    def update_matrix_tag(self, start_pos):
+        curr_pos = self.file_stream.tell()
+        self.file_stream.seek(start_pos)
+        byte_count = curr_pos - start_pos - 8
+        if byte_count >= 2**32:
+            raise MatWriteError("Matrix too large to save with Matlab "
+                                "5 format")
+        self.mat_tag['byte_count'] = byte_count
+        self.write_bytes(self.mat_tag)
+        self.file_stream.seek(curr_pos)
+
+    def write_top(self, arr, name, is_global):
+        """ Write variable at top level of mat file
+
+        Parameters
+        ----------
+        arr : array_like
+            array-like object to create writer for
+        name : str, optional
+            name as it will appear in matlab workspace
+            default is empty string
+        is_global : {False, True}, optional
+            whether variable will be global on load into matlab
+        """
+        # these are set before the top-level header write, and unset at
+        # the end of the same write, because they do not apply for lower levels
+        self._var_is_global = is_global
+        self._var_name = name
+        # write the header and data
+        self.write(arr)
+
+    def write(self, arr):
+        ''' Write `arr` to stream at top and sub levels
+
+        Parameters
+        ----------
+        arr : array_like
+            array-like object to create writer for
+        '''
+        # store position, so we can update the matrix tag
+        mat_tag_pos = self.file_stream.tell()
+        # First check if these are sparse
+        if scipy.sparse.issparse(arr):
+            self.write_sparse(arr)
+            self.update_matrix_tag(mat_tag_pos)
+            return
+        # Try to convert things that aren't arrays
+        narr = to_writeable(arr)
+        if narr is None:
+            raise TypeError('Could not convert %s (type %s) to array'
+                            % (arr, type(arr)))
+        if isinstance(narr, MatlabObject):
+            self.write_object(narr)
+        elif isinstance(narr, MatlabFunction):
+            raise MatWriteError('Cannot write matlab functions')
+        elif narr is EmptyStructMarker:  # empty struct array
+            self.write_empty_struct()
+        elif narr.dtype.fields:  # struct array
+            self.write_struct(narr)
+        elif narr.dtype.hasobject:  # cell array
+            self.write_cells(narr)
+        elif narr.dtype.kind in ('U', 'S'):
+            if self.unicode_strings:
+                codec = 'UTF8'
+            else:
+                codec = 'ascii'
+            self.write_char(narr, codec)
+        else:
+            self.write_numeric(narr)
+        self.update_matrix_tag(mat_tag_pos)
+
+    def write_numeric(self, arr):
+        imagf = arr.dtype.kind == 'c'
+        logif = arr.dtype.kind == 'b'
+        try:
+            mclass = NP_TO_MXTYPES[arr.dtype.str[1:]]
+        except KeyError:
+            # No matching matlab type, probably complex256 / float128 / float96
+            # Cast data to complex128 / float64.
+            if imagf:
+                arr = arr.astype('c128')
+            elif logif:
+                arr = arr.astype('i1')  # Should only contain 0/1
+            else:
+                arr = arr.astype('f8')
+            mclass = mxDOUBLE_CLASS
+        self.write_header(matdims(arr, self.oned_as),
+                          mclass,
+                          is_complex=imagf,
+                          is_logical=logif)
+        if imagf:
+            self.write_element(arr.real)
+            self.write_element(arr.imag)
+        else:
+            self.write_element(arr)
+
+    def write_char(self, arr, codec='ascii'):
+        ''' Write string array `arr` with given `codec`
+        '''
+        if arr.size == 0 or np.all(arr == ''):
+            # This an empty string array or a string array containing
+            # only empty strings.  Matlab cannot distinguish between a
+            # string array that is empty, and a string array containing
+            # only empty strings, because it stores strings as arrays of
+            # char.  There is no way of having an array of char that is
+            # not empty, but contains an empty string. We have to
+            # special-case the array-with-empty-strings because even
+            # empty strings have zero padding, which would otherwise
+            # appear in matlab as a string with a space.
+            shape = (0,) * np.max([arr.ndim, 2])
+            self.write_header(shape, mxCHAR_CLASS)
+            self.write_smalldata_element(arr, miUTF8, 0)
+            return
+        # non-empty string.
+        #
+        # Convert to char array
+        arr = arr_to_chars(arr)
+        # We have to write the shape directly, because we are going
+        # recode the characters, and the resulting stream of chars
+        # may have a different length
+        shape = arr.shape
+        self.write_header(shape, mxCHAR_CLASS)
+        if arr.dtype.kind == 'U' and arr.size:
+            # Make one long string from all the characters.  We need to
+            # transpose here, because we're flattening the array, before
+            # we write the bytes.  The bytes have to be written in
+            # Fortran order.
+            n_chars = np.product(shape)
+            st_arr = np.ndarray(shape=(),
+                                dtype=arr_dtype_number(arr, n_chars),
+                                buffer=arr.T.copy())  # Fortran order
+            # Recode with codec to give byte string
+            st = st_arr.item().encode(codec)
+            # Reconstruct as one-dimensional byte array
+            arr = np.ndarray(shape=(len(st),),
+                             dtype='S1',
+                             buffer=st)
+        self.write_element(arr, mdtype=miUTF8)
+
+    def write_sparse(self, arr):
+        ''' Sparse matrices are 2D
+        '''
+        A = arr.tocsc()  # convert to sparse CSC format
+        A.sort_indices()     # MATLAB expects sorted row indices
+        is_complex = (A.dtype.kind == 'c')
+        is_logical = (A.dtype.kind == 'b')
+        nz = A.nnz
+        self.write_header(matdims(arr, self.oned_as),
+                          mxSPARSE_CLASS,
+                          is_complex=is_complex,
+                          is_logical=is_logical,
+                          # matlab won't load file with 0 nzmax
+                          nzmax=1 if nz == 0 else nz)
+        self.write_element(A.indices.astype('i4'))
+        self.write_element(A.indptr.astype('i4'))
+        self.write_element(A.data.real)
+        if is_complex:
+            self.write_element(A.data.imag)
+
+    def write_cells(self, arr):
+        self.write_header(matdims(arr, self.oned_as),
+                          mxCELL_CLASS)
+        # loop over data, column major
+        A = np.atleast_2d(arr).flatten('F')
+        for el in A:
+            self.write(el)
+
+    def write_empty_struct(self):
+        self.write_header((1, 1), mxSTRUCT_CLASS)
+        # max field name length set to 1 in an example matlab struct
+        self.write_element(np.array(1, dtype=np.int32))
+        # Field names element is empty
+        self.write_element(np.array([], dtype=np.int8))
+
+    def write_struct(self, arr):
+        self.write_header(matdims(arr, self.oned_as),
+                          mxSTRUCT_CLASS)
+        self._write_items(arr)
+
+    def _write_items(self, arr):
+        # write fieldnames
+        fieldnames = [f[0] for f in arr.dtype.descr]
+        length = max([len(fieldname) for fieldname in fieldnames])+1
+        max_length = (self.long_field_names and 64) or 32
+        if length > max_length:
+            raise ValueError("Field names are restricted to %d characters" %
+                             (max_length-1))
+        self.write_element(np.array([length], dtype='i4'))
+        self.write_element(
+            np.array(fieldnames, dtype='S%d' % (length)),
+            mdtype=miINT8)
+        A = np.atleast_2d(arr).flatten('F')
+        for el in A:
+            for f in fieldnames:
+                self.write(el[f])
+
+    def write_object(self, arr):
+        '''Same as writing structs, except different mx class, and extra
+        classname element after header
+        '''
+        self.write_header(matdims(arr, self.oned_as),
+                          mxOBJECT_CLASS)
+        self.write_element(np.array(arr.classname, dtype='S'),
+                           mdtype=miINT8)
+        self._write_items(arr)
+
+
+class MatFile5Writer(object):
+    ''' Class for writing mat5 files '''
+
+    @docfiller
+    def __init__(self, file_stream,
+                 do_compression=False,
+                 unicode_strings=False,
+                 global_vars=None,
+                 long_field_names=False,
+                 oned_as='row'):
+        ''' Initialize writer for matlab 5 format files
+
+        Parameters
+        ----------
+        %(do_compression)s
+        %(unicode_strings)s
+        global_vars : None or sequence of strings, optional
+            Names of variables to be marked as global for matlab
+        %(long_fields)s
+        %(oned_as)s
+        '''
+        self.file_stream = file_stream
+        self.do_compression = do_compression
+        self.unicode_strings = unicode_strings
+        if global_vars:
+            self.global_vars = global_vars
+        else:
+            self.global_vars = []
+        self.long_field_names = long_field_names
+        self.oned_as = oned_as
+        self._matrix_writer = None
+
+    def write_file_header(self):
+        # write header
+        hdr = np.zeros((), NDT_FILE_HDR)
+        hdr['description'] = 'MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \
+            % (os.name,time.asctime())
+        hdr['version'] = 0x0100
+        hdr['endian_test'] = np.ndarray(shape=(),
+                                      dtype='S2',
+                                      buffer=np.uint16(0x4d49))
+        self.file_stream.write(hdr.tostring())
+
+    def put_variables(self, mdict, write_header=None):
+        ''' Write variables in `mdict` to stream
+
+        Parameters
+        ----------
+        mdict : mapping
+           mapping with method ``items`` returns name, contents pairs where
+           ``name`` which will appear in the matlab workspace in file load, and
+           ``contents`` is something writeable to a matlab file, such as a numpy
+           array.
+        write_header : {None, True, False}, optional
+           If True, then write the matlab file header before writing the
+           variables.  If None (the default) then write the file header
+           if we are at position 0 in the stream.  By setting False
+           here, and setting the stream position to the end of the file,
+           you can append variables to a matlab file
+        '''
+        # write header if requested, or None and start of file
+        if write_header is None:
+            write_header = self.file_stream.tell() == 0
+        if write_header:
+            self.write_file_header()
+        self._matrix_writer = VarWriter5(self)
+        for name, var in mdict.items():
+            if name[0] == '_':
+                continue
+            is_global = name in self.global_vars
+            if self.do_compression:
+                stream = BytesIO()
+                self._matrix_writer.file_stream = stream
+                self._matrix_writer.write_top(var, asbytes(name), is_global)
+                out_str = zlib.compress(stream.getvalue())
+                tag = np.empty((), NDT_TAG_FULL)
+                tag['mdtype'] = miCOMPRESSED
+                tag['byte_count'] = len(out_str)
+                self.file_stream.write(tag.tostring())
+                self.file_stream.write(out_str)
+            else:  # not compressing
+                self._matrix_writer.write_top(var, asbytes(name), is_global)
@@ -0,0 +1,254 @@
+''' Constants and classes for matlab 5 read and write
+
+See also mio5_utils.pyx where these same constants arise as c enums.
+
+If you make changes in this file, don't forget to change mio5_utils.pyx
+'''
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+
+from .miobase import convert_dtypes
+
+miINT8 = 1
+miUINT8 = 2
+miINT16 = 3
+miUINT16 = 4
+miINT32 = 5
+miUINT32 = 6
+miSINGLE = 7
+miDOUBLE = 9
+miINT64 = 12
+miUINT64 = 13
+miMATRIX = 14
+miCOMPRESSED = 15
+miUTF8 = 16
+miUTF16 = 17
+miUTF32 = 18
+
+mxCELL_CLASS = 1
+mxSTRUCT_CLASS = 2
+# The March 2008 edition of "Matlab 7 MAT-File Format" says that
+# mxOBJECT_CLASS = 3, whereas matrix.h says that mxLOGICAL = 3.
+# Matlab 2008a appears to save logicals as type 9, so we assume that
+# the document is correct.  See type 18, below.
+mxOBJECT_CLASS = 3
+mxCHAR_CLASS = 4
+mxSPARSE_CLASS = 5
+mxDOUBLE_CLASS = 6
+mxSINGLE_CLASS = 7
+mxINT8_CLASS = 8
+mxUINT8_CLASS = 9
+mxINT16_CLASS = 10
+mxUINT16_CLASS = 11
+mxINT32_CLASS = 12
+mxUINT32_CLASS = 13
+# The following are not in the March 2008 edition of "Matlab 7
+# MAT-File Format," but were guessed from matrix.h.
+mxINT64_CLASS = 14
+mxUINT64_CLASS = 15
+mxFUNCTION_CLASS = 16
+# Not doing anything with these at the moment.
+mxOPAQUE_CLASS = 17  # This appears to be a function workspace
+# Thread 'saveing/loading symbol table of annymous functions', octave-maintainers, April-May 2007
+# https://lists.gnu.org/archive/html/octave-maintainers/2007-04/msg00031.html
+# https://lists.gnu.org/archive/html/octave-maintainers/2007-05/msg00032.html
+# (Was/Deprecated: https://www-old.cae.wisc.edu/pipermail/octave-maintainers/2007-May/002824.html)
+mxOBJECT_CLASS_FROM_MATRIX_H = 18
+
+mdtypes_template = {
+    miINT8: 'i1',
+    miUINT8: 'u1',
+    miINT16: 'i2',
+    miUINT16: 'u2',
+    miINT32: 'i4',
+    miUINT32: 'u4',
+    miSINGLE: 'f4',
+    miDOUBLE: 'f8',
+    miINT64: 'i8',
+    miUINT64: 'u8',
+    miUTF8: 'u1',
+    miUTF16: 'u2',
+    miUTF32: 'u4',
+    'file_header': [('description', 'S116'),
+                    ('subsystem_offset', 'i8'),
+                    ('version', 'u2'),
+                    ('endian_test', 'S2')],
+    'tag_full': [('mdtype', 'u4'), ('byte_count', 'u4')],
+    'tag_smalldata':[('byte_count_mdtype', 'u4'), ('data', 'S4')],
+    'array_flags': [('data_type', 'u4'),
+                    ('byte_count', 'u4'),
+                    ('flags_class','u4'),
+                    ('nzmax', 'u4')],
+    'U1': 'U1',
+    }
+
+mclass_dtypes_template = {
+    mxINT8_CLASS: 'i1',
+    mxUINT8_CLASS: 'u1',
+    mxINT16_CLASS: 'i2',
+    mxUINT16_CLASS: 'u2',
+    mxINT32_CLASS: 'i4',
+    mxUINT32_CLASS: 'u4',
+    mxINT64_CLASS: 'i8',
+    mxUINT64_CLASS: 'u8',
+    mxSINGLE_CLASS: 'f4',
+    mxDOUBLE_CLASS: 'f8',
+    }
+
+mclass_info = {
+    mxINT8_CLASS: 'int8',
+    mxUINT8_CLASS: 'uint8',
+    mxINT16_CLASS: 'int16',
+    mxUINT16_CLASS: 'uint16',
+    mxINT32_CLASS: 'int32',
+    mxUINT32_CLASS: 'uint32',
+    mxINT64_CLASS: 'int64',
+    mxUINT64_CLASS: 'uint64',
+    mxSINGLE_CLASS: 'single',
+    mxDOUBLE_CLASS: 'double',
+    mxCELL_CLASS: 'cell',
+    mxSTRUCT_CLASS: 'struct',
+    mxOBJECT_CLASS: 'object',
+    mxCHAR_CLASS: 'char',
+    mxSPARSE_CLASS: 'sparse',
+    mxFUNCTION_CLASS: 'function',
+    mxOPAQUE_CLASS: 'opaque',
+    }
+
+NP_TO_MTYPES = {
+    'f8': miDOUBLE,
+    'c32': miDOUBLE,
+    'c24': miDOUBLE,
+    'c16': miDOUBLE,
+    'f4': miSINGLE,
+    'c8': miSINGLE,
+    'i8': miINT64,
+    'i4': miINT32,
+    'i2': miINT16,
+    'i1': miINT8,
+    'u8': miUINT64,
+    'u4': miUINT32,
+    'u2': miUINT16,
+    'u1': miUINT8,
+    'S1': miUINT8,
+    'U1': miUTF16,
+    'b1': miUINT8,  # not standard but seems MATLAB uses this (gh-4022)
+    }
+
+
+NP_TO_MXTYPES = {
+    'f8': mxDOUBLE_CLASS,
+    'c32': mxDOUBLE_CLASS,
+    'c24': mxDOUBLE_CLASS,
+    'c16': mxDOUBLE_CLASS,
+    'f4': mxSINGLE_CLASS,
+    'c8': mxSINGLE_CLASS,
+    'i8': mxINT64_CLASS,
+    'i4': mxINT32_CLASS,
+    'i2': mxINT16_CLASS,
+    'i1': mxINT8_CLASS,
+    'u8': mxUINT64_CLASS,
+    'u4': mxUINT32_CLASS,
+    'u2': mxUINT16_CLASS,
+    'u1': mxUINT8_CLASS,
+    'S1': mxUINT8_CLASS,
+    'b1': mxUINT8_CLASS,  # not standard but seems MATLAB uses this
+    }
+
+''' Before release v7.1 (release 14) matlab (TM) used the system
+default character encoding scheme padded out to 16-bits. Release 14
+and later use Unicode. When saving character data, R14 checks if it
+can be encoded in 7-bit ascii, and saves in that format if so.'''
+
+codecs_template = {
+    miUTF8: {'codec': 'utf_8', 'width': 1},
+    miUTF16: {'codec': 'utf_16', 'width': 2},
+    miUTF32: {'codec': 'utf_32','width': 4},
+    }
+
+
+def _convert_codecs(template, byte_order):
+    ''' Convert codec template mapping to byte order
+
+    Set codecs not on this system to None
+
+    Parameters
+    ----------
+    template : mapping
+       key, value are respectively codec name, and root name for codec
+       (without byte order suffix)
+    byte_order : {'<', '>'}
+       code for little or big endian
+
+    Returns
+    -------
+    codecs : dict
+       key, value are name, codec (as in .encode(codec))
+    '''
+    codecs = {}
+    postfix = byte_order == '<' and '_le' or '_be'
+    for k, v in template.items():
+        codec = v['codec']
+        try:
+            " ".encode(codec)
+        except LookupError:
+            codecs[k] = None
+            continue
+        if v['width'] > 1:
+            codec += postfix
+        codecs[k] = codec
+    return codecs.copy()
+
+
+MDTYPES = {}
+for _bytecode in '<>':
+    _def = {'dtypes': convert_dtypes(mdtypes_template, _bytecode),
+            'classes': convert_dtypes(mclass_dtypes_template, _bytecode),
+            'codecs': _convert_codecs(codecs_template, _bytecode)}
+    MDTYPES[_bytecode] = _def
+
+
+class mat_struct(object):
+    ''' Placeholder for holding read data from structs
+
+    We use instances of this class when the user passes False as a value to the
+    ``struct_as_record`` parameter of the :func:`scipy.io.matlab.loadmat`
+    function.
+    '''
+    pass
+
+
+class MatlabObject(np.ndarray):
+    ''' ndarray Subclass to contain matlab object '''
+    def __new__(cls, input_array, classname=None):
+        # Input array is an already formed ndarray instance
+        # We first cast to be our class type
+        obj = np.asarray(input_array).view(cls)
+        # add the new attribute to the created instance
+        obj.classname = classname
+        # Finally, we must return the newly created object:
+        return obj
+
+    def __array_finalize__(self,obj):
+        # reset the attribute from passed original object
+        self.classname = getattr(obj, 'classname', None)
+        # We do not need to return anything
+
+
+class MatlabFunction(np.ndarray):
+    ''' Subclass to signal this is a matlab function '''
+    def __new__(cls, input_array):
+        obj = np.asarray(input_array).view(cls)
+        return obj
+
+
+class MatlabOpaque(np.ndarray):
+    ''' Subclass to signal this is a matlab opaque matrix '''
+    def __new__(cls, input_array):
+        obj = np.asarray(input_array).view(cls)
+        return obj
+
+
+OPAQUE_DTYPE = np.dtype(
+    [('s0', 'O'), ('s1', 'O'), ('s2', 'O'), ('arr', 'O')])
@@ -0,0 +1,415 @@
+# Authors: Travis Oliphant, Matthew Brett
+
+"""
+Base classes for MATLAB file stream reading.
+
+MATLAB is a registered trademark of the Mathworks inc.
+"""
+from __future__ import division, print_function, absolute_import
+
+import sys
+import operator
+
+from scipy._lib.six import reduce
+
+import numpy as np
+
+if sys.version_info[0] >= 3:
+    byteord = int
+else:
+    byteord = ord
+
+from scipy.misc import doccer
+
+from . import byteordercodes as boc
+
+
+class MatReadError(Exception):
+    pass
+
+
+class MatWriteError(Exception):
+    pass
+
+
+class MatReadWarning(UserWarning):
+    pass
+
+
+doc_dict = \
+    {'file_arg':
+         '''file_name : str
+   Name of the mat file (do not need .mat extension if
+   appendmat==True) Can also pass open file-like object.''',
+     'append_arg':
+         '''appendmat : bool, optional
+   True to append the .mat extension to the end of the given
+   filename, if not already present.''',
+     'load_args':
+         '''byte_order : str or None, optional
+   None by default, implying byte order guessed from mat
+   file. Otherwise can be one of ('native', '=', 'little', '<',
+   'BIG', '>').
+mat_dtype : bool, optional
+   If True, return arrays in same dtype as would be loaded into
+   MATLAB (instead of the dtype with which they are saved).
+squeeze_me : bool, optional
+   Whether to squeeze unit matrix dimensions or not.
+chars_as_strings : bool, optional
+   Whether to convert char arrays to string arrays.
+matlab_compatible : bool, optional
+   Returns matrices as would be loaded by MATLAB (implies
+   squeeze_me=False, chars_as_strings=False, mat_dtype=True,
+   struct_as_record=True).''',
+     'struct_arg':
+         '''struct_as_record : bool, optional
+   Whether to load MATLAB structs as numpy record arrays, or as
+   old-style numpy arrays with dtype=object.  Setting this flag to
+   False replicates the behavior of scipy version 0.7.x (returning
+   numpy object arrays).  The default setting is True, because it
+   allows easier round-trip load and save of MATLAB files.''',
+     'matstream_arg':
+         '''mat_stream : file-like
+   Object with file API, open for reading.''',
+     'long_fields':
+         '''long_field_names : bool, optional
+   * False - maximum field name length in a structure is 31 characters
+     which is the documented maximum length. This is the default.
+   * True - maximum field name length in a structure is 63 characters
+     which works for MATLAB 7.6''',
+     'do_compression':
+         '''do_compression : bool, optional
+   Whether to compress matrices on write. Default is False.''',
+     'oned_as':
+         '''oned_as : {'row', 'column'}, optional
+   If 'column', write 1-D numpy arrays as column vectors.
+   If 'row', write 1D numpy arrays as row vectors.''',
+     'unicode_strings':
+         '''unicode_strings : bool, optional
+   If True, write strings as Unicode, else MATLAB usual encoding.'''}
+
+docfiller = doccer.filldoc(doc_dict)
+
+'''
+
+ Note on architecture
+======================
+
+There are three sets of parameters relevant for reading files.  The
+first are *file read parameters* - containing options that are common
+for reading the whole file, and therefore every variable within that
+file. At the moment these are:
+
+* mat_stream
+* dtypes (derived from byte code)
+* byte_order
+* chars_as_strings
+* squeeze_me
+* struct_as_record (MATLAB 5 files)
+* class_dtypes (derived from order code, MATLAB 5 files)
+* codecs (MATLAB 5 files)
+* uint16_codec (MATLAB 5 files)
+
+Another set of parameters are those that apply only to the current
+variable being read - the *header*:
+
+* header related variables (different for v4 and v5 mat files)
+* is_complex
+* mclass
+* var_stream
+
+With the header, we need ``next_position`` to tell us where the next
+variable in the stream is.
+
+Then, for each element in a matrix, there can be *element read
+parameters*.  An element is, for example, one element in a MATLAB cell
+array.  At the moment these are:
+
+* mat_dtype
+
+The file-reading object contains the *file read parameters*.  The
+*header* is passed around as a data object, or may be read and discarded
+in a single function.  The *element read parameters* - the mat_dtype in
+this instance, is passed into a general post-processing function - see
+``mio_utils`` for details.
+'''
+
+
+def convert_dtypes(dtype_template, order_code):
+    ''' Convert dtypes in mapping to given order
+
+    Parameters
+    ----------
+    dtype_template : mapping
+       mapping with values returning numpy dtype from ``np.dtype(val)``
+    order_code : str
+       an order code suitable for using in ``dtype.newbyteorder()``
+
+    Returns
+    -------
+    dtypes : mapping
+       mapping where values have been replaced by
+       ``np.dtype(val).newbyteorder(order_code)``
+
+    '''
+    dtypes = dtype_template.copy()
+    for k in dtypes:
+        dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code)
+    return dtypes
+
+
+def read_dtype(mat_stream, a_dtype):
+    """
+    Generic get of byte stream data of known type
+
+    Parameters
+    ----------
+    mat_stream : file_like object
+        MATLAB (tm) mat file stream
+    a_dtype : dtype
+        dtype of array to read.  `a_dtype` is assumed to be correct
+        endianness.
+
+    Returns
+    -------
+    arr : ndarray
+        Array of dtype `a_dtype` read from stream.
+
+    """
+    num_bytes = a_dtype.itemsize
+    arr = np.ndarray(shape=(),
+                     dtype=a_dtype,
+                     buffer=mat_stream.read(num_bytes),
+                     order='F')
+    return arr
+
+
+def get_matfile_version(fileobj):
+    """
+    Return major, minor tuple depending on apparent mat file type
+
+    Where:
+
+     #. 0,x -> version 4 format mat files
+     #. 1,x -> version 5 format mat files
+     #. 2,x -> version 7.3 format mat files (HDF format)
+
+    Parameters
+    ----------
+    fileobj : file_like
+        object implementing seek() and read()
+
+    Returns
+    -------
+    major_version : {0, 1, 2}
+        major MATLAB File format version
+    minor_version : int
+        minor MATLAB file format version
+
+    Raises
+    ------
+    MatReadError
+        If the file is empty.
+    ValueError
+        The matfile version is unknown.
+
+    Notes
+    -----
+    Has the side effect of setting the file read pointer to 0
+    """
+    # Mat4 files have a zero somewhere in first 4 bytes
+    fileobj.seek(0)
+    mopt_bytes = fileobj.read(4)
+    if len(mopt_bytes) == 0:
+        raise MatReadError("Mat file appears to be empty")
+    mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes)
+    if 0 in mopt_ints:
+        fileobj.seek(0)
+        return (0,0)
+    # For 5 format or 7.3 format we need to read an integer in the
+    # header. Bytes 124 through 128 contain a version integer and an
+    # endian test string
+    fileobj.seek(124)
+    tst_str = fileobj.read(4)
+    fileobj.seek(0)
+    maj_ind = int(tst_str[2] == b'I'[0])
+    maj_val = byteord(tst_str[maj_ind])
+    min_val = byteord(tst_str[1-maj_ind])
+    ret = (maj_val, min_val)
+    if maj_val in (1, 2):
+        return ret
+    raise ValueError('Unknown mat file type, version %s, %s' % ret)
+
+
+def matdims(arr, oned_as='column'):
+    """
+    Determine equivalent MATLAB dimensions for given array
+
+    Parameters
+    ----------
+    arr : ndarray
+        Input array
+    oned_as : {'column', 'row'}, optional
+        Whether 1-D arrays are returned as MATLAB row or column matrices.
+        Default is 'column'.
+
+    Returns
+    -------
+    dims : tuple
+        Shape tuple, in the form MATLAB expects it.
+
+    Notes
+    -----
+    We had to decide what shape a 1 dimensional array would be by
+    default.  ``np.atleast_2d`` thinks it is a row vector.  The
+    default for a vector in MATLAB (e.g. ``>> 1:12``) is a row vector.
+
+    Versions of scipy up to and including 0.11 resulted (accidentally)
+    in 1-D arrays being read as column vectors.  For the moment, we
+    maintain the same tradition here.
+
+    Examples
+    --------
+    >>> matdims(np.array(1)) # numpy scalar
+    (1, 1)
+    >>> matdims(np.array([1])) # 1d array, 1 element
+    (1, 1)
+    >>> matdims(np.array([1,2])) # 1d array, 2 elements
+    (2, 1)
+    >>> matdims(np.array([[2],[3]])) # 2d array, column vector
+    (2, 1)
+    >>> matdims(np.array([[2,3]])) # 2d array, row vector
+    (1, 2)
+    >>> matdims(np.array([[[2,3]]])) # 3d array, rowish vector
+    (1, 1, 2)
+    >>> matdims(np.array([])) # empty 1d array
+    (0, 0)
+    >>> matdims(np.array([[]])) # empty 2d
+    (0, 0)
+    >>> matdims(np.array([[[]]])) # empty 3d
+    (0, 0, 0)
+
+    Optional argument flips 1-D shape behavior.
+
+    >>> matdims(np.array([1,2]), 'row') # 1d array, 2 elements
+    (1, 2)
+
+    The argument has to make sense though
+
+    >>> matdims(np.array([1,2]), 'bizarre')
+    Traceback (most recent call last):
+       ...
+    ValueError: 1D option "bizarre" is strange
+
+    """
+    shape = arr.shape
+    if shape == ():  # scalar
+        return (1,1)
+    if reduce(operator.mul, shape) == 0:  # zero elememts
+        return (0,) * np.max([arr.ndim, 2])
+    if len(shape) == 1:  # 1D
+        if oned_as == 'column':
+            return shape + (1,)
+        elif oned_as == 'row':
+            return (1,) + shape
+        else:
+            raise ValueError('1D option "%s" is strange'
+                             % oned_as)
+    return shape
+
+
+class MatVarReader(object):
+    ''' Abstract class defining required interface for var readers'''
+    def __init__(self, file_reader):
+        pass
+
+    def read_header(self):
+        ''' Returns header '''
+        pass
+
+    def array_from_header(self, header):
+        ''' Reads array given header '''
+        pass
+
+
+class MatFileReader(object):
+    """ Base object for reading mat files
+
+    To make this class functional, you will need to override the
+    following methods:
+
+    matrix_getter_factory   - gives object to fetch next matrix from stream
+    guess_byte_order        - guesses file byte order from file
+    """
+
+    @docfiller
+    def __init__(self, mat_stream,
+                 byte_order=None,
+                 mat_dtype=False,
+                 squeeze_me=False,
+                 chars_as_strings=True,
+                 matlab_compatible=False,
+                 struct_as_record=True,
+                 verify_compressed_data_integrity=True
+                 ):
+        '''
+        Initializer for mat file reader
+
+        mat_stream : file-like
+            object with file API, open for reading
+    %(load_args)s
+        '''
+        # Initialize stream
+        self.mat_stream = mat_stream
+        self.dtypes = {}
+        if not byte_order:
+            byte_order = self.guess_byte_order()
+        else:
+            byte_order = boc.to_numpy_code(byte_order)
+        self.byte_order = byte_order
+        self.struct_as_record = struct_as_record
+        if matlab_compatible:
+            self.set_matlab_compatible()
+        else:
+            self.squeeze_me = squeeze_me
+            self.chars_as_strings = chars_as_strings
+            self.mat_dtype = mat_dtype
+        self.verify_compressed_data_integrity = verify_compressed_data_integrity
+
+    def set_matlab_compatible(self):
+        ''' Sets options to return arrays as MATLAB loads them '''
+        self.mat_dtype = True
+        self.squeeze_me = False
+        self.chars_as_strings = False
+
+    def guess_byte_order(self):
+        ''' As we do not know what file type we have, assume native '''
+        return boc.native_code
+
+    def end_of_stream(self):
+        b = self.mat_stream.read(1)
+        curpos = self.mat_stream.tell()
+        self.mat_stream.seek(curpos-1)
+        return len(b) == 0
+
+
+def arr_dtype_number(arr, num):
+    ''' Return dtype for given number of items per element'''
+    return np.dtype(arr.dtype.str[:2] + str(num))
+
+
+def arr_to_chars(arr):
+    ''' Convert string array to char array '''
+    dims = list(arr.shape)
+    if not dims:
+        dims = [1]
+    dims.append(int(arr.dtype.str[2:]))
+    arr = np.ndarray(shape=dims,
+                     dtype=arr_dtype_number(arr, 1),
+                     buffer=arr)
+    empties = [arr == '']
+    if not np.any(empties):
+        return arr
+    arr = arr.copy()
+    arr[tuple(empties)] = ' '
+    return arr
@@ -0,0 +1,16 @@
+from __future__ import division, print_function, absolute_import
+
+
+def configuration(parent_package='io',top_path=None):
+    from numpy.distutils.misc_util import Configuration
+    config = Configuration('matlab', parent_package, top_path)
+    config.add_extension('streams', sources=['streams.c'])
+    config.add_extension('mio_utils', sources=['mio_utils.c'])
+    config.add_extension('mio5_utils', sources=['mio5_utils.c'])
+    config.add_data_dir('tests')
+    return config
+
+
+if __name__ == '__main__':
+    from numpy.distutils.core import setup
+    setup(**configuration(top_path='').todict())
@@ -0,0 +1,4 @@
+function [a, b] = afunc(c, d)
+% A function
+a = c + 1;
+b = d + 10;
@@ -0,0 +1,5 @@
+Japanese: 
+すべての人間は、生まれながらにして自由であり、
+かつ、尊厳と権利と について平等である。
+人間は、理性と良心とを授けられており、
+互いに同胞の精神をもって行動しなければならない。
@@ -0,0 +1,50 @@
+% Generates mat files for loadmat unit tests
+% Uses save_matfile.m function
+% This is the version for matlab 4
+
+% work out matlab version and file suffix for test files
+global FILEPREFIX FILESUFFIX
+sepchar = '/';
+if strcmp(computer, 'PCWIN'), sepchar = '\'; end
+FILEPREFIX = [pwd sepchar 'data' sepchar];
+mlv = version;
+FILESUFFIX = ['_' mlv '_' computer '.mat'];
+
+% basic double array
+theta = 0:pi/4:2*pi;
+save_matfile('testdouble', theta);
+
+% string
+save_matfile('teststring', '"Do nine men interpret?" "Nine men," I nod.')
+
+% complex
+save_matfile('testcomplex', cos(theta) + 1j*sin(theta));
+
+% asymmetric array to check indexing
+a = zeros(3, 5);
+a(:,1) = [1:3]';
+a(1,:) = 1:5;
+
+% 2D matrix
+save_matfile('testmatrix', a);
+
+% minus number - tests signed int 
+save_matfile('testminus', -1);
+
+% single character
+save_matfile('testonechar', 'r');
+
+% string array
+save_matfile('teststringarray', ['one  '; 'two  '; 'three']);
+
+% sparse array
+save_matfile('testsparse', sparse(a));
+
+% sparse complex array
+b = sparse(a);
+b(1,1) = b(1,1) + j;
+save_matfile('testsparsecomplex', b);
+
+% Two variables in same file
+save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
+
@@ -0,0 +1,100 @@
+% Generates mat files for loadmat unit tests
+% This is the version for matlab 5 and higher
+% Uses save_matfile.m function
+
+% work out matlab version and file suffix for test files
+global FILEPREFIX FILESUFFIX
+FILEPREFIX = [fullfile(pwd, 'data') filesep];
+temp = ver('MATLAB');
+mlv = temp.Version;
+FILESUFFIX = ['_' mlv '_' computer '.mat'];
+
+% basic double array
+theta = 0:pi/4:2*pi;
+save_matfile('testdouble', theta);
+
+% string
+save_matfile('teststring', '"Do nine men interpret?" "Nine men," I nod.')
+
+% complex
+save_matfile('testcomplex', cos(theta) + 1j*sin(theta));
+
+% asymmetric array to check indexing
+a = zeros(3, 5);
+a(:,1) = [1:3]';
+a(1,:) = 1:5;
+
+% 2D matrix
+save_matfile('testmatrix', a);
+
+% minus number - tests signed int 
+save_matfile('testminus', -1);
+
+% single character
+save_matfile('testonechar', 'r');
+
+% string array
+save_matfile('teststringarray', ['one  '; 'two  '; 'three']);
+
+% sparse array
+save_matfile('testsparse', sparse(a));
+
+% sparse complex array
+b = sparse(a);
+b(1,1) = b(1,1) + j;
+save_matfile('testsparsecomplex', b);
+
+% Two variables in same file
+save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
+
+
+% struct
+save_matfile('teststruct', ...
+	  struct('stringfield','Rats live on no evil star.',...
+		 'doublefield',[sqrt(2) exp(1) pi],...
+		 'complexfield',(1+1j)*[sqrt(2) exp(1) pi]));
+
+% cell
+save_matfile('testcell', ...
+	  {['This cell contains this string and 3 arrays of increasing' ...
+	    ' length'], 1., 1.:2., 1.:3.});
+
+% scalar cell
+save_matfile('testscalarcell', {1})
+
+% Empty cells in two cell matrices
+save_matfile('testemptycell', {1, 2, [], [], 3});
+
+% 3D matrix
+save_matfile('test3dmatrix', reshape(1:24,[2 3 4]))
+
+% nested cell array
+save_matfile('testcellnest', {1, {2, 3, {4, 5}}});
+
+% nested struct
+save_matfile('teststructnest', struct('one', 1, 'two', ...
+				   struct('three', 'number 3')));
+
+% array of struct
+save_matfile('teststructarr', [struct('one', 1, 'two', 2) ...
+		    struct('one', 'number 1', 'two', 'number 2')]);
+
+% matlab object
+save_matfile('testobject', inline('x'))
+
+% array of matlab objects
+%save_matfile('testobjarr', [inline('x') inline('x')])
+
+% unicode test
+if str2num(mlv) > 7  % function added 7.0.1
+  fid = fopen([FILEPREFIX 'japanese_utf8.txt']);
+  from_japan = fread(fid, 'uint8')';
+  fclose(fid);
+  save_matfile('testunicode', native2unicode(from_japan, 'utf-8'));
+end
+  
+% func
+if str2num(mlv) > 7  % function pointers added recently
+  func = @afunc;
+  save_matfile('testfunc', func);
+end
@@ -0,0 +1,6 @@
+function save_matfile(test_name, v)
+% saves variable passed in m with filename from prefix
+  
+global FILEPREFIX FILESUFFIX
+eval([test_name ' = v;']);
+save([FILEPREFIX test_name FILESUFFIX], test_name)
@@ -0,0 +1,31 @@
+''' Tests for byteorder module '''
+
+from __future__ import division, print_function, absolute_import
+
+import sys
+
+from numpy.testing import assert_
+from pytest import raises as assert_raises
+
+import scipy.io.matlab.byteordercodes as sibc
+
+
+def test_native():
+    native_is_le = sys.byteorder == 'little'
+    assert_(sibc.sys_is_le == native_is_le)
+
+
+def test_to_numpy():
+    if sys.byteorder == 'little':
+        assert_(sibc.to_numpy_code('native') == '<')
+        assert_(sibc.to_numpy_code('swapped') == '>')
+    else:
+        assert_(sibc.to_numpy_code('native') == '>')
+        assert_(sibc.to_numpy_code('swapped') == '<')
+    assert_(sibc.to_numpy_code('native') == sibc.to_numpy_code('='))
+    assert_(sibc.to_numpy_code('big') == '>')
+    for code in ('little', '<', 'l', 'L', 'le'):
+        assert_(sibc.to_numpy_code(code) == '<')
+    for code in ('big', '>', 'b', 'B', 'be'):
+        assert_(sibc.to_numpy_code(code) == '>')
+    assert_raises(ValueError, sibc.to_numpy_code, 'silly string')
@@ -0,0 +1,185 @@
+""" Testing mio5_utils Cython module
+
+"""
+from __future__ import division, print_function, absolute_import
+
+import sys
+
+from io import BytesIO
+cStringIO = BytesIO
+
+import numpy as np
+
+from numpy.testing import assert_array_equal, assert_equal, assert_
+from pytest import raises as assert_raises
+
+from scipy._lib.six import u
+
+import scipy.io.matlab.byteordercodes as boc
+import scipy.io.matlab.streams as streams
+import scipy.io.matlab.mio5_params as mio5p
+import scipy.io.matlab.mio5_utils as m5u
+
+
+def test_byteswap():
+    for val in (
+        1,
+        0x100,
+        0x10000):
+        a = np.array(val, dtype=np.uint32)
+        b = a.byteswap()
+        c = m5u.byteswap_u4(a)
+        assert_equal(b.item(), c)
+        d = m5u.byteswap_u4(c)
+        assert_equal(a.item(), d)
+
+
+def _make_tag(base_dt, val, mdtype, sde=False):
+    ''' Makes a simple matlab tag, full or sde '''
+    base_dt = np.dtype(base_dt)
+    bo = boc.to_numpy_code(base_dt.byteorder)
+    byte_count = base_dt.itemsize
+    if not sde:
+        udt = bo + 'u4'
+        padding = 8 - (byte_count % 8)
+        all_dt = [('mdtype', udt),
+                  ('byte_count', udt),
+                  ('val', base_dt)]
+        if padding:
+            all_dt.append(('padding', 'u1', padding))
+    else:  # is sde
+        udt = bo + 'u2'
+        padding = 4-byte_count
+        if bo == '<':  # little endian
+            all_dt = [('mdtype', udt),
+                      ('byte_count', udt),
+                      ('val', base_dt)]
+        else:  # big endian
+            all_dt = [('byte_count', udt),
+                      ('mdtype', udt),
+                      ('val', base_dt)]
+        if padding:
+            all_dt.append(('padding', 'u1', padding))
+    tag = np.zeros((1,), dtype=all_dt)
+    tag['mdtype'] = mdtype
+    tag['byte_count'] = byte_count
+    tag['val'] = val
+    return tag
+
+
+def _write_stream(stream, *strings):
+    stream.truncate(0)
+    stream.seek(0)
+    for s in strings:
+        stream.write(s)
+    stream.seek(0)
+
+
+def _make_readerlike(stream, byte_order=boc.native_code):
+    class R(object):
+        pass
+    r = R()
+    r.mat_stream = stream
+    r.byte_order = byte_order
+    r.struct_as_record = True
+    r.uint16_codec = sys.getdefaultencoding()
+    r.chars_as_strings = False
+    r.mat_dtype = False
+    r.squeeze_me = False
+    return r
+
+
+def test_read_tag():
+    # mainly to test errors
+    # make reader-like thing
+    str_io = BytesIO()
+    r = _make_readerlike(str_io)
+    c_reader = m5u.VarReader5(r)
+    # This works for StringIO but _not_ cStringIO
+    assert_raises(IOError, c_reader.read_tag)
+    # bad SDE
+    tag = _make_tag('i4', 1, mio5p.miINT32, sde=True)
+    tag['byte_count'] = 5
+    _write_stream(str_io, tag.tostring())
+    assert_raises(ValueError, c_reader.read_tag)
+
+
+def test_read_stream():
+    tag = _make_tag('i4', 1, mio5p.miINT32, sde=True)
+    tag_str = tag.tostring()
+    str_io = cStringIO(tag_str)
+    st = streams.make_stream(str_io)
+    s = streams._read_into(st, tag.itemsize)
+    assert_equal(s, tag.tostring())
+
+
+def test_read_numeric():
+    # make reader-like thing
+    str_io = cStringIO()
+    r = _make_readerlike(str_io)
+    # check simplest of tags
+    for base_dt, val, mdtype in (('u2', 30, mio5p.miUINT16),
+                                 ('i4', 1, mio5p.miINT32),
+                                 ('i2', -1, mio5p.miINT16)):
+        for byte_code in ('<', '>'):
+            r.byte_order = byte_code
+            c_reader = m5u.VarReader5(r)
+            assert_equal(c_reader.little_endian, byte_code == '<')
+            assert_equal(c_reader.is_swapped, byte_code != boc.native_code)
+            for sde_f in (False, True):
+                dt = np.dtype(base_dt).newbyteorder(byte_code)
+                a = _make_tag(dt, val, mdtype, sde_f)
+                a_str = a.tostring()
+                _write_stream(str_io, a_str)
+                el = c_reader.read_numeric()
+                assert_equal(el, val)
+                # two sequential reads
+                _write_stream(str_io, a_str, a_str)
+                el = c_reader.read_numeric()
+                assert_equal(el, val)
+                el = c_reader.read_numeric()
+                assert_equal(el, val)
+
+
+def test_read_numeric_writeable():
+    # make reader-like thing
+    str_io = cStringIO()
+    r = _make_readerlike(str_io, '<')
+    c_reader = m5u.VarReader5(r)
+    dt = np.dtype('<u2')
+    a = _make_tag(dt, 30, mio5p.miUINT16, 0)
+    a_str = a.tostring()
+    _write_stream(str_io, a_str)
+    el = c_reader.read_numeric()
+    assert_(el.flags.writeable is True)
+
+
+def test_zero_byte_string():
+    # Tests hack to allow chars of non-zero length, but 0 bytes
+    # make reader-like thing
+    str_io = cStringIO()
+    r = _make_readerlike(str_io, boc.native_code)
+    c_reader = m5u.VarReader5(r)
+    tag_dt = np.dtype([('mdtype', 'u4'), ('byte_count', 'u4')])
+    tag = np.zeros((1,), dtype=tag_dt)
+    tag['mdtype'] = mio5p.miINT8
+    tag['byte_count'] = 1
+    hdr = m5u.VarHeader5()
+    # Try when string is 1 length
+    hdr.set_dims([1,])
+    _write_stream(str_io, tag.tostring() + b'        ')
+    str_io.seek(0)
+    val = c_reader.read_char(hdr)
+    assert_equal(val, u(' '))
+    # Now when string has 0 bytes 1 length
+    tag['byte_count'] = 0
+    _write_stream(str_io, tag.tostring())
+    str_io.seek(0)
+    val = c_reader.read_char(hdr)
+    assert_equal(val, u(' '))
+    # Now when string has 0 bytes 4 length
+    str_io.seek(0)
+    hdr.set_dims([4,])
+    val = c_reader.read_char(hdr)
+    assert_array_equal(val, [u(' ')] * 4)
+
@@ -0,0 +1,57 @@
+''' Jottings to work out format for __function_workspace__ matrix at end
+of mat file.
+
+'''
+from __future__ import division, print_function, absolute_import
+
+import os.path
+import sys
+import io
+
+from numpy.compat import asstr
+
+from scipy.io.matlab.mio5 import (MatlabObject, MatFile5Writer,
+                                  MatFile5Reader, MatlabFunction)
+
+test_data_path = os.path.join(os.path.dirname(__file__), 'data')
+
+
+def read_minimat_vars(rdr):
+    rdr.initialize_read()
+    mdict = {'__globals__': []}
+    i = 0
+    while not rdr.end_of_stream():
+        hdr, next_position = rdr.read_var_header()
+        name = asstr(hdr.name)
+        if name == '':
+            name = 'var_%d' % i
+            i += 1
+        res = rdr.read_var_array(hdr, process=False)
+        rdr.mat_stream.seek(next_position)
+        mdict[name] = res
+        if hdr.is_global:
+            mdict['__globals__'].append(name)
+    return mdict
+
+
+def read_workspace_vars(fname):
+    fp = open(fname, 'rb')
+    rdr = MatFile5Reader(fp, struct_as_record=True)
+    vars = rdr.get_variables()
+    fws = vars['__function_workspace__']
+    ws_bs = io.BytesIO(fws.tostring())
+    ws_bs.seek(2)
+    rdr.mat_stream = ws_bs
+    # Guess byte order.
+    mi = rdr.mat_stream.read(2)
+    rdr.byte_order = mi == b'IM' and '<' or '>'
+    rdr.mat_stream.read(4)  # presumably byte padding
+    mdict = read_minimat_vars(rdr)
+    fp.close()
+    return mdict
+
+
+def test_jottings():
+    # example
+    fname = os.path.join(test_data_path, 'parabola.mat')
+    ws_vars = read_workspace_vars(fname)
@@ -0,0 +1,46 @@
+""" Testing
+
+"""
+
+from __future__ import division, print_function, absolute_import
+
+import numpy as np
+
+from numpy.testing import assert_array_equal, assert_array_almost_equal, \
+     assert_
+
+from scipy.io.matlab.mio_utils import squeeze_element, chars_to_strings
+
+
+def test_squeeze_element():
+    a = np.zeros((1,3))
+    assert_array_equal(np.squeeze(a), squeeze_element(a))
+    # 0d output from squeeze gives scalar
+    sq_int = squeeze_element(np.zeros((1,1), dtype=float))
+    assert_(isinstance(sq_int, float))
+    # Unless it's a structured array
+    sq_sa = squeeze_element(np.zeros((1,1),dtype=[('f1', 'f')]))
+    assert_(isinstance(sq_sa, np.ndarray))
+
+
+def test_chars_strings():
+    # chars as strings
+    strings = ['learn ', 'python', 'fast  ', 'here  ']
+    str_arr = np.array(strings, dtype='U6')  # shape (4,)
+    chars = [list(s) for s in strings]
+    char_arr = np.array(chars, dtype='U1')  # shape (4,6)
+    assert_array_equal(chars_to_strings(char_arr), str_arr)
+    ca2d = char_arr.reshape((2,2,6))
+    sa2d = str_arr.reshape((2,2))
+    assert_array_equal(chars_to_strings(ca2d), sa2d)
+    ca3d = char_arr.reshape((1,2,2,6))
+    sa3d = str_arr.reshape((1,2,2))
+    assert_array_equal(chars_to_strings(ca3d), sa3d)
+    # Fortran ordered arrays
+    char_arrf = np.array(chars, dtype='U1', order='F')  # shape (4,6)
+    assert_array_equal(chars_to_strings(char_arrf), str_arr)
+    # empty array
+    arr = np.array([['']], dtype='U1')
+    out_arr = np.array([''], dtype='U1')
+    assert_array_equal(chars_to_strings(arr), out_arr)
+
@@ -0,0 +1,31 @@
+""" Testing miobase module
+"""
+
+import numpy as np
+
+from numpy.testing import assert_equal
+from pytest import raises as assert_raises
+
+from scipy.io.matlab.miobase import matdims
+
+
+def test_matdims():
+    # Test matdims dimension finder
+    assert_equal(matdims(np.array(1)), (1, 1))  # numpy scalar
+    assert_equal(matdims(np.array([1])), (1, 1))  # 1d array, 1 element
+    assert_equal(matdims(np.array([1,2])), (2, 1))  # 1d array, 2 elements
+    assert_equal(matdims(np.array([[2],[3]])), (2, 1))  # 2d array, column vector
+    assert_equal(matdims(np.array([[2,3]])), (1, 2))  # 2d array, row vector
+    # 3d array, rowish vector
+    assert_equal(matdims(np.array([[[2,3]]])), (1, 1, 2))
+    assert_equal(matdims(np.array([])), (0, 0))  # empty 1d array
+    assert_equal(matdims(np.array([[]])), (0, 0))  # empty 2d
+    assert_equal(matdims(np.array([[[]]])), (0, 0, 0))  # empty 3d
+    # Optional argument flips 1-D shape behavior.
+    assert_equal(matdims(np.array([1,2]), 'row'), (1, 2))  # 1d array, 2 elements
+    # The argument has to make sense though
+    assert_raises(ValueError, matdims, np.array([1,2]), 'bizarre')
+    # Check empty sparse matrices get their own shape
+    from scipy.sparse import csr_matrix, csc_matrix
+    assert_equal(matdims(csr_matrix(np.zeros((3, 3)))), (3, 3))
+    assert_equal(matdims(csc_matrix(np.zeros((2, 2)))), (2, 2))
@@ -0,0 +1,35 @@
+""" Test reading of files not conforming to matlab specification
+
+We try and read any file that matlab reads, these files included
+"""
+from __future__ import division, print_function, absolute_import
+
+from os.path import dirname, join as pjoin
+
+from numpy.testing import assert_
+from pytest import raises as assert_raises
+
+from scipy.io.matlab.mio import loadmat
+
+TEST_DATA_PATH = pjoin(dirname(__file__), 'data')
+
+
+def test_multiple_fieldnames():
+    # Example provided by Dharhas Pothina
+    # Extracted using mio5.varmats_from_mat
+    multi_fname = pjoin(TEST_DATA_PATH, 'nasty_duplicate_fieldnames.mat')
+    vars = loadmat(multi_fname)
+    funny_names = vars['Summary'].dtype.names
+    assert_(set(['_1_Station_Q', '_2_Station_Q',
+                     '_3_Station_Q']).issubset(funny_names))
+
+
+def test_malformed1():
+    # Example from gh-6072
+    # Contains malformed header data, which previously resulted into a
+    # buffer overflow.
+    #
+    # Should raise an exception, not segfault
+    fname = pjoin(TEST_DATA_PATH, 'malformed1.mat')
+    with open(fname, 'rb') as f:
+        assert_raises(ValueError, loadmat, f)
@@ -0,0 +1,184 @@
+""" Testing
+
+"""
+
+from __future__ import division, print_function, absolute_import
+
+import os
+import sys
+import zlib
+
+from io import BytesIO
+
+if sys.version_info[0] >= 3:
+    cStringIO = BytesIO
+else:
+    from cStringIO import StringIO as cStringIO
+
+from tempfile import mkstemp
+from contextlib import contextmanager
+
+import numpy as np
+
+from numpy.testing import assert_, assert_equal
+from pytest import raises as assert_raises
+
+from scipy.io.matlab.streams import (make_stream,
+    GenericStream, cStringStream, FileStream, ZlibInputStream,
+    _read_into, _read_string)
+
+IS_PYPY = ('__pypy__' in sys.modules)
+
+
+@contextmanager
+def setup_test_file():
+    val = b'a\x00string'
+    fd, fname = mkstemp()
+
+    with os.fdopen(fd, 'wb') as fs:
+        fs.write(val)
+    with open(fname, 'rb') as fs:
+        gs = BytesIO(val)
+        cs = cStringIO(val)
+        yield fs, gs, cs
+    os.unlink(fname)
+
+
+def test_make_stream():
+    with setup_test_file() as (fs, gs, cs):
+        # test stream initialization
+        assert_(isinstance(make_stream(gs), GenericStream))
+        if sys.version_info[0] < 3 and not IS_PYPY:
+            assert_(isinstance(make_stream(cs), cStringStream))
+            assert_(isinstance(make_stream(fs), FileStream))
+
+
+def test_tell_seek():
+    with setup_test_file() as (fs, gs, cs):
+        for s in (fs, gs, cs):
+            st = make_stream(s)
+            res = st.seek(0)
+            assert_equal(res, 0)
+            assert_equal(st.tell(), 0)
+            res = st.seek(5)
+            assert_equal(res, 0)
+            assert_equal(st.tell(), 5)
+            res = st.seek(2, 1)
+            assert_equal(res, 0)
+            assert_equal(st.tell(), 7)
+            res = st.seek(-2, 2)
+            assert_equal(res, 0)
+            assert_equal(st.tell(), 6)
+
+
+def test_read():
+    with setup_test_file() as (fs, gs, cs):
+        for s in (fs, gs, cs):
+            st = make_stream(s)
+            st.seek(0)
+            res = st.read(-1)
+            assert_equal(res, b'a\x00string')
+            st.seek(0)
+            res = st.read(4)
+            assert_equal(res, b'a\x00st')
+            # read into
+            st.seek(0)
+            res = _read_into(st, 4)
+            assert_equal(res, b'a\x00st')
+            res = _read_into(st, 4)
+            assert_equal(res, b'ring')
+            assert_raises(IOError, _read_into, st, 2)
+            # read alloc
+            st.seek(0)
+            res = _read_string(st, 4)
+            assert_equal(res, b'a\x00st')
+            res = _read_string(st, 4)
+            assert_equal(res, b'ring')
+            assert_raises(IOError, _read_string, st, 2)
+
+
+class TestZlibInputStream(object):
+    def _get_data(self, size):
+        data = np.random.randint(0, 256, size).astype(np.uint8).tostring()
+        compressed_data = zlib.compress(data)
+        stream = BytesIO(compressed_data)
+        return stream, len(compressed_data), data
+
+    def test_read(self):
+        block_size = 131072
+
+        SIZES = [0, 1, 10, block_size//2, block_size-1,
+                 block_size, block_size+1, 2*block_size-1]
+
+        READ_SIZES = [block_size//2, block_size-1,
+                      block_size, block_size+1]
+
+        def check(size, read_size):
+            compressed_stream, compressed_data_len, data = self._get_data(size)
+            stream = ZlibInputStream(compressed_stream, compressed_data_len)
+            data2 = b''
+            so_far = 0
+            while True:
+                block = stream.read(min(read_size,
+                                        size - so_far))
+                if not block:
+                    break
+                so_far += len(block)
+                data2 += block
+            assert_equal(data, data2)
+
+        for size in SIZES:
+            for read_size in READ_SIZES:
+                check(size, read_size)
+
+    def test_read_max_length(self):
+        size = 1234
+        data = np.random.randint(0, 256, size).astype(np.uint8).tostring()
+        compressed_data = zlib.compress(data)
+        compressed_stream = BytesIO(compressed_data + b"abbacaca")
+        stream = ZlibInputStream(compressed_stream, len(compressed_data))
+
+        stream.read(len(data))
+        assert_equal(compressed_stream.tell(), len(compressed_data))
+
+        assert_raises(IOError, stream.read, 1)
+
+    def test_seek(self):
+        compressed_stream, compressed_data_len, data = self._get_data(1024)
+
+        stream = ZlibInputStream(compressed_stream, compressed_data_len)
+
+        stream.seek(123)
+        p = 123
+        assert_equal(stream.tell(), p)
+        d1 = stream.read(11)
+        assert_equal(d1, data[p:p+11])
+
+        stream.seek(321, 1)
+        p = 123+11+321
+        assert_equal(stream.tell(), p)
+        d2 = stream.read(21)
+        assert_equal(d2, data[p:p+21])
+
+        stream.seek(641, 0)
+        p = 641
+        assert_equal(stream.tell(), p)
+        d3 = stream.read(11)
+        assert_equal(d3, data[p:p+11])
+
+        assert_raises(IOError, stream.seek, 10, 2)
+        assert_raises(IOError, stream.seek, -1, 1)
+        assert_raises(ValueError, stream.seek, 1, 123)
+
+        stream.seek(10000, 1)
+        assert_raises(IOError, stream.read, 12)
+
+    def test_all_data_read(self):
+        compressed_stream, compressed_data_len, data = self._get_data(1024)
+        stream = ZlibInputStream(compressed_stream, compressed_data_len)
+        assert_(not stream.all_data_read())
+        stream.seek(512)
+        assert_(not stream.all_data_read())
+        stream.seek(1024)
+        assert_(stream.all_data_read())
+
@@ -0,0 +1,835 @@
+"""
+  Matrix Market I/O in Python.
+  See http://math.nist.gov/MatrixMarket/formats.html
+  for information about the Matrix Market format.
+"""
+#
+# Author: Pearu Peterson <pearu@cens.ioc.ee>
+# Created: October, 2004
+#
+# References:
+#  http://math.nist.gov/MatrixMarket/
+#
+from __future__ import division, print_function, absolute_import
+
+import os
+import sys
+
+from numpy import (asarray, real, imag, conj, zeros, ndarray, concatenate,
+                   ones, can_cast)
+from numpy.compat import asbytes, asstr
+
+from scipy._lib.six import string_types
+from scipy.sparse import coo_matrix, isspmatrix
+
+__all__ = ['mminfo', 'mmread', 'mmwrite', 'MMFile']
+
+
+# -----------------------------------------------------------------------------
+def mminfo(source):
+    """
+    Return size and storage parameters from Matrix Market file-like 'source'.
+
+    Parameters
+    ----------
+    source : str or file-like
+        Matrix Market filename (extension .mtx) or open file-like object
+
+    Returns
+    -------
+    rows : int
+        Number of matrix rows.
+    cols : int
+        Number of matrix columns.
+    entries : int
+        Number of non-zero entries of a sparse matrix
+        or rows*cols for a dense matrix.
+    format : str
+        Either 'coordinate' or 'array'.
+    field : str
+        Either 'real', 'complex', 'pattern', or 'integer'.
+    symmetry : str
+        Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
+    """
+    return MMFile.info(source)
+
+# -----------------------------------------------------------------------------
+
+
+def mmread(source):
+    """
+    Reads the contents of a Matrix Market file-like 'source' into a matrix.
+
+    Parameters
+    ----------
+    source : str or file-like
+        Matrix Market filename (extensions .mtx, .mtz.gz)
+        or open file-like object.
+
+    Returns
+    -------
+    a : ndarray or coo_matrix
+        Dense or sparse matrix depending on the matrix format in the
+        Matrix Market file.
+    """
+    return MMFile().read(source)
+
+# -----------------------------------------------------------------------------
+
+
+def mmwrite(target, a, comment='', field=None, precision=None, symmetry=None):
+    """
+    Writes the sparse or dense array `a` to Matrix Market file-like `target`.
+
+    Parameters
+    ----------
+    target : str or file-like
+        Matrix Market filename (extension .mtx) or open file-like object.
+    a : array like
+        Sparse or dense 2D array.
+    comment : str, optional
+        Comments to be prepended to the Matrix Market file.
+    field : None or str, optional
+        Either 'real', 'complex', 'pattern', or 'integer'.
+    precision : None or int, optional
+        Number of digits to display for real or complex values.
+    symmetry : None or str, optional
+        Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
+        If symmetry is None the symmetry type of 'a' is determined by its
+        values.
+    """
+    MMFile().write(target, a, comment, field, precision, symmetry)
+
+
+###############################################################################
+class MMFile (object):
+    __slots__ = ('_rows',
+                 '_cols',
+                 '_entries',
+                 '_format',
+                 '_field',
+                 '_symmetry')
+
+    @property
+    def rows(self):
+        return self._rows
+
+    @property
+    def cols(self):
+        return self._cols
+
+    @property
+    def entries(self):
+        return self._entries
+
+    @property
+    def format(self):
+        return self._format
+
+    @property
+    def field(self):
+        return self._field
+
+    @property
+    def symmetry(self):
+        return self._symmetry
+
+    @property
+    def has_symmetry(self):
+        return self._symmetry in (self.SYMMETRY_SYMMETRIC,
+                                  self.SYMMETRY_SKEW_SYMMETRIC,
+                                  self.SYMMETRY_HERMITIAN)
+
+    # format values
+    FORMAT_COORDINATE = 'coordinate'
+    FORMAT_ARRAY = 'array'
+    FORMAT_VALUES = (FORMAT_COORDINATE, FORMAT_ARRAY)
+
+    @classmethod
+    def _validate_format(self, format):
+        if format not in self.FORMAT_VALUES:
+            raise ValueError('unknown format type %s, must be one of %s' %
+                             (format, self.FORMAT_VALUES))
+
+    # field values
+    FIELD_INTEGER = 'integer'
+    FIELD_UNSIGNED = 'unsigned-integer'
+    FIELD_REAL = 'real'
+    FIELD_COMPLEX = 'complex'
+    FIELD_PATTERN = 'pattern'
+    FIELD_VALUES = (FIELD_INTEGER, FIELD_UNSIGNED, FIELD_REAL, FIELD_COMPLEX, FIELD_PATTERN)
+
+    @classmethod
+    def _validate_field(self, field):
+        if field not in self.FIELD_VALUES:
+            raise ValueError('unknown field type %s, must be one of %s' %
+                             (field, self.FIELD_VALUES))
+
+    # symmetry values
+    SYMMETRY_GENERAL = 'general'
+    SYMMETRY_SYMMETRIC = 'symmetric'
+    SYMMETRY_SKEW_SYMMETRIC = 'skew-symmetric'
+    SYMMETRY_HERMITIAN = 'hermitian'
+    SYMMETRY_VALUES = (SYMMETRY_GENERAL, SYMMETRY_SYMMETRIC,
+                       SYMMETRY_SKEW_SYMMETRIC, SYMMETRY_HERMITIAN)
+
+    @classmethod
+    def _validate_symmetry(self, symmetry):
+        if symmetry not in self.SYMMETRY_VALUES:
+            raise ValueError('unknown symmetry type %s, must be one of %s' %
+                             (symmetry, self.SYMMETRY_VALUES))
+
+    DTYPES_BY_FIELD = {FIELD_INTEGER: 'intp',
+                       FIELD_UNSIGNED: 'uint64',
+                       FIELD_REAL: 'd',
+                       FIELD_COMPLEX: 'D',
+                       FIELD_PATTERN: 'd'}
+
+    # -------------------------------------------------------------------------
+    @staticmethod
+    def reader():
+        pass
+
+    # -------------------------------------------------------------------------
+    @staticmethod
+    def writer():
+        pass
+
+    # -------------------------------------------------------------------------
+    @classmethod
+    def info(self, source):
+        """
+        Return size, storage parameters from Matrix Market file-like 'source'.
+
+        Parameters
+        ----------
+        source : str or file-like
+            Matrix Market filename (extension .mtx) or open file-like object
+
+        Returns
+        -------
+        rows : int
+            Number of matrix rows.
+        cols : int
+            Number of matrix columns.
+        entries : int
+            Number of non-zero entries of a sparse matrix
+            or rows*cols for a dense matrix.
+        format : str
+            Either 'coordinate' or 'array'.
+        field : str
+            Either 'real', 'complex', 'pattern', or 'integer'.
+        symmetry : str
+            Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
+        """
+
+        stream, close_it = self._open(source)
+
+        try:
+
+            # read and validate header line
+            line = stream.readline()
+            mmid, matrix, format, field, symmetry = \
+                [asstr(part.strip()) for part in line.split()]
+            if not mmid.startswith('%%MatrixMarket'):
+                raise ValueError('source is not in Matrix Market format')
+            if not matrix.lower() == 'matrix':
+                raise ValueError("Problem reading file header: " + line)
+
+            # http://math.nist.gov/MatrixMarket/formats.html
+            if format.lower() == 'array':
+                format = self.FORMAT_ARRAY
+            elif format.lower() == 'coordinate':
+                format = self.FORMAT_COORDINATE
+
+            # skip comments
+            while line.startswith(b'%'):
+                line = stream.readline()
+
+            line = line.split()
+            if format == self.FORMAT_ARRAY:
+                if not len(line) == 2:
+                    raise ValueError("Header line not of length 2: " + line)
+                rows, cols = map(int, line)
+                entries = rows * cols
+            else:
+                if not len(line) == 3:
+                    raise ValueError("Header line not of length 3: " + line)
+                rows, cols, entries = map(int, line)
+
+            return (rows, cols, entries, format, field.lower(),
+                    symmetry.lower())
+
+        finally:
+            if close_it:
+                stream.close()
+
+    # -------------------------------------------------------------------------
+    @staticmethod
+    def _open(filespec, mode='rb'):
+        """ Return an open file stream for reading based on source.
+
+        If source is a file name, open it (after trying to find it with mtx and
+        gzipped mtx extensions).  Otherwise, just return source.
+
+        Parameters
+        ----------
+        filespec : str or file-like
+            String giving file name or file-like object
+        mode : str, optional
+            Mode with which to open file, if `filespec` is a file name.
+
+        Returns
+        -------
+        fobj : file-like
+            Open file-like object.
+        close_it : bool
+            True if the calling function should close this file when done,
+            false otherwise.
+        """
+        close_it = False
+        if isinstance(filespec, string_types):
+            close_it = True
+
+            # open for reading
+            if mode[0] == 'r':
+
+                # determine filename plus extension
+                if not os.path.isfile(filespec):
+                    if os.path.isfile(filespec+'.mtx'):
+                        filespec = filespec + '.mtx'
+                    elif os.path.isfile(filespec+'.mtx.gz'):
+                        filespec = filespec + '.mtx.gz'
+                    elif os.path.isfile(filespec+'.mtx.bz2'):
+                        filespec = filespec + '.mtx.bz2'
+                # open filename
+                if filespec.endswith('.gz'):
+                    import gzip
+                    stream = gzip.open(filespec, mode)
+                elif filespec.endswith('.bz2'):
+                    import bz2
+                    stream = bz2.BZ2File(filespec, 'rb')
+                else:
+                    stream = open(filespec, mode)
+
+            # open for writing
+            else:
+                if filespec[-4:] != '.mtx':
+                    filespec = filespec + '.mtx'
+                stream = open(filespec, mode)
+        else:
+            stream = filespec
+
+        return stream, close_it
+
+    # -------------------------------------------------------------------------
+    @staticmethod
+    def _get_symmetry(a):
+        m, n = a.shape
+        if m != n:
+            return MMFile.SYMMETRY_GENERAL
+        issymm = True
+        isskew = True
+        isherm = a.dtype.char in 'FD'
+
+        # sparse input
+        if isspmatrix(a):
+            # check if number of nonzero entries of lower and upper triangle
+            # matrix are equal
+            a = a.tocoo()
+            (row, col) = a.nonzero()
+            if (row < col).sum() != (row > col).sum():
+                return MMFile.SYMMETRY_GENERAL
+
+            # define iterator over symmetric pair entries
+            a = a.todok()
+
+            def symm_iterator():
+                for ((i, j), aij) in a.items():
+                    if i > j:
+                        aji = a[j, i]
+                        yield (aij, aji)
+
+        # non-sparse input
+        else:
+            # define iterator over symmetric pair entries
+            def symm_iterator():
+                for j in range(n):
+                    for i in range(j+1, n):
+                        aij, aji = a[i][j], a[j][i]
+                        yield (aij, aji)
+
+        # check for symmetry
+        for (aij, aji) in symm_iterator():
+            if issymm and aij != aji:
+                issymm = False
+            if isskew and aij != -aji:
+                isskew = False
+            if isherm and aij != conj(aji):
+                isherm = False
+            if not (issymm or isskew or isherm):
+                break
+
+        # return symmetry value
+        if issymm:
+            return MMFile.SYMMETRY_SYMMETRIC
+        if isskew:
+            return MMFile.SYMMETRY_SKEW_SYMMETRIC
+        if isherm:
+            return MMFile.SYMMETRY_HERMITIAN
+        return MMFile.SYMMETRY_GENERAL
+
+    # -------------------------------------------------------------------------
+    @staticmethod
+    def _field_template(field, precision):
+        return {MMFile.FIELD_REAL: '%%.%ie\n' % precision,
+                MMFile.FIELD_INTEGER: '%i\n',
+                MMFile.FIELD_UNSIGNED: '%u\n',
+                MMFile.FIELD_COMPLEX: '%%.%ie %%.%ie\n' %
+                    (precision, precision)
+                }.get(field, None)
+
+    # -------------------------------------------------------------------------
+    def __init__(self, **kwargs):
+        self._init_attrs(**kwargs)
+
+    # -------------------------------------------------------------------------
+    def read(self, source):
+        """
+        Reads the contents of a Matrix Market file-like 'source' into a matrix.
+
+        Parameters
+        ----------
+        source : str or file-like
+            Matrix Market filename (extensions .mtx, .mtz.gz)
+            or open file object.
+
+        Returns
+        -------
+        a : ndarray or coo_matrix
+            Dense or sparse matrix depending on the matrix format in the
+            Matrix Market file.
+        """
+        stream, close_it = self._open(source)
+
+        try:
+            self._parse_header(stream)
+            return self._parse_body(stream)
+
+        finally:
+            if close_it:
+                stream.close()
+
+    # -------------------------------------------------------------------------
+    def write(self, target, a, comment='', field=None, precision=None,
+              symmetry=None):
+        """
+        Writes sparse or dense array `a` to Matrix Market file-like `target`.
+
+        Parameters
+        ----------
+        target : str or file-like
+            Matrix Market filename (extension .mtx) or open file-like object.
+        a : array like
+            Sparse or dense 2D array.
+        comment : str, optional
+            Comments to be prepended to the Matrix Market file.
+        field : None or str, optional
+            Either 'real', 'complex', 'pattern', or 'integer'.
+        precision : None or int, optional
+            Number of digits to display for real or complex values.
+        symmetry : None or str, optional
+            Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
+            If symmetry is None the symmetry type of 'a' is determined by its
+            values.
+        """
+
+        stream, close_it = self._open(target, 'wb')
+
+        try:
+            self._write(stream, a, comment, field, precision, symmetry)
+
+        finally:
+            if close_it:
+                stream.close()
+            else:
+                stream.flush()
+
+    # -------------------------------------------------------------------------
+    def _init_attrs(self, **kwargs):
+        """
+        Initialize each attributes with the corresponding keyword arg value
+        or a default of None
+        """
+
+        attrs = self.__class__.__slots__
+        public_attrs = [attr[1:] for attr in attrs]
+        invalid_keys = set(kwargs.keys()) - set(public_attrs)
+
+        if invalid_keys:
+            raise ValueError('''found %s invalid keyword arguments, please only
+                                use %s''' % (tuple(invalid_keys),
+                                             public_attrs))
+
+        for attr in attrs:
+            setattr(self, attr, kwargs.get(attr[1:], None))
+
+    # -------------------------------------------------------------------------
+    def _parse_header(self, stream):
+        rows, cols, entries, format, field, symmetry = \
+            self.__class__.info(stream)
+        self._init_attrs(rows=rows, cols=cols, entries=entries, format=format,
+                         field=field, symmetry=symmetry)
+
+    # -------------------------------------------------------------------------
+    def _parse_body(self, stream):
+        rows, cols, entries, format, field, symm = (self.rows, self.cols,
+                                                    self.entries, self.format,
+                                                    self.field, self.symmetry)
+
+        try:
+            from scipy.sparse import coo_matrix
+        except ImportError:
+            coo_matrix = None
+
+        dtype = self.DTYPES_BY_FIELD.get(field, None)
+
+        has_symmetry = self.has_symmetry
+        is_integer = field == self.FIELD_INTEGER 
+        is_unsigned_integer = field == self.FIELD_UNSIGNED
+        is_complex = field == self.FIELD_COMPLEX
+        is_skew = symm == self.SYMMETRY_SKEW_SYMMETRIC
+        is_herm = symm == self.SYMMETRY_HERMITIAN
+        is_pattern = field == self.FIELD_PATTERN
+
+        if format == self.FORMAT_ARRAY:
+            a = zeros((rows, cols), dtype=dtype)
+            line = 1
+            i, j = 0, 0
+            if is_skew:
+                a[i, j] = 0
+                if i < rows - 1:
+                    i += 1
+            while line:
+                line = stream.readline()
+                if not line or line.startswith(b'%'):
+                    continue
+                if is_integer:
+                    aij = int(line)
+                elif is_unsigned_integer:
+                    aij = int(line)
+                elif is_complex:
+                    aij = complex(*map(float, line.split()))
+                else:
+                    aij = float(line)
+                a[i, j] = aij
+                if has_symmetry and i != j:
+                    if is_skew:
+                        a[j, i] = -aij
+                    elif is_herm:
+                        a[j, i] = conj(aij)
+                    else:
+                        a[j, i] = aij
+                if i < rows-1:
+                    i = i + 1
+                else:
+                    j = j + 1
+                    if not has_symmetry:
+                        i = 0
+                    else:
+                        i = j
+                        if is_skew:
+                            a[i, j] = 0
+                            if i < rows-1:
+                                i += 1     
+                                
+            if is_skew:
+                if not (i in [0, j] and j == cols - 1):
+                    raise ValueError("Parse error, did not read all lines.")
+            else:
+                if not (i in [0, j] and j == cols):
+                    raise ValueError("Parse error, did not read all lines.")
+
+        elif format == self.FORMAT_COORDINATE and coo_matrix is None:
+            # Read sparse matrix to dense when coo_matrix is not available.
+            a = zeros((rows, cols), dtype=dtype)
+            line = 1
+            k = 0
+            while line:
+                line = stream.readline()
+                if not line or line.startswith(b'%'):
+                    continue
+                l = line.split()
+                i, j = map(int, l[:2])
+                i, j = i-1, j-1
+                if is_integer:
+                    aij = int(l[2])
+                elif is_unsigned_integer:
+                    aij = int(l[2])
+                elif is_complex:
+                    aij = complex(*map(float, l[2:]))
+                else:
+                    aij = float(l[2])
+                a[i, j] = aij
+                if has_symmetry and i != j:
+                    if is_skew:
+                        a[j, i] = -aij
+                    elif is_herm:
+                        a[j, i] = conj(aij)
+                    else:
+                        a[j, i] = aij
+                k = k + 1
+            if not k == entries:
+                ValueError("Did not read all entries")
+
+        elif format == self.FORMAT_COORDINATE:
+            # Read sparse COOrdinate format
+
+            if entries == 0:
+                # empty matrix
+                return coo_matrix((rows, cols), dtype=dtype)
+
+            I = zeros(entries, dtype='intc')
+            J = zeros(entries, dtype='intc')
+            if is_pattern:
+                V = ones(entries, dtype='int8')
+            elif is_integer:
+                V = zeros(entries, dtype='intp')
+            elif is_unsigned_integer:
+                V = zeros(entries, dtype='uint64')
+            elif is_complex:
+                V = zeros(entries, dtype='complex')
+            else:
+                V = zeros(entries, dtype='float')
+
+            entry_number = 0
+            for line in stream:
+                if not line or line.startswith(b'%'):
+                    continue
+
+                if entry_number+1 > entries:
+                    raise ValueError("'entries' in header is smaller than "
+                                     "number of entries")
+                l = line.split()
+                I[entry_number], J[entry_number] = map(int, l[:2])
+
+                if not is_pattern:
+                    if is_integer:
+                        V[entry_number] = int(l[2])
+                    elif is_unsigned_integer:
+                        V[entry_number] = int(l[2])
+                    elif is_complex:
+                        V[entry_number] = complex(*map(float, l[2:]))
+                    else:
+                        V[entry_number] = float(l[2])
+                entry_number += 1
+            if entry_number < entries:
+                raise ValueError("'entries' in header is larger than "
+                                 "number of entries")
+
+            I -= 1  # adjust indices (base 1 -> base 0)
+            J -= 1
+
+            if has_symmetry:
+                mask = (I != J)       # off diagonal mask
+                od_I = I[mask]
+                od_J = J[mask]
+                od_V = V[mask]
+
+                I = concatenate((I, od_J))
+                J = concatenate((J, od_I))
+
+                if is_skew:
+                    od_V *= -1
+                elif is_herm:
+                    od_V = od_V.conjugate()
+
+                V = concatenate((V, od_V))
+
+            a = coo_matrix((V, (I, J)), shape=(rows, cols), dtype=dtype)
+        else:
+            raise NotImplementedError(format)
+
+        return a
+
+    #  ------------------------------------------------------------------------
+    def _write(self, stream, a, comment='', field=None, precision=None,
+               symmetry=None):
+        if isinstance(a, list) or isinstance(a, ndarray) or \
+           isinstance(a, tuple) or hasattr(a, '__array__'):
+            rep = self.FORMAT_ARRAY
+            a = asarray(a)
+            if len(a.shape) != 2:
+                raise ValueError('Expected 2 dimensional array')
+            rows, cols = a.shape
+
+            if field is not None:
+
+                if field == self.FIELD_INTEGER:
+                    if not can_cast(a.dtype, 'intp'):
+                        raise OverflowError("mmwrite does not support integer "
+                                            "dtypes larger than native 'intp'.")
+                    a = a.astype('intp')
+                elif field == self.FIELD_REAL:
+                    if a.dtype.char not in 'fd':
+                        a = a.astype('d')
+                elif field == self.FIELD_COMPLEX:
+                    if a.dtype.char not in 'FD':
+                        a = a.astype('D')
+
+        else:
+            if not isspmatrix(a):
+                raise ValueError('unknown matrix type: %s' % type(a))
+            
+            rep = 'coordinate'
+            rows, cols = a.shape
+
+        typecode = a.dtype.char
+
+        if precision is None:
+            if typecode in 'fF':
+                precision = 8
+            else:
+                precision = 16
+        if field is None:
+            kind = a.dtype.kind
+            if kind == 'i':
+                if not can_cast(a.dtype, 'intp'):
+                    raise OverflowError("mmwrite does not support integer "
+                                        "dtypes larger than native 'intp'.")
+                field = 'integer'
+            elif kind == 'f':
+                field = 'real'
+            elif kind == 'c':
+                field = 'complex'
+            elif kind == 'u':
+                field = 'unsigned-integer'
+            else:
+                raise TypeError('unexpected dtype kind ' + kind)
+
+        if symmetry is None:
+            symmetry = self._get_symmetry(a)
+
+        # validate rep, field, and symmetry
+        self.__class__._validate_format(rep)
+        self.__class__._validate_field(field)
+        self.__class__._validate_symmetry(symmetry)
+
+        # write initial header line
+        stream.write(asbytes('%%MatrixMarket matrix {0} {1} {2}\n'.format(rep,
+            field, symmetry)))
+
+        # write comments
+        for line in comment.split('\n'):
+            stream.write(asbytes('%%%s\n' % (line)))
+
+        template = self._field_template(field, precision)
+        # write dense format
+        if rep == self.FORMAT_ARRAY:
+            # write shape spec
+            stream.write(asbytes('%i %i\n' % (rows, cols)))
+
+            if field in (self.FIELD_INTEGER, self.FIELD_REAL, self.FIELD_UNSIGNED):
+                if symmetry == self.SYMMETRY_GENERAL:
+                    for j in range(cols):
+                        for i in range(rows):
+                            stream.write(asbytes(template % a[i, j]))
+                            
+                elif symmetry == self.SYMMETRY_SKEW_SYMMETRIC:
+                    for j in range(cols):
+                        for i in range(j + 1, rows):
+                            stream.write(asbytes(template % a[i, j]))
+                            
+                else:
+                    for j in range(cols):
+                        for i in range(j, rows):
+                            stream.write(asbytes(template % a[i, j]))
+
+            elif field == self.FIELD_COMPLEX:
+
+                if symmetry == self.SYMMETRY_GENERAL:
+                    for j in range(cols):
+                        for i in range(rows):
+                            aij = a[i, j]
+                            stream.write(asbytes(template % (real(aij),
+                                                             imag(aij))))
+                else:
+                    for j in range(cols):
+                        for i in range(j, rows):
+                            aij = a[i, j]
+                            stream.write(asbytes(template % (real(aij),
+                                                             imag(aij))))
+
+            elif field == self.FIELD_PATTERN:
+                raise ValueError('pattern type inconsisted with dense format')
+
+            else:
+                raise TypeError('Unknown field type %s' % field)
+
+        # write sparse format
+        else:
+            coo = a.tocoo()  # convert to COOrdinate format
+
+            # if symmetry format used, remove values above main diagonal
+            if symmetry != self.SYMMETRY_GENERAL:
+                lower_triangle_mask = coo.row >= coo.col
+                coo = coo_matrix((coo.data[lower_triangle_mask],
+                                 (coo.row[lower_triangle_mask],
+                                  coo.col[lower_triangle_mask])),
+                                 shape=coo.shape)
+
+            # write shape spec
+            stream.write(asbytes('%i %i %i\n' % (rows, cols, coo.nnz)))
+
+            template = self._field_template(field, precision-1)
+
+            if field == self.FIELD_PATTERN:
+                for r, c in zip(coo.row+1, coo.col+1):
+                    stream.write(asbytes("%i %i\n" % (r, c)))
+            elif field in (self.FIELD_INTEGER, self.FIELD_REAL, self.FIELD_UNSIGNED):
+                for r, c, d in zip(coo.row+1, coo.col+1, coo.data):
+                    stream.write(asbytes(("%i %i " % (r, c)) +
+                                         (template % d)))
+            elif field == self.FIELD_COMPLEX:
+                for r, c, d in zip(coo.row+1, coo.col+1, coo.data):
+                    stream.write(asbytes(("%i %i " % (r, c)) +
+                                         (template % (d.real, d.imag))))
+            else:
+                raise TypeError('Unknown field type %s' % field)
+
+
+def _is_fromfile_compatible(stream):
+    """
+    Check whether `stream` is compatible with numpy.fromfile.
+
+    Passing a gzipped file object to ``fromfile/fromstring`` doesn't work with
+    Python3.
+    """
+    if sys.version_info[0] < 3:
+        return True
+
+    bad_cls = []
+    try:
+        import gzip
+        bad_cls.append(gzip.GzipFile)
+    except ImportError:
+        pass
+    try:
+        import bz2
+        bad_cls.append(bz2.BZ2File)
+    except ImportError:
+        pass
+
+    bad_cls = tuple(bad_cls)
+    return not isinstance(stream, bad_cls)
+
+
+# -----------------------------------------------------------------------------
+if __name__ == '__main__':
+    import time
+    for filename in sys.argv[1:]:
+        print('Reading', filename, '...', end=' ')
+        sys.stdout.flush()
+        t = time.time()
+        mmread(filename)
+        print('took %s seconds' % (time.time() - t))
@@ -0,0 +1,20 @@
+from __future__ import division, print_function, absolute_import
+
+
+def configuration(parent_package='',top_path=None):
+    from numpy.distutils.misc_util import Configuration
+    config = Configuration('io', parent_package, top_path)
+
+    config.add_extension('_test_fortran',
+                         sources=['_test_fortran.pyf', '_test_fortran.f'])
+
+    config.add_data_dir('tests')
+    config.add_subpackage('matlab')
+    config.add_subpackage('arff')
+    config.add_subpackage('harwell_boeing')
+    return config
+
+
+if __name__ == '__main__':
+    from numpy.distutils.core import setup
+    setup(**configuration(top_path='').todict())
@@ -0,0 +1,159 @@
+''' Tests for fortran sequential files '''
+
+import tempfile
+import shutil
+from os import path, unlink
+from glob import iglob
+import re
+
+from numpy.testing import assert_equal, assert_allclose
+import numpy as np
+
+from scipy.io import FortranFile, _test_fortran
+
+
+DATA_PATH = path.join(path.dirname(__file__), 'data')
+
+
+def test_fortranfiles_read():
+    for filename in iglob(path.join(DATA_PATH, "fortran-*-*x*x*.dat")):
+        m = re.search(r'fortran-([^-]+)-(\d+)x(\d+)x(\d+).dat', filename, re.I)
+        if not m:
+            raise RuntimeError("Couldn't match %s filename to regex" % filename)
+
+        dims = (int(m.group(2)), int(m.group(3)), int(m.group(4)))
+
+        dtype = m.group(1).replace('s', '<')
+
+        f = FortranFile(filename, 'r', '<u4')
+        data = f.read_record(dtype=dtype).reshape(dims, order='F')
+        f.close()
+
+        expected = np.arange(np.prod(dims)).reshape(dims).astype(dtype)
+        assert_equal(data, expected)
+
+
+def test_fortranfiles_mixed_record():
+    filename = path.join(DATA_PATH, "fortran-mixed.dat")
+    with FortranFile(filename, 'r', '<u4') as f:
+        record = f.read_record('<i4,<f4,<i8,(2)<f8')
+
+    assert_equal(record['f0'][0], 1)
+    assert_allclose(record['f1'][0], 2.3)
+    assert_equal(record['f2'][0], 4)
+    assert_allclose(record['f3'][0], [5.6, 7.8])
+
+
+def test_fortranfiles_write():
+    for filename in iglob(path.join(DATA_PATH, "fortran-*-*x*x*.dat")):
+        m = re.search(r'fortran-([^-]+)-(\d+)x(\d+)x(\d+).dat', filename, re.I)
+        if not m:
+            raise RuntimeError("Couldn't match %s filename to regex" % filename)
+        dims = (int(m.group(2)), int(m.group(3)), int(m.group(4)))
+
+        dtype = m.group(1).replace('s', '<')
+        data = np.arange(np.prod(dims)).reshape(dims).astype(dtype)
+
+        tmpdir = tempfile.mkdtemp()
+        try:
+            testFile = path.join(tmpdir,path.basename(filename))
+            f = FortranFile(testFile, 'w','<u4')
+            f.write_record(data.T)
+            f.close()
+            originalfile = open(filename, 'rb')
+            newfile = open(testFile, 'rb')
+            assert_equal(originalfile.read(), newfile.read(),
+                         err_msg=filename)
+            originalfile.close()
+            newfile.close()
+        finally:
+            shutil.rmtree(tmpdir)
+
+
+def test_fortranfile_read_mixed_record():
+    # The data file fortran-3x3d-2i.dat contains the program that
+    # produced it at the end.
+    #
+    # double precision :: a(3,3)
+    # integer :: b(2)
+    # ...
+    # open(1, file='fortran-3x3d-2i.dat', form='unformatted')
+    # write(1) a, b
+    # close(1)
+    #
+
+    filename = path.join(DATA_PATH, "fortran-3x3d-2i.dat")
+    with FortranFile(filename, 'r', '<u4') as f:
+        record = f.read_record('(3,3)f8', '2i4')
+
+    ax = np.arange(3*3).reshape(3, 3).astype(np.double)
+    bx = np.array([-1, -2], dtype=np.int32)
+
+    assert_equal(record[0], ax.T)
+    assert_equal(record[1], bx.T)
+
+
+def test_fortranfile_write_mixed_record(tmpdir):
+    tf = path.join(str(tmpdir), 'test.dat')
+
+    records = [
+        (('f4', 'f4', 'i4'), (np.float32(2), np.float32(3), np.int32(100))),
+        (('4f4', '(3,3)f4', '8i4'), (np.random.randint(255, size=[4]).astype(np.float32),
+                                     np.random.randint(255, size=[3, 3]).astype(np.float32),
+                                     np.random.randint(255, size=[8]).astype(np.int32)))
+    ]
+
+    for dtype, a in records:
+        with FortranFile(tf, 'w') as f:
+            f.write_record(*a)
+
+        with FortranFile(tf, 'r') as f:
+            b = f.read_record(*dtype)
+
+        assert_equal(len(a), len(b))
+
+        for aa, bb in zip(a, b):
+            assert_equal(bb, aa)
+
+
+def test_fortran_roundtrip(tmpdir):
+    filename = path.join(str(tmpdir), 'test.dat')
+
+    np.random.seed(1)
+
+    # double precision
+    m, n, k = 5, 3, 2
+    a = np.random.randn(m, n, k)
+    with FortranFile(filename, 'w') as f:
+        f.write_record(a.T)
+    a2 = _test_fortran.read_unformatted_double(m, n, k, filename)
+    with FortranFile(filename, 'r') as f:
+        a3 = f.read_record('(2,3,5)f8').T
+    assert_equal(a2, a)
+    assert_equal(a3, a)
+
+    # integer
+    m, n, k = 5, 3, 2
+    a = np.random.randn(m, n, k).astype(np.int32)
+    with FortranFile(filename, 'w') as f:
+        f.write_record(a.T)
+    a2 = _test_fortran.read_unformatted_int(m, n, k, filename)
+    with FortranFile(filename, 'r') as f:
+        a3 = f.read_record('(2,3,5)i4').T
+    assert_equal(a2, a)
+    assert_equal(a3, a)
+
+    # mixed
+    m, n, k = 5, 3, 2
+    a = np.random.randn(m, n)
+    b = np.random.randn(k).astype(np.intc)
+    with FortranFile(filename, 'w') as f:
+        f.write_record(a.T, b.T)
+    a2, b2 = _test_fortran.read_unformatted_mixed(m, n, k, filename)
+    with FortranFile(filename, 'r') as f:
+        a3, b3 = f.read_record('(3,5)f8', '2i4')
+        a3 = a3.T
+    assert_equal(a2, a)
+    assert_equal(a3, a)
+    assert_equal(b2, b)
+    assert_equal(b3, b)
@@ -0,0 +1,442 @@
+from __future__ import division, print_function, absolute_import
+
+from os import path
+import warnings
+
+DATA_PATH = path.join(path.dirname(__file__), 'data')
+
+import numpy as np
+from numpy.testing import (assert_equal, assert_array_equal,
+    assert_)
+from scipy._lib._numpy_compat import suppress_warnings
+
+from scipy.io.idl import readsav
+
+
+def object_array(*args):
+    """Constructs a numpy array of objects"""
+    array = np.empty(len(args), dtype=object)
+    for i in range(len(args)):
+        array[i] = args[i]
+    return array
+
+
+def assert_identical(a, b):
+    """Assert whether value AND type are the same"""
+    assert_equal(a, b)
+    if type(b) is str:
+        assert_equal(type(a), type(b))
+    else:
+        assert_equal(np.asarray(a).dtype.type, np.asarray(b).dtype.type)
+
+
+def assert_array_identical(a, b):
+    """Assert whether values AND type are the same"""
+    assert_array_equal(a, b)
+    assert_equal(a.dtype.type, b.dtype.type)
+
+
+# Define vectorized ID function for pointer arrays
+vect_id = np.vectorize(id)
+
+
+class TestIdict:
+
+    def test_idict(self):
+        custom_dict = {'a': np.int16(999)}
+        original_id = id(custom_dict)
+        s = readsav(path.join(DATA_PATH, 'scalar_byte.sav'), idict=custom_dict, verbose=False)
+        assert_equal(original_id, id(s))
+        assert_('a' in s)
+        assert_identical(s['a'], np.int16(999))
+        assert_identical(s['i8u'], np.uint8(234))
+
+
+class TestScalars:
+    # Test that scalar values are read in with the correct value and type
+
+    def test_byte(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_byte.sav'), verbose=False)
+        assert_identical(s.i8u, np.uint8(234))
+
+    def test_int16(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_int16.sav'), verbose=False)
+        assert_identical(s.i16s, np.int16(-23456))
+
+    def test_int32(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_int32.sav'), verbose=False)
+        assert_identical(s.i32s, np.int32(-1234567890))
+
+    def test_float32(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_float32.sav'), verbose=False)
+        assert_identical(s.f32, np.float32(-3.1234567e+37))
+
+    def test_float64(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_float64.sav'), verbose=False)
+        assert_identical(s.f64, np.float64(-1.1976931348623157e+307))
+
+    def test_complex32(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_complex32.sav'), verbose=False)
+        assert_identical(s.c32, np.complex64(3.124442e13-2.312442e31j))
+
+    def test_bytes(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_string.sav'), verbose=False)
+        assert_identical(s.s, np.bytes_("The quick brown fox jumps over the lazy python"))
+
+    def test_structure(self):
+        pass
+
+    def test_complex64(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_complex64.sav'), verbose=False)
+        assert_identical(s.c64, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
+
+    def test_heap_pointer(self):
+        pass
+
+    def test_object_reference(self):
+        pass
+
+    def test_uint16(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_uint16.sav'), verbose=False)
+        assert_identical(s.i16u, np.uint16(65511))
+
+    def test_uint32(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_uint32.sav'), verbose=False)
+        assert_identical(s.i32u, np.uint32(4294967233))
+
+    def test_int64(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_int64.sav'), verbose=False)
+        assert_identical(s.i64s, np.int64(-9223372036854774567))
+
+    def test_uint64(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_uint64.sav'), verbose=False)
+        assert_identical(s.i64u, np.uint64(18446744073709529285))
+
+
+class TestCompressed(TestScalars):
+    # Test that compressed .sav files can be read in
+
+    def test_compressed(self):
+        s = readsav(path.join(DATA_PATH, 'various_compressed.sav'), verbose=False)
+
+        assert_identical(s.i8u, np.uint8(234))
+        assert_identical(s.f32, np.float32(-3.1234567e+37))
+        assert_identical(s.c64, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
+        assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
+        assert_identical(s.arrays.a[0], np.array([1, 2, 3], dtype=np.int16))
+        assert_identical(s.arrays.b[0], np.array([4., 5., 6., 7.], dtype=np.float32))
+        assert_identical(s.arrays.c[0], np.array([np.complex64(1+2j), np.complex64(7+8j)]))
+        assert_identical(s.arrays.d[0], np.array([b"cheese", b"bacon", b"spam"], dtype=object))
+
+
+class TestArrayDimensions:
+    # Test that multi-dimensional arrays are read in with the correct dimensions
+
+    def test_1d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_1d.sav'), verbose=False)
+        assert_equal(s.array1d.shape, (123, ))
+
+    def test_2d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_2d.sav'), verbose=False)
+        assert_equal(s.array2d.shape, (22, 12))
+
+    def test_3d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_3d.sav'), verbose=False)
+        assert_equal(s.array3d.shape, (11, 22, 12))
+
+    def test_4d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_4d.sav'), verbose=False)
+        assert_equal(s.array4d.shape, (4, 5, 8, 7))
+
+    def test_5d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_5d.sav'), verbose=False)
+        assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
+
+    def test_6d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_6d.sav'), verbose=False)
+        assert_equal(s.array6d.shape, (3, 6, 4, 5, 3, 4))
+
+    def test_7d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_7d.sav'), verbose=False)
+        assert_equal(s.array7d.shape, (2, 1, 2, 3, 4, 3, 2))
+
+    def test_8d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_8d.sav'), verbose=False)
+        assert_equal(s.array8d.shape, (4, 3, 2, 1, 2, 3, 5, 4))
+
+
+class TestStructures:
+
+    def test_scalars(self):
+        s = readsav(path.join(DATA_PATH, 'struct_scalars.sav'), verbose=False)
+        assert_identical(s.scalars.a, np.array(np.int16(1)))
+        assert_identical(s.scalars.b, np.array(np.int32(2)))
+        assert_identical(s.scalars.c, np.array(np.float32(3.)))
+        assert_identical(s.scalars.d, np.array(np.float64(4.)))
+        assert_identical(s.scalars.e, np.array([b"spam"], dtype=object))
+        assert_identical(s.scalars.f, np.array(np.complex64(-1.+3j)))
+
+    def test_scalars_replicated(self):
+        s = readsav(path.join(DATA_PATH, 'struct_scalars_replicated.sav'), verbose=False)
+        assert_identical(s.scalars_rep.a, np.repeat(np.int16(1), 5))
+        assert_identical(s.scalars_rep.b, np.repeat(np.int32(2), 5))
+        assert_identical(s.scalars_rep.c, np.repeat(np.float32(3.), 5))
+        assert_identical(s.scalars_rep.d, np.repeat(np.float64(4.), 5))
+        assert_identical(s.scalars_rep.e, np.repeat(b"spam", 5).astype(object))
+        assert_identical(s.scalars_rep.f, np.repeat(np.complex64(-1.+3j), 5))
+
+    def test_scalars_replicated_3d(self):
+        s = readsav(path.join(DATA_PATH, 'struct_scalars_replicated_3d.sav'), verbose=False)
+        assert_identical(s.scalars_rep.a, np.repeat(np.int16(1), 24).reshape(4, 3, 2))
+        assert_identical(s.scalars_rep.b, np.repeat(np.int32(2), 24).reshape(4, 3, 2))
+        assert_identical(s.scalars_rep.c, np.repeat(np.float32(3.), 24).reshape(4, 3, 2))
+        assert_identical(s.scalars_rep.d, np.repeat(np.float64(4.), 24).reshape(4, 3, 2))
+        assert_identical(s.scalars_rep.e, np.repeat(b"spam", 24).reshape(4, 3, 2).astype(object))
+        assert_identical(s.scalars_rep.f, np.repeat(np.complex64(-1.+3j), 24).reshape(4, 3, 2))
+
+    def test_arrays(self):
+        s = readsav(path.join(DATA_PATH, 'struct_arrays.sav'), verbose=False)
+        assert_array_identical(s.arrays.a[0], np.array([1, 2, 3], dtype=np.int16))
+        assert_array_identical(s.arrays.b[0], np.array([4., 5., 6., 7.], dtype=np.float32))
+        assert_array_identical(s.arrays.c[0], np.array([np.complex64(1+2j), np.complex64(7+8j)]))
+        assert_array_identical(s.arrays.d[0], np.array([b"cheese", b"bacon", b"spam"], dtype=object))
+
+    def test_arrays_replicated(self):
+        s = readsav(path.join(DATA_PATH, 'struct_arrays_replicated.sav'), verbose=False)
+
+        # Check column types
+        assert_(s.arrays_rep.a.dtype.type is np.object_)
+        assert_(s.arrays_rep.b.dtype.type is np.object_)
+        assert_(s.arrays_rep.c.dtype.type is np.object_)
+        assert_(s.arrays_rep.d.dtype.type is np.object_)
+
+        # Check column shapes
+        assert_equal(s.arrays_rep.a.shape, (5, ))
+        assert_equal(s.arrays_rep.b.shape, (5, ))
+        assert_equal(s.arrays_rep.c.shape, (5, ))
+        assert_equal(s.arrays_rep.d.shape, (5, ))
+
+        # Check values
+        for i in range(5):
+            assert_array_identical(s.arrays_rep.a[i],
+                                   np.array([1, 2, 3], dtype=np.int16))
+            assert_array_identical(s.arrays_rep.b[i],
+                                   np.array([4., 5., 6., 7.], dtype=np.float32))
+            assert_array_identical(s.arrays_rep.c[i],
+                                   np.array([np.complex64(1+2j),
+                                             np.complex64(7+8j)]))
+            assert_array_identical(s.arrays_rep.d[i],
+                                   np.array([b"cheese", b"bacon", b"spam"],
+                                            dtype=object))
+
+    def test_arrays_replicated_3d(self):
+        s = readsav(path.join(DATA_PATH, 'struct_arrays_replicated_3d.sav'), verbose=False)
+
+        # Check column types
+        assert_(s.arrays_rep.a.dtype.type is np.object_)
+        assert_(s.arrays_rep.b.dtype.type is np.object_)
+        assert_(s.arrays_rep.c.dtype.type is np.object_)
+        assert_(s.arrays_rep.d.dtype.type is np.object_)
+
+        # Check column shapes
+        assert_equal(s.arrays_rep.a.shape, (4, 3, 2))
+        assert_equal(s.arrays_rep.b.shape, (4, 3, 2))
+        assert_equal(s.arrays_rep.c.shape, (4, 3, 2))
+        assert_equal(s.arrays_rep.d.shape, (4, 3, 2))
+
+        # Check values
+        for i in range(4):
+            for j in range(3):
+                for k in range(2):
+                    assert_array_identical(s.arrays_rep.a[i, j, k],
+                                           np.array([1, 2, 3], dtype=np.int16))
+                    assert_array_identical(s.arrays_rep.b[i, j, k],
+                                           np.array([4., 5., 6., 7.],
+                                                    dtype=np.float32))
+                    assert_array_identical(s.arrays_rep.c[i, j, k],
+                                           np.array([np.complex64(1+2j),
+                                                     np.complex64(7+8j)]))
+                    assert_array_identical(s.arrays_rep.d[i, j, k],
+                                           np.array([b"cheese", b"bacon", b"spam"],
+                                                    dtype=object))
+
+    def test_inheritance(self):
+        s = readsav(path.join(DATA_PATH, 'struct_inherit.sav'), verbose=False)
+        assert_identical(s.fc.x, np.array([0], dtype=np.int16))
+        assert_identical(s.fc.y, np.array([0], dtype=np.int16))
+        assert_identical(s.fc.r, np.array([0], dtype=np.int16))
+        assert_identical(s.fc.c, np.array([4], dtype=np.int16))
+
+    def test_arrays_corrupt_idl80(self):
+        # test byte arrays with missing nbyte information from IDL 8.0 .sav file
+        with suppress_warnings() as sup:
+            sup.filter(UserWarning, "Not able to verify number of bytes from header")
+            s = readsav(path.join(DATA_PATH,'struct_arrays_byte_idl80.sav'),
+                        verbose=False)
+
+        assert_identical(s.y.x[0], np.array([55,66], dtype=np.uint8))
+
+
+class TestPointers:
+    # Check that pointers in .sav files produce references to the same object in Python
+
+    def test_pointers(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_heap_pointer.sav'), verbose=False)
+        assert_identical(s.c64_pointer1, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
+        assert_identical(s.c64_pointer2, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
+        assert_(s.c64_pointer1 is s.c64_pointer2)
+
+
+class TestPointerArray:
+    # Test that pointers in arrays are correctly read in
+
+    def test_1d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_1d.sav'), verbose=False)
+        assert_equal(s.array1d.shape, (123, ))
+        assert_(np.all(s.array1d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array1d) == id(s.array1d[0])))
+
+    def test_2d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_2d.sav'), verbose=False)
+        assert_equal(s.array2d.shape, (22, 12))
+        assert_(np.all(s.array2d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array2d) == id(s.array2d[0,0])))
+
+    def test_3d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_3d.sav'), verbose=False)
+        assert_equal(s.array3d.shape, (11, 22, 12))
+        assert_(np.all(s.array3d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array3d) == id(s.array3d[0,0,0])))
+
+    def test_4d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_4d.sav'), verbose=False)
+        assert_equal(s.array4d.shape, (4, 5, 8, 7))
+        assert_(np.all(s.array4d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array4d) == id(s.array4d[0,0,0,0])))
+
+    def test_5d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_5d.sav'), verbose=False)
+        assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
+        assert_(np.all(s.array5d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array5d) == id(s.array5d[0,0,0,0,0])))
+
+    def test_6d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_6d.sav'), verbose=False)
+        assert_equal(s.array6d.shape, (3, 6, 4, 5, 3, 4))
+        assert_(np.all(s.array6d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array6d) == id(s.array6d[0,0,0,0,0,0])))
+
+    def test_7d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_7d.sav'), verbose=False)
+        assert_equal(s.array7d.shape, (2, 1, 2, 3, 4, 3, 2))
+        assert_(np.all(s.array7d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array7d) == id(s.array7d[0,0,0,0,0,0,0])))
+
+    def test_8d(self):
+        s = readsav(path.join(DATA_PATH, 'array_float32_pointer_8d.sav'), verbose=False)
+        assert_equal(s.array8d.shape, (4, 3, 2, 1, 2, 3, 5, 4))
+        assert_(np.all(s.array8d == np.float32(4.)))
+        assert_(np.all(vect_id(s.array8d) == id(s.array8d[0,0,0,0,0,0,0,0])))
+
+
+class TestPointerStructures:
+    # Test that structures are correctly read in
+
+    def test_scalars(self):
+        s = readsav(path.join(DATA_PATH, 'struct_pointers.sav'), verbose=False)
+        assert_identical(s.pointers.g, np.array(np.float32(4.), dtype=np.object_))
+        assert_identical(s.pointers.h, np.array(np.float32(4.), dtype=np.object_))
+        assert_(id(s.pointers.g[0]) == id(s.pointers.h[0]))
+
+    def test_pointers_replicated(self):
+        s = readsav(path.join(DATA_PATH, 'struct_pointers_replicated.sav'), verbose=False)
+        assert_identical(s.pointers_rep.g, np.repeat(np.float32(4.), 5).astype(np.object_))
+        assert_identical(s.pointers_rep.h, np.repeat(np.float32(4.), 5).astype(np.object_))
+        assert_(np.all(vect_id(s.pointers_rep.g) == vect_id(s.pointers_rep.h)))
+
+    def test_pointers_replicated_3d(self):
+        s = readsav(path.join(DATA_PATH, 'struct_pointers_replicated_3d.sav'), verbose=False)
+        s_expect = np.repeat(np.float32(4.), 24).reshape(4, 3, 2).astype(np.object_)
+        assert_identical(s.pointers_rep.g, s_expect)
+        assert_identical(s.pointers_rep.h, s_expect)
+        assert_(np.all(vect_id(s.pointers_rep.g) == vect_id(s.pointers_rep.h)))
+
+    def test_arrays(self):
+        s = readsav(path.join(DATA_PATH, 'struct_pointer_arrays.sav'), verbose=False)
+        assert_array_identical(s.arrays.g[0], np.repeat(np.float32(4.), 2).astype(np.object_))
+        assert_array_identical(s.arrays.h[0], np.repeat(np.float32(4.), 3).astype(np.object_))
+        assert_(np.all(vect_id(s.arrays.g[0]) == id(s.arrays.g[0][0])))
+        assert_(np.all(vect_id(s.arrays.h[0]) == id(s.arrays.h[0][0])))
+        assert_(id(s.arrays.g[0][0]) == id(s.arrays.h[0][0]))
+
+    def test_arrays_replicated(self):
+        s = readsav(path.join(DATA_PATH, 'struct_pointer_arrays_replicated.sav'), verbose=False)
+
+        # Check column types
+        assert_(s.arrays_rep.g.dtype.type is np.object_)
+        assert_(s.arrays_rep.h.dtype.type is np.object_)
+
+        # Check column shapes
+        assert_equal(s.arrays_rep.g.shape, (5, ))
+        assert_equal(s.arrays_rep.h.shape, (5, ))
+
+        # Check values
+        for i in range(5):
+            assert_array_identical(s.arrays_rep.g[i], np.repeat(np.float32(4.), 2).astype(np.object_))
+            assert_array_identical(s.arrays_rep.h[i], np.repeat(np.float32(4.), 3).astype(np.object_))
+            assert_(np.all(vect_id(s.arrays_rep.g[i]) == id(s.arrays_rep.g[0][0])))
+            assert_(np.all(vect_id(s.arrays_rep.h[i]) == id(s.arrays_rep.h[0][0])))
+
+    def test_arrays_replicated_3d(self):
+        pth = path.join(DATA_PATH, 'struct_pointer_arrays_replicated_3d.sav')
+        s = readsav(pth, verbose=False)
+
+        # Check column types
+        assert_(s.arrays_rep.g.dtype.type is np.object_)
+        assert_(s.arrays_rep.h.dtype.type is np.object_)
+
+        # Check column shapes
+        assert_equal(s.arrays_rep.g.shape, (4, 3, 2))
+        assert_equal(s.arrays_rep.h.shape, (4, 3, 2))
+
+        # Check values
+        for i in range(4):
+            for j in range(3):
+                for k in range(2):
+                    assert_array_identical(s.arrays_rep.g[i, j, k],
+                            np.repeat(np.float32(4.), 2).astype(np.object_))
+                    assert_array_identical(s.arrays_rep.h[i, j, k],
+                            np.repeat(np.float32(4.), 3).astype(np.object_))
+                    assert_(np.all(vect_id(s.arrays_rep.g[i, j, k]) == id(s.arrays_rep.g[0, 0, 0][0])))
+                    assert_(np.all(vect_id(s.arrays_rep.h[i, j, k]) == id(s.arrays_rep.h[0, 0, 0][0])))
+class TestTags:
+    '''Test that sav files with description tag read at all'''
+
+    def test_description(self):
+        s = readsav(path.join(DATA_PATH, 'scalar_byte_descr.sav'), verbose=False)
+        assert_identical(s.i8u, np.uint8(234))
+
+
+def test_null_pointer():
+    # Regression test for null pointers.
+    s = readsav(path.join(DATA_PATH, 'null_pointer.sav'), verbose=False)
+    assert_identical(s.point, None)
+    assert_identical(s.check, np.int16(5))
+
+
+def test_invalid_pointer():
+    # Regression test for invalid pointers (gh-4613).
+
+    # In some files in the wild, pointers can sometimes refer to a heap
+    # variable that does not exist. In that case, we now gracefully fail for
+    # that variable and replace the variable with None and emit a warning.
+    # Since it's difficult to artificially produce such files, the file used
+    # here has been edited to force the pointer reference to be invalid.
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        s = readsav(path.join(DATA_PATH, 'invalid_pointer.sav'), verbose=False)
+    assert_(len(w) == 1)
+    assert_(str(w[0].message) == ("Variable referenced by pointer not found in "
+                                  "heap: variable will be set to None"))
+    assert_identical(s['a'], np.array([None, None]))
+
@@ -0,0 +1,673 @@
+from __future__ import division, print_function, absolute_import
+
+from tempfile import mkdtemp, mktemp
+import os
+import shutil
+
+import numpy as np
+from numpy import array, transpose, pi
+from numpy.testing import (assert_equal,
+                           assert_array_equal, assert_array_almost_equal)
+import pytest
+from pytest import raises as assert_raises
+
+import scipy.sparse
+from scipy.io.mmio import mminfo, mmread, mmwrite
+
+parametrize_args = [('integer', 'int'),
+                    ('unsigned-integer', 'uint')]
+
+
+class TestMMIOArray(object):
+    def setup_method(self):
+        self.tmpdir = mkdtemp()
+        self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
+
+    def teardown_method(self):
+        shutil.rmtree(self.tmpdir)
+
+    def check(self, a, info):
+        mmwrite(self.fn, a)
+        assert_equal(mminfo(self.fn), info)
+        b = mmread(self.fn)
+        assert_array_almost_equal(a, b)
+
+    def check_exact(self, a, info):
+        mmwrite(self.fn, a)
+        assert_equal(mminfo(self.fn), info)
+        b = mmread(self.fn)
+        assert_equal(a, b)
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_integer(self, typeval, dtype):
+        self.check_exact(array([[1, 2], [3, 4]], dtype=dtype),
+                         (2, 2, 4, 'array', typeval, 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_32bit_integer(self, typeval, dtype):
+        a = array([[2**31-1, 2**31-2], [2**31-3, 2**31-4]], dtype=dtype)
+        self.check_exact(a, (2, 2, 4, 'array', typeval, 'general'))
+
+    def test_64bit_integer(self):
+        a = array([[2**31, 2**32], [2**63-2, 2**63-1]], dtype=np.int64)
+        if (np.intp(0).itemsize < 8):
+            assert_raises(OverflowError, mmwrite, self.fn, a)
+        else:
+            self.check_exact(a, (2, 2, 4, 'array', 'integer', 'general'))
+
+    def test_64bit_unsigned_integer(self):
+        a = array([[2**31, 2**32], [2**64-2, 2**64-1]], dtype=np.uint64)
+        self.check_exact(a, (2, 2, 4, 'array', 'unsigned-integer', 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_upper_triangle_integer(self, typeval, dtype):
+        self.check_exact(array([[0, 1], [0, 0]], dtype=dtype),
+                         (2, 2, 4, 'array', typeval, 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_lower_triangle_integer(self, typeval, dtype):
+        self.check_exact(array([[0, 0], [1, 0]], dtype=dtype),
+                         (2, 2, 4, 'array', typeval, 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_rectangular_integer(self, typeval, dtype):
+        self.check_exact(array([[1, 2, 3], [4, 5, 6]], dtype=dtype),
+                         (2, 3, 6, 'array', typeval, 'general'))
+
+    def test_simple_rectangular_float(self):
+        self.check([[1, 2], [3.5, 4], [5, 6]],
+                   (3, 2, 6, 'array', 'real', 'general'))
+
+    def test_simple_float(self):
+        self.check([[1, 2], [3, 4.0]],
+                   (2, 2, 4, 'array', 'real', 'general'))
+
+    def test_simple_complex(self):
+        self.check([[1, 2], [3, 4j]],
+                   (2, 2, 4, 'array', 'complex', 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_symmetric_integer(self, typeval, dtype):
+        self.check_exact(array([[1, 2], [2, 4]], dtype=dtype),
+                         (2, 2, 4, 'array', typeval, 'symmetric'))
+
+    def test_simple_skew_symmetric_integer(self):
+        self.check_exact([[0, 2], [-2, 0]],
+                         (2, 2, 4, 'array', 'integer', 'skew-symmetric'))
+
+    def test_simple_skew_symmetric_float(self):
+        self.check(array([[0, 2], [-2.0, 0.0]], 'f'),
+                   (2, 2, 4, 'array', 'real', 'skew-symmetric'))
+
+    def test_simple_hermitian_complex(self):
+        self.check([[1, 2+3j], [2-3j, 4]],
+                   (2, 2, 4, 'array', 'complex', 'hermitian'))
+
+    def test_random_symmetric_float(self):
+        sz = (20, 20)
+        a = np.random.random(sz)
+        a = a + transpose(a)
+        self.check(a, (20, 20, 400, 'array', 'real', 'symmetric'))
+
+    def test_random_rectangular_float(self):
+        sz = (20, 15)
+        a = np.random.random(sz)
+        self.check(a, (20, 15, 300, 'array', 'real', 'general'))
+
+
+class TestMMIOSparseCSR(TestMMIOArray):
+    def setup_method(self):
+        self.tmpdir = mkdtemp()
+        self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
+
+    def teardown_method(self):
+        shutil.rmtree(self.tmpdir)
+
+    def check(self, a, info):
+        mmwrite(self.fn, a)
+        assert_equal(mminfo(self.fn), info)
+        b = mmread(self.fn)
+        assert_array_almost_equal(a.todense(), b.todense())
+
+    def check_exact(self, a, info):
+        mmwrite(self.fn, a)
+        assert_equal(mminfo(self.fn), info)
+        b = mmread(self.fn)
+        assert_equal(a.todense(), b.todense())
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_integer(self, typeval, dtype):
+        self.check_exact(scipy.sparse.csr_matrix([[1, 2], [3, 4]], dtype=dtype),
+                         (2, 2, 4, 'coordinate', typeval, 'general'))
+
+    def test_32bit_integer(self):
+        a = scipy.sparse.csr_matrix(array([[2**31-1, -2**31+2],
+                                           [2**31-3, 2**31-4]],
+                                          dtype=np.int32))
+        self.check_exact(a, (2, 2, 4, 'coordinate', 'integer', 'general'))
+
+    def test_64bit_integer(self):
+        a = scipy.sparse.csr_matrix(array([[2**32+1, 2**32+1],
+                                           [-2**63+2, 2**63-2]],
+                                          dtype=np.int64))
+        if (np.intp(0).itemsize < 8):
+            assert_raises(OverflowError, mmwrite, self.fn, a)
+        else:
+            self.check_exact(a, (2, 2, 4, 'coordinate', 'integer', 'general'))
+
+    def test_32bit_unsigned_integer(self):
+        a = scipy.sparse.csr_matrix(array([[2**31-1, 2**31-2],
+                                           [2**31-3, 2**31-4]],
+                                          dtype=np.uint32))
+        self.check_exact(a, (2, 2, 4, 'coordinate', 'unsigned-integer', 'general'))
+
+    def test_64bit_unsigned_integer(self):
+        a = scipy.sparse.csr_matrix(array([[2**32+1, 2**32+1],
+                                           [2**64-2, 2**64-1]],
+                                          dtype=np.uint64))
+        self.check_exact(a, (2, 2, 4, 'coordinate', 'unsigned-integer', 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_upper_triangle_integer(self, typeval, dtype):
+        self.check_exact(scipy.sparse.csr_matrix([[0, 1], [0, 0]], dtype=dtype),
+                         (2, 2, 1, 'coordinate', typeval, 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_lower_triangle_integer(self, typeval, dtype):
+        self.check_exact(scipy.sparse.csr_matrix([[0, 0], [1, 0]], dtype=dtype),
+                         (2, 2, 1, 'coordinate', typeval, 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_rectangular_integer(self, typeval, dtype):
+        self.check_exact(scipy.sparse.csr_matrix([[1, 2, 3], [4, 5, 6]], dtype=dtype),
+                         (2, 3, 6, 'coordinate', typeval, 'general'))
+
+    def test_simple_rectangular_float(self):
+        self.check(scipy.sparse.csr_matrix([[1, 2], [3.5, 4], [5, 6]]),
+                   (3, 2, 6, 'coordinate', 'real', 'general'))
+
+    def test_simple_float(self):
+        self.check(scipy.sparse.csr_matrix([[1, 2], [3, 4.0]]),
+                   (2, 2, 4, 'coordinate', 'real', 'general'))
+
+    def test_simple_complex(self):
+        self.check(scipy.sparse.csr_matrix([[1, 2], [3, 4j]]),
+                   (2, 2, 4, 'coordinate', 'complex', 'general'))
+
+    @pytest.mark.parametrize('typeval, dtype', parametrize_args)
+    def test_simple_symmetric_integer(self, typeval, dtype):
+        self.check_exact(scipy.sparse.csr_matrix([[1, 2], [2, 4]], dtype=dtype),
+                         (2, 2, 3, 'coordinate', typeval, 'symmetric'))
+
+    def test_simple_skew_symmetric_integer(self):
+        self.check_exact(scipy.sparse.csr_matrix([[1, 2], [-2, 4]]),
+                         (2, 2, 3, 'coordinate', 'integer', 'skew-symmetric'))
+
+    def test_simple_skew_symmetric_float(self):
+        self.check(scipy.sparse.csr_matrix(array([[1, 2], [-2.0, 4]], 'f')),
+                   (2, 2, 3, 'coordinate', 'real', 'skew-symmetric'))
+
+    def test_simple_hermitian_complex(self):
+        self.check(scipy.sparse.csr_matrix([[1, 2+3j], [2-3j, 4]]),
+                   (2, 2, 3, 'coordinate', 'complex', 'hermitian'))
+
+    def test_random_symmetric_float(self):
+        sz = (20, 20)
+        a = np.random.random(sz)
+        a = a + transpose(a)
+        a = scipy.sparse.csr_matrix(a)
+        self.check(a, (20, 20, 210, 'coordinate', 'real', 'symmetric'))
+
+    def test_random_rectangular_float(self):
+        sz = (20, 15)
+        a = np.random.random(sz)
+        a = scipy.sparse.csr_matrix(a)
+        self.check(a, (20, 15, 300, 'coordinate', 'real', 'general'))
+
+    def test_simple_pattern(self):
+        a = scipy.sparse.csr_matrix([[0, 1.5], [3.0, 2.5]])
+        p = np.zeros_like(a.todense())
+        p[a.todense() > 0] = 1
+        info = (2, 2, 3, 'coordinate', 'pattern', 'general')
+        mmwrite(self.fn, a, field='pattern')
+        assert_equal(mminfo(self.fn), info)
+        b = mmread(self.fn)
+        assert_array_almost_equal(p, b.todense())
+
+
+_32bit_integer_dense_example = '''\
+%%MatrixMarket matrix array integer general
+2  2
+2147483647
+2147483646
+2147483647
+2147483646
+'''
+
+_32bit_integer_sparse_example = '''\
+%%MatrixMarket matrix coordinate integer symmetric
+2  2  2
+1  1  2147483647
+2  2  2147483646
+'''
+
+_64bit_integer_dense_example = '''\
+%%MatrixMarket matrix array integer general
+2  2
+          2147483648
+-9223372036854775806
+         -2147483648
+ 9223372036854775807
+'''
+
+_64bit_integer_sparse_general_example = '''\
+%%MatrixMarket matrix coordinate integer general
+2  2  3
+1  1           2147483648
+1  2  9223372036854775807
+2  2  9223372036854775807
+'''
+
+_64bit_integer_sparse_symmetric_example = '''\
+%%MatrixMarket matrix coordinate integer symmetric
+2  2  3
+1  1            2147483648
+1  2  -9223372036854775807
+2  2   9223372036854775807
+'''
+
+_64bit_integer_sparse_skew_example = '''\
+%%MatrixMarket matrix coordinate integer skew-symmetric
+2  2  3
+1  1            2147483648
+1  2  -9223372036854775807
+2  2   9223372036854775807
+'''
+
+_over64bit_integer_dense_example = '''\
+%%MatrixMarket matrix array integer general
+2  2
+         2147483648
+9223372036854775807
+         2147483648
+9223372036854775808
+'''
+
+_over64bit_integer_sparse_example = '''\
+%%MatrixMarket matrix coordinate integer symmetric
+2  2  2
+1  1            2147483648
+2  2  19223372036854775808
+'''
+
+class TestMMIOReadLargeIntegers(object):
+    def setup_method(self):
+        self.tmpdir = mkdtemp()
+        self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
+
+    def teardown_method(self):
+        shutil.rmtree(self.tmpdir)
+
+    def check_read(self, example, a, info, dense, over32, over64):
+        with open(self.fn, 'w') as f:
+            f.write(example)
+        assert_equal(mminfo(self.fn), info)
+        if (over32 and (np.intp(0).itemsize < 8)) or over64:
+            assert_raises(OverflowError, mmread, self.fn)
+        else:
+            b = mmread(self.fn)
+            if not dense:
+                b = b.todense()
+            assert_equal(a, b)
+
+    def test_read_32bit_integer_dense(self):
+        a = array([[2**31-1, 2**31-1],
+                   [2**31-2, 2**31-2]], dtype=np.int64)
+        self.check_read(_32bit_integer_dense_example,
+                        a,
+                        (2, 2, 4, 'array', 'integer', 'general'),
+                        dense=True,
+                        over32=False,
+                        over64=False)
+
+    def test_read_32bit_integer_sparse(self):
+        a = array([[2**31-1, 0],
+                   [0, 2**31-2]], dtype=np.int64)
+        self.check_read(_32bit_integer_sparse_example,
+                        a,
+                        (2, 2, 2, 'coordinate', 'integer', 'symmetric'),
+                        dense=False,
+                        over32=False,
+                        over64=False)
+
+    def test_read_64bit_integer_dense(self):
+        a = array([[2**31, -2**31],
+                   [-2**63+2, 2**63-1]], dtype=np.int64)
+        self.check_read(_64bit_integer_dense_example,
+                        a,
+                        (2, 2, 4, 'array', 'integer', 'general'),
+                        dense=True,
+                        over32=True,
+                        over64=False)
+
+    def test_read_64bit_integer_sparse_general(self):
+        a = array([[2**31, 2**63-1],
+                   [0, 2**63-1]], dtype=np.int64)
+        self.check_read(_64bit_integer_sparse_general_example,
+                        a,
+                        (2, 2, 3, 'coordinate', 'integer', 'general'),
+                        dense=False,
+                        over32=True,
+                        over64=False)
+
+    def test_read_64bit_integer_sparse_symmetric(self):
+        a = array([[2**31, -2**63+1],
+                   [-2**63+1, 2**63-1]], dtype=np.int64)
+        self.check_read(_64bit_integer_sparse_symmetric_example,
+                        a,
+                        (2, 2, 3, 'coordinate', 'integer', 'symmetric'),
+                        dense=False,
+                        over32=True,
+                        over64=False)
+
+    def test_read_64bit_integer_sparse_skew(self):
+        a = array([[2**31, -2**63+1],
+                   [2**63-1, 2**63-1]], dtype=np.int64)
+        self.check_read(_64bit_integer_sparse_skew_example,
+                        a,
+                        (2, 2, 3, 'coordinate', 'integer', 'skew-symmetric'),
+                        dense=False,
+                        over32=True,
+                        over64=False)
+
+    def test_read_over64bit_integer_dense(self):
+        self.check_read(_over64bit_integer_dense_example,
+                        None,
+                        (2, 2, 4, 'array', 'integer', 'general'),
+                        dense=True,
+                        over32=True,
+                        over64=True)
+
+    def test_read_over64bit_integer_sparse(self):
+        self.check_read(_over64bit_integer_sparse_example,
+                        None,
+                        (2, 2, 2, 'coordinate', 'integer', 'symmetric'),
+                        dense=False,
+                        over32=True,
+                        over64=True)
+
+
+_general_example = '''\
+%%MatrixMarket matrix coordinate real general
+%=================================================================================
+%
+% This ASCII file represents a sparse MxN matrix with L
+% nonzeros in the following Matrix Market format:
+%
+% +----------------------------------------------+
+% |%%MatrixMarket matrix coordinate real general | <--- header line
+% |%                                             | <--+
+% |% comments                                    |    |-- 0 or more comment lines
+% |%                                             | <--+
+% |    M  N  L                                   | <--- rows, columns, entries
+% |    I1  J1  A(I1, J1)                         | <--+
+% |    I2  J2  A(I2, J2)                         |    |
+% |    I3  J3  A(I3, J3)                         |    |-- L lines
+% |        . . .                                 |    |
+% |    IL JL  A(IL, JL)                          | <--+
+% +----------------------------------------------+
+%
+% Indices are 1-based, i.e. A(1,1) is the first element.
+%
+%=================================================================================
+  5  5  8
+    1     1   1.000e+00
+    2     2   1.050e+01
+    3     3   1.500e-02
+    1     4   6.000e+00
+    4     2   2.505e+02
+    4     4  -2.800e+02
+    4     5   3.332e+01
+    5     5   1.200e+01
+'''
+
+_hermitian_example = '''\
+%%MatrixMarket matrix coordinate complex hermitian
+  5  5  7
+    1     1     1.0      0
+    2     2    10.5      0
+    4     2   250.5     22.22
+    3     3     1.5e-2   0
+    4     4    -2.8e2    0
+    5     5    12.       0
+    5     4     0       33.32
+'''
+
+_skew_example = '''\
+%%MatrixMarket matrix coordinate real skew-symmetric
+  5  5  7
+    1     1     1.0
+    2     2    10.5
+    4     2   250.5
+    3     3     1.5e-2
+    4     4    -2.8e2
+    5     5    12.
+    5     4     0
+'''
+
+_symmetric_example = '''\
+%%MatrixMarket matrix coordinate real symmetric
+  5  5  7
+    1     1     1.0
+    2     2    10.5
+    4     2   250.5
+    3     3     1.5e-2
+    4     4    -2.8e2
+    5     5    12.
+    5     4     8
+'''
+
+_symmetric_pattern_example = '''\
+%%MatrixMarket matrix coordinate pattern symmetric
+  5  5  7
+    1     1
+    2     2
+    4     2
+    3     3
+    4     4
+    5     5
+    5     4
+'''
+
+
+class TestMMIOCoordinate(object):
+    def setup_method(self):
+        self.tmpdir = mkdtemp()
+        self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
+
+    def teardown_method(self):
+        shutil.rmtree(self.tmpdir)
+
+    def check_read(self, example, a, info):
+        f = open(self.fn, 'w')
+        f.write(example)
+        f.close()
+        assert_equal(mminfo(self.fn), info)
+        b = mmread(self.fn).todense()
+        assert_array_almost_equal(a, b)
+
+    def test_read_general(self):
+        a = [[1, 0, 0, 6, 0],
+             [0, 10.5, 0, 0, 0],
+             [0, 0, .015, 0, 0],
+             [0, 250.5, 0, -280, 33.32],
+             [0, 0, 0, 0, 12]]
+        self.check_read(_general_example, a,
+                        (5, 5, 8, 'coordinate', 'real', 'general'))
+
+    def test_read_hermitian(self):
+        a = [[1, 0, 0, 0, 0],
+             [0, 10.5, 0, 250.5 - 22.22j, 0],
+             [0, 0, .015, 0, 0],
+             [0, 250.5 + 22.22j, 0, -280, -33.32j],
+             [0, 0, 0, 33.32j, 12]]
+        self.check_read(_hermitian_example, a,
+                        (5, 5, 7, 'coordinate', 'complex', 'hermitian'))
+
+    def test_read_skew(self):
+        a = [[1, 0, 0, 0, 0],
+             [0, 10.5, 0, -250.5, 0],
+             [0, 0, .015, 0, 0],
+             [0, 250.5, 0, -280, 0],
+             [0, 0, 0, 0, 12]]
+        self.check_read(_skew_example, a,
+                        (5, 5, 7, 'coordinate', 'real', 'skew-symmetric'))
+
+    def test_read_symmetric(self):
+        a = [[1, 0, 0, 0, 0],
+             [0, 10.5, 0, 250.5, 0],
+             [0, 0, .015, 0, 0],
+             [0, 250.5, 0, -280, 8],
+             [0, 0, 0, 8, 12]]
+        self.check_read(_symmetric_example, a,
+                        (5, 5, 7, 'coordinate', 'real', 'symmetric'))
+
+    def test_read_symmetric_pattern(self):
+        a = [[1, 0, 0, 0, 0],
+             [0, 1, 0, 1, 0],
+             [0, 0, 1, 0, 0],
+             [0, 1, 0, 1, 1],
+             [0, 0, 0, 1, 1]]
+        self.check_read(_symmetric_pattern_example, a,
+                        (5, 5, 7, 'coordinate', 'pattern', 'symmetric'))
+
+    def test_empty_write_read(self):
+        # https://github.com/scipy/scipy/issues/1410 (Trac #883)
+
+        b = scipy.sparse.coo_matrix((10, 10))
+        mmwrite(self.fn, b)
+
+        assert_equal(mminfo(self.fn),
+                     (10, 10, 0, 'coordinate', 'real', 'symmetric'))
+        a = b.todense()
+        b = mmread(self.fn).todense()
+        assert_array_almost_equal(a, b)
+
+    def test_bzip2_py3(self):
+        # test if fix for #2152 works
+        try:
+            # bz2 module isn't always built when building Python.
+            import bz2
+        except ImportError:
+            return
+        I = array([0, 0, 1, 2, 3, 3, 3, 4])
+        J = array([0, 3, 1, 2, 1, 3, 4, 4])
+        V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
+
+        b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
+
+        mmwrite(self.fn, b)
+
+        fn_bzip2 = "%s.bz2" % self.fn
+        with open(self.fn, 'rb') as f_in:
+            f_out = bz2.BZ2File(fn_bzip2, 'wb')
+            f_out.write(f_in.read())
+            f_out.close()
+
+        a = mmread(fn_bzip2).todense()
+        assert_array_almost_equal(a, b.todense())
+
+    def test_gzip_py3(self):
+        # test if fix for #2152 works
+        try:
+            # gzip module can be missing from Python installation
+            import gzip
+        except ImportError:
+            return
+        I = array([0, 0, 1, 2, 3, 3, 3, 4])
+        J = array([0, 3, 1, 2, 1, 3, 4, 4])
+        V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
+
+        b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
+
+        mmwrite(self.fn, b)
+
+        fn_gzip = "%s.gz" % self.fn
+        with open(self.fn, 'rb') as f_in:
+            f_out = gzip.open(fn_gzip, 'wb')
+            f_out.write(f_in.read())
+            f_out.close()
+
+        a = mmread(fn_gzip).todense()
+        assert_array_almost_equal(a, b.todense())
+
+    def test_real_write_read(self):
+        I = array([0, 0, 1, 2, 3, 3, 3, 4])
+        J = array([0, 3, 1, 2, 1, 3, 4, 4])
+        V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
+
+        b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
+
+        mmwrite(self.fn, b)
+
+        assert_equal(mminfo(self.fn),
+                     (5, 5, 8, 'coordinate', 'real', 'general'))
+        a = b.todense()
+        b = mmread(self.fn).todense()
+        assert_array_almost_equal(a, b)
+
+    def test_complex_write_read(self):
+        I = array([0, 0, 1, 2, 3, 3, 3, 4])
+        J = array([0, 3, 1, 2, 1, 3, 4, 4])
+        V = array([1.0 + 3j, 6.0 + 2j, 10.50 + 0.9j, 0.015 + -4.4j,
+                   250.5 + 0j, -280.0 + 5j, 33.32 + 6.4j, 12.00 + 0.8j])
+
+        b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
+
+        mmwrite(self.fn, b)
+
+        assert_equal(mminfo(self.fn),
+                     (5, 5, 8, 'coordinate', 'complex', 'general'))
+        a = b.todense()
+        b = mmread(self.fn).todense()
+        assert_array_almost_equal(a, b)
+
+    def test_sparse_formats(self):
+        mats = []
+
+        I = array([0, 0, 1, 2, 3, 3, 3, 4])
+        J = array([0, 3, 1, 2, 1, 3, 4, 4])
+
+        V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
+        mats.append(scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5)))
+
+        V = array([1.0 + 3j, 6.0 + 2j, 10.50 + 0.9j, 0.015 + -4.4j,
+                   250.5 + 0j, -280.0 + 5j, 33.32 + 6.4j, 12.00 + 0.8j])
+        mats.append(scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5)))
+
+        for mat in mats:
+            expected = mat.todense()
+            for fmt in ['csr', 'csc', 'coo']:
+                fn = mktemp(dir=self.tmpdir)  # safe, we own tmpdir
+                mmwrite(fn, mat.asformat(fmt))
+
+                result = mmread(fn).todense()
+                assert_array_almost_equal(result, expected)
+
+    def test_precision(self):
+        test_values = [pi] + [10**(i) for i in range(0, -10, -1)]
+        test_precisions = range(1, 10)
+        for value in test_values:
+            for precision in test_precisions:
+                # construct sparse matrix with test value at last main diagonal
+                n = 10**precision + 1
+                A = scipy.sparse.dok_matrix((n, n))
+                A[n-1, n-1] = value
+                # write matrix with test precision and read again
+                mmwrite(self.fn, A, precision=precision)
+                A = scipy.io.mmread(self.fn)
+                # check for right entries in matrix
+                assert_array_equal(A.row, [n-1])
+                assert_array_equal(A.col, [n-1])
+                assert_array_almost_equal(A.data,
+                    [float('%%.%dg' % precision % value)])
@@ -0,0 +1,544 @@
+''' Tests for netcdf '''
+from __future__ import division, print_function, absolute_import
+
+import os
+from os.path import join as pjoin, dirname
+import shutil
+import tempfile
+import warnings
+from io import BytesIO
+from glob import glob
+from contextlib import contextmanager
+
+import numpy as np
+from numpy.testing import assert_, assert_allclose, assert_equal
+from pytest import raises as assert_raises
+
+from scipy.io.netcdf import netcdf_file, IS_PYPY
+
+from scipy._lib._numpy_compat import suppress_warnings
+from scipy._lib._tmpdirs import in_tempdir
+
+TEST_DATA_PATH = pjoin(dirname(__file__), 'data')
+
+N_EG_ELS = 11  # number of elements for example variable
+VARTYPE_EG = 'b'  # var type for example variable
+
+
+@contextmanager
+def make_simple(*args, **kwargs):
+    f = netcdf_file(*args, **kwargs)
+    f.history = 'Created for a test'
+    f.createDimension('time', N_EG_ELS)
+    time = f.createVariable('time', VARTYPE_EG, ('time',))
+    time[:] = np.arange(N_EG_ELS)
+    time.units = 'days since 2008-01-01'
+    f.flush()
+    yield f
+    f.close()
+
+
+def check_simple(ncfileobj):
+    '''Example fileobj tests '''
+    assert_equal(ncfileobj.history, b'Created for a test')
+    time = ncfileobj.variables['time']
+    assert_equal(time.units, b'days since 2008-01-01')
+    assert_equal(time.shape, (N_EG_ELS,))
+    assert_equal(time[-1], N_EG_ELS-1)
+
+def assert_mask_matches(arr, expected_mask):
+    '''
+    Asserts that the mask of arr is effectively the same as expected_mask.
+
+    In contrast to numpy.ma.testutils.assert_mask_equal, this function allows
+    testing the 'mask' of a standard numpy array (the mask in this case is treated
+    as all False).
+
+    Parameters
+    ----------
+    arr: ndarray or MaskedArray
+        Array to test.
+    expected_mask: array_like of booleans
+        A list giving the expected mask.
+    '''
+
+    mask = np.ma.getmaskarray(arr)
+    assert_equal(mask, expected_mask)
+
+
+def test_read_write_files():
+    # test round trip for example file
+    cwd = os.getcwd()
+    try:
+        tmpdir = tempfile.mkdtemp()
+        os.chdir(tmpdir)
+        with make_simple('simple.nc', 'w') as f:
+            pass
+        # read the file we just created in 'a' mode
+        with netcdf_file('simple.nc', 'a') as f:
+            check_simple(f)
+            # add something
+            f._attributes['appendRan'] = 1
+
+        # To read the NetCDF file we just created::
+        with netcdf_file('simple.nc') as f:
+            # Using mmap is the default (but not on pypy)
+            assert_equal(f.use_mmap, not IS_PYPY)
+            check_simple(f)
+            assert_equal(f._attributes['appendRan'], 1)
+
+        # Read it in append (and check mmap is off)
+        with netcdf_file('simple.nc', 'a') as f:
+            assert_(not f.use_mmap)
+            check_simple(f)
+            assert_equal(f._attributes['appendRan'], 1)
+
+        # Now without mmap
+        with netcdf_file('simple.nc', mmap=False) as f:
+            # Using mmap is the default
+            assert_(not f.use_mmap)
+            check_simple(f)
+
+        # To read the NetCDF file we just created, as file object, no
+        # mmap.  When n * n_bytes(var_type) is not divisible by 4, this
+        # raised an error in pupynere 1.0.12 and scipy rev 5893, because
+        # calculated vsize was rounding up in units of 4 - see
+        # https://www.unidata.ucar.edu/software/netcdf/docs/user_guide.html
+        with open('simple.nc', 'rb') as fobj:
+            with netcdf_file(fobj) as f:
+                # by default, don't use mmap for file-like
+                assert_(not f.use_mmap)
+                check_simple(f)
+
+        # Read file from fileobj, with mmap
+        with suppress_warnings() as sup:
+            if IS_PYPY:
+                sup.filter(RuntimeWarning,
+                           "Cannot close a netcdf_file opened with mmap=True.*")
+            with open('simple.nc', 'rb') as fobj:
+                with netcdf_file(fobj, mmap=True) as f:
+                    assert_(f.use_mmap)
+                    check_simple(f)
+
+        # Again read it in append mode (adding another att)
+        with open('simple.nc', 'r+b') as fobj:
+            with netcdf_file(fobj, 'a') as f:
+                assert_(not f.use_mmap)
+                check_simple(f)
+                f.createDimension('app_dim', 1)
+                var = f.createVariable('app_var', 'i', ('app_dim',))
+                var[:] = 42
+
+        # And... check that app_var made it in...
+        with netcdf_file('simple.nc') as f:
+            check_simple(f)
+            assert_equal(f.variables['app_var'][:], 42)
+
+    except:  # noqa: E722
+        os.chdir(cwd)
+        shutil.rmtree(tmpdir)
+        raise
+    os.chdir(cwd)
+    shutil.rmtree(tmpdir)
+
+
+def test_read_write_sio():
+    eg_sio1 = BytesIO()
+    with make_simple(eg_sio1, 'w') as f1:
+        str_val = eg_sio1.getvalue()
+
+    eg_sio2 = BytesIO(str_val)
+    with netcdf_file(eg_sio2) as f2:
+        check_simple(f2)
+
+    # Test that error is raised if attempting mmap for sio
+    eg_sio3 = BytesIO(str_val)
+    assert_raises(ValueError, netcdf_file, eg_sio3, 'r', True)
+    # Test 64-bit offset write / read
+    eg_sio_64 = BytesIO()
+    with make_simple(eg_sio_64, 'w', version=2) as f_64:
+        str_val = eg_sio_64.getvalue()
+
+    eg_sio_64 = BytesIO(str_val)
+    with netcdf_file(eg_sio_64) as f_64:
+        check_simple(f_64)
+        assert_equal(f_64.version_byte, 2)
+    # also when version 2 explicitly specified
+    eg_sio_64 = BytesIO(str_val)
+    with netcdf_file(eg_sio_64, version=2) as f_64:
+        check_simple(f_64)
+        assert_equal(f_64.version_byte, 2)
+
+
+def test_bytes():
+    raw_file = BytesIO()
+    f = netcdf_file(raw_file, mode='w')
+    # Dataset only has a single variable, dimension and attribute to avoid
+    # any ambiguity related to order.
+    f.a = 'b'
+    f.createDimension('dim', 1)
+    var = f.createVariable('var', np.int16, ('dim',))
+    var[0] = -9999
+    var.c = 'd'
+    f.sync()
+
+    actual = raw_file.getvalue()
+
+    expected = (b'CDF\x01'
+                b'\x00\x00\x00\x00'
+                b'\x00\x00\x00\x0a'
+                b'\x00\x00\x00\x01'
+                b'\x00\x00\x00\x03'
+                b'dim\x00'
+                b'\x00\x00\x00\x01'
+                b'\x00\x00\x00\x0c'
+                b'\x00\x00\x00\x01'
+                b'\x00\x00\x00\x01'
+                b'a\x00\x00\x00'
+                b'\x00\x00\x00\x02'
+                b'\x00\x00\x00\x01'
+                b'b\x00\x00\x00'
+                b'\x00\x00\x00\x0b'
+                b'\x00\x00\x00\x01'
+                b'\x00\x00\x00\x03'
+                b'var\x00'
+                b'\x00\x00\x00\x01'
+                b'\x00\x00\x00\x00'
+                b'\x00\x00\x00\x0c'
+                b'\x00\x00\x00\x01'
+                b'\x00\x00\x00\x01'
+                b'c\x00\x00\x00'
+                b'\x00\x00\x00\x02'
+                b'\x00\x00\x00\x01'
+                b'd\x00\x00\x00'
+                b'\x00\x00\x00\x03'
+                b'\x00\x00\x00\x04'
+                b'\x00\x00\x00\x78'
+                b'\xd8\xf1\x80\x01')
+
+    assert_equal(actual, expected)
+
+
+def test_encoded_fill_value():
+    with netcdf_file(BytesIO(), mode='w') as f:
+        f.createDimension('x', 1)
+        var = f.createVariable('var', 'S1', ('x',))
+        assert_equal(var._get_encoded_fill_value(), b'\x00')
+        var._FillValue = b'\x01'
+        assert_equal(var._get_encoded_fill_value(), b'\x01')
+        var._FillValue = b'\x00\x00'  # invalid, wrong size
+        assert_equal(var._get_encoded_fill_value(), b'\x00')
+
+
+def test_read_example_data():
+    # read any example data files
+    for fname in glob(pjoin(TEST_DATA_PATH, '*.nc')):
+        with netcdf_file(fname, 'r') as f:
+            pass
+        with netcdf_file(fname, 'r', mmap=False) as f:
+            pass
+
+
+def test_itemset_no_segfault_on_readonly():
+    # Regression test for ticket #1202.
+    # Open the test file in read-only mode.
+
+    filename = pjoin(TEST_DATA_PATH, 'example_1.nc')
+    with suppress_warnings() as sup:
+        sup.filter(RuntimeWarning,
+                   "Cannot close a netcdf_file opened with mmap=True, when netcdf_variables or arrays referring to its data still exist")
+        with netcdf_file(filename, 'r', mmap=True) as f:
+            time_var = f.variables['time']
+
+    # time_var.assignValue(42) should raise a RuntimeError--not seg. fault!
+    assert_raises(RuntimeError, time_var.assignValue, 42)
+
+
+def test_appending_issue_gh_8625():
+    stream = BytesIO()
+
+    with make_simple(stream, mode='w') as f:
+        f.createDimension('x', 2)
+        f.createVariable('x', float, ('x',))
+        f.variables['x'][...] = 1
+        f.flush()
+        contents = stream.getvalue()
+
+    stream = BytesIO(contents)
+    with netcdf_file(stream, mode='a') as f:
+        f.variables['x'][...] = 2
+
+
+def test_write_invalid_dtype():
+    dtypes = ['int64', 'uint64']
+    if np.dtype('int').itemsize == 8:   # 64-bit machines
+        dtypes.append('int')
+    if np.dtype('uint').itemsize == 8:   # 64-bit machines
+        dtypes.append('uint')
+
+    with netcdf_file(BytesIO(), 'w') as f:
+        f.createDimension('time', N_EG_ELS)
+        for dt in dtypes:
+            assert_raises(ValueError, f.createVariable, 'time', dt, ('time',))
+
+
+def test_flush_rewind():
+    stream = BytesIO()
+    with make_simple(stream, mode='w') as f:
+        x = f.createDimension('x',4)
+        v = f.createVariable('v', 'i2', ['x'])
+        v[:] = 1
+        f.flush()
+        len_single = len(stream.getvalue())
+        f.flush()
+        len_double = len(stream.getvalue())
+
+    assert_(len_single == len_double)
+
+
+def test_dtype_specifiers():
+    # Numpy 1.7.0-dev had a bug where 'i2' wouldn't work.
+    # Specifying np.int16 or similar only works from the same commit as this
+    # comment was made.
+    with make_simple(BytesIO(), mode='w') as f:
+        f.createDimension('x',4)
+        f.createVariable('v1', 'i2', ['x'])
+        f.createVariable('v2', np.int16, ['x'])
+        f.createVariable('v3', np.dtype(np.int16), ['x'])
+
+
+def test_ticket_1720():
+    io = BytesIO()
+
+    items = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
+
+    with netcdf_file(io, 'w') as f:
+        f.history = 'Created for a test'
+        f.createDimension('float_var', 10)
+        float_var = f.createVariable('float_var', 'f', ('float_var',))
+        float_var[:] = items
+        float_var.units = 'metres'
+        f.flush()
+        contents = io.getvalue()
+
+    io = BytesIO(contents)
+    with netcdf_file(io, 'r') as f:
+        assert_equal(f.history, b'Created for a test')
+        float_var = f.variables['float_var']
+        assert_equal(float_var.units, b'metres')
+        assert_equal(float_var.shape, (10,))
+        assert_allclose(float_var[:], items)
+
+
+def test_mmaps_segfault():
+    filename = pjoin(TEST_DATA_PATH, 'example_1.nc')
+
+    if not IS_PYPY:
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            with netcdf_file(filename, mmap=True) as f:
+                x = f.variables['lat'][:]
+                # should not raise warnings
+                del x
+
+    def doit():
+        with netcdf_file(filename, mmap=True) as f:
+            return f.variables['lat'][:]
+
+    # should not crash
+    with suppress_warnings() as sup:
+        sup.filter(RuntimeWarning,
+                   "Cannot close a netcdf_file opened with mmap=True, when netcdf_variables or arrays referring to its data still exist")
+        x = doit()
+    x.sum()
+
+
+def test_zero_dimensional_var():
+    io = BytesIO()
+    with make_simple(io, 'w') as f:
+        v = f.createVariable('zerodim', 'i2', [])
+        # This is checking that .isrec returns a boolean - don't simplify it
+        # to 'assert not ...'
+        assert v.isrec is False, v.isrec
+        f.flush()
+
+
+def test_byte_gatts():
+    # Check that global "string" atts work like they did before py3k
+    # unicode and general bytes confusion
+    with in_tempdir():
+        filename = 'g_byte_atts.nc'
+        f = netcdf_file(filename, 'w')
+        f._attributes['holy'] = b'grail'
+        f._attributes['witch'] = 'floats'
+        f.close()
+        f = netcdf_file(filename, 'r')
+        assert_equal(f._attributes['holy'], b'grail')
+        assert_equal(f._attributes['witch'], b'floats')
+        f.close()
+
+
+def test_open_append():
+    # open 'w' put one attr
+    with in_tempdir():
+        filename = 'append_dat.nc'
+        f = netcdf_file(filename, 'w')
+        f._attributes['Kilroy'] = 'was here'
+        f.close()
+
+        # open again in 'a', read the att and and a new one
+        f = netcdf_file(filename, 'a')
+        assert_equal(f._attributes['Kilroy'], b'was here')
+        f._attributes['naughty'] = b'Zoot'
+        f.close()
+
+        # open yet again in 'r' and check both atts
+        f = netcdf_file(filename, 'r')
+        assert_equal(f._attributes['Kilroy'], b'was here')
+        assert_equal(f._attributes['naughty'], b'Zoot')
+        f.close()
+
+
+def test_append_recordDimension():
+    dataSize = 100
+
+    with in_tempdir():
+        # Create file with record time dimension
+        with netcdf_file('withRecordDimension.nc', 'w') as f:
+            f.createDimension('time', None)
+            f.createVariable('time', 'd', ('time',))
+            f.createDimension('x', dataSize)
+            x = f.createVariable('x', 'd', ('x',))
+            x[:] = np.array(range(dataSize))
+            f.createDimension('y', dataSize)
+            y = f.createVariable('y', 'd', ('y',))
+            y[:] = np.array(range(dataSize))
+            f.createVariable('testData', 'i', ('time', 'x', 'y'))
+            f.flush()
+            f.close()
+
+        for i in range(2):
+            # Open the file in append mode and add data
+            with netcdf_file('withRecordDimension.nc', 'a') as f:
+                f.variables['time'].data = np.append(f.variables["time"].data, i)
+                f.variables['testData'][i, :, :] = np.ones((dataSize, dataSize))*i
+                f.flush()
+
+            # Read the file and check that append worked
+            with netcdf_file('withRecordDimension.nc') as f:
+                assert_equal(f.variables['time'][-1], i)
+                assert_equal(f.variables['testData'][-1, :, :].copy(), np.ones((dataSize, dataSize))*i)
+                assert_equal(f.variables['time'].data.shape[0], i+1)
+                assert_equal(f.variables['testData'].data.shape[0], i+1)
+
+        # Read the file and check that 'data' was not saved as user defined
+        # attribute of testData variable during append operation
+        with netcdf_file('withRecordDimension.nc') as f:
+            with assert_raises(KeyError) as ar:
+                f.variables['testData']._attributes['data']
+            ex = ar.value
+            assert_equal(ex.args[0], 'data')
+
+def test_maskandscale():
+    t = np.linspace(20, 30, 15)
+    t[3] = 100
+    tm = np.ma.masked_greater(t, 99)
+    fname = pjoin(TEST_DATA_PATH, 'example_2.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        Temp = f.variables['Temperature']
+        assert_equal(Temp.missing_value, 9999)
+        assert_equal(Temp.add_offset, 20)
+        assert_equal(Temp.scale_factor, np.float32(0.01))
+        found = Temp[:].compressed()
+        del Temp  # Remove ref to mmap, so file can be closed.
+        expected = np.round(tm.compressed(), 2)
+        assert_allclose(found, expected)
+
+    with in_tempdir():
+        newfname = 'ms.nc'
+        f = netcdf_file(newfname, 'w', maskandscale=True)
+        f.createDimension('Temperature', len(tm))
+        temp = f.createVariable('Temperature', 'i', ('Temperature',))
+        temp.missing_value = 9999
+        temp.scale_factor = 0.01
+        temp.add_offset = 20
+        temp[:] = tm
+        f.close()
+
+        with netcdf_file(newfname, maskandscale=True) as f:
+            Temp = f.variables['Temperature']
+            assert_equal(Temp.missing_value, 9999)
+            assert_equal(Temp.add_offset, 20)
+            assert_equal(Temp.scale_factor, np.float32(0.01))
+            expected = np.round(tm.compressed(), 2)
+            found = Temp[:].compressed()
+            del Temp
+            assert_allclose(found, expected)
+
+
+# ------------------------------------------------------------------------
+# Test reading with masked values (_FillValue / missing_value)
+# ------------------------------------------------------------------------
+
+def test_read_withValuesNearFillValue():
+    # Regression test for ticket #5626
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        vardata = f.variables['var1_fillval0'][:]
+        assert_mask_matches(vardata, [False, True, False])
+
+def test_read_withNoFillValue():
+    # For a variable with no fill value, reading data with maskandscale=True
+    # should return unmasked data
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        vardata = f.variables['var2_noFillval'][:]
+        assert_mask_matches(vardata, [False, False, False])
+        assert_equal(vardata, [1,2,3])
+
+def test_read_withFillValueAndMissingValue():
+    # For a variable with both _FillValue and missing_value, the _FillValue
+    # should be used
+    IRRELEVANT_VALUE = 9999
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        vardata = f.variables['var3_fillvalAndMissingValue'][:]
+        assert_mask_matches(vardata, [True, False, False])
+        assert_equal(vardata, [IRRELEVANT_VALUE, 2, 3])
+
+def test_read_withMissingValue():
+    # For a variable with missing_value but not _FillValue, the missing_value
+    # should be used
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        vardata = f.variables['var4_missingValue'][:]
+        assert_mask_matches(vardata, [False, True, False])
+
+def test_read_withFillValNaN():
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        vardata = f.variables['var5_fillvalNaN'][:]
+        assert_mask_matches(vardata, [False, True, False])
+
+def test_read_withChar():
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        vardata = f.variables['var6_char'][:]
+        assert_mask_matches(vardata, [False, True, False])
+
+def test_read_with2dVar():
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    with netcdf_file(fname, maskandscale=True) as f:
+        vardata = f.variables['var7_2d'][:]
+        assert_mask_matches(vardata, [[True, False], [False, False], [False, True]])
+
+def test_read_withMaskAndScaleFalse():
+    # If a variable has a _FillValue (or missing_value) attribute, but is read
+    # with maskandscale set to False, the result should be unmasked
+    fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
+    # Open file with mmap=False to avoid problems with closing a mmap'ed file
+    # when arrays referring to its data still exist:
+    with netcdf_file(fname, maskandscale=False, mmap=False) as f:
+        vardata = f.variables['var3_fillvalAndMissingValue'][:]
+        assert_mask_matches(vardata, [False, False, False])
+        assert_equal(vardata, [1, 2, 3])
@@ -0,0 +1,88 @@
+"""
+Ensure that we can use pathlib.Path objects in all relevant IO functions.
+"""
+import sys
+
+try:
+    from pathlib import Path
+except ImportError:
+    # Not available. No fallback import, since we'll skip the entire
+    # test suite for Python < 3.6.
+    pass
+
+import numpy as np
+from numpy.testing import assert_
+import pytest
+
+import scipy.io
+import scipy.io.wavfile
+from scipy._lib._tmpdirs import tempdir
+import scipy.sparse
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6),
+                    reason='Passing path-like objects to IO functions requires Python >= 3.6')
+class TestPaths(object):
+    data = np.arange(5).astype(np.int64)
+
+    def test_savemat(self):
+        with tempdir() as temp_dir:
+            path = Path(temp_dir) / 'data.mat'
+            scipy.io.savemat(path, {'data': self.data})
+            assert_(path.is_file())
+
+    def test_loadmat(self):
+        # Save data with string path, load with pathlib.Path
+        with tempdir() as temp_dir:
+            path = Path(temp_dir) / 'data.mat'
+            scipy.io.savemat(str(path), {'data': self.data})
+
+            mat_contents = scipy.io.loadmat(path)
+            assert_((mat_contents['data'] == self.data).all())
+
+    def test_whosmat(self):
+        # Save data with string path, load with pathlib.Path
+        with tempdir() as temp_dir:
+            path = Path(temp_dir) / 'data.mat'
+            scipy.io.savemat(str(path), {'data': self.data})
+
+            contents = scipy.io.whosmat(path)
+            assert_(contents[0] == ('data', (1, 5), 'int64'))
+
+    def test_readsav(self):
+        path = Path(__file__).parent / 'data/scalar_string.sav'
+        scipy.io.readsav(path)
+
+    def test_hb_read(self):
+        # Save data with string path, load with pathlib.Path
+        with tempdir() as temp_dir:
+            data = scipy.sparse.csr_matrix(scipy.sparse.eye(3))
+            path = Path(temp_dir) / 'data.hb'
+            scipy.io.harwell_boeing.hb_write(str(path), data)
+
+            data_new = scipy.io.harwell_boeing.hb_read(path)
+            assert_((data_new != data).nnz == 0)
+
+    def test_hb_write(self):
+        with tempdir() as temp_dir:
+            data = scipy.sparse.csr_matrix(scipy.sparse.eye(3))
+            path = Path(temp_dir) / 'data.hb'
+            scipy.io.harwell_boeing.hb_write(path, data)
+            assert_(path.is_file())
+
+    def test_netcdf_file(self):
+        path = Path(__file__).parent / 'data/example_1.nc'
+        scipy.io.netcdf.netcdf_file(path)
+
+    def test_wavfile_read(self):
+        path = Path(__file__).parent / 'data/test-8000Hz-le-2ch-1byteu.wav'
+        scipy.io.wavfile.read(path)
+
+    def test_wavfile_write(self):
+        # Read from str path, write to Path
+        input_path = Path(__file__).parent / 'data/test-8000Hz-le-2ch-1byteu.wav'
+        rate, data = scipy.io.wavfile.read(str(input_path))
+
+        with tempdir() as temp_dir:
+            output_path = Path(temp_dir) / input_path.name
+            scipy.io.wavfile.write(output_path, rate, data)
@@ -0,0 +1,159 @@
+from __future__ import division, print_function, absolute_import
+
+import os
+import sys
+import tempfile
+from io import BytesIO
+
+import numpy as np
+from numpy.testing import assert_equal, assert_, assert_array_equal
+from pytest import raises as assert_raises
+from scipy._lib._numpy_compat import suppress_warnings
+
+from scipy.io import wavfile
+
+
+def datafile(fn):
+    return os.path.join(os.path.dirname(__file__), 'data', fn)
+
+
+def test_read_1():
+    for mmap in [False, True]:
+        rate, data = wavfile.read(datafile('test-44100Hz-le-1ch-4bytes.wav'),
+                                  mmap=mmap)
+
+        assert_equal(rate, 44100)
+        assert_(np.issubdtype(data.dtype, np.int32))
+        assert_equal(data.shape, (4410,))
+
+        del data
+
+
+def test_read_2():
+    for mmap in [False, True]:
+        rate, data = wavfile.read(datafile('test-8000Hz-le-2ch-1byteu.wav'),
+                                  mmap=mmap)
+        assert_equal(rate, 8000)
+        assert_(np.issubdtype(data.dtype, np.uint8))
+        assert_equal(data.shape, (800, 2))
+
+        del data
+
+def test_read_3():
+    for mmap in [False, True]:
+        rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-le.wav'),
+                                  mmap=mmap)
+        assert_equal(rate, 44100)
+        assert_(np.issubdtype(data.dtype, np.float32))
+        assert_equal(data.shape, (441, 2))
+
+        del data
+
+def test_read_4():
+    for mmap in [False, True]:
+        with suppress_warnings() as sup:
+            sup.filter(wavfile.WavFileWarning,
+                       "Chunk .non-data. not understood, skipping it")
+            rate, data = wavfile.read(datafile('test-48000Hz-2ch-64bit-float-le-wavex.wav'),
+                                      mmap=mmap)
+
+        assert_equal(rate, 48000)
+        assert_(np.issubdtype(data.dtype, np.float64))
+        assert_equal(data.shape, (480, 2))
+
+        del data
+
+
+def test_read_5():
+    for mmap in [False, True]:
+        rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-be.wav'),
+                                  mmap=mmap)
+        assert_equal(rate, 44100)
+        assert_(np.issubdtype(data.dtype, np.float32))
+        assert_(data.dtype.byteorder == '>' or (sys.byteorder == 'big' and
+                                                data.dtype.byteorder == '='))
+        assert_equal(data.shape, (441, 2))
+
+        del data
+
+
+def test_read_fail():
+    for mmap in [False, True]:
+        fp = open(datafile('example_1.nc'), 'rb')
+        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
+        fp.close()
+
+
+def test_read_early_eof():
+    for mmap in [False, True]:
+        fp = open(datafile('test-44100Hz-le-1ch-4bytes-early-eof.wav'), 'rb')
+        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
+        fp.close()
+
+
+def test_read_incomplete_chunk():
+    for mmap in [False, True]:
+        fp = open(datafile('test-44100Hz-le-1ch-4bytes-incomplete-chunk.wav'), 'rb')
+        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
+        fp.close()
+
+
+def _check_roundtrip(realfile, rate, dtype, channels):
+    if realfile:
+        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
+        os.close(fd)
+    else:
+        tmpfile = BytesIO()
+    try:
+        data = np.random.rand(100, channels)
+        if channels == 1:
+            data = data[:,0]
+        if dtype.kind == 'f':
+            # The range of the float type should be in [-1, 1]
+            data = data.astype(dtype)
+        else:
+            data = (data*128).astype(dtype)
+
+        wavfile.write(tmpfile, rate, data)
+
+        for mmap in [False, True]:
+            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)
+
+            assert_equal(rate, rate2)
+            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
+            assert_array_equal(data, data2)
+
+            del data2
+    finally:
+        if realfile:
+            os.unlink(tmpfile)
+
+
+def test_write_roundtrip():
+    for realfile in (False, True):
+        for dtypechar in ('i', 'u', 'f', 'g', 'q'):
+            for size in (1, 2, 4, 8):
+                if size == 1 and dtypechar == 'i':
+                    # signed 8-bit integer PCM is not allowed
+                    continue
+                if size > 1 and dtypechar == 'u':
+                    # unsigned > 8-bit integer PCM is not allowed
+                    continue
+                if (size == 1 or size == 2) and dtypechar == 'f':
+                    # 8- or 16-bit float PCM is not expected
+                    continue
+                if dtypechar in 'gq':
+                    # no size allowed for these types
+                    if size == 1:
+                        size = ''
+                    else:
+                        continue
+
+                for endianness in ('>', '<'):
+                    if size == 1 and endianness == '<':
+                        continue
+                    for rate in (8000, 32000):
+                        for channels in (1, 2, 5):
+                            dt = np.dtype('%s%s%s' % (endianness, dtypechar, size))
+                            _check_roundtrip(realfile, rate, dt, channels)
+
@@ -0,0 +1,405 @@
+"""
+Module to read / write wav files using numpy arrays
+
+Functions
+---------
+`read`: Return the sample rate (in samples/sec) and data from a WAV file.
+
+`write`: Write a numpy array as a WAV file.
+
+"""
+from __future__ import division, print_function, absolute_import
+
+import sys
+import numpy
+import struct
+import warnings
+
+
+__all__ = [
+    'WavFileWarning',
+    'read',
+    'write'
+]
+
+
+class WavFileWarning(UserWarning):
+    pass
+
+
+WAVE_FORMAT_PCM = 0x0001
+WAVE_FORMAT_IEEE_FLOAT = 0x0003
+WAVE_FORMAT_EXTENSIBLE = 0xfffe
+KNOWN_WAVE_FORMATS = (WAVE_FORMAT_PCM, WAVE_FORMAT_IEEE_FLOAT)
+
+# assumes file pointer is immediately
+#  after the 'fmt ' id
+
+
+def _read_fmt_chunk(fid, is_big_endian):
+    """
+    Returns
+    -------
+    size : int
+        size of format subchunk in bytes (minus 8 for "fmt " and itself)
+    format_tag : int
+        PCM, float, or compressed format
+    channels : int
+        number of channels
+    fs : int
+        sampling frequency in samples per second
+    bytes_per_second : int
+        overall byte rate for the file
+    block_align : int
+        bytes per sample, including all channels
+    bit_depth : int
+        bits per sample
+    """
+    if is_big_endian:
+        fmt = '>'
+    else:
+        fmt = '<'
+
+    size = res = struct.unpack(fmt+'I', fid.read(4))[0]
+    bytes_read = 0
+
+    if size < 16:
+        raise ValueError("Binary structure of wave file is not compliant")
+
+    res = struct.unpack(fmt+'HHIIHH', fid.read(16))
+    bytes_read += 16
+
+    format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
+
+    if format_tag == WAVE_FORMAT_EXTENSIBLE and size >= (16+2):
+        ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0]
+        bytes_read += 2
+        if ext_chunk_size >= 22:
+            extensible_chunk_data = fid.read(22)
+            bytes_read += 22
+            raw_guid = extensible_chunk_data[2+4:2+4+16]
+            # GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
+            # MS GUID byte order: first three groups are native byte order,
+            # rest is Big Endian
+            if is_big_endian:
+                tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71'
+            else:
+                tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71'
+            if raw_guid.endswith(tail):
+                format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0]
+        else:
+            raise ValueError("Binary structure of wave file is not compliant")
+
+    if format_tag not in KNOWN_WAVE_FORMATS:
+        raise ValueError("Unknown wave file format")
+
+    # move file pointer to next chunk
+    if size > (bytes_read):
+        fid.read(size - bytes_read)
+
+    return (size, format_tag, channels, fs, bytes_per_second, block_align,
+            bit_depth)
+
+
+# assumes file pointer is immediately after the 'data' id
+def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian,
+                     mmap=False):
+    if is_big_endian:
+        fmt = '>I'
+    else:
+        fmt = '<I'
+
+    # Size of the data subchunk in bytes
+    size = struct.unpack(fmt, fid.read(4))[0]
+
+    # Number of bytes per sample
+    bytes_per_sample = bit_depth//8
+    if bit_depth == 8:
+        dtype = 'u1'
+    else:
+        if is_big_endian:
+            dtype = '>'
+        else:
+            dtype = '<'
+        if format_tag == WAVE_FORMAT_PCM:
+            dtype += 'i%d' % bytes_per_sample
+        else:
+            dtype += 'f%d' % bytes_per_sample
+    if not mmap:
+        data = numpy.frombuffer(fid.read(size), dtype=dtype)
+    else:
+        start = fid.tell()
+        data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
+                            shape=(size//bytes_per_sample,))
+        fid.seek(start + size)
+
+    if channels > 1:
+        data = data.reshape(-1, channels)
+    return data
+
+
+def _skip_unknown_chunk(fid, is_big_endian):
+    if is_big_endian:
+        fmt = '>I'
+    else:
+        fmt = '<I'
+
+    data = fid.read(4)
+    # call unpack() and seek() only if we have really read data from file
+    # otherwise empty read at the end of the file would trigger
+    # unnecessary exception at unpack() call
+    # in case data equals somehow to 0, there is no need for seek() anyway
+    if data:
+        size = struct.unpack(fmt, data)[0]
+        fid.seek(size, 1)
+
+
+def _read_riff_chunk(fid):
+    str1 = fid.read(4)  # File signature
+    if str1 == b'RIFF':
+        is_big_endian = False
+        fmt = '<I'
+    elif str1 == b'RIFX':
+        is_big_endian = True
+        fmt = '>I'
+    else:
+        # There are also .wav files with "FFIR" or "XFIR" signatures?
+        raise ValueError("File format {}... not "
+                         "understood.".format(repr(str1)))
+
+    # Size of entire file
+    file_size = struct.unpack(fmt, fid.read(4))[0] + 8
+
+    str2 = fid.read(4)
+    if str2 != b'WAVE':
+        raise ValueError("Not a WAV file.")
+
+    return file_size, is_big_endian
+
+
+def read(filename, mmap=False):
+    """
+    Open a WAV file
+
+    Return the sample rate (in samples/sec) and data from a WAV file.
+
+    Parameters
+    ----------
+    filename : string or open file handle
+        Input wav file.
+    mmap : bool, optional
+        Whether to read data as memory-mapped.
+        Only to be used on real files (Default: False).
+
+        .. versionadded:: 0.12.0
+
+    Returns
+    -------
+    rate : int
+        Sample rate of wav file.
+    data : numpy array
+        Data read from wav file.  Data-type is determined from the file;
+        see Notes.
+
+    Notes
+    -----
+    This function cannot read wav files with 24-bit data.
+
+    Common data types: [1]_
+
+    =====================  ===========  ===========  =============
+         WAV format            Min          Max       NumPy dtype
+    =====================  ===========  ===========  =============
+    32-bit floating-point  -1.0         +1.0         float32
+    32-bit PCM             -2147483648  +2147483647  int32
+    16-bit PCM             -32768       +32767       int16
+    8-bit PCM              0            255          uint8
+    =====================  ===========  ===========  =============
+
+    Note that 8-bit PCM is unsigned.
+
+    References
+    ----------
+    .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
+       Interface and Data Specifications 1.0", section "Data Format of the
+       Samples", August 1991
+       http://www.tactilemedia.com/info/MCI_Control_Info.html
+
+    """
+    if hasattr(filename, 'read'):
+        fid = filename
+        mmap = False
+    else:
+        fid = open(filename, 'rb')
+
+    try:
+        file_size, is_big_endian = _read_riff_chunk(fid)
+        fmt_chunk_received = False
+        channels = 1
+        bit_depth = 8
+        format_tag = WAVE_FORMAT_PCM
+        while fid.tell() < file_size:
+            # read the next chunk
+            chunk_id = fid.read(4)
+
+            if not chunk_id:
+                raise ValueError("Unexpected end of file.")
+            elif len(chunk_id) < 4:
+                raise ValueError("Incomplete wav chunk.")
+
+            if chunk_id == b'fmt ':
+                fmt_chunk_received = True
+                fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
+                format_tag, channels, fs = fmt_chunk[1:4]
+                bit_depth = fmt_chunk[6]
+                if bit_depth not in (8, 16, 32, 64, 96, 128):
+                    raise ValueError("Unsupported bit depth: the wav file "
+                                     "has {}-bit data.".format(bit_depth))
+            elif chunk_id == b'fact':
+                _skip_unknown_chunk(fid, is_big_endian)
+            elif chunk_id == b'data':
+                if not fmt_chunk_received:
+                    raise ValueError("No fmt chunk before data")
+                data = _read_data_chunk(fid, format_tag, channels, bit_depth,
+                                        is_big_endian, mmap)
+            elif chunk_id == b'LIST':
+                # Someday this could be handled properly but for now skip it
+                _skip_unknown_chunk(fid, is_big_endian)
+            elif chunk_id in (b'JUNK', b'Fake'):
+                # Skip alignment chunks without warning
+                _skip_unknown_chunk(fid, is_big_endian)
+            else:
+                warnings.warn("Chunk (non-data) not understood, skipping it.",
+                              WavFileWarning)
+                _skip_unknown_chunk(fid, is_big_endian)
+    finally:
+        if not hasattr(filename, 'read'):
+            fid.close()
+        else:
+            fid.seek(0)
+
+    return fs, data
+
+
+def write(filename, rate, data):
+    """
+    Write a numpy array as a WAV file.
+
+    Parameters
+    ----------
+    filename : string or open file handle
+        Output wav file.
+    rate : int
+        The sample rate (in samples/sec).
+    data : ndarray
+        A 1-D or 2-D numpy array of either integer or float data-type.
+
+    Notes
+    -----
+    * Writes a simple uncompressed WAV file.
+    * To write multiple-channels, use a 2-D array of shape
+      (Nsamples, Nchannels).
+    * The bits-per-sample and PCM/float will be determined by the data-type.
+
+    Common data types: [1]_
+
+    =====================  ===========  ===========  =============
+         WAV format            Min          Max       NumPy dtype
+    =====================  ===========  ===========  =============
+    32-bit floating-point  -1.0         +1.0         float32
+    32-bit PCM             -2147483648  +2147483647  int32
+    16-bit PCM             -32768       +32767       int16
+    8-bit PCM              0            255          uint8
+    =====================  ===========  ===========  =============
+
+    Note that 8-bit PCM is unsigned.
+
+    References
+    ----------
+    .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
+       Interface and Data Specifications 1.0", section "Data Format of the
+       Samples", August 1991
+       http://www.tactilemedia.com/info/MCI_Control_Info.html
+
+    """
+    if hasattr(filename, 'write'):
+        fid = filename
+    else:
+        fid = open(filename, 'wb')
+
+    fs = rate
+
+    try:
+        dkind = data.dtype.kind
+        if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
+                                                 data.dtype.itemsize == 1)):
+            raise ValueError("Unsupported data type '%s'" % data.dtype)
+
+        header_data = b''
+
+        header_data += b'RIFF'
+        header_data += b'\x00\x00\x00\x00'
+        header_data += b'WAVE'
+
+        # fmt chunk
+        header_data += b'fmt '
+        if dkind == 'f':
+            format_tag = WAVE_FORMAT_IEEE_FLOAT
+        else:
+            format_tag = WAVE_FORMAT_PCM
+        if data.ndim == 1:
+            channels = 1
+        else:
+            channels = data.shape[1]
+        bit_depth = data.dtype.itemsize * 8
+        bytes_per_second = fs*(bit_depth // 8)*channels
+        block_align = channels * (bit_depth // 8)
+
+        fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
+                                     bytes_per_second, block_align, bit_depth)
+        if not (dkind == 'i' or dkind == 'u'):
+            # add cbSize field for non-PCM files
+            fmt_chunk_data += b'\x00\x00'
+
+        header_data += struct.pack('<I', len(fmt_chunk_data))
+        header_data += fmt_chunk_data
+
+        # fact chunk (non-PCM files)
+        if not (dkind == 'i' or dkind == 'u'):
+            header_data += b'fact'
+            header_data += struct.pack('<II', 4, data.shape[0])
+
+        # check data size (needs to be immediately before the data chunk)
+        if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
+            raise ValueError("Data exceeds wave file size limit")
+
+        fid.write(header_data)
+
+        # data chunk
+        fid.write(b'data')
+        fid.write(struct.pack('<I', data.nbytes))
+        if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
+                                           sys.byteorder == 'big'):
+            data = data.byteswap()
+        _array_tofile(fid, data)
+
+        # Determine file size and place it in correct
+        #  position at start of the file.
+        size = fid.tell()
+        fid.seek(4)
+        fid.write(struct.pack('<I', size-8))
+
+    finally:
+        if not hasattr(filename, 'write'):
+            fid.close()
+        else:
+            fid.seek(0)
+
+
+if sys.version_info[0] >= 3:
+    def _array_tofile(fid, data):
+        # ravel gives a c-contiguous buffer
+        fid.write(data.ravel().view('b').data)
+else:
+    def _array_tofile(fid, data):
+        fid.write(data.tostring())