demo + utils venv

This commit is contained in:
d3m1g0d
2019-02-03 13:40:10 +01:00
parent 5fa112490b
commit cfa9c8ea23
5994 changed files with 1353819 additions and 0 deletions
@@ -0,0 +1,20 @@
"""
Utilities for dealing with MATLAB(R) files
Notes
-----
MATLAB(R) is a registered trademark of The MathWorks, Inc., 3 Apple Hill
Drive, Natick, MA 01760-2098, USA.
"""
from __future__ import division, print_function, absolute_import
# Matlab file read and write utilities
from .mio import loadmat, savemat, whosmat
from . import byteordercodes
__all__ = ['loadmat', 'savemat', 'whosmat', 'byteordercodes']
from scipy._lib._testutils import PytestTester
test = PytestTester(__name__)
del PytestTester
@@ -0,0 +1,70 @@
''' Byteorder utilities for system - numpy byteorder encoding
Converts a variety of string codes for little endian, big endian,
native byte order and swapped byte order to explicit numpy endian
codes - one of '<' (little endian) or '>' (big endian)
'''
from __future__ import division, print_function, absolute_import
import sys
sys_is_le = sys.byteorder == 'little'
native_code = sys_is_le and '<' or '>'
swapped_code = sys_is_le and '>' or '<'
aliases = {'little': ('little', '<', 'l', 'le'),
'big': ('big', '>', 'b', 'be'),
'native': ('native', '='),
'swapped': ('swapped', 'S')}
def to_numpy_code(code):
"""
Convert various order codings to numpy format.
Parameters
----------
code : str
The code to convert. It is converted to lower case before parsing.
Legal values are:
'little', 'big', 'l', 'b', 'le', 'be', '<', '>', 'native', '=',
'swapped', 's'.
Returns
-------
out_code : {'<', '>'}
Here '<' is the numpy dtype code for little endian,
and '>' is the code for big endian.
Examples
--------
>>> import sys
>>> sys_is_le == (sys.byteorder == 'little')
True
>>> to_numpy_code('big')
'>'
>>> to_numpy_code('little')
'<'
>>> nc = to_numpy_code('native')
>>> nc == '<' if sys_is_le else nc == '>'
True
>>> sc = to_numpy_code('swapped')
>>> sc == '>' if sys_is_le else sc == '<'
True
"""
code = code.lower()
if code is None:
return native_code
if code in aliases['little']:
return '<'
elif code in aliases['big']:
return '>'
elif code in aliases['native']:
return native_code
elif code in aliases['swapped']:
return swapped_code
else:
raise ValueError(
'We cannot handle byte order %s' % code)
@@ -0,0 +1,326 @@
"""
Module for reading and writing matlab (TM) .mat files
"""
# Authors: Travis Oliphant, Matthew Brett
from __future__ import division, print_function, absolute_import
from scipy._lib.six import string_types
from .miobase import get_matfile_version, docfiller
from .mio4 import MatFile4Reader, MatFile4Writer
from .mio5 import MatFile5Reader, MatFile5Writer
__all__ = ['mat_reader_factory', 'loadmat', 'savemat', 'whosmat']
def _open_file(file_like, appendmat):
"""
Open `file_like` and return as file-like object. First, check if object is
already file-like; if so, return it as-is. Otherwise, try to pass it
to open(). If that fails, and `file_like` is a string, and `appendmat` is true,
append '.mat' and try again.
"""
try:
file_like.read(0)
return file_like, False
except AttributeError:
pass
try:
return open(file_like, 'rb'), True
except IOError:
# Probably "not found"
if isinstance(file_like, string_types):
if appendmat and not file_like.endswith('.mat'):
file_like += '.mat'
return open(file_like, 'rb'), True
else:
raise IOError('Reader needs file name or open file-like object')
@docfiller
def mat_reader_factory(file_name, appendmat=True, **kwargs):
"""
Create reader for matlab .mat format files.
Parameters
----------
%(file_arg)s
%(append_arg)s
%(load_args)s
%(struct_arg)s
Returns
-------
matreader : MatFileReader object
Initialized instance of MatFileReader class matching the mat file
type detected in `filename`.
file_opened : bool
Whether the file was opened by this routine.
"""
byte_stream, file_opened = _open_file(file_name, appendmat)
mjv, mnv = get_matfile_version(byte_stream)
if mjv == 0:
return MatFile4Reader(byte_stream, **kwargs), file_opened
elif mjv == 1:
return MatFile5Reader(byte_stream, **kwargs), file_opened
elif mjv == 2:
raise NotImplementedError('Please use HDF reader for matlab v7.3 files')
else:
raise TypeError('Did not recognize version %s' % mjv)
@docfiller
def loadmat(file_name, mdict=None, appendmat=True, **kwargs):
"""
Load MATLAB file.
Parameters
----------
file_name : str
Name of the mat file (do not need .mat extension if
appendmat==True). Can also pass open file-like object.
mdict : dict, optional
Dictionary in which to insert matfile variables.
appendmat : bool, optional
True to append the .mat extension to the end of the given
filename, if not already present.
byte_order : str or None, optional
None by default, implying byte order guessed from mat
file. Otherwise can be one of ('native', '=', 'little', '<',
'BIG', '>').
mat_dtype : bool, optional
If True, return arrays in same dtype as would be loaded into
MATLAB (instead of the dtype with which they are saved).
squeeze_me : bool, optional
Whether to squeeze unit matrix dimensions or not.
chars_as_strings : bool, optional
Whether to convert char arrays to string arrays.
matlab_compatible : bool, optional
Returns matrices as would be loaded by MATLAB (implies
squeeze_me=False, chars_as_strings=False, mat_dtype=True,
struct_as_record=True).
struct_as_record : bool, optional
Whether to load MATLAB structs as numpy record arrays, or as
old-style numpy arrays with dtype=object. Setting this flag to
False replicates the behavior of scipy version 0.7.x (returning
numpy object arrays). The default setting is True, because it
allows easier round-trip load and save of MATLAB files.
verify_compressed_data_integrity : bool, optional
Whether the length of compressed sequences in the MATLAB file
should be checked, to ensure that they are not longer than we expect.
It is advisable to enable this (the default) because overlong
compressed sequences in MATLAB files generally indicate that the
files have experienced some sort of corruption.
variable_names : None or sequence
If None (the default) - read all variables in file. Otherwise
`variable_names` should be a sequence of strings, giving names of the
MATLAB variables to read from the file. The reader will skip any
variable with a name not in this sequence, possibly saving some read
processing.
Returns
-------
mat_dict : dict
dictionary with variable names as keys, and loaded matrices as
values.
Notes
-----
v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
You will need an HDF5 python library to read MATLAB 7.3 format mat
files. Because scipy does not supply one, we do not implement the
HDF5 / 7.3 interface here.
Examples
--------
>>> from os.path import dirname, join as pjoin
>>> import scipy.io as sio
Get the filename for an example .mat file from the tests/data directory.
>>> data_dir = pjoin(dirname(sio.__file__), 'matlab', 'tests', 'data')
>>> mat_fname = pjoin(data_dir, 'testdouble_7.4_GLNX86.mat')
Load the .mat file contents.
>>> mat_contents = sio.loadmat(mat_fname)
The result is a dictionary, one key/value pair for each variable:
>>> sorted(mat_contents.keys())
['__globals__', '__header__', '__version__', 'testdouble']
>>> mat_contents['testdouble']
array([[0. , 0.78539816, 1.57079633, 2.35619449, 3.14159265,
3.92699082, 4.71238898, 5.49778714, 6.28318531]])
By default SciPy reads MATLAB structs as structured NumPy arrays where the
dtype fields are of type `object` and the names correspond to the MATLAB
struct field names. This can be disabled by setting the optional argument
`struct_as_record=False`.
Get the filename for an example .mat file that contains a MATLAB struct
called `teststruct` and load the contents.
>>> matstruct_fname = pjoin(data_dir, 'teststruct_7.4_GLNX86.mat')
>>> matstruct_contents = sio.loadmat(matstruct_fname)
>>> teststruct = matstruct_contents['teststruct']
>>> teststruct.dtype
dtype([('stringfield', 'O'), ('doublefield', 'O'), ('complexfield', 'O')])
The size of the structured array is the size of the MATLAB struct, not the
number of elements in any particular field. The shape defaults to 2-D
unless the optional argument `squeeze_me=True`, in which case all length 1
dimensions are removed.
>>> teststruct.size
1
>>> teststruct.shape
(1, 1)
Get the 'stringfield' of the first element in the MATLAB struct.
>>> teststruct[0, 0]['stringfield']
array(['Rats live on no evil star.'],
dtype='<U26')
Get the first element of the 'doublefield'.
>>> teststruct['doublefield'][0, 0]
array([[ 1.41421356, 2.71828183, 3.14159265]])
Load the MATLAB struct, squeezing out length 1 dimensions, and get the item
from the 'complexfield'.
>>> matstruct_squeezed = sio.loadmat(matstruct_fname, squeeze_me=True)
>>> matstruct_squeezed['teststruct'].shape
()
>>> matstruct_squeezed['teststruct']['complexfield'].shape
()
>>> matstruct_squeezed['teststruct']['complexfield'].item()
array([ 1.41421356+1.41421356j, 2.71828183+2.71828183j,
3.14159265+3.14159265j])
"""
variable_names = kwargs.pop('variable_names', None)
MR, file_opened = mat_reader_factory(file_name, appendmat, **kwargs)
matfile_dict = MR.get_variables(variable_names)
if mdict is not None:
mdict.update(matfile_dict)
else:
mdict = matfile_dict
if file_opened:
MR.mat_stream.close()
return mdict
@docfiller
def savemat(file_name, mdict,
appendmat=True,
format='5',
long_field_names=False,
do_compression=False,
oned_as='row'):
"""
Save a dictionary of names and arrays into a MATLAB-style .mat file.
This saves the array objects in the given dictionary to a MATLAB-
style .mat file.
Parameters
----------
file_name : str or file-like object
Name of the .mat file (.mat extension not needed if ``appendmat ==
True``).
Can also pass open file_like object.
mdict : dict
Dictionary from which to save matfile variables.
appendmat : bool, optional
True (the default) to append the .mat extension to the end of the
given filename, if not already present.
format : {'5', '4'}, string, optional
'5' (the default) for MATLAB 5 and up (to 7.2),
'4' for MATLAB 4 .mat files.
long_field_names : bool, optional
False (the default) - maximum field name length in a structure is
31 characters which is the documented maximum length.
True - maximum field name length in a structure is 63 characters
which works for MATLAB 7.6+.
do_compression : bool, optional
Whether or not to compress matrices on write. Default is False.
oned_as : {'row', 'column'}, optional
If 'column', write 1-D numpy arrays as column vectors.
If 'row', write 1-D numpy arrays as row vectors.
See also
--------
mio4.MatFile4Writer
mio5.MatFile5Writer
"""
file_opened = False
if hasattr(file_name, 'write'):
# File-like object already; use as-is
file_stream = file_name
else:
if isinstance(file_name, string_types):
if appendmat and not file_name.endswith('.mat'):
file_name = file_name + ".mat"
file_stream = open(file_name, 'wb')
file_opened = True
if format == '4':
if long_field_names:
raise ValueError("Long field names are not available for version 4 files")
MW = MatFile4Writer(file_stream, oned_as)
elif format == '5':
MW = MatFile5Writer(file_stream,
do_compression=do_compression,
unicode_strings=True,
long_field_names=long_field_names,
oned_as=oned_as)
else:
raise ValueError("Format should be '4' or '5'")
MW.put_variables(mdict)
if file_opened:
file_stream.close()
@docfiller
def whosmat(file_name, appendmat=True, **kwargs):
"""
List variables inside a MATLAB file.
Parameters
----------
%(file_arg)s
%(append_arg)s
%(load_args)s
%(struct_arg)s
Returns
-------
variables : list of tuples
A list of tuples, where each tuple holds the matrix name (a string),
its shape (tuple of ints), and its data class (a string).
Possible data classes are: int8, uint8, int16, uint16, int32, uint32,
int64, uint64, single, double, cell, struct, object, char, sparse,
function, opaque, logical, unknown.
Notes
-----
v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
You will need an HDF5 python library to read matlab 7.3 format mat
files. Because scipy does not supply one, we do not implement the
HDF5 / 7.3 interface here.
.. versionadded:: 0.12.0
"""
ML, file_opened = mat_reader_factory(file_name, **kwargs)
variables = ML.list_variables()
if file_opened:
ML.mat_stream.close()
return variables
@@ -0,0 +1,618 @@
''' Classes for read / write of matlab (TM) 4 files
'''
from __future__ import division, print_function, absolute_import
import sys
import warnings
import numpy as np
from numpy.compat import asbytes, asstr
import scipy.sparse
from scipy._lib.six import string_types
from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
convert_dtypes, arr_to_chars, arr_dtype_number)
from .mio_utils import squeeze_element, chars_to_strings
from functools import reduce
SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
miDOUBLE = 0
miSINGLE = 1
miINT32 = 2
miINT16 = 3
miUINT16 = 4
miUINT8 = 5
mdtypes_template = {
miDOUBLE: 'f8',
miSINGLE: 'f4',
miINT32: 'i4',
miINT16: 'i2',
miUINT16: 'u2',
miUINT8: 'u1',
'header': [('mopt', 'i4'),
('mrows', 'i4'),
('ncols', 'i4'),
('imagf', 'i4'),
('namlen', 'i4')],
'U1': 'U1',
}
np_to_mtypes = {
'f8': miDOUBLE,
'c32': miDOUBLE,
'c24': miDOUBLE,
'c16': miDOUBLE,
'f4': miSINGLE,
'c8': miSINGLE,
'i4': miINT32,
'i2': miINT16,
'u2': miUINT16,
'u1': miUINT8,
'S1': miUINT8,
}
# matrix classes
mxFULL_CLASS = 0
mxCHAR_CLASS = 1
mxSPARSE_CLASS = 2
order_codes = {
0: '<',
1: '>',
2: 'VAX D-float', # !
3: 'VAX G-float',
4: 'Cray', # !!
}
mclass_info = {
mxFULL_CLASS: 'double',
mxCHAR_CLASS: 'char',
mxSPARSE_CLASS: 'sparse',
}
class VarHeader4(object):
# Mat4 variables never logical or global
is_logical = False
is_global = False
def __init__(self,
name,
dtype,
mclass,
dims,
is_complex):
self.name = name
self.dtype = dtype
self.mclass = mclass
self.dims = dims
self.is_complex = is_complex
class VarReader4(object):
''' Class to read matlab 4 variables '''
def __init__(self, file_reader):
self.file_reader = file_reader
self.mat_stream = file_reader.mat_stream
self.dtypes = file_reader.dtypes
self.chars_as_strings = file_reader.chars_as_strings
self.squeeze_me = file_reader.squeeze_me
def read_header(self):
''' Read and return header for variable '''
data = read_dtype(self.mat_stream, self.dtypes['header'])
name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00')
if data['mopt'] < 0 or data['mopt'] > 5000:
raise ValueError('Mat 4 mopt wrong format, byteswapping problem?')
M, rest = divmod(data['mopt'], 1000) # order code
if M not in (0, 1):
warnings.warn("We do not support byte ordering '%s'; returned "
"data may be corrupt" % order_codes[M],
UserWarning)
O, rest = divmod(rest, 100) # unused, should be 0
if O != 0:
raise ValueError('O in MOPT integer should be 0, wrong format?')
P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above)
T = rest # matrix type code e.g. mxFULL_CLASS (see above)
dims = (data['mrows'], data['ncols'])
is_complex = data['imagf'] == 1
dtype = self.dtypes[P]
return VarHeader4(
name,
dtype,
T,
dims,
is_complex)
def array_from_header(self, hdr, process=True):
mclass = hdr.mclass
if mclass == mxFULL_CLASS:
arr = self.read_full_array(hdr)
elif mclass == mxCHAR_CLASS:
arr = self.read_char_array(hdr)
if process and self.chars_as_strings:
arr = chars_to_strings(arr)
elif mclass == mxSPARSE_CLASS:
# no current processing (below) makes sense for sparse
return self.read_sparse_array(hdr)
else:
raise TypeError('No reader for class code %s' % mclass)
if process and self.squeeze_me:
return squeeze_element(arr)
return arr
def read_sub_array(self, hdr, copy=True):
''' Mat4 read using header `hdr` dtype and dims
Parameters
----------
hdr : object
object with attributes ``dtype``, ``dims``. dtype is assumed to be
the correct endianness
copy : bool, optional
copies array before return if True (default True)
(buffer is usually read only)
Returns
-------
arr : ndarray
of dtype givem by `hdr` ``dtype`` and shape givem by `hdr` ``dims``
'''
dt = hdr.dtype
dims = hdr.dims
num_bytes = dt.itemsize
for d in dims:
num_bytes *= d
buffer = self.mat_stream.read(int(num_bytes))
if len(buffer) != num_bytes:
raise ValueError("Not enough bytes to read matrix '%s'; is this "
"a badly-formed file? Consider listing matrices "
"with `whosmat` and loading named matrices with "
"`variable_names` kwarg to `loadmat`" % hdr.name)
arr = np.ndarray(shape=dims,
dtype=dt,
buffer=buffer,
order='F')
if copy:
arr = arr.copy()
return arr
def read_full_array(self, hdr):
''' Full (rather than sparse) matrix getter
Read matrix (array) can be real or complex
Parameters
----------
hdr : ``VarHeader4`` instance
Returns
-------
arr : ndarray
complex array if ``hdr.is_complex`` is True, otherwise a real
numeric array
'''
if hdr.is_complex:
# avoid array copy to save memory
res = self.read_sub_array(hdr, copy=False)
res_j = self.read_sub_array(hdr, copy=False)
return res + (res_j * 1j)
return self.read_sub_array(hdr)
def read_char_array(self, hdr):
''' latin-1 text matrix (char matrix) reader
Parameters
----------
hdr : ``VarHeader4`` instance
Returns
-------
arr : ndarray
with dtype 'U1', shape given by `hdr` ``dims``
'''
arr = self.read_sub_array(hdr).astype(np.uint8)
S = arr.tostring().decode('latin-1')
return np.ndarray(shape=hdr.dims,
dtype=np.dtype('U1'),
buffer=np.array(S)).copy()
def read_sparse_array(self, hdr):
''' Read and return sparse matrix type
Parameters
----------
hdr : ``VarHeader4`` instance
Returns
-------
arr : ``scipy.sparse.coo_matrix``
with dtype ``float`` and shape read from the sparse matrix data
Notes
-----
MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where
N is the number of non-zero values. Column 1 values [0:N] are the
(1-based) row indices of the each non-zero value, column 2 [0:N] are the
column indices, column 3 [0:N] are the (real) values. The last values
[-1,0:2] of the rows, column indices are shape[0] and shape[1]
respectively of the output matrix. The last value for the values column
is a padding 0. mrows and ncols values from the header give the shape of
the stored matrix, here [N+1, 3]. Complex data is saved as a 4 column
matrix, where the fourth column contains the imaginary component; the
last value is again 0. Complex sparse data do *not* have the header
``imagf`` field set to True; the fact that the data are complex is only
detectable because there are 4 storage columns
'''
res = self.read_sub_array(hdr)
tmp = res[:-1,:]
# All numbers are float64 in Matlab, but Scipy sparse expects int shape
dims = (int(res[-1,0]), int(res[-1,1]))
I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also
J = np.ascontiguousarray(tmp[:,1],dtype='intc')
I -= 1 # for 1-based indexing
J -= 1
if res.shape[1] == 3:
V = np.ascontiguousarray(tmp[:,2],dtype='float')
else:
V = np.ascontiguousarray(tmp[:,2],dtype='complex')
V.imag = tmp[:,3]
return scipy.sparse.coo_matrix((V,(I,J)), dims)
def shape_from_header(self, hdr):
'''Read the shape of the array described by the header.
The file position after this call is unspecified.
'''
mclass = hdr.mclass
if mclass == mxFULL_CLASS:
shape = tuple(map(int, hdr.dims))
elif mclass == mxCHAR_CLASS:
shape = tuple(map(int, hdr.dims))
if self.chars_as_strings:
shape = shape[:-1]
elif mclass == mxSPARSE_CLASS:
dt = hdr.dtype
dims = hdr.dims
if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1):
return ()
# Read only the row and column counts
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
rows = np.ndarray(shape=(1,), dtype=dt,
buffer=self.mat_stream.read(dt.itemsize))
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
cols = np.ndarray(shape=(1,), dtype=dt,
buffer=self.mat_stream.read(dt.itemsize))
shape = (int(rows), int(cols))
else:
raise TypeError('No reader for class code %s' % mclass)
if self.squeeze_me:
shape = tuple([x for x in shape if x != 1])
return shape
class MatFile4Reader(MatFileReader):
''' Reader for Mat4 files '''
@docfiller
def __init__(self, mat_stream, *args, **kwargs):
''' Initialize matlab 4 file reader
%(matstream_arg)s
%(load_args)s
'''
super(MatFile4Reader, self).__init__(mat_stream, *args, **kwargs)
self._matrix_reader = None
def guess_byte_order(self):
self.mat_stream.seek(0)
mopt = read_dtype(self.mat_stream, np.dtype('i4'))
self.mat_stream.seek(0)
if mopt == 0:
return '<'
if mopt < 0 or mopt > 5000:
# Number must have been byteswapped
return SYS_LITTLE_ENDIAN and '>' or '<'
# Not byteswapped
return SYS_LITTLE_ENDIAN and '<' or '>'
def initialize_read(self):
''' Run when beginning read of variables
Sets up readers from parameters in `self`
'''
self.dtypes = convert_dtypes(mdtypes_template, self.byte_order)
self._matrix_reader = VarReader4(self)
def read_var_header(self):
''' Read and return header, next position
Parameters
----------
None
Returns
-------
header : object
object that can be passed to self.read_var_array, and that
has attributes ``name`` and ``is_global``
next_position : int
position in stream of next variable
'''
hdr = self._matrix_reader.read_header()
n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product
remaining_bytes = hdr.dtype.itemsize * n
if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS:
remaining_bytes *= 2
next_position = self.mat_stream.tell() + remaining_bytes
return hdr, next_position
def read_var_array(self, header, process=True):
''' Read array, given `header`
Parameters
----------
header : header object
object with fields defining variable header
process : {True, False}, optional
If True, apply recursive post-processing during loading of array.
Returns
-------
arr : array
array with post-processing applied or not according to
`process`.
'''
return self._matrix_reader.array_from_header(header, process)
def get_variables(self, variable_names=None):
''' get variables from stream as dictionary
Parameters
----------
variable_names : None or str or sequence of str, optional
variable name, or sequence of variable names to get from Mat file /
file stream. If None, then get all variables in file
'''
if isinstance(variable_names, string_types):
variable_names = [variable_names]
elif variable_names is not None:
variable_names = list(variable_names)
self.mat_stream.seek(0)
# set up variable reader
self.initialize_read()
mdict = {}
while not self.end_of_stream():
hdr, next_position = self.read_var_header()
name = asstr(hdr.name)
if variable_names is not None and name not in variable_names:
self.mat_stream.seek(next_position)
continue
mdict[name] = self.read_var_array(hdr)
self.mat_stream.seek(next_position)
if variable_names is not None:
variable_names.remove(name)
if len(variable_names) == 0:
break
return mdict
def list_variables(self):
''' list variables from stream '''
self.mat_stream.seek(0)
# set up variable reader
self.initialize_read()
vars = []
while not self.end_of_stream():
hdr, next_position = self.read_var_header()
name = asstr(hdr.name)
shape = self._matrix_reader.shape_from_header(hdr)
info = mclass_info.get(hdr.mclass, 'unknown')
vars.append((name, shape, info))
self.mat_stream.seek(next_position)
return vars
def arr_to_2d(arr, oned_as='row'):
''' Make ``arr`` exactly two dimensional
If `arr` has more than 2 dimensions, raise a ValueError
Parameters
----------
arr : array
oned_as : {'row', 'column'}, optional
Whether to reshape 1D vectors as row vectors or column vectors.
See documentation for ``matdims`` for more detail
Returns
-------
arr2d : array
2D version of the array
'''
dims = matdims(arr, oned_as)
if len(dims) > 2:
raise ValueError('Matlab 4 files cannot save arrays with more than '
'2 dimensions')
return arr.reshape(dims)
class VarWriter4(object):
def __init__(self, file_writer):
self.file_stream = file_writer.file_stream
self.oned_as = file_writer.oned_as
def write_bytes(self, arr):
self.file_stream.write(arr.tostring(order='F'))
def write_string(self, s):
self.file_stream.write(s)
def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0):
''' Write header for given data options
Parameters
----------
name : str
name of variable
shape : sequence
Shape of array as it will be read in matlab
P : int, optional
code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32,
miINT16, miUINT16, miUINT8``
T : int, optional
code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS,
mxSPARSE_CLASS``
imagf : int, optional
flag indicating complex
'''
header = np.empty((), mdtypes_template['header'])
M = not SYS_LITTLE_ENDIAN
O = 0
header['mopt'] = (M * 1000 +
O * 100 +
P * 10 +
T)
header['mrows'] = shape[0]
header['ncols'] = shape[1]
header['imagf'] = imagf
header['namlen'] = len(name) + 1
self.write_bytes(header)
self.write_string(asbytes(name + '\0'))
def write(self, arr, name):
''' Write matrix `arr`, with name `name`
Parameters
----------
arr : array_like
array to write
name : str
name in matlab workspace
'''
# we need to catch sparse first, because np.asarray returns an
# an object array for scipy.sparse
if scipy.sparse.issparse(arr):
self.write_sparse(arr, name)
return
arr = np.asarray(arr)
dt = arr.dtype
if not dt.isnative:
arr = arr.astype(dt.newbyteorder('='))
dtt = dt.type
if dtt is np.object_:
raise TypeError('Cannot save object arrays in Mat4')
elif dtt is np.void:
raise TypeError('Cannot save void type arrays')
elif dtt in (np.unicode_, np.string_):
self.write_char(arr, name)
return
self.write_numeric(arr, name)
def write_numeric(self, arr, name):
arr = arr_to_2d(arr, self.oned_as)
imagf = arr.dtype.kind == 'c'
try:
P = np_to_mtypes[arr.dtype.str[1:]]
except KeyError:
if imagf:
arr = arr.astype('c128')
else:
arr = arr.astype('f8')
P = miDOUBLE
self.write_header(name,
arr.shape,
P=P,
T=mxFULL_CLASS,
imagf=imagf)
if imagf:
self.write_bytes(arr.real)
self.write_bytes(arr.imag)
else:
self.write_bytes(arr)
def write_char(self, arr, name):
arr = arr_to_chars(arr)
arr = arr_to_2d(arr, self.oned_as)
dims = arr.shape
self.write_header(
name,
dims,
P=miUINT8,
T=mxCHAR_CLASS)
if arr.dtype.kind == 'U':
# Recode unicode to latin1
n_chars = np.product(dims)
st_arr = np.ndarray(shape=(),
dtype=arr_dtype_number(arr, n_chars),
buffer=arr)
st = st_arr.item().encode('latin-1')
arr = np.ndarray(shape=dims, dtype='S1', buffer=st)
self.write_bytes(arr)
def write_sparse(self, arr, name):
''' Sparse matrices are 2D
See docstring for VarReader4.read_sparse_array
'''
A = arr.tocoo() # convert to sparse COO format (ijv)
imagf = A.dtype.kind == 'c'
ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8')
ijv[:-1,0] = A.row
ijv[:-1,1] = A.col
ijv[:-1,0:2] += 1 # 1 based indexing
if imagf:
ijv[:-1,2] = A.data.real
ijv[:-1,3] = A.data.imag
else:
ijv[:-1,2] = A.data
ijv[-1,0:2] = A.shape
self.write_header(
name,
ijv.shape,
P=miDOUBLE,
T=mxSPARSE_CLASS)
self.write_bytes(ijv)
class MatFile4Writer(object):
''' Class for writing matlab 4 format files '''
def __init__(self, file_stream, oned_as=None):
self.file_stream = file_stream
if oned_as is None:
oned_as = 'row'
self.oned_as = oned_as
self._matrix_writer = None
def put_variables(self, mdict, write_header=None):
''' Write variables in `mdict` to stream
Parameters
----------
mdict : mapping
mapping with method ``items`` return name, contents pairs
where ``name`` which will appeak in the matlab workspace in
file load, and ``contents`` is something writeable to a
matlab file, such as a numpy array.
write_header : {None, True, False}
If True, then write the matlab file header before writing the
variables. If None (the default) then write the file header
if we are at position 0 in the stream. By setting False
here, and setting the stream position to the end of the file,
you can append variables to a matlab file
'''
# there is no header for a matlab 4 mat file, so we ignore the
# ``write_header`` input argument. It's there for compatibility
# with the matlab 5 version of this method
self._matrix_writer = VarWriter4(self)
for name, var in mdict.items():
self._matrix_writer.write(var, name)
@@ -0,0 +1,849 @@
''' Classes for read / write of matlab (TM) 5 files
The matfile specification last found here:
https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf
(as of December 5 2008)
'''
from __future__ import division, print_function, absolute_import
'''
=================================
Note on functions and mat files
=================================
The document above does not give any hints as to the storage of matlab
function handles, or anonymous function handles. I had therefore to
guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and
``mxOPAQUE_CLASS`` by looking at example mat files.
``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to
contain a struct matrix with a set pattern of fields. For anonymous
functions, a sub-fields of one of these fields seems to contain the
well-named ``mxOPAQUE_CLASS``. This seems to contain:
* array flags as for any matlab matrix
* 3 int8 strings
* a matrix
It seems that, whenever the mat file contains a ``mxOPAQUE_CLASS``
instance, there is also an un-named matrix (name == '') at the end of
the mat file. I'll call this the ``__function_workspace__`` matrix.
When I saved two anonymous functions in a mat file, or appended another
anonymous function to the mat file, there was still only one
``__function_workspace__`` un-named matrix at the end, but larger than
that for a mat file with a single anonymous function, suggesting that
the workspaces for the two functions had been merged.
The ``__function_workspace__`` matrix appears to be of double class
(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in
the format of a mini .mat file, without the first 124 bytes of the file
header (the description and the subsystem_offset), but with the version
U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes,
presumably for 8 byte padding, and then a series of ``miMATRIX``
entries, as in a standard mat file. The ``miMATRIX`` entries appear to
be series of un-named (name == '') matrices, and may also contain arrays
of this same mini-mat format.
I guess that:
* saving an anonymous function back to a mat file will need the
associated ``__function_workspace__`` matrix saved as well for the
anonymous function to work correctly.
* appending to a mat file that has a ``__function_workspace__`` would
involve first pulling off this workspace, appending, checking whether
there were any more anonymous functions appended, and then somehow
merging the relevant workspaces, and saving at the end of the mat
file.
The mat files I was playing with are in ``tests/data``:
* sqr.mat
* parabola.mat
* some_functions.mat
See ``tests/test_mio.py:test_mio_funcs.py`` for a debugging
script I was working with.
'''
# Small fragments of current code adapted from matfile.py by Heiko
# Henkelmann
import os
import time
import sys
import zlib
from io import BytesIO
import warnings
import numpy as np
from numpy.compat import asbytes, asstr
import scipy.sparse
from scipy._lib.six import string_types
from .byteordercodes import native_code, swapped_code
from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
arr_to_chars, arr_dtype_number, MatWriteError,
MatReadError, MatReadWarning)
# Reader object for matlab 5 format variables
from .mio5_utils import VarReader5
# Constants and helper objects
from .mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES,
NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8,
miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS,
mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS,
mxDOUBLE_CLASS, mclass_info)
from .streams import ZlibInputStream
class MatFile5Reader(MatFileReader):
''' Reader for Mat 5 mat files
Adds the following attribute to base class
uint16_codec - char codec to use for uint16 char arrays
(defaults to system default codec)
Uses variable reader that has the following stardard interface (see
abstract class in ``miobase``::
__init__(self, file_reader)
read_header(self)
array_from_header(self)
and added interface::
set_stream(self, stream)
read_full_tag(self)
'''
@docfiller
def __init__(self,
mat_stream,
byte_order=None,
mat_dtype=False,
squeeze_me=False,
chars_as_strings=True,
matlab_compatible=False,
struct_as_record=True,
verify_compressed_data_integrity=True,
uint16_codec=None
):
'''Initializer for matlab 5 file format reader
%(matstream_arg)s
%(load_args)s
%(struct_arg)s
uint16_codec : {None, string}
Set codec to use for uint16 char arrays (e.g. 'utf-8').
Use system default codec if None
'''
super(MatFile5Reader, self).__init__(
mat_stream,
byte_order,
mat_dtype,
squeeze_me,
chars_as_strings,
matlab_compatible,
struct_as_record,
verify_compressed_data_integrity
)
# Set uint16 codec
if not uint16_codec:
uint16_codec = sys.getdefaultencoding()
self.uint16_codec = uint16_codec
# placeholders for readers - see initialize_read method
self._file_reader = None
self._matrix_reader = None
def guess_byte_order(self):
''' Guess byte order.
Sets stream pointer to 0 '''
self.mat_stream.seek(126)
mi = self.mat_stream.read(2)
self.mat_stream.seek(0)
return mi == b'IM' and '<' or '>'
def read_file_header(self):
''' Read in mat 5 file header '''
hdict = {}
hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header']
hdr = read_dtype(self.mat_stream, hdr_dtype)
hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000')
v_major = hdr['version'] >> 8
v_minor = hdr['version'] & 0xFF
hdict['__version__'] = '%d.%d' % (v_major, v_minor)
return hdict
def initialize_read(self):
''' Run when beginning read of variables
Sets up readers from parameters in `self`
'''
# reader for top level stream. We need this extra top-level
# reader because we use the matrix_reader object to contain
# compressed matrices (so they have their own stream)
self._file_reader = VarReader5(self)
# reader for matrix streams
self._matrix_reader = VarReader5(self)
def read_var_header(self):
''' Read header, return header, next position
Header has to define at least .name and .is_global
Parameters
----------
None
Returns
-------
header : object
object that can be passed to self.read_var_array, and that
has attributes .name and .is_global
next_position : int
position in stream of next variable
'''
mdtype, byte_count = self._file_reader.read_full_tag()
if not byte_count > 0:
raise ValueError("Did not read any bytes")
next_pos = self.mat_stream.tell() + byte_count
if mdtype == miCOMPRESSED:
# Make new stream from compressed data
stream = ZlibInputStream(self.mat_stream, byte_count)
self._matrix_reader.set_stream(stream)
check_stream_limit = self.verify_compressed_data_integrity
mdtype, byte_count = self._matrix_reader.read_full_tag()
else:
check_stream_limit = False
self._matrix_reader.set_stream(self.mat_stream)
if not mdtype == miMATRIX:
raise TypeError('Expecting miMATRIX type here, got %d' % mdtype)
header = self._matrix_reader.read_header(check_stream_limit)
return header, next_pos
def read_var_array(self, header, process=True):
''' Read array, given `header`
Parameters
----------
header : header object
object with fields defining variable header
process : {True, False} bool, optional
If True, apply recursive post-processing during loading of
array.
Returns
-------
arr : array
array with post-processing applied or not according to
`process`.
'''
return self._matrix_reader.array_from_header(header, process)
def get_variables(self, variable_names=None):
''' get variables from stream as dictionary
variable_names - optional list of variable names to get
If variable_names is None, then get all variables in file
'''
if isinstance(variable_names, string_types):
variable_names = [variable_names]
elif variable_names is not None:
variable_names = list(variable_names)
self.mat_stream.seek(0)
# Here we pass all the parameters in self to the reading objects
self.initialize_read()
mdict = self.read_file_header()
mdict['__globals__'] = []
while not self.end_of_stream():
hdr, next_position = self.read_var_header()
name = asstr(hdr.name)
if name in mdict:
warnings.warn('Duplicate variable name "%s" in stream'
' - replacing previous with new\n'
'Consider mio5.varmats_from_mat to split '
'file into single variable files' % name,
MatReadWarning, stacklevel=2)
if name == '':
# can only be a matlab 7 function workspace
name = '__function_workspace__'
# We want to keep this raw because mat_dtype processing
# will break the format (uint8 as mxDOUBLE_CLASS)
process = False
else:
process = True
if variable_names is not None and name not in variable_names:
self.mat_stream.seek(next_position)
continue
try:
res = self.read_var_array(hdr, process)
except MatReadError as err:
warnings.warn(
'Unreadable variable "%s", because "%s"' %
(name, err),
Warning, stacklevel=2)
res = "Read error: %s" % err
self.mat_stream.seek(next_position)
mdict[name] = res
if hdr.is_global:
mdict['__globals__'].append(name)
if variable_names is not None:
variable_names.remove(name)
if len(variable_names) == 0:
break
return mdict
def list_variables(self):
''' list variables from stream '''
self.mat_stream.seek(0)
# Here we pass all the parameters in self to the reading objects
self.initialize_read()
self.read_file_header()
vars = []
while not self.end_of_stream():
hdr, next_position = self.read_var_header()
name = asstr(hdr.name)
if name == '':
# can only be a matlab 7 function workspace
name = '__function_workspace__'
shape = self._matrix_reader.shape_from_header(hdr)
if hdr.is_logical:
info = 'logical'
else:
info = mclass_info.get(hdr.mclass, 'unknown')
vars.append((name, shape, info))
self.mat_stream.seek(next_position)
return vars
def varmats_from_mat(file_obj):
""" Pull variables out of mat 5 file as a sequence of mat file objects
This can be useful with a difficult mat file, containing unreadable
variables. This routine pulls the variables out in raw form and puts them,
unread, back into a file stream for saving or reading. Another use is the
pathological case where there is more than one variable of the same name in
the file; this routine returns the duplicates, whereas the standard reader
will overwrite duplicates in the returned dictionary.
The file pointer in `file_obj` will be undefined. File pointers for the
returned file-like objects are set at 0.
Parameters
----------
file_obj : file-like
file object containing mat file
Returns
-------
named_mats : list
list contains tuples of (name, BytesIO) where BytesIO is a file-like
object containing mat file contents as for a single variable. The
BytesIO contains a string with the original header and a single var. If
``var_file_obj`` is an individual BytesIO instance, then save as a mat
file with something like ``open('test.mat',
'wb').write(var_file_obj.read())``
Examples
--------
>>> import scipy.io
BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
python < 3.
>>> mat_fileobj = BytesIO()
>>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
>>> varmats = varmats_from_mat(mat_fileobj)
>>> sorted([name for name, str_obj in varmats])
['a', 'b']
"""
rdr = MatFile5Reader(file_obj)
file_obj.seek(0)
# Raw read of top-level file header
hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
raw_hdr = file_obj.read(hdr_len)
# Initialize variable reading
file_obj.seek(0)
rdr.initialize_read()
mdict = rdr.read_file_header()
next_position = file_obj.tell()
named_mats = []
while not rdr.end_of_stream():
start_position = next_position
hdr, next_position = rdr.read_var_header()
name = asstr(hdr.name)
# Read raw variable string
file_obj.seek(start_position)
byte_count = next_position - start_position
var_str = file_obj.read(byte_count)
# write to stringio object
out_obj = BytesIO()
out_obj.write(raw_hdr)
out_obj.write(var_str)
out_obj.seek(0)
named_mats.append((name, out_obj))
return named_mats
class EmptyStructMarker(object):
""" Class to indicate presence of empty matlab struct on output """
def to_writeable(source):
''' Convert input object ``source`` to something we can write
Parameters
----------
source : object
Returns
-------
arr : None or ndarray or EmptyStructMarker
If `source` cannot be converted to something we can write to a matfile,
return None. If `source` is equivalent to an empty dictionary, return
``EmptyStructMarker``. Otherwise return `source` converted to an
ndarray with contents for writing to matfile.
'''
if isinstance(source, np.ndarray):
return source
if source is None:
return None
# Objects that implement mappings
is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and
hasattr(source, 'items'))
# Objects that don't implement mappings, but do have dicts
if isinstance(source, np.generic):
# Numpy scalars are never mappings (pypy issue workaround)
pass
elif not is_mapping and hasattr(source, '__dict__'):
source = dict((key, value) for key, value in source.__dict__.items()
if not key.startswith('_'))
is_mapping = True
if is_mapping:
dtype = []
values = []
for field, value in source.items():
if (isinstance(field, string_types) and
field[0] not in '_0123456789'):
dtype.append((str(field), object))
values.append(value)
if dtype:
return np.array([tuple(values)], dtype)
else:
return EmptyStructMarker
# Next try and convert to an array
narr = np.asanyarray(source)
if narr.dtype.type in (object, np.object_) and \
narr.shape == () and narr == source:
# No interesting conversion possible
return None
return narr
# Native byte ordered dtypes for convenience for writers
NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header']
NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full']
NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata']
NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags']
class VarWriter5(object):
''' Generic matlab matrix writing class '''
mat_tag = np.zeros((), NDT_TAG_FULL)
mat_tag['mdtype'] = miMATRIX
def __init__(self, file_writer):
self.file_stream = file_writer.file_stream
self.unicode_strings = file_writer.unicode_strings
self.long_field_names = file_writer.long_field_names
self.oned_as = file_writer.oned_as
# These are used for top level writes, and unset after
self._var_name = None
self._var_is_global = False
def write_bytes(self, arr):
self.file_stream.write(arr.tostring(order='F'))
def write_string(self, s):
self.file_stream.write(s)
def write_element(self, arr, mdtype=None):
''' write tag and data '''
if mdtype is None:
mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]
# Array needs to be in native byte order
if arr.dtype.byteorder == swapped_code:
arr = arr.byteswap().newbyteorder()
byte_count = arr.size*arr.itemsize
if byte_count <= 4:
self.write_smalldata_element(arr, mdtype, byte_count)
else:
self.write_regular_element(arr, mdtype, byte_count)
def write_smalldata_element(self, arr, mdtype, byte_count):
# write tag with embedded data
tag = np.zeros((), NDT_TAG_SMALL)
tag['byte_count_mdtype'] = (byte_count << 16) + mdtype
# if arr.tostring is < 4, the element will be zero-padded as needed.
tag['data'] = arr.tostring(order='F')
self.write_bytes(tag)
def write_regular_element(self, arr, mdtype, byte_count):
# write tag, data
tag = np.zeros((), NDT_TAG_FULL)
tag['mdtype'] = mdtype
tag['byte_count'] = byte_count
self.write_bytes(tag)
self.write_bytes(arr)
# pad to next 64-bit boundary
bc_mod_8 = byte_count % 8
if bc_mod_8:
self.file_stream.write(b'\x00' * (8-bc_mod_8))
def write_header(self,
shape,
mclass,
is_complex=False,
is_logical=False,
nzmax=0):
''' Write header for given data options
shape : sequence
array shape
mclass - mat5 matrix class
is_complex - True if matrix is complex
is_logical - True if matrix is logical
nzmax - max non zero elements for sparse arrays
We get the name and the global flag from the object, and reset
them to defaults after we've used them
'''
# get name and is_global from one-shot object store
name = self._var_name
is_global = self._var_is_global
# initialize the top-level matrix tag, store position
self._mat_tag_pos = self.file_stream.tell()
self.write_bytes(self.mat_tag)
# write array flags (complex, global, logical, class, nzmax)
af = np.zeros((), NDT_ARRAY_FLAGS)
af['data_type'] = miUINT32
af['byte_count'] = 8
flags = is_complex << 3 | is_global << 2 | is_logical << 1
af['flags_class'] = mclass | flags << 8
af['nzmax'] = nzmax
self.write_bytes(af)
# shape
self.write_element(np.array(shape, dtype='i4'))
# write name
name = np.asarray(name)
if name == '': # empty string zero-terminated
self.write_smalldata_element(name, miINT8, 0)
else:
self.write_element(name, miINT8)
# reset the one-shot store to defaults
self._var_name = ''
self._var_is_global = False
def update_matrix_tag(self, start_pos):
curr_pos = self.file_stream.tell()
self.file_stream.seek(start_pos)
byte_count = curr_pos - start_pos - 8
if byte_count >= 2**32:
raise MatWriteError("Matrix too large to save with Matlab "
"5 format")
self.mat_tag['byte_count'] = byte_count
self.write_bytes(self.mat_tag)
self.file_stream.seek(curr_pos)
def write_top(self, arr, name, is_global):
""" Write variable at top level of mat file
Parameters
----------
arr : array_like
array-like object to create writer for
name : str, optional
name as it will appear in matlab workspace
default is empty string
is_global : {False, True}, optional
whether variable will be global on load into matlab
"""
# these are set before the top-level header write, and unset at
# the end of the same write, because they do not apply for lower levels
self._var_is_global = is_global
self._var_name = name
# write the header and data
self.write(arr)
def write(self, arr):
''' Write `arr` to stream at top and sub levels
Parameters
----------
arr : array_like
array-like object to create writer for
'''
# store position, so we can update the matrix tag
mat_tag_pos = self.file_stream.tell()
# First check if these are sparse
if scipy.sparse.issparse(arr):
self.write_sparse(arr)
self.update_matrix_tag(mat_tag_pos)
return
# Try to convert things that aren't arrays
narr = to_writeable(arr)
if narr is None:
raise TypeError('Could not convert %s (type %s) to array'
% (arr, type(arr)))
if isinstance(narr, MatlabObject):
self.write_object(narr)
elif isinstance(narr, MatlabFunction):
raise MatWriteError('Cannot write matlab functions')
elif narr is EmptyStructMarker: # empty struct array
self.write_empty_struct()
elif narr.dtype.fields: # struct array
self.write_struct(narr)
elif narr.dtype.hasobject: # cell array
self.write_cells(narr)
elif narr.dtype.kind in ('U', 'S'):
if self.unicode_strings:
codec = 'UTF8'
else:
codec = 'ascii'
self.write_char(narr, codec)
else:
self.write_numeric(narr)
self.update_matrix_tag(mat_tag_pos)
def write_numeric(self, arr):
imagf = arr.dtype.kind == 'c'
logif = arr.dtype.kind == 'b'
try:
mclass = NP_TO_MXTYPES[arr.dtype.str[1:]]
except KeyError:
# No matching matlab type, probably complex256 / float128 / float96
# Cast data to complex128 / float64.
if imagf:
arr = arr.astype('c128')
elif logif:
arr = arr.astype('i1') # Should only contain 0/1
else:
arr = arr.astype('f8')
mclass = mxDOUBLE_CLASS
self.write_header(matdims(arr, self.oned_as),
mclass,
is_complex=imagf,
is_logical=logif)
if imagf:
self.write_element(arr.real)
self.write_element(arr.imag)
else:
self.write_element(arr)
def write_char(self, arr, codec='ascii'):
''' Write string array `arr` with given `codec`
'''
if arr.size == 0 or np.all(arr == ''):
# This an empty string array or a string array containing
# only empty strings. Matlab cannot distinguish between a
# string array that is empty, and a string array containing
# only empty strings, because it stores strings as arrays of
# char. There is no way of having an array of char that is
# not empty, but contains an empty string. We have to
# special-case the array-with-empty-strings because even
# empty strings have zero padding, which would otherwise
# appear in matlab as a string with a space.
shape = (0,) * np.max([arr.ndim, 2])
self.write_header(shape, mxCHAR_CLASS)
self.write_smalldata_element(arr, miUTF8, 0)
return
# non-empty string.
#
# Convert to char array
arr = arr_to_chars(arr)
# We have to write the shape directly, because we are going
# recode the characters, and the resulting stream of chars
# may have a different length
shape = arr.shape
self.write_header(shape, mxCHAR_CLASS)
if arr.dtype.kind == 'U' and arr.size:
# Make one long string from all the characters. We need to
# transpose here, because we're flattening the array, before
# we write the bytes. The bytes have to be written in
# Fortran order.
n_chars = np.product(shape)
st_arr = np.ndarray(shape=(),
dtype=arr_dtype_number(arr, n_chars),
buffer=arr.T.copy()) # Fortran order
# Recode with codec to give byte string
st = st_arr.item().encode(codec)
# Reconstruct as one-dimensional byte array
arr = np.ndarray(shape=(len(st),),
dtype='S1',
buffer=st)
self.write_element(arr, mdtype=miUTF8)
def write_sparse(self, arr):
''' Sparse matrices are 2D
'''
A = arr.tocsc() # convert to sparse CSC format
A.sort_indices() # MATLAB expects sorted row indices
is_complex = (A.dtype.kind == 'c')
is_logical = (A.dtype.kind == 'b')
nz = A.nnz
self.write_header(matdims(arr, self.oned_as),
mxSPARSE_CLASS,
is_complex=is_complex,
is_logical=is_logical,
# matlab won't load file with 0 nzmax
nzmax=1 if nz == 0 else nz)
self.write_element(A.indices.astype('i4'))
self.write_element(A.indptr.astype('i4'))
self.write_element(A.data.real)
if is_complex:
self.write_element(A.data.imag)
def write_cells(self, arr):
self.write_header(matdims(arr, self.oned_as),
mxCELL_CLASS)
# loop over data, column major
A = np.atleast_2d(arr).flatten('F')
for el in A:
self.write(el)
def write_empty_struct(self):
self.write_header((1, 1), mxSTRUCT_CLASS)
# max field name length set to 1 in an example matlab struct
self.write_element(np.array(1, dtype=np.int32))
# Field names element is empty
self.write_element(np.array([], dtype=np.int8))
def write_struct(self, arr):
self.write_header(matdims(arr, self.oned_as),
mxSTRUCT_CLASS)
self._write_items(arr)
def _write_items(self, arr):
# write fieldnames
fieldnames = [f[0] for f in arr.dtype.descr]
length = max([len(fieldname) for fieldname in fieldnames])+1
max_length = (self.long_field_names and 64) or 32
if length > max_length:
raise ValueError("Field names are restricted to %d characters" %
(max_length-1))
self.write_element(np.array([length], dtype='i4'))
self.write_element(
np.array(fieldnames, dtype='S%d' % (length)),
mdtype=miINT8)
A = np.atleast_2d(arr).flatten('F')
for el in A:
for f in fieldnames:
self.write(el[f])
def write_object(self, arr):
'''Same as writing structs, except different mx class, and extra
classname element after header
'''
self.write_header(matdims(arr, self.oned_as),
mxOBJECT_CLASS)
self.write_element(np.array(arr.classname, dtype='S'),
mdtype=miINT8)
self._write_items(arr)
class MatFile5Writer(object):
''' Class for writing mat5 files '''
@docfiller
def __init__(self, file_stream,
do_compression=False,
unicode_strings=False,
global_vars=None,
long_field_names=False,
oned_as='row'):
''' Initialize writer for matlab 5 format files
Parameters
----------
%(do_compression)s
%(unicode_strings)s
global_vars : None or sequence of strings, optional
Names of variables to be marked as global for matlab
%(long_fields)s
%(oned_as)s
'''
self.file_stream = file_stream
self.do_compression = do_compression
self.unicode_strings = unicode_strings
if global_vars:
self.global_vars = global_vars
else:
self.global_vars = []
self.long_field_names = long_field_names
self.oned_as = oned_as
self._matrix_writer = None
def write_file_header(self):
# write header
hdr = np.zeros((), NDT_FILE_HDR)
hdr['description'] = 'MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \
% (os.name,time.asctime())
hdr['version'] = 0x0100
hdr['endian_test'] = np.ndarray(shape=(),
dtype='S2',
buffer=np.uint16(0x4d49))
self.file_stream.write(hdr.tostring())
def put_variables(self, mdict, write_header=None):
''' Write variables in `mdict` to stream
Parameters
----------
mdict : mapping
mapping with method ``items`` returns name, contents pairs where
``name`` which will appear in the matlab workspace in file load, and
``contents`` is something writeable to a matlab file, such as a numpy
array.
write_header : {None, True, False}, optional
If True, then write the matlab file header before writing the
variables. If None (the default) then write the file header
if we are at position 0 in the stream. By setting False
here, and setting the stream position to the end of the file,
you can append variables to a matlab file
'''
# write header if requested, or None and start of file
if write_header is None:
write_header = self.file_stream.tell() == 0
if write_header:
self.write_file_header()
self._matrix_writer = VarWriter5(self)
for name, var in mdict.items():
if name[0] == '_':
continue
is_global = name in self.global_vars
if self.do_compression:
stream = BytesIO()
self._matrix_writer.file_stream = stream
self._matrix_writer.write_top(var, asbytes(name), is_global)
out_str = zlib.compress(stream.getvalue())
tag = np.empty((), NDT_TAG_FULL)
tag['mdtype'] = miCOMPRESSED
tag['byte_count'] = len(out_str)
self.file_stream.write(tag.tostring())
self.file_stream.write(out_str)
else: # not compressing
self._matrix_writer.write_top(var, asbytes(name), is_global)
@@ -0,0 +1,254 @@
''' Constants and classes for matlab 5 read and write
See also mio5_utils.pyx where these same constants arise as c enums.
If you make changes in this file, don't forget to change mio5_utils.pyx
'''
from __future__ import division, print_function, absolute_import
import numpy as np
from .miobase import convert_dtypes
miINT8 = 1
miUINT8 = 2
miINT16 = 3
miUINT16 = 4
miINT32 = 5
miUINT32 = 6
miSINGLE = 7
miDOUBLE = 9
miINT64 = 12
miUINT64 = 13
miMATRIX = 14
miCOMPRESSED = 15
miUTF8 = 16
miUTF16 = 17
miUTF32 = 18
mxCELL_CLASS = 1
mxSTRUCT_CLASS = 2
# The March 2008 edition of "Matlab 7 MAT-File Format" says that
# mxOBJECT_CLASS = 3, whereas matrix.h says that mxLOGICAL = 3.
# Matlab 2008a appears to save logicals as type 9, so we assume that
# the document is correct. See type 18, below.
mxOBJECT_CLASS = 3
mxCHAR_CLASS = 4
mxSPARSE_CLASS = 5
mxDOUBLE_CLASS = 6
mxSINGLE_CLASS = 7
mxINT8_CLASS = 8
mxUINT8_CLASS = 9
mxINT16_CLASS = 10
mxUINT16_CLASS = 11
mxINT32_CLASS = 12
mxUINT32_CLASS = 13
# The following are not in the March 2008 edition of "Matlab 7
# MAT-File Format," but were guessed from matrix.h.
mxINT64_CLASS = 14
mxUINT64_CLASS = 15
mxFUNCTION_CLASS = 16
# Not doing anything with these at the moment.
mxOPAQUE_CLASS = 17 # This appears to be a function workspace
# Thread 'saveing/loading symbol table of annymous functions', octave-maintainers, April-May 2007
# https://lists.gnu.org/archive/html/octave-maintainers/2007-04/msg00031.html
# https://lists.gnu.org/archive/html/octave-maintainers/2007-05/msg00032.html
# (Was/Deprecated: https://www-old.cae.wisc.edu/pipermail/octave-maintainers/2007-May/002824.html)
mxOBJECT_CLASS_FROM_MATRIX_H = 18
mdtypes_template = {
miINT8: 'i1',
miUINT8: 'u1',
miINT16: 'i2',
miUINT16: 'u2',
miINT32: 'i4',
miUINT32: 'u4',
miSINGLE: 'f4',
miDOUBLE: 'f8',
miINT64: 'i8',
miUINT64: 'u8',
miUTF8: 'u1',
miUTF16: 'u2',
miUTF32: 'u4',
'file_header': [('description', 'S116'),
('subsystem_offset', 'i8'),
('version', 'u2'),
('endian_test', 'S2')],
'tag_full': [('mdtype', 'u4'), ('byte_count', 'u4')],
'tag_smalldata':[('byte_count_mdtype', 'u4'), ('data', 'S4')],
'array_flags': [('data_type', 'u4'),
('byte_count', 'u4'),
('flags_class','u4'),
('nzmax', 'u4')],
'U1': 'U1',
}
mclass_dtypes_template = {
mxINT8_CLASS: 'i1',
mxUINT8_CLASS: 'u1',
mxINT16_CLASS: 'i2',
mxUINT16_CLASS: 'u2',
mxINT32_CLASS: 'i4',
mxUINT32_CLASS: 'u4',
mxINT64_CLASS: 'i8',
mxUINT64_CLASS: 'u8',
mxSINGLE_CLASS: 'f4',
mxDOUBLE_CLASS: 'f8',
}
mclass_info = {
mxINT8_CLASS: 'int8',
mxUINT8_CLASS: 'uint8',
mxINT16_CLASS: 'int16',
mxUINT16_CLASS: 'uint16',
mxINT32_CLASS: 'int32',
mxUINT32_CLASS: 'uint32',
mxINT64_CLASS: 'int64',
mxUINT64_CLASS: 'uint64',
mxSINGLE_CLASS: 'single',
mxDOUBLE_CLASS: 'double',
mxCELL_CLASS: 'cell',
mxSTRUCT_CLASS: 'struct',
mxOBJECT_CLASS: 'object',
mxCHAR_CLASS: 'char',
mxSPARSE_CLASS: 'sparse',
mxFUNCTION_CLASS: 'function',
mxOPAQUE_CLASS: 'opaque',
}
NP_TO_MTYPES = {
'f8': miDOUBLE,
'c32': miDOUBLE,
'c24': miDOUBLE,
'c16': miDOUBLE,
'f4': miSINGLE,
'c8': miSINGLE,
'i8': miINT64,
'i4': miINT32,
'i2': miINT16,
'i1': miINT8,
'u8': miUINT64,
'u4': miUINT32,
'u2': miUINT16,
'u1': miUINT8,
'S1': miUINT8,
'U1': miUTF16,
'b1': miUINT8, # not standard but seems MATLAB uses this (gh-4022)
}
NP_TO_MXTYPES = {
'f8': mxDOUBLE_CLASS,
'c32': mxDOUBLE_CLASS,
'c24': mxDOUBLE_CLASS,
'c16': mxDOUBLE_CLASS,
'f4': mxSINGLE_CLASS,
'c8': mxSINGLE_CLASS,
'i8': mxINT64_CLASS,
'i4': mxINT32_CLASS,
'i2': mxINT16_CLASS,
'i1': mxINT8_CLASS,
'u8': mxUINT64_CLASS,
'u4': mxUINT32_CLASS,
'u2': mxUINT16_CLASS,
'u1': mxUINT8_CLASS,
'S1': mxUINT8_CLASS,
'b1': mxUINT8_CLASS, # not standard but seems MATLAB uses this
}
''' Before release v7.1 (release 14) matlab (TM) used the system
default character encoding scheme padded out to 16-bits. Release 14
and later use Unicode. When saving character data, R14 checks if it
can be encoded in 7-bit ascii, and saves in that format if so.'''
codecs_template = {
miUTF8: {'codec': 'utf_8', 'width': 1},
miUTF16: {'codec': 'utf_16', 'width': 2},
miUTF32: {'codec': 'utf_32','width': 4},
}
def _convert_codecs(template, byte_order):
''' Convert codec template mapping to byte order
Set codecs not on this system to None
Parameters
----------
template : mapping
key, value are respectively codec name, and root name for codec
(without byte order suffix)
byte_order : {'<', '>'}
code for little or big endian
Returns
-------
codecs : dict
key, value are name, codec (as in .encode(codec))
'''
codecs = {}
postfix = byte_order == '<' and '_le' or '_be'
for k, v in template.items():
codec = v['codec']
try:
" ".encode(codec)
except LookupError:
codecs[k] = None
continue
if v['width'] > 1:
codec += postfix
codecs[k] = codec
return codecs.copy()
MDTYPES = {}
for _bytecode in '<>':
_def = {'dtypes': convert_dtypes(mdtypes_template, _bytecode),
'classes': convert_dtypes(mclass_dtypes_template, _bytecode),
'codecs': _convert_codecs(codecs_template, _bytecode)}
MDTYPES[_bytecode] = _def
class mat_struct(object):
''' Placeholder for holding read data from structs
We use instances of this class when the user passes False as a value to the
``struct_as_record`` parameter of the :func:`scipy.io.matlab.loadmat`
function.
'''
pass
class MatlabObject(np.ndarray):
''' ndarray Subclass to contain matlab object '''
def __new__(cls, input_array, classname=None):
# Input array is an already formed ndarray instance
# We first cast to be our class type
obj = np.asarray(input_array).view(cls)
# add the new attribute to the created instance
obj.classname = classname
# Finally, we must return the newly created object:
return obj
def __array_finalize__(self,obj):
# reset the attribute from passed original object
self.classname = getattr(obj, 'classname', None)
# We do not need to return anything
class MatlabFunction(np.ndarray):
''' Subclass to signal this is a matlab function '''
def __new__(cls, input_array):
obj = np.asarray(input_array).view(cls)
return obj
class MatlabOpaque(np.ndarray):
''' Subclass to signal this is a matlab opaque matrix '''
def __new__(cls, input_array):
obj = np.asarray(input_array).view(cls)
return obj
OPAQUE_DTYPE = np.dtype(
[('s0', 'O'), ('s1', 'O'), ('s2', 'O'), ('arr', 'O')])
@@ -0,0 +1,415 @@
# Authors: Travis Oliphant, Matthew Brett
"""
Base classes for MATLAB file stream reading.
MATLAB is a registered trademark of the Mathworks inc.
"""
from __future__ import division, print_function, absolute_import
import sys
import operator
from scipy._lib.six import reduce
import numpy as np
if sys.version_info[0] >= 3:
byteord = int
else:
byteord = ord
from scipy.misc import doccer
from . import byteordercodes as boc
class MatReadError(Exception):
pass
class MatWriteError(Exception):
pass
class MatReadWarning(UserWarning):
pass
doc_dict = \
{'file_arg':
'''file_name : str
Name of the mat file (do not need .mat extension if
appendmat==True) Can also pass open file-like object.''',
'append_arg':
'''appendmat : bool, optional
True to append the .mat extension to the end of the given
filename, if not already present.''',
'load_args':
'''byte_order : str or None, optional
None by default, implying byte order guessed from mat
file. Otherwise can be one of ('native', '=', 'little', '<',
'BIG', '>').
mat_dtype : bool, optional
If True, return arrays in same dtype as would be loaded into
MATLAB (instead of the dtype with which they are saved).
squeeze_me : bool, optional
Whether to squeeze unit matrix dimensions or not.
chars_as_strings : bool, optional
Whether to convert char arrays to string arrays.
matlab_compatible : bool, optional
Returns matrices as would be loaded by MATLAB (implies
squeeze_me=False, chars_as_strings=False, mat_dtype=True,
struct_as_record=True).''',
'struct_arg':
'''struct_as_record : bool, optional
Whether to load MATLAB structs as numpy record arrays, or as
old-style numpy arrays with dtype=object. Setting this flag to
False replicates the behavior of scipy version 0.7.x (returning
numpy object arrays). The default setting is True, because it
allows easier round-trip load and save of MATLAB files.''',
'matstream_arg':
'''mat_stream : file-like
Object with file API, open for reading.''',
'long_fields':
'''long_field_names : bool, optional
* False - maximum field name length in a structure is 31 characters
which is the documented maximum length. This is the default.
* True - maximum field name length in a structure is 63 characters
which works for MATLAB 7.6''',
'do_compression':
'''do_compression : bool, optional
Whether to compress matrices on write. Default is False.''',
'oned_as':
'''oned_as : {'row', 'column'}, optional
If 'column', write 1-D numpy arrays as column vectors.
If 'row', write 1D numpy arrays as row vectors.''',
'unicode_strings':
'''unicode_strings : bool, optional
If True, write strings as Unicode, else MATLAB usual encoding.'''}
docfiller = doccer.filldoc(doc_dict)
'''
Note on architecture
======================
There are three sets of parameters relevant for reading files. The
first are *file read parameters* - containing options that are common
for reading the whole file, and therefore every variable within that
file. At the moment these are:
* mat_stream
* dtypes (derived from byte code)
* byte_order
* chars_as_strings
* squeeze_me
* struct_as_record (MATLAB 5 files)
* class_dtypes (derived from order code, MATLAB 5 files)
* codecs (MATLAB 5 files)
* uint16_codec (MATLAB 5 files)
Another set of parameters are those that apply only to the current
variable being read - the *header*:
* header related variables (different for v4 and v5 mat files)
* is_complex
* mclass
* var_stream
With the header, we need ``next_position`` to tell us where the next
variable in the stream is.
Then, for each element in a matrix, there can be *element read
parameters*. An element is, for example, one element in a MATLAB cell
array. At the moment these are:
* mat_dtype
The file-reading object contains the *file read parameters*. The
*header* is passed around as a data object, or may be read and discarded
in a single function. The *element read parameters* - the mat_dtype in
this instance, is passed into a general post-processing function - see
``mio_utils`` for details.
'''
def convert_dtypes(dtype_template, order_code):
''' Convert dtypes in mapping to given order
Parameters
----------
dtype_template : mapping
mapping with values returning numpy dtype from ``np.dtype(val)``
order_code : str
an order code suitable for using in ``dtype.newbyteorder()``
Returns
-------
dtypes : mapping
mapping where values have been replaced by
``np.dtype(val).newbyteorder(order_code)``
'''
dtypes = dtype_template.copy()
for k in dtypes:
dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code)
return dtypes
def read_dtype(mat_stream, a_dtype):
"""
Generic get of byte stream data of known type
Parameters
----------
mat_stream : file_like object
MATLAB (tm) mat file stream
a_dtype : dtype
dtype of array to read. `a_dtype` is assumed to be correct
endianness.
Returns
-------
arr : ndarray
Array of dtype `a_dtype` read from stream.
"""
num_bytes = a_dtype.itemsize
arr = np.ndarray(shape=(),
dtype=a_dtype,
buffer=mat_stream.read(num_bytes),
order='F')
return arr
def get_matfile_version(fileobj):
"""
Return major, minor tuple depending on apparent mat file type
Where:
#. 0,x -> version 4 format mat files
#. 1,x -> version 5 format mat files
#. 2,x -> version 7.3 format mat files (HDF format)
Parameters
----------
fileobj : file_like
object implementing seek() and read()
Returns
-------
major_version : {0, 1, 2}
major MATLAB File format version
minor_version : int
minor MATLAB file format version
Raises
------
MatReadError
If the file is empty.
ValueError
The matfile version is unknown.
Notes
-----
Has the side effect of setting the file read pointer to 0
"""
# Mat4 files have a zero somewhere in first 4 bytes
fileobj.seek(0)
mopt_bytes = fileobj.read(4)
if len(mopt_bytes) == 0:
raise MatReadError("Mat file appears to be empty")
mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes)
if 0 in mopt_ints:
fileobj.seek(0)
return (0,0)
# For 5 format or 7.3 format we need to read an integer in the
# header. Bytes 124 through 128 contain a version integer and an
# endian test string
fileobj.seek(124)
tst_str = fileobj.read(4)
fileobj.seek(0)
maj_ind = int(tst_str[2] == b'I'[0])
maj_val = byteord(tst_str[maj_ind])
min_val = byteord(tst_str[1-maj_ind])
ret = (maj_val, min_val)
if maj_val in (1, 2):
return ret
raise ValueError('Unknown mat file type, version %s, %s' % ret)
def matdims(arr, oned_as='column'):
"""
Determine equivalent MATLAB dimensions for given array
Parameters
----------
arr : ndarray
Input array
oned_as : {'column', 'row'}, optional
Whether 1-D arrays are returned as MATLAB row or column matrices.
Default is 'column'.
Returns
-------
dims : tuple
Shape tuple, in the form MATLAB expects it.
Notes
-----
We had to decide what shape a 1 dimensional array would be by
default. ``np.atleast_2d`` thinks it is a row vector. The
default for a vector in MATLAB (e.g. ``>> 1:12``) is a row vector.
Versions of scipy up to and including 0.11 resulted (accidentally)
in 1-D arrays being read as column vectors. For the moment, we
maintain the same tradition here.
Examples
--------
>>> matdims(np.array(1)) # numpy scalar
(1, 1)
>>> matdims(np.array([1])) # 1d array, 1 element
(1, 1)
>>> matdims(np.array([1,2])) # 1d array, 2 elements
(2, 1)
>>> matdims(np.array([[2],[3]])) # 2d array, column vector
(2, 1)
>>> matdims(np.array([[2,3]])) # 2d array, row vector
(1, 2)
>>> matdims(np.array([[[2,3]]])) # 3d array, rowish vector
(1, 1, 2)
>>> matdims(np.array([])) # empty 1d array
(0, 0)
>>> matdims(np.array([[]])) # empty 2d
(0, 0)
>>> matdims(np.array([[[]]])) # empty 3d
(0, 0, 0)
Optional argument flips 1-D shape behavior.
>>> matdims(np.array([1,2]), 'row') # 1d array, 2 elements
(1, 2)
The argument has to make sense though
>>> matdims(np.array([1,2]), 'bizarre')
Traceback (most recent call last):
...
ValueError: 1D option "bizarre" is strange
"""
shape = arr.shape
if shape == (): # scalar
return (1,1)
if reduce(operator.mul, shape) == 0: # zero elememts
return (0,) * np.max([arr.ndim, 2])
if len(shape) == 1: # 1D
if oned_as == 'column':
return shape + (1,)
elif oned_as == 'row':
return (1,) + shape
else:
raise ValueError('1D option "%s" is strange'
% oned_as)
return shape
class MatVarReader(object):
''' Abstract class defining required interface for var readers'''
def __init__(self, file_reader):
pass
def read_header(self):
''' Returns header '''
pass
def array_from_header(self, header):
''' Reads array given header '''
pass
class MatFileReader(object):
""" Base object for reading mat files
To make this class functional, you will need to override the
following methods:
matrix_getter_factory - gives object to fetch next matrix from stream
guess_byte_order - guesses file byte order from file
"""
@docfiller
def __init__(self, mat_stream,
byte_order=None,
mat_dtype=False,
squeeze_me=False,
chars_as_strings=True,
matlab_compatible=False,
struct_as_record=True,
verify_compressed_data_integrity=True
):
'''
Initializer for mat file reader
mat_stream : file-like
object with file API, open for reading
%(load_args)s
'''
# Initialize stream
self.mat_stream = mat_stream
self.dtypes = {}
if not byte_order:
byte_order = self.guess_byte_order()
else:
byte_order = boc.to_numpy_code(byte_order)
self.byte_order = byte_order
self.struct_as_record = struct_as_record
if matlab_compatible:
self.set_matlab_compatible()
else:
self.squeeze_me = squeeze_me
self.chars_as_strings = chars_as_strings
self.mat_dtype = mat_dtype
self.verify_compressed_data_integrity = verify_compressed_data_integrity
def set_matlab_compatible(self):
''' Sets options to return arrays as MATLAB loads them '''
self.mat_dtype = True
self.squeeze_me = False
self.chars_as_strings = False
def guess_byte_order(self):
''' As we do not know what file type we have, assume native '''
return boc.native_code
def end_of_stream(self):
b = self.mat_stream.read(1)
curpos = self.mat_stream.tell()
self.mat_stream.seek(curpos-1)
return len(b) == 0
def arr_dtype_number(arr, num):
''' Return dtype for given number of items per element'''
return np.dtype(arr.dtype.str[:2] + str(num))
def arr_to_chars(arr):
''' Convert string array to char array '''
dims = list(arr.shape)
if not dims:
dims = [1]
dims.append(int(arr.dtype.str[2:]))
arr = np.ndarray(shape=dims,
dtype=arr_dtype_number(arr, 1),
buffer=arr)
empties = [arr == '']
if not np.any(empties):
return arr
arr = arr.copy()
arr[tuple(empties)] = ' '
return arr
@@ -0,0 +1,16 @@
from __future__ import division, print_function, absolute_import
def configuration(parent_package='io',top_path=None):
from numpy.distutils.misc_util import Configuration
config = Configuration('matlab', parent_package, top_path)
config.add_extension('streams', sources=['streams.c'])
config.add_extension('mio_utils', sources=['mio_utils.c'])
config.add_extension('mio5_utils', sources=['mio5_utils.c'])
config.add_data_dir('tests')
return config
if __name__ == '__main__':
from numpy.distutils.core import setup
setup(**configuration(top_path='').todict())
@@ -0,0 +1,4 @@
function [a, b] = afunc(c, d)
% A function
a = c + 1;
b = d + 10;
@@ -0,0 +1,5 @@
Japanese:
すべての人間は、生まれながらにして自由であり、
かつ、尊厳と権利と について平等である。
人間は、理性と良心とを授けられており、
互いに同胞の精神をもって行動しなければならない。
@@ -0,0 +1,50 @@
% Generates mat files for loadmat unit tests
% Uses save_matfile.m function
% This is the version for matlab 4
% work out matlab version and file suffix for test files
global FILEPREFIX FILESUFFIX
sepchar = '/';
if strcmp(computer, 'PCWIN'), sepchar = '\'; end
FILEPREFIX = [pwd sepchar 'data' sepchar];
mlv = version;
FILESUFFIX = ['_' mlv '_' computer '.mat'];
% basic double array
theta = 0:pi/4:2*pi;
save_matfile('testdouble', theta);
% string
save_matfile('teststring', '"Do nine men interpret?" "Nine men," I nod.')
% complex
save_matfile('testcomplex', cos(theta) + 1j*sin(theta));
% asymmetric array to check indexing
a = zeros(3, 5);
a(:,1) = [1:3]';
a(1,:) = 1:5;
% 2D matrix
save_matfile('testmatrix', a);
% minus number - tests signed int
save_matfile('testminus', -1);
% single character
save_matfile('testonechar', 'r');
% string array
save_matfile('teststringarray', ['one '; 'two '; 'three']);
% sparse array
save_matfile('testsparse', sparse(a));
% sparse complex array
b = sparse(a);
b(1,1) = b(1,1) + j;
save_matfile('testsparsecomplex', b);
% Two variables in same file
save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
@@ -0,0 +1,100 @@
% Generates mat files for loadmat unit tests
% This is the version for matlab 5 and higher
% Uses save_matfile.m function
% work out matlab version and file suffix for test files
global FILEPREFIX FILESUFFIX
FILEPREFIX = [fullfile(pwd, 'data') filesep];
temp = ver('MATLAB');
mlv = temp.Version;
FILESUFFIX = ['_' mlv '_' computer '.mat'];
% basic double array
theta = 0:pi/4:2*pi;
save_matfile('testdouble', theta);
% string
save_matfile('teststring', '"Do nine men interpret?" "Nine men," I nod.')
% complex
save_matfile('testcomplex', cos(theta) + 1j*sin(theta));
% asymmetric array to check indexing
a = zeros(3, 5);
a(:,1) = [1:3]';
a(1,:) = 1:5;
% 2D matrix
save_matfile('testmatrix', a);
% minus number - tests signed int
save_matfile('testminus', -1);
% single character
save_matfile('testonechar', 'r');
% string array
save_matfile('teststringarray', ['one '; 'two '; 'three']);
% sparse array
save_matfile('testsparse', sparse(a));
% sparse complex array
b = sparse(a);
b(1,1) = b(1,1) + j;
save_matfile('testsparsecomplex', b);
% Two variables in same file
save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
% struct
save_matfile('teststruct', ...
struct('stringfield','Rats live on no evil star.',...
'doublefield',[sqrt(2) exp(1) pi],...
'complexfield',(1+1j)*[sqrt(2) exp(1) pi]));
% cell
save_matfile('testcell', ...
{['This cell contains this string and 3 arrays of increasing' ...
' length'], 1., 1.:2., 1.:3.});
% scalar cell
save_matfile('testscalarcell', {1})
% Empty cells in two cell matrices
save_matfile('testemptycell', {1, 2, [], [], 3});
% 3D matrix
save_matfile('test3dmatrix', reshape(1:24,[2 3 4]))
% nested cell array
save_matfile('testcellnest', {1, {2, 3, {4, 5}}});
% nested struct
save_matfile('teststructnest', struct('one', 1, 'two', ...
struct('three', 'number 3')));
% array of struct
save_matfile('teststructarr', [struct('one', 1, 'two', 2) ...
struct('one', 'number 1', 'two', 'number 2')]);
% matlab object
save_matfile('testobject', inline('x'))
% array of matlab objects
%save_matfile('testobjarr', [inline('x') inline('x')])
% unicode test
if str2num(mlv) > 7 % function added 7.0.1
fid = fopen([FILEPREFIX 'japanese_utf8.txt']);
from_japan = fread(fid, 'uint8')';
fclose(fid);
save_matfile('testunicode', native2unicode(from_japan, 'utf-8'));
end
% func
if str2num(mlv) > 7 % function pointers added recently
func = @afunc;
save_matfile('testfunc', func);
end
@@ -0,0 +1,6 @@
function save_matfile(test_name, v)
% saves variable passed in m with filename from prefix
global FILEPREFIX FILESUFFIX
eval([test_name ' = v;']);
save([FILEPREFIX test_name FILESUFFIX], test_name)
@@ -0,0 +1,31 @@
''' Tests for byteorder module '''
from __future__ import division, print_function, absolute_import
import sys
from numpy.testing import assert_
from pytest import raises as assert_raises
import scipy.io.matlab.byteordercodes as sibc
def test_native():
native_is_le = sys.byteorder == 'little'
assert_(sibc.sys_is_le == native_is_le)
def test_to_numpy():
if sys.byteorder == 'little':
assert_(sibc.to_numpy_code('native') == '<')
assert_(sibc.to_numpy_code('swapped') == '>')
else:
assert_(sibc.to_numpy_code('native') == '>')
assert_(sibc.to_numpy_code('swapped') == '<')
assert_(sibc.to_numpy_code('native') == sibc.to_numpy_code('='))
assert_(sibc.to_numpy_code('big') == '>')
for code in ('little', '<', 'l', 'L', 'le'):
assert_(sibc.to_numpy_code(code) == '<')
for code in ('big', '>', 'b', 'B', 'be'):
assert_(sibc.to_numpy_code(code) == '>')
assert_raises(ValueError, sibc.to_numpy_code, 'silly string')
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,185 @@
""" Testing mio5_utils Cython module
"""
from __future__ import division, print_function, absolute_import
import sys
from io import BytesIO
cStringIO = BytesIO
import numpy as np
from numpy.testing import assert_array_equal, assert_equal, assert_
from pytest import raises as assert_raises
from scipy._lib.six import u
import scipy.io.matlab.byteordercodes as boc
import scipy.io.matlab.streams as streams
import scipy.io.matlab.mio5_params as mio5p
import scipy.io.matlab.mio5_utils as m5u
def test_byteswap():
for val in (
1,
0x100,
0x10000):
a = np.array(val, dtype=np.uint32)
b = a.byteswap()
c = m5u.byteswap_u4(a)
assert_equal(b.item(), c)
d = m5u.byteswap_u4(c)
assert_equal(a.item(), d)
def _make_tag(base_dt, val, mdtype, sde=False):
''' Makes a simple matlab tag, full or sde '''
base_dt = np.dtype(base_dt)
bo = boc.to_numpy_code(base_dt.byteorder)
byte_count = base_dt.itemsize
if not sde:
udt = bo + 'u4'
padding = 8 - (byte_count % 8)
all_dt = [('mdtype', udt),
('byte_count', udt),
('val', base_dt)]
if padding:
all_dt.append(('padding', 'u1', padding))
else: # is sde
udt = bo + 'u2'
padding = 4-byte_count
if bo == '<': # little endian
all_dt = [('mdtype', udt),
('byte_count', udt),
('val', base_dt)]
else: # big endian
all_dt = [('byte_count', udt),
('mdtype', udt),
('val', base_dt)]
if padding:
all_dt.append(('padding', 'u1', padding))
tag = np.zeros((1,), dtype=all_dt)
tag['mdtype'] = mdtype
tag['byte_count'] = byte_count
tag['val'] = val
return tag
def _write_stream(stream, *strings):
stream.truncate(0)
stream.seek(0)
for s in strings:
stream.write(s)
stream.seek(0)
def _make_readerlike(stream, byte_order=boc.native_code):
class R(object):
pass
r = R()
r.mat_stream = stream
r.byte_order = byte_order
r.struct_as_record = True
r.uint16_codec = sys.getdefaultencoding()
r.chars_as_strings = False
r.mat_dtype = False
r.squeeze_me = False
return r
def test_read_tag():
# mainly to test errors
# make reader-like thing
str_io = BytesIO()
r = _make_readerlike(str_io)
c_reader = m5u.VarReader5(r)
# This works for StringIO but _not_ cStringIO
assert_raises(IOError, c_reader.read_tag)
# bad SDE
tag = _make_tag('i4', 1, mio5p.miINT32, sde=True)
tag['byte_count'] = 5
_write_stream(str_io, tag.tostring())
assert_raises(ValueError, c_reader.read_tag)
def test_read_stream():
tag = _make_tag('i4', 1, mio5p.miINT32, sde=True)
tag_str = tag.tostring()
str_io = cStringIO(tag_str)
st = streams.make_stream(str_io)
s = streams._read_into(st, tag.itemsize)
assert_equal(s, tag.tostring())
def test_read_numeric():
# make reader-like thing
str_io = cStringIO()
r = _make_readerlike(str_io)
# check simplest of tags
for base_dt, val, mdtype in (('u2', 30, mio5p.miUINT16),
('i4', 1, mio5p.miINT32),
('i2', -1, mio5p.miINT16)):
for byte_code in ('<', '>'):
r.byte_order = byte_code
c_reader = m5u.VarReader5(r)
assert_equal(c_reader.little_endian, byte_code == '<')
assert_equal(c_reader.is_swapped, byte_code != boc.native_code)
for sde_f in (False, True):
dt = np.dtype(base_dt).newbyteorder(byte_code)
a = _make_tag(dt, val, mdtype, sde_f)
a_str = a.tostring()
_write_stream(str_io, a_str)
el = c_reader.read_numeric()
assert_equal(el, val)
# two sequential reads
_write_stream(str_io, a_str, a_str)
el = c_reader.read_numeric()
assert_equal(el, val)
el = c_reader.read_numeric()
assert_equal(el, val)
def test_read_numeric_writeable():
# make reader-like thing
str_io = cStringIO()
r = _make_readerlike(str_io, '<')
c_reader = m5u.VarReader5(r)
dt = np.dtype('<u2')
a = _make_tag(dt, 30, mio5p.miUINT16, 0)
a_str = a.tostring()
_write_stream(str_io, a_str)
el = c_reader.read_numeric()
assert_(el.flags.writeable is True)
def test_zero_byte_string():
# Tests hack to allow chars of non-zero length, but 0 bytes
# make reader-like thing
str_io = cStringIO()
r = _make_readerlike(str_io, boc.native_code)
c_reader = m5u.VarReader5(r)
tag_dt = np.dtype([('mdtype', 'u4'), ('byte_count', 'u4')])
tag = np.zeros((1,), dtype=tag_dt)
tag['mdtype'] = mio5p.miINT8
tag['byte_count'] = 1
hdr = m5u.VarHeader5()
# Try when string is 1 length
hdr.set_dims([1,])
_write_stream(str_io, tag.tostring() + b' ')
str_io.seek(0)
val = c_reader.read_char(hdr)
assert_equal(val, u(' '))
# Now when string has 0 bytes 1 length
tag['byte_count'] = 0
_write_stream(str_io, tag.tostring())
str_io.seek(0)
val = c_reader.read_char(hdr)
assert_equal(val, u(' '))
# Now when string has 0 bytes 4 length
str_io.seek(0)
hdr.set_dims([4,])
val = c_reader.read_char(hdr)
assert_array_equal(val, [u(' ')] * 4)
@@ -0,0 +1,57 @@
''' Jottings to work out format for __function_workspace__ matrix at end
of mat file.
'''
from __future__ import division, print_function, absolute_import
import os.path
import sys
import io
from numpy.compat import asstr
from scipy.io.matlab.mio5 import (MatlabObject, MatFile5Writer,
MatFile5Reader, MatlabFunction)
test_data_path = os.path.join(os.path.dirname(__file__), 'data')
def read_minimat_vars(rdr):
rdr.initialize_read()
mdict = {'__globals__': []}
i = 0
while not rdr.end_of_stream():
hdr, next_position = rdr.read_var_header()
name = asstr(hdr.name)
if name == '':
name = 'var_%d' % i
i += 1
res = rdr.read_var_array(hdr, process=False)
rdr.mat_stream.seek(next_position)
mdict[name] = res
if hdr.is_global:
mdict['__globals__'].append(name)
return mdict
def read_workspace_vars(fname):
fp = open(fname, 'rb')
rdr = MatFile5Reader(fp, struct_as_record=True)
vars = rdr.get_variables()
fws = vars['__function_workspace__']
ws_bs = io.BytesIO(fws.tostring())
ws_bs.seek(2)
rdr.mat_stream = ws_bs
# Guess byte order.
mi = rdr.mat_stream.read(2)
rdr.byte_order = mi == b'IM' and '<' or '>'
rdr.mat_stream.read(4) # presumably byte padding
mdict = read_minimat_vars(rdr)
fp.close()
return mdict
def test_jottings():
# example
fname = os.path.join(test_data_path, 'parabola.mat')
ws_vars = read_workspace_vars(fname)
@@ -0,0 +1,46 @@
""" Testing
"""
from __future__ import division, print_function, absolute_import
import numpy as np
from numpy.testing import assert_array_equal, assert_array_almost_equal, \
assert_
from scipy.io.matlab.mio_utils import squeeze_element, chars_to_strings
def test_squeeze_element():
a = np.zeros((1,3))
assert_array_equal(np.squeeze(a), squeeze_element(a))
# 0d output from squeeze gives scalar
sq_int = squeeze_element(np.zeros((1,1), dtype=float))
assert_(isinstance(sq_int, float))
# Unless it's a structured array
sq_sa = squeeze_element(np.zeros((1,1),dtype=[('f1', 'f')]))
assert_(isinstance(sq_sa, np.ndarray))
def test_chars_strings():
# chars as strings
strings = ['learn ', 'python', 'fast ', 'here ']
str_arr = np.array(strings, dtype='U6') # shape (4,)
chars = [list(s) for s in strings]
char_arr = np.array(chars, dtype='U1') # shape (4,6)
assert_array_equal(chars_to_strings(char_arr), str_arr)
ca2d = char_arr.reshape((2,2,6))
sa2d = str_arr.reshape((2,2))
assert_array_equal(chars_to_strings(ca2d), sa2d)
ca3d = char_arr.reshape((1,2,2,6))
sa3d = str_arr.reshape((1,2,2))
assert_array_equal(chars_to_strings(ca3d), sa3d)
# Fortran ordered arrays
char_arrf = np.array(chars, dtype='U1', order='F') # shape (4,6)
assert_array_equal(chars_to_strings(char_arrf), str_arr)
# empty array
arr = np.array([['']], dtype='U1')
out_arr = np.array([''], dtype='U1')
assert_array_equal(chars_to_strings(arr), out_arr)
@@ -0,0 +1,31 @@
""" Testing miobase module
"""
import numpy as np
from numpy.testing import assert_equal
from pytest import raises as assert_raises
from scipy.io.matlab.miobase import matdims
def test_matdims():
# Test matdims dimension finder
assert_equal(matdims(np.array(1)), (1, 1)) # numpy scalar
assert_equal(matdims(np.array([1])), (1, 1)) # 1d array, 1 element
assert_equal(matdims(np.array([1,2])), (2, 1)) # 1d array, 2 elements
assert_equal(matdims(np.array([[2],[3]])), (2, 1)) # 2d array, column vector
assert_equal(matdims(np.array([[2,3]])), (1, 2)) # 2d array, row vector
# 3d array, rowish vector
assert_equal(matdims(np.array([[[2,3]]])), (1, 1, 2))
assert_equal(matdims(np.array([])), (0, 0)) # empty 1d array
assert_equal(matdims(np.array([[]])), (0, 0)) # empty 2d
assert_equal(matdims(np.array([[[]]])), (0, 0, 0)) # empty 3d
# Optional argument flips 1-D shape behavior.
assert_equal(matdims(np.array([1,2]), 'row'), (1, 2)) # 1d array, 2 elements
# The argument has to make sense though
assert_raises(ValueError, matdims, np.array([1,2]), 'bizarre')
# Check empty sparse matrices get their own shape
from scipy.sparse import csr_matrix, csc_matrix
assert_equal(matdims(csr_matrix(np.zeros((3, 3)))), (3, 3))
assert_equal(matdims(csc_matrix(np.zeros((2, 2)))), (2, 2))
@@ -0,0 +1,35 @@
""" Test reading of files not conforming to matlab specification
We try and read any file that matlab reads, these files included
"""
from __future__ import division, print_function, absolute_import
from os.path import dirname, join as pjoin
from numpy.testing import assert_
from pytest import raises as assert_raises
from scipy.io.matlab.mio import loadmat
TEST_DATA_PATH = pjoin(dirname(__file__), 'data')
def test_multiple_fieldnames():
# Example provided by Dharhas Pothina
# Extracted using mio5.varmats_from_mat
multi_fname = pjoin(TEST_DATA_PATH, 'nasty_duplicate_fieldnames.mat')
vars = loadmat(multi_fname)
funny_names = vars['Summary'].dtype.names
assert_(set(['_1_Station_Q', '_2_Station_Q',
'_3_Station_Q']).issubset(funny_names))
def test_malformed1():
# Example from gh-6072
# Contains malformed header data, which previously resulted into a
# buffer overflow.
#
# Should raise an exception, not segfault
fname = pjoin(TEST_DATA_PATH, 'malformed1.mat')
with open(fname, 'rb') as f:
assert_raises(ValueError, loadmat, f)
@@ -0,0 +1,184 @@
""" Testing
"""
from __future__ import division, print_function, absolute_import
import os
import sys
import zlib
from io import BytesIO
if sys.version_info[0] >= 3:
cStringIO = BytesIO
else:
from cStringIO import StringIO as cStringIO
from tempfile import mkstemp
from contextlib import contextmanager
import numpy as np
from numpy.testing import assert_, assert_equal
from pytest import raises as assert_raises
from scipy.io.matlab.streams import (make_stream,
GenericStream, cStringStream, FileStream, ZlibInputStream,
_read_into, _read_string)
IS_PYPY = ('__pypy__' in sys.modules)
@contextmanager
def setup_test_file():
val = b'a\x00string'
fd, fname = mkstemp()
with os.fdopen(fd, 'wb') as fs:
fs.write(val)
with open(fname, 'rb') as fs:
gs = BytesIO(val)
cs = cStringIO(val)
yield fs, gs, cs
os.unlink(fname)
def test_make_stream():
with setup_test_file() as (fs, gs, cs):
# test stream initialization
assert_(isinstance(make_stream(gs), GenericStream))
if sys.version_info[0] < 3 and not IS_PYPY:
assert_(isinstance(make_stream(cs), cStringStream))
assert_(isinstance(make_stream(fs), FileStream))
def test_tell_seek():
with setup_test_file() as (fs, gs, cs):
for s in (fs, gs, cs):
st = make_stream(s)
res = st.seek(0)
assert_equal(res, 0)
assert_equal(st.tell(), 0)
res = st.seek(5)
assert_equal(res, 0)
assert_equal(st.tell(), 5)
res = st.seek(2, 1)
assert_equal(res, 0)
assert_equal(st.tell(), 7)
res = st.seek(-2, 2)
assert_equal(res, 0)
assert_equal(st.tell(), 6)
def test_read():
with setup_test_file() as (fs, gs, cs):
for s in (fs, gs, cs):
st = make_stream(s)
st.seek(0)
res = st.read(-1)
assert_equal(res, b'a\x00string')
st.seek(0)
res = st.read(4)
assert_equal(res, b'a\x00st')
# read into
st.seek(0)
res = _read_into(st, 4)
assert_equal(res, b'a\x00st')
res = _read_into(st, 4)
assert_equal(res, b'ring')
assert_raises(IOError, _read_into, st, 2)
# read alloc
st.seek(0)
res = _read_string(st, 4)
assert_equal(res, b'a\x00st')
res = _read_string(st, 4)
assert_equal(res, b'ring')
assert_raises(IOError, _read_string, st, 2)
class TestZlibInputStream(object):
def _get_data(self, size):
data = np.random.randint(0, 256, size).astype(np.uint8).tostring()
compressed_data = zlib.compress(data)
stream = BytesIO(compressed_data)
return stream, len(compressed_data), data
def test_read(self):
block_size = 131072
SIZES = [0, 1, 10, block_size//2, block_size-1,
block_size, block_size+1, 2*block_size-1]
READ_SIZES = [block_size//2, block_size-1,
block_size, block_size+1]
def check(size, read_size):
compressed_stream, compressed_data_len, data = self._get_data(size)
stream = ZlibInputStream(compressed_stream, compressed_data_len)
data2 = b''
so_far = 0
while True:
block = stream.read(min(read_size,
size - so_far))
if not block:
break
so_far += len(block)
data2 += block
assert_equal(data, data2)
for size in SIZES:
for read_size in READ_SIZES:
check(size, read_size)
def test_read_max_length(self):
size = 1234
data = np.random.randint(0, 256, size).astype(np.uint8).tostring()
compressed_data = zlib.compress(data)
compressed_stream = BytesIO(compressed_data + b"abbacaca")
stream = ZlibInputStream(compressed_stream, len(compressed_data))
stream.read(len(data))
assert_equal(compressed_stream.tell(), len(compressed_data))
assert_raises(IOError, stream.read, 1)
def test_seek(self):
compressed_stream, compressed_data_len, data = self._get_data(1024)
stream = ZlibInputStream(compressed_stream, compressed_data_len)
stream.seek(123)
p = 123
assert_equal(stream.tell(), p)
d1 = stream.read(11)
assert_equal(d1, data[p:p+11])
stream.seek(321, 1)
p = 123+11+321
assert_equal(stream.tell(), p)
d2 = stream.read(21)
assert_equal(d2, data[p:p+21])
stream.seek(641, 0)
p = 641
assert_equal(stream.tell(), p)
d3 = stream.read(11)
assert_equal(d3, data[p:p+11])
assert_raises(IOError, stream.seek, 10, 2)
assert_raises(IOError, stream.seek, -1, 1)
assert_raises(ValueError, stream.seek, 1, 123)
stream.seek(10000, 1)
assert_raises(IOError, stream.read, 12)
def test_all_data_read(self):
compressed_stream, compressed_data_len, data = self._get_data(1024)
stream = ZlibInputStream(compressed_stream, compressed_data_len)
assert_(not stream.all_data_read())
stream.seek(512)
assert_(not stream.all_data_read())
stream.seek(1024)
assert_(stream.all_data_read())