pruned venvs
This commit is contained in:
@@ -1,113 +0,0 @@
|
||||
# -*- encoding:utf-8 -*-
|
||||
"""
|
||||
==================================
|
||||
Input and output (:mod:`scipy.io`)
|
||||
==================================
|
||||
|
||||
.. currentmodule:: scipy.io
|
||||
|
||||
SciPy has many modules, classes, and functions available to read data
|
||||
from and write data to a variety of file formats.
|
||||
|
||||
.. seealso:: :ref:`numpy-reference.routines.io` (in Numpy)
|
||||
|
||||
MATLAB® files
|
||||
=============
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
loadmat - Read a MATLAB style mat file (version 4 through 7.1)
|
||||
savemat - Write a MATLAB style mat file (version 4 through 7.1)
|
||||
whosmat - List contents of a MATLAB style mat file (version 4 through 7.1)
|
||||
|
||||
IDL® files
|
||||
==========
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
readsav - Read an IDL 'save' file
|
||||
|
||||
Matrix Market files
|
||||
===================
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
mminfo - Query matrix info from Matrix Market formatted file
|
||||
mmread - Read matrix from Matrix Market formatted file
|
||||
mmwrite - Write matrix to Matrix Market formatted file
|
||||
|
||||
Unformatted Fortran files
|
||||
===============================
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
FortranFile - A file object for unformatted sequential Fortran files
|
||||
|
||||
Netcdf
|
||||
======
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
netcdf_file - A file object for NetCDF data
|
||||
netcdf_variable - A data object for the netcdf module
|
||||
|
||||
Harwell-Boeing files
|
||||
====================
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
hb_read -- read H-B file
|
||||
hb_write -- write H-B file
|
||||
|
||||
Wav sound files (:mod:`scipy.io.wavfile`)
|
||||
=========================================
|
||||
|
||||
.. module:: scipy.io.wavfile
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
read
|
||||
write
|
||||
WavFileWarning
|
||||
|
||||
Arff files (:mod:`scipy.io.arff`)
|
||||
=================================
|
||||
|
||||
.. module:: scipy.io.arff
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
loadarff
|
||||
MetaData
|
||||
ArffError
|
||||
ParseArffError
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
# matfile read and write
|
||||
from .matlab import loadmat, savemat, whosmat, byteordercodes
|
||||
|
||||
# netCDF file support
|
||||
from .netcdf import netcdf_file, netcdf_variable
|
||||
|
||||
# Fortran file support
|
||||
from ._fortran import FortranFile
|
||||
|
||||
from .mmio import mminfo, mmread, mmwrite
|
||||
from .idl import readsav
|
||||
from .harwell_boeing import hb_read, hb_write
|
||||
|
||||
__all__ = [s for s in dir() if not s.startswith('_')]
|
||||
|
||||
from scipy._lib._testutils import PytestTester
|
||||
test = PytestTester(__name__)
|
||||
del PytestTester
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,317 +0,0 @@
|
||||
"""
|
||||
Module to read / write Fortran unformatted sequential files.
|
||||
|
||||
This is in the spirit of code written by Neil Martinsen-Burrell and Joe Zuntz.
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import warnings
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['FortranFile']
|
||||
|
||||
|
||||
class FortranFile(object):
|
||||
"""
|
||||
A file object for unformatted sequential files from Fortran code.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : file or str
|
||||
Open file object or filename.
|
||||
mode : {'r', 'w'}, optional
|
||||
Read-write mode, default is 'r'.
|
||||
header_dtype : dtype, optional
|
||||
Data type of the header. Size and endiness must match the input/output file.
|
||||
|
||||
Notes
|
||||
-----
|
||||
These files are broken up into records of unspecified types. The size of
|
||||
each record is given at the start (although the size of this header is not
|
||||
standard) and the data is written onto disk without any formatting. Fortran
|
||||
compilers supporting the BACKSPACE statement will write a second copy of
|
||||
the size to facilitate backwards seeking.
|
||||
|
||||
This class only supports files written with both sizes for the record.
|
||||
It also does not support the subrecords used in Intel and gfortran compilers
|
||||
for records which are greater than 2GB with a 4-byte header.
|
||||
|
||||
An example of an unformatted sequential file in Fortran would be written as::
|
||||
|
||||
OPEN(1, FILE=myfilename, FORM='unformatted')
|
||||
|
||||
WRITE(1) myvariable
|
||||
|
||||
Since this is a non-standard file format, whose contents depend on the
|
||||
compiler and the endianness of the machine, caution is advised. Files from
|
||||
gfortran 4.8.0 and gfortran 4.1.2 on x86_64 are known to work.
|
||||
|
||||
Consider using Fortran direct-access files or files from the newer Stream
|
||||
I/O, which can be easily read by `numpy.fromfile`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
To create an unformatted sequential Fortran file:
|
||||
|
||||
>>> from scipy.io import FortranFile
|
||||
>>> f = FortranFile('test.unf', 'w')
|
||||
>>> f.write_record(np.array([1,2,3,4,5], dtype=np.int32))
|
||||
>>> f.write_record(np.linspace(0,1,20).reshape((5,4)).T)
|
||||
>>> f.close()
|
||||
|
||||
To read this file:
|
||||
|
||||
>>> f = FortranFile('test.unf', 'r')
|
||||
>>> print(f.read_ints(np.int32))
|
||||
[1 2 3 4 5]
|
||||
>>> print(f.read_reals(float).reshape((5,4), order="F"))
|
||||
[[0. 0.05263158 0.10526316 0.15789474]
|
||||
[0.21052632 0.26315789 0.31578947 0.36842105]
|
||||
[0.42105263 0.47368421 0.52631579 0.57894737]
|
||||
[0.63157895 0.68421053 0.73684211 0.78947368]
|
||||
[0.84210526 0.89473684 0.94736842 1. ]]
|
||||
>>> f.close()
|
||||
|
||||
Or, in Fortran::
|
||||
|
||||
integer :: a(5), i
|
||||
double precision :: b(5,4)
|
||||
open(1, file='test.unf', form='unformatted')
|
||||
read(1) a
|
||||
read(1) b
|
||||
close(1)
|
||||
write(*,*) a
|
||||
do i = 1, 5
|
||||
write(*,*) b(i,:)
|
||||
end do
|
||||
|
||||
"""
|
||||
def __init__(self, filename, mode='r', header_dtype=np.uint32):
|
||||
if header_dtype is None:
|
||||
raise ValueError('Must specify dtype')
|
||||
|
||||
header_dtype = np.dtype(header_dtype)
|
||||
if header_dtype.kind != 'u':
|
||||
warnings.warn("Given a dtype which is not unsigned.")
|
||||
|
||||
if mode not in 'rw' or len(mode) != 1:
|
||||
raise ValueError('mode must be either r or w')
|
||||
|
||||
if hasattr(filename, 'seek'):
|
||||
self._fp = filename
|
||||
else:
|
||||
self._fp = open(filename, '%sb' % mode)
|
||||
|
||||
self._header_dtype = header_dtype
|
||||
|
||||
def _read_size(self):
|
||||
return int(np.fromfile(self._fp, dtype=self._header_dtype, count=1))
|
||||
|
||||
def write_record(self, *items):
|
||||
"""
|
||||
Write a record (including sizes) to the file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*items : array_like
|
||||
The data arrays to write.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Writes data items to a file::
|
||||
|
||||
write_record(a.T, b.T, c.T, ...)
|
||||
|
||||
write(1) a, b, c, ...
|
||||
|
||||
Note that data in multidimensional arrays is written in
|
||||
row-major order --- to make them read correctly by Fortran
|
||||
programs, you need to transpose the arrays yourself when
|
||||
writing them.
|
||||
|
||||
"""
|
||||
items = tuple(np.asarray(item) for item in items)
|
||||
total_size = sum(item.nbytes for item in items)
|
||||
|
||||
nb = np.array([total_size], dtype=self._header_dtype)
|
||||
|
||||
nb.tofile(self._fp)
|
||||
for item in items:
|
||||
item.tofile(self._fp)
|
||||
nb.tofile(self._fp)
|
||||
|
||||
def read_record(self, *dtypes, **kwargs):
|
||||
"""
|
||||
Reads a record of a given type from the file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*dtypes : dtypes, optional
|
||||
Data type(s) specifying the size and endiness of the data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
data : ndarray
|
||||
A one-dimensional array object.
|
||||
|
||||
Notes
|
||||
-----
|
||||
If the record contains a multi-dimensional array, you can specify
|
||||
the size in the dtype. For example::
|
||||
|
||||
INTEGER var(5,4)
|
||||
|
||||
can be read with::
|
||||
|
||||
read_record('(4,5)i4').T
|
||||
|
||||
Note that this function does **not** assume the file data is in Fortran
|
||||
column major order, so you need to (i) swap the order of dimensions
|
||||
when reading and (ii) transpose the resulting array.
|
||||
|
||||
Alternatively, you can read the data as a 1D array and handle the
|
||||
ordering yourself. For example::
|
||||
|
||||
read_record('i4').reshape(5, 4, order='F')
|
||||
|
||||
For records that contain several variables or mixed types (as opposed
|
||||
to single scalar or array types), give them as separate arguments::
|
||||
|
||||
double precision :: a
|
||||
integer :: b
|
||||
write(1) a, b
|
||||
|
||||
record = f.read_record('<f4', '<i4')
|
||||
a = record[0] # first number
|
||||
b = record[1] # second number
|
||||
|
||||
and if any of the variables are arrays, the shape can be specified as
|
||||
the third item in the relevant dtype::
|
||||
|
||||
double precision :: a
|
||||
integer :: b(3,4)
|
||||
write(1) a, b
|
||||
|
||||
record = f.read_record('<f4', np.dtype(('<i4', (4, 3))))
|
||||
a = record[0]
|
||||
b = record[1].T
|
||||
|
||||
Numpy also supports a short syntax for this kind of type::
|
||||
|
||||
record = f.read_record('<f4', '(3,3)<i4')
|
||||
|
||||
See Also
|
||||
--------
|
||||
read_reals
|
||||
read_ints
|
||||
|
||||
"""
|
||||
dtype = kwargs.pop('dtype', None)
|
||||
if kwargs:
|
||||
raise ValueError("Unknown keyword arguments {}".format(tuple(kwargs.keys())))
|
||||
|
||||
if dtype is not None:
|
||||
dtypes = dtypes + (dtype,)
|
||||
elif not dtypes:
|
||||
raise ValueError('Must specify at least one dtype')
|
||||
|
||||
first_size = self._read_size()
|
||||
|
||||
dtypes = tuple(np.dtype(dtype) for dtype in dtypes)
|
||||
block_size = sum(dtype.itemsize for dtype in dtypes)
|
||||
|
||||
num_blocks, remainder = divmod(first_size, block_size)
|
||||
if remainder != 0:
|
||||
raise ValueError('Size obtained ({0}) is not a multiple of the '
|
||||
'dtypes given ({1}).'.format(first_size, block_size))
|
||||
|
||||
if len(dtypes) != 1 and first_size != block_size:
|
||||
# Fortran does not write mixed type array items in interleaved order,
|
||||
# and it's not possible to guess the sizes of the arrays that were written.
|
||||
# The user must specify the exact sizes of each of the arrays.
|
||||
raise ValueError('Size obtained ({0}) does not match with the expected '
|
||||
'size ({1}) of multi-item record'.format(first_size, block_size))
|
||||
|
||||
data = []
|
||||
for dtype in dtypes:
|
||||
r = np.fromfile(self._fp, dtype=dtype, count=num_blocks)
|
||||
if dtype.shape != ():
|
||||
# Squeeze outmost block dimension for array items
|
||||
if num_blocks == 1:
|
||||
assert r.shape == (1,) + dtype.shape
|
||||
r = r[0]
|
||||
|
||||
data.append(r)
|
||||
|
||||
second_size = self._read_size()
|
||||
if first_size != second_size:
|
||||
raise IOError('Sizes do not agree in the header and footer for '
|
||||
'this record - check header dtype')
|
||||
|
||||
# Unpack result
|
||||
if len(dtypes) == 1:
|
||||
return data[0]
|
||||
else:
|
||||
return tuple(data)
|
||||
|
||||
def read_ints(self, dtype='i4'):
|
||||
"""
|
||||
Reads a record of a given type from the file, defaulting to an integer
|
||||
type (``INTEGER*4`` in Fortran).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype : dtype, optional
|
||||
Data type specifying the size and endiness of the data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
data : ndarray
|
||||
A one-dimensional array object.
|
||||
|
||||
See Also
|
||||
--------
|
||||
read_reals
|
||||
read_record
|
||||
|
||||
"""
|
||||
return self.read_record(dtype)
|
||||
|
||||
def read_reals(self, dtype='f8'):
|
||||
"""
|
||||
Reads a record of a given type from the file, defaulting to a floating
|
||||
point number (``real*8`` in Fortran).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype : dtype, optional
|
||||
Data type specifying the size and endiness of the data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
data : ndarray
|
||||
A one-dimensional array object.
|
||||
|
||||
See Also
|
||||
--------
|
||||
read_ints
|
||||
read_record
|
||||
|
||||
"""
|
||||
return self.read_record(dtype)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Closes the file. It is unsupported to call any other methods off this
|
||||
object after closing it. Note that this class supports the 'with'
|
||||
statement in modern versions of Python, to call this automatically
|
||||
|
||||
"""
|
||||
self._fp.close()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, tb):
|
||||
self.close()
|
||||
BIN
Binary file not shown.
@@ -1,26 +0,0 @@
|
||||
"""
|
||||
Module to read ARFF files, which are the standard data format for WEKA.
|
||||
|
||||
ARFF is a text file format which support numerical, string and data values.
|
||||
The format can also represent missing data and sparse data.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The ARFF support in ``scipy.io`` provides file reading functionality only.
|
||||
For more extensive ARFF functionality, see `liac-arff
|
||||
<https://github.com/renatopp/liac-arff>`_.
|
||||
|
||||
See the `WEKA website <http://weka.wikispaces.com/ARFF>`_
|
||||
for more details about the ARFF format and available datasets.
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from .arffread import *
|
||||
from . import arffread
|
||||
|
||||
__all__ = arffread.__all__
|
||||
|
||||
from scipy._lib._testutils import PytestTester
|
||||
test = PytestTester(__name__)
|
||||
del PytestTester
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
@@ -1,670 +0,0 @@
|
||||
# Last Change: Mon Aug 20 08:00 PM 2007 J
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import re
|
||||
import itertools
|
||||
import datetime
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
|
||||
from scipy._lib.six import next
|
||||
|
||||
"""A module to read arff files."""
|
||||
|
||||
__all__ = ['MetaData', 'loadarff', 'ArffError', 'ParseArffError']
|
||||
|
||||
# An Arff file is basically two parts:
|
||||
# - header
|
||||
# - data
|
||||
#
|
||||
# A header has each of its components starting by @META where META is one of
|
||||
# the keyword (attribute of relation, for now).
|
||||
|
||||
# TODO:
|
||||
# - both integer and reals are treated as numeric -> the integer info
|
||||
# is lost!
|
||||
# - Replace ValueError by ParseError or something
|
||||
|
||||
# We know can handle the following:
|
||||
# - numeric and nominal attributes
|
||||
# - missing values for numeric attributes
|
||||
|
||||
r_meta = re.compile(r'^\s*@')
|
||||
# Match a comment
|
||||
r_comment = re.compile(r'^%')
|
||||
# Match an empty line
|
||||
r_empty = re.compile(r'^\s+$')
|
||||
# Match a header line, that is a line which starts by @ + a word
|
||||
r_headerline = re.compile(r'^@\S*')
|
||||
r_datameta = re.compile(r'^@[Dd][Aa][Tt][Aa]')
|
||||
r_relation = re.compile(r'^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)')
|
||||
r_attribute = re.compile(r'^@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)')
|
||||
|
||||
# To get attributes name enclosed with ''
|
||||
r_comattrval = re.compile(r"'(..+)'\s+(..+$)")
|
||||
# To get normal attributes
|
||||
r_wcomattrval = re.compile(r"(\S+)\s+(..+$)")
|
||||
|
||||
#-------------------------
|
||||
# Module defined exception
|
||||
#-------------------------
|
||||
|
||||
|
||||
class ArffError(IOError):
|
||||
pass
|
||||
|
||||
|
||||
class ParseArffError(ArffError):
|
||||
pass
|
||||
|
||||
#------------------
|
||||
# Various utilities
|
||||
#------------------
|
||||
|
||||
# An attribute is defined as @attribute name value
|
||||
|
||||
|
||||
def parse_type(attrtype):
|
||||
"""Given an arff attribute value (meta data), returns its type.
|
||||
|
||||
Expect the value to be a name."""
|
||||
uattribute = attrtype.lower().strip()
|
||||
if uattribute[0] == '{':
|
||||
return 'nominal'
|
||||
elif uattribute[:len('real')] == 'real':
|
||||
return 'numeric'
|
||||
elif uattribute[:len('integer')] == 'integer':
|
||||
return 'numeric'
|
||||
elif uattribute[:len('numeric')] == 'numeric':
|
||||
return 'numeric'
|
||||
elif uattribute[:len('string')] == 'string':
|
||||
return 'string'
|
||||
elif uattribute[:len('relational')] == 'relational':
|
||||
return 'relational'
|
||||
elif uattribute[:len('date')] == 'date':
|
||||
return 'date'
|
||||
else:
|
||||
raise ParseArffError("unknown attribute %s" % uattribute)
|
||||
|
||||
|
||||
def get_nominal(attribute):
|
||||
"""If attribute is nominal, returns a list of the values"""
|
||||
return attribute.split(',')
|
||||
|
||||
|
||||
def read_data_list(ofile):
|
||||
"""Read each line of the iterable and put it in a list."""
|
||||
data = [next(ofile)]
|
||||
if data[0].strip()[0] == '{':
|
||||
raise ValueError("This looks like a sparse ARFF: not supported yet")
|
||||
data.extend([i for i in ofile])
|
||||
return data
|
||||
|
||||
|
||||
def get_ndata(ofile):
|
||||
"""Read the whole file to get number of data attributes."""
|
||||
data = [next(ofile)]
|
||||
loc = 1
|
||||
if data[0].strip()[0] == '{':
|
||||
raise ValueError("This looks like a sparse ARFF: not supported yet")
|
||||
for i in ofile:
|
||||
loc += 1
|
||||
return loc
|
||||
|
||||
|
||||
def maxnomlen(atrv):
|
||||
"""Given a string containing a nominal type definition, returns the
|
||||
string len of the biggest component.
|
||||
|
||||
A nominal type is defined as seomthing framed between brace ({}).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
atrv : str
|
||||
Nominal type definition
|
||||
|
||||
Returns
|
||||
-------
|
||||
slen : int
|
||||
length of longest component
|
||||
|
||||
Examples
|
||||
--------
|
||||
maxnomlen("{floup, bouga, fl, ratata}") returns 6 (the size of
|
||||
ratata, the longest nominal value).
|
||||
|
||||
>>> maxnomlen("{floup, bouga, fl, ratata}")
|
||||
6
|
||||
"""
|
||||
nomtp = get_nom_val(atrv)
|
||||
return max(len(i) for i in nomtp)
|
||||
|
||||
|
||||
def get_nom_val(atrv):
|
||||
"""Given a string containing a nominal type, returns a tuple of the
|
||||
possible values.
|
||||
|
||||
A nominal type is defined as something framed between braces ({}).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
atrv : str
|
||||
Nominal type definition
|
||||
|
||||
Returns
|
||||
-------
|
||||
poss_vals : tuple
|
||||
possible values
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> get_nom_val("{floup, bouga, fl, ratata}")
|
||||
('floup', 'bouga', 'fl', 'ratata')
|
||||
"""
|
||||
r_nominal = re.compile('{(.+)}')
|
||||
m = r_nominal.match(atrv)
|
||||
if m:
|
||||
return tuple(i.strip() for i in m.group(1).split(','))
|
||||
else:
|
||||
raise ValueError("This does not look like a nominal string")
|
||||
|
||||
|
||||
def get_date_format(atrv):
|
||||
r_date = re.compile(r"[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$")
|
||||
m = r_date.match(atrv)
|
||||
if m:
|
||||
pattern = m.group(1).strip()
|
||||
# convert time pattern from Java's SimpleDateFormat to C's format
|
||||
datetime_unit = None
|
||||
if "yyyy" in pattern:
|
||||
pattern = pattern.replace("yyyy", "%Y")
|
||||
datetime_unit = "Y"
|
||||
elif "yy":
|
||||
pattern = pattern.replace("yy", "%y")
|
||||
datetime_unit = "Y"
|
||||
if "MM" in pattern:
|
||||
pattern = pattern.replace("MM", "%m")
|
||||
datetime_unit = "M"
|
||||
if "dd" in pattern:
|
||||
pattern = pattern.replace("dd", "%d")
|
||||
datetime_unit = "D"
|
||||
if "HH" in pattern:
|
||||
pattern = pattern.replace("HH", "%H")
|
||||
datetime_unit = "h"
|
||||
if "mm" in pattern:
|
||||
pattern = pattern.replace("mm", "%M")
|
||||
datetime_unit = "m"
|
||||
if "ss" in pattern:
|
||||
pattern = pattern.replace("ss", "%S")
|
||||
datetime_unit = "s"
|
||||
if "z" in pattern or "Z" in pattern:
|
||||
raise ValueError("Date type attributes with time zone not "
|
||||
"supported, yet")
|
||||
|
||||
if datetime_unit is None:
|
||||
raise ValueError("Invalid or unsupported date format")
|
||||
|
||||
return pattern, datetime_unit
|
||||
else:
|
||||
raise ValueError("Invalid or no date format")
|
||||
|
||||
|
||||
def go_data(ofile):
|
||||
"""Skip header.
|
||||
|
||||
the first next() call of the returned iterator will be the @data line"""
|
||||
return itertools.dropwhile(lambda x: not r_datameta.match(x), ofile)
|
||||
|
||||
|
||||
#----------------
|
||||
# Parsing header
|
||||
#----------------
|
||||
def tokenize_attribute(iterable, attribute):
|
||||
"""Parse a raw string in header (eg starts by @attribute).
|
||||
|
||||
Given a raw string attribute, try to get the name and type of the
|
||||
attribute. Constraints:
|
||||
|
||||
* The first line must start with @attribute (case insensitive, and
|
||||
space like characters before @attribute are allowed)
|
||||
* Works also if the attribute is spread on multilines.
|
||||
* Works if empty lines or comments are in between
|
||||
|
||||
Parameters
|
||||
----------
|
||||
attribute : str
|
||||
the attribute string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
name : str
|
||||
name of the attribute
|
||||
value : str
|
||||
value of the attribute
|
||||
next : str
|
||||
next line to be parsed
|
||||
|
||||
Examples
|
||||
--------
|
||||
If attribute is a string defined in python as r"floupi real", will
|
||||
return floupi as name, and real as value.
|
||||
|
||||
>>> iterable = iter([0] * 10) # dummy iterator
|
||||
>>> tokenize_attribute(iterable, r"@attribute floupi real")
|
||||
('floupi', 'real', 0)
|
||||
|
||||
If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
|
||||
and real as value.
|
||||
|
||||
>>> tokenize_attribute(iterable, r" @attribute 'floupi 2' real ")
|
||||
('floupi 2', 'real', 0)
|
||||
|
||||
"""
|
||||
sattr = attribute.strip()
|
||||
mattr = r_attribute.match(sattr)
|
||||
if mattr:
|
||||
# atrv is everything after @attribute
|
||||
atrv = mattr.group(1)
|
||||
if r_comattrval.match(atrv):
|
||||
name, type = tokenize_single_comma(atrv)
|
||||
next_item = next(iterable)
|
||||
elif r_wcomattrval.match(atrv):
|
||||
name, type = tokenize_single_wcomma(atrv)
|
||||
next_item = next(iterable)
|
||||
else:
|
||||
# Not sure we should support this, as it does not seem supported by
|
||||
# weka.
|
||||
raise ValueError("multi line not supported yet")
|
||||
#name, type, next_item = tokenize_multilines(iterable, atrv)
|
||||
else:
|
||||
raise ValueError("First line unparsable: %s" % sattr)
|
||||
|
||||
if type == 'relational':
|
||||
raise ValueError("relational attributes not supported yet")
|
||||
return name, type, next_item
|
||||
|
||||
|
||||
def tokenize_single_comma(val):
|
||||
# XXX we match twice the same string (here and at the caller level). It is
|
||||
# stupid, but it is easier for now...
|
||||
m = r_comattrval.match(val)
|
||||
if m:
|
||||
try:
|
||||
name = m.group(1).strip()
|
||||
type = m.group(2).strip()
|
||||
except IndexError:
|
||||
raise ValueError("Error while tokenizing attribute")
|
||||
else:
|
||||
raise ValueError("Error while tokenizing single %s" % val)
|
||||
return name, type
|
||||
|
||||
|
||||
def tokenize_single_wcomma(val):
|
||||
# XXX we match twice the same string (here and at the caller level). It is
|
||||
# stupid, but it is easier for now...
|
||||
m = r_wcomattrval.match(val)
|
||||
if m:
|
||||
try:
|
||||
name = m.group(1).strip()
|
||||
type = m.group(2).strip()
|
||||
except IndexError:
|
||||
raise ValueError("Error while tokenizing attribute")
|
||||
else:
|
||||
raise ValueError("Error while tokenizing single %s" % val)
|
||||
return name, type
|
||||
|
||||
|
||||
def read_header(ofile):
|
||||
"""Read the header of the iterable ofile."""
|
||||
i = next(ofile)
|
||||
|
||||
# Pass first comments
|
||||
while r_comment.match(i):
|
||||
i = next(ofile)
|
||||
|
||||
# Header is everything up to DATA attribute ?
|
||||
relation = None
|
||||
attributes = []
|
||||
while not r_datameta.match(i):
|
||||
m = r_headerline.match(i)
|
||||
if m:
|
||||
isattr = r_attribute.match(i)
|
||||
if isattr:
|
||||
name, type, i = tokenize_attribute(ofile, i)
|
||||
attributes.append((name, type))
|
||||
else:
|
||||
isrel = r_relation.match(i)
|
||||
if isrel:
|
||||
relation = isrel.group(1)
|
||||
else:
|
||||
raise ValueError("Error parsing line %s" % i)
|
||||
i = next(ofile)
|
||||
else:
|
||||
i = next(ofile)
|
||||
|
||||
return relation, attributes
|
||||
|
||||
|
||||
#--------------------
|
||||
# Parsing actual data
|
||||
#--------------------
|
||||
def safe_float(x):
|
||||
"""given a string x, convert it to a float. If the stripped string is a ?,
|
||||
return a Nan (missing value).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : str
|
||||
string to convert
|
||||
|
||||
Returns
|
||||
-------
|
||||
f : float
|
||||
where float can be nan
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> safe_float('1')
|
||||
1.0
|
||||
>>> safe_float('1\\n')
|
||||
1.0
|
||||
>>> safe_float('?\\n')
|
||||
nan
|
||||
"""
|
||||
if '?' in x:
|
||||
return np.nan
|
||||
else:
|
||||
return float(x)
|
||||
|
||||
|
||||
def safe_nominal(value, pvalue):
|
||||
svalue = value.strip()
|
||||
if svalue in pvalue:
|
||||
return svalue
|
||||
elif svalue == '?':
|
||||
return svalue
|
||||
else:
|
||||
raise ValueError("%s value not in %s" % (str(svalue), str(pvalue)))
|
||||
|
||||
|
||||
def safe_date(value, date_format, datetime_unit):
|
||||
date_str = value.strip().strip("'").strip('"')
|
||||
if date_str == '?':
|
||||
return np.datetime64('NaT', datetime_unit)
|
||||
else:
|
||||
dt = datetime.datetime.strptime(date_str, date_format)
|
||||
return np.datetime64(dt).astype("datetime64[%s]" % datetime_unit)
|
||||
|
||||
|
||||
class MetaData(object):
|
||||
"""Small container to keep useful information on a ARFF dataset.
|
||||
|
||||
Knows about attributes names and types.
|
||||
|
||||
Examples
|
||||
--------
|
||||
::
|
||||
|
||||
data, meta = loadarff('iris.arff')
|
||||
# This will print the attributes names of the iris.arff dataset
|
||||
for i in meta:
|
||||
print(i)
|
||||
# This works too
|
||||
meta.names()
|
||||
# Getting attribute type
|
||||
types = meta.types()
|
||||
|
||||
Notes
|
||||
-----
|
||||
Also maintains the list of attributes in order, i.e. doing for i in
|
||||
meta, where meta is an instance of MetaData, will return the
|
||||
different attribute names in the order they were defined.
|
||||
"""
|
||||
def __init__(self, rel, attr):
|
||||
self.name = rel
|
||||
# We need the dictionary to be ordered
|
||||
# XXX: may be better to implement an ordered dictionary
|
||||
self._attributes = {}
|
||||
self._attrnames = []
|
||||
for name, value in attr:
|
||||
tp = parse_type(value)
|
||||
self._attrnames.append(name)
|
||||
if tp == 'nominal':
|
||||
self._attributes[name] = (tp, get_nom_val(value))
|
||||
elif tp == 'date':
|
||||
self._attributes[name] = (tp, get_date_format(value)[0])
|
||||
else:
|
||||
self._attributes[name] = (tp, None)
|
||||
|
||||
def __repr__(self):
|
||||
msg = ""
|
||||
msg += "Dataset: %s\n" % self.name
|
||||
for i in self._attrnames:
|
||||
msg += "\t%s's type is %s" % (i, self._attributes[i][0])
|
||||
if self._attributes[i][1]:
|
||||
msg += ", range is %s" % str(self._attributes[i][1])
|
||||
msg += '\n'
|
||||
return msg
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._attrnames)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._attributes[key]
|
||||
|
||||
def names(self):
|
||||
"""Return the list of attribute names."""
|
||||
return self._attrnames
|
||||
|
||||
def types(self):
|
||||
"""Return the list of attribute types."""
|
||||
attr_types = [self._attributes[name][0] for name in self._attrnames]
|
||||
return attr_types
|
||||
|
||||
|
||||
def loadarff(f):
|
||||
"""
|
||||
Read an arff file.
|
||||
|
||||
The data is returned as a record array, which can be accessed much like
|
||||
a dictionary of numpy arrays. For example, if one of the attributes is
|
||||
called 'pressure', then its first 10 data points can be accessed from the
|
||||
``data`` record array like so: ``data['pressure'][0:10]``
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : file-like or str
|
||||
File-like object to read from, or filename to open.
|
||||
|
||||
Returns
|
||||
-------
|
||||
data : record array
|
||||
The data of the arff file, accessible by attribute names.
|
||||
meta : `MetaData`
|
||||
Contains information about the arff file such as name and
|
||||
type of attributes, the relation (name of the dataset), etc...
|
||||
|
||||
Raises
|
||||
------
|
||||
ParseArffError
|
||||
This is raised if the given file is not ARFF-formatted.
|
||||
NotImplementedError
|
||||
The ARFF file has an attribute which is not supported yet.
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
This function should be able to read most arff files. Not
|
||||
implemented functionality include:
|
||||
|
||||
* date type attributes
|
||||
* string type attributes
|
||||
|
||||
It can read files with numeric and nominal attributes. It cannot read
|
||||
files with sparse data ({} in the file). However, this function can
|
||||
read files with missing data (? in the file), representing the data
|
||||
points as NaNs.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.io import arff
|
||||
>>> from io import StringIO
|
||||
>>> content = \"\"\"
|
||||
... @relation foo
|
||||
... @attribute width numeric
|
||||
... @attribute height numeric
|
||||
... @attribute color {red,green,blue,yellow,black}
|
||||
... @data
|
||||
... 5.0,3.25,blue
|
||||
... 4.5,3.75,green
|
||||
... 3.0,4.00,red
|
||||
... \"\"\"
|
||||
>>> f = StringIO(content)
|
||||
>>> data, meta = arff.loadarff(f)
|
||||
>>> data
|
||||
array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
|
||||
dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
|
||||
>>> meta
|
||||
Dataset: foo
|
||||
\twidth's type is numeric
|
||||
\theight's type is numeric
|
||||
\tcolor's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')
|
||||
|
||||
"""
|
||||
if hasattr(f, 'read'):
|
||||
ofile = f
|
||||
else:
|
||||
ofile = open(f, 'rt')
|
||||
try:
|
||||
return _loadarff(ofile)
|
||||
finally:
|
||||
if ofile is not f: # only close what we opened
|
||||
ofile.close()
|
||||
|
||||
|
||||
def _loadarff(ofile):
|
||||
# Parse the header file
|
||||
try:
|
||||
rel, attr = read_header(ofile)
|
||||
except ValueError as e:
|
||||
msg = "Error while parsing header, error was: " + str(e)
|
||||
raise ParseArffError(msg)
|
||||
|
||||
# Check whether we have a string attribute (not supported yet)
|
||||
hasstr = False
|
||||
for name, value in attr:
|
||||
type = parse_type(value)
|
||||
if type == 'string':
|
||||
hasstr = True
|
||||
|
||||
meta = MetaData(rel, attr)
|
||||
|
||||
# XXX The following code is not great
|
||||
# Build the type descriptor descr and the list of convertors to convert
|
||||
# each attribute to the suitable type (which should match the one in
|
||||
# descr).
|
||||
|
||||
# This can be used once we want to support integer as integer values and
|
||||
# not as numeric anymore (using masked arrays ?).
|
||||
acls2dtype = {'real': float, 'integer': float, 'numeric': float}
|
||||
acls2conv = {'real': safe_float,
|
||||
'integer': safe_float,
|
||||
'numeric': safe_float}
|
||||
descr = []
|
||||
convertors = []
|
||||
if not hasstr:
|
||||
for name, value in attr:
|
||||
type = parse_type(value)
|
||||
if type == 'date':
|
||||
date_format, datetime_unit = get_date_format(value)
|
||||
descr.append((name, "datetime64[%s]" % datetime_unit))
|
||||
convertors.append(partial(safe_date, date_format=date_format,
|
||||
datetime_unit=datetime_unit))
|
||||
elif type == 'nominal':
|
||||
n = maxnomlen(value)
|
||||
descr.append((name, 'S%d' % n))
|
||||
pvalue = get_nom_val(value)
|
||||
convertors.append(partial(safe_nominal, pvalue=pvalue))
|
||||
else:
|
||||
descr.append((name, acls2dtype[type]))
|
||||
convertors.append(safe_float)
|
||||
#dc.append(acls2conv[type])
|
||||
#sdescr.append((name, acls2sdtype[type]))
|
||||
else:
|
||||
# How to support string efficiently ? Ideally, we should know the max
|
||||
# size of the string before allocating the numpy array.
|
||||
raise NotImplementedError("String attributes not supported yet, sorry")
|
||||
|
||||
ni = len(convertors)
|
||||
|
||||
def generator(row_iter, delim=','):
|
||||
# TODO: this is where we are spending times (~80%). I think things
|
||||
# could be made more efficiently:
|
||||
# - We could for example "compile" the function, because some values
|
||||
# do not change here.
|
||||
# - The function to convert a line to dtyped values could also be
|
||||
# generated on the fly from a string and be executed instead of
|
||||
# looping.
|
||||
# - The regex are overkill: for comments, checking that a line starts
|
||||
# by % should be enough and faster, and for empty lines, same thing
|
||||
# --> this does not seem to change anything.
|
||||
|
||||
# 'compiling' the range since it does not change
|
||||
# Note, I have already tried zipping the converters and
|
||||
# row elements and got slightly worse performance.
|
||||
elems = list(range(ni))
|
||||
|
||||
for raw in row_iter:
|
||||
# We do not abstract skipping comments and empty lines for
|
||||
# performance reasons.
|
||||
if r_comment.match(raw) or r_empty.match(raw):
|
||||
continue
|
||||
row = raw.split(delim)
|
||||
yield tuple([convertors[i](row[i]) for i in elems])
|
||||
|
||||
a = generator(ofile)
|
||||
# No error should happen here: it is a bug otherwise
|
||||
data = np.fromiter(a, descr)
|
||||
return data, meta
|
||||
|
||||
|
||||
#-----
|
||||
# Misc
|
||||
#-----
|
||||
def basic_stats(data):
|
||||
nbfac = data.size * 1. / (data.size - 1)
|
||||
return np.nanmin(data), np.nanmax(data), np.mean(data), np.std(data) * nbfac
|
||||
|
||||
|
||||
def print_attribute(name, tp, data):
|
||||
type = tp[0]
|
||||
if type == 'numeric' or type == 'real' or type == 'integer':
|
||||
min, max, mean, std = basic_stats(data)
|
||||
print("%s,%s,%f,%f,%f,%f" % (name, type, min, max, mean, std))
|
||||
else:
|
||||
msg = name + ",{"
|
||||
for i in range(len(tp[1])-1):
|
||||
msg += tp[1][i] + ","
|
||||
msg += tp[1][-1]
|
||||
msg += "}"
|
||||
print(msg)
|
||||
|
||||
|
||||
def test_weka(filename):
|
||||
data, meta = loadarff(filename)
|
||||
print(len(data.dtype))
|
||||
print(data.size)
|
||||
for i in meta:
|
||||
print_attribute(i, meta[i], data[i])
|
||||
|
||||
|
||||
# make sure nose does not find this as a test
|
||||
test_weka.__test__ = False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
filename = sys.argv[1]
|
||||
test_weka(filename)
|
||||
@@ -1,13 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
|
||||
def configuration(parent_package='io',top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
config = Configuration('arff', parent_package, top_path)
|
||||
config.add_data_dir('tests')
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,225 +0,0 @@
|
||||
% 1. Title: Iris Plants Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: R.A. Fisher
|
||||
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
|
||||
% (c) Date: July, 1988
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% - Publications: too many to mention!!! Here are a few.
|
||||
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
|
||||
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
|
||||
% to Mathematical Statistics" (John Wiley, NY, 1950).
|
||||
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
|
||||
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
|
||||
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
|
||||
% Structure and Classification Rule for Recognition in Partially Exposed
|
||||
% Environments". IEEE Transactions on Pattern Analysis and Machine
|
||||
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates (0% for the setosa class)
|
||||
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
|
||||
% Transactions on Information Theory, May 1972, 431-433.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates again
|
||||
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
|
||||
% conceptual clustering system finds 3 classes in the data.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
% --- This is perhaps the best known database to be found in the pattern
|
||||
% recognition literature. Fisher's paper is a classic in the field
|
||||
% and is referenced frequently to this day. (See Duda & Hart, for
|
||||
% example.) The data set contains 3 classes of 50 instances each,
|
||||
% where each class refers to a type of iris plant. One class is
|
||||
% linearly separable from the other 2; the latter are NOT linearly
|
||||
% separable from each other.
|
||||
% --- Predicted attribute: class of iris plant.
|
||||
% --- This is an exceedingly simple domain.
|
||||
%
|
||||
% 5. Number of Instances: 150 (50 in each of three classes)
|
||||
%
|
||||
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. sepal length in cm
|
||||
% 2. sepal width in cm
|
||||
% 3. petal length in cm
|
||||
% 4. petal width in cm
|
||||
% 5. class:
|
||||
% -- Iris Setosa
|
||||
% -- Iris Versicolour
|
||||
% -- Iris Virginica
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Min Max Mean SD Class Correlation
|
||||
% sepal length: 4.3 7.9 5.84 0.83 0.7826
|
||||
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
|
||||
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
|
||||
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
|
||||
%
|
||||
% 9. Class Distribution: 33.3% for each of 3 classes.
|
||||
|
||||
@RELATION iris
|
||||
|
||||
@ATTRIBUTE sepallength REAL
|
||||
@ATTRIBUTE sepalwidth REAL
|
||||
@ATTRIBUTE petallength REAL
|
||||
@ATTRIBUTE petalwidth REAL
|
||||
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
||||
|
||||
@DATA
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
%
|
||||
%
|
||||
%
|
||||
@@ -1,8 +0,0 @@
|
||||
% This arff file contains some missing data
|
||||
@relation missing
|
||||
@attribute yop real
|
||||
@attribute yap real
|
||||
@data
|
||||
1,5
|
||||
2,4
|
||||
?,?
|
||||
@@ -1,11 +0,0 @@
|
||||
@RELATION iris
|
||||
|
||||
@ATTRIBUTE sepallength REAL
|
||||
@ATTRIBUTE sepalwidth REAL
|
||||
@ATTRIBUTE petallength REAL
|
||||
@ATTRIBUTE petalwidth REAL
|
||||
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
||||
|
||||
@DATA
|
||||
|
||||
% This file has no data
|
||||
@@ -1,10 +0,0 @@
|
||||
@RELATION test1
|
||||
|
||||
@ATTRIBUTE attr0 REAL
|
||||
@ATTRIBUTE attr1 REAL
|
||||
@ATTRIBUTE attr2 REAL
|
||||
@ATTRIBUTE attr3 REAL
|
||||
@ATTRIBUTE class {class0, class1, class2, class3}
|
||||
|
||||
@DATA
|
||||
0.1, 0.2, 0.3, 0.4,class1
|
||||
@@ -1,15 +0,0 @@
|
||||
@RELATION test2
|
||||
|
||||
@ATTRIBUTE attr0 REAL
|
||||
@ATTRIBUTE attr1 real
|
||||
@ATTRIBUTE attr2 integer
|
||||
@ATTRIBUTE attr3 Integer
|
||||
@ATTRIBUTE attr4 Numeric
|
||||
@ATTRIBUTE attr5 numeric
|
||||
@ATTRIBUTE attr6 string
|
||||
@ATTRIBUTE attr7 STRING
|
||||
@ATTRIBUTE attr8 {bla}
|
||||
@ATTRIBUTE attr9 {bla, bla}
|
||||
|
||||
@DATA
|
||||
0.1, 0.2, 0.3, 0.4,class1
|
||||
@@ -1,6 +0,0 @@
|
||||
@RELATION test3
|
||||
|
||||
@ATTRIBUTE attr0 crap
|
||||
|
||||
@DATA
|
||||
0.1, 0.2, 0.3, 0.4,class1
|
||||
@@ -1,11 +0,0 @@
|
||||
@RELATION test5
|
||||
|
||||
@ATTRIBUTE attr0 REAL
|
||||
@ATTRIBUTE attr1 REAL
|
||||
@ATTRIBUTE attr2 REAL
|
||||
@ATTRIBUTE attr3 REAL
|
||||
@ATTRIBUTE class {class0, class1, class2, class3}
|
||||
@DATA
|
||||
0.1, 0.2, 0.3, 0.4,class1
|
||||
-0.1, -0.2, -0.3, -0.4,class2
|
||||
1, 2, 3, 4,class3
|
||||
@@ -1,26 +0,0 @@
|
||||
@RELATION test4
|
||||
|
||||
@ATTRIBUTE attr0 REAL
|
||||
@ATTRIBUTE attr1 REAL
|
||||
@ATTRIBUTE attr2 REAL
|
||||
@ATTRIBUTE attr3 REAL
|
||||
@ATTRIBUTE class {class0, class1, class2, class3}
|
||||
|
||||
@DATA
|
||||
|
||||
% lsdflkjhaksjdhf
|
||||
|
||||
% lsdflkjhaksjdhf
|
||||
|
||||
0.1, 0.2, 0.3, 0.4,class1
|
||||
% laksjdhf
|
||||
|
||||
% lsdflkjhaksjdhf
|
||||
-0.1, -0.2, -0.3, -0.4,class2
|
||||
|
||||
% lsdflkjhaksjdhf
|
||||
% lsdflkjhaksjdhf
|
||||
|
||||
% lsdflkjhaksjdhf
|
||||
|
||||
1, 2, 3, 4,class3
|
||||
@@ -1,12 +0,0 @@
|
||||
@RELATION test6
|
||||
|
||||
@ATTRIBUTE attr0 REAL
|
||||
@ATTRIBUTE attr1 REAL
|
||||
@ATTRIBUTE attr2 REAL
|
||||
@ATTRIBUTE attr3 REAL
|
||||
@ATTRIBUTE class {C}
|
||||
|
||||
@DATA
|
||||
0.1, 0.2, 0.3, 0.4,C
|
||||
-0.1, -0.2, -0.3, -0.4,C
|
||||
1, 2, 3, 4,C
|
||||
@@ -1,15 +0,0 @@
|
||||
@RELATION test7
|
||||
|
||||
@ATTRIBUTE attr_year DATE yyyy
|
||||
@ATTRIBUTE attr_month DATE yyyy-MM
|
||||
@ATTRIBUTE attr_date DATE yyyy-MM-dd
|
||||
@ATTRIBUTE attr_datetime_local DATE "yyyy-MM-dd HH:mm"
|
||||
@ATTRIBUTE attr_datetime_missing DATE "yyyy-MM-dd HH:mm"
|
||||
|
||||
@DATA
|
||||
1999,1999-01,1999-01-31,"1999-01-31 00:01",?
|
||||
2004,2004-12,2004-12-01,"2004-12-01 23:59","2004-12-01 23:59"
|
||||
1817,1817-04,1817-04-28,"1817-04-28 13:00",?
|
||||
2100,2100-09,2100-09-10,"2100-09-10 12:00",?
|
||||
2013,2013-11,2013-11-30,"2013-11-30 04:55","2013-11-30 04:55"
|
||||
1631,1631-10,1631-10-15,"1631-10-15 20:04","1631-10-15 20:04"
|
||||
@@ -1,12 +0,0 @@
|
||||
@RELATION test8
|
||||
|
||||
@ATTRIBUTE attr_datetime_utc DATE "yyyy-MM-dd HH:mm Z"
|
||||
@ATTRIBUTE attr_datetime_full DATE "yy-MM-dd HH:mm:ss z"
|
||||
|
||||
@DATA
|
||||
"1999-01-31 00:01 UTC","99-01-31 00:01:08 +0430"
|
||||
"2004-12-01 23:59 UTC","04-12-01 23:59:59 -0800"
|
||||
"1817-04-28 13:00 UTC","17-04-28 13:00:33 +1000"
|
||||
"2100-09-10 12:00 UTC","21-09-10 12:00:21 -0300"
|
||||
"2013-11-30 04:55 UTC","13-11-30 04:55:48 -1100"
|
||||
"1631-10-15 20:04 UTC","31-10-15 20:04:10 +0000"
|
||||
@@ -1,259 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import sys
|
||||
from os.path import join as pjoin
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
from io import StringIO
|
||||
else:
|
||||
from cStringIO import StringIO
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import (assert_array_almost_equal,
|
||||
assert_array_equal, assert_equal, assert_)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.io.arff.arffread import loadarff
|
||||
from scipy.io.arff.arffread import read_header, parse_type, ParseArffError
|
||||
|
||||
|
||||
data_path = pjoin(os.path.dirname(__file__), 'data')
|
||||
|
||||
test1 = pjoin(data_path, 'test1.arff')
|
||||
test2 = pjoin(data_path, 'test2.arff')
|
||||
test3 = pjoin(data_path, 'test3.arff')
|
||||
|
||||
test4 = pjoin(data_path, 'test4.arff')
|
||||
test5 = pjoin(data_path, 'test5.arff')
|
||||
test6 = pjoin(data_path, 'test6.arff')
|
||||
test7 = pjoin(data_path, 'test7.arff')
|
||||
test8 = pjoin(data_path, 'test8.arff')
|
||||
expect4_data = [(0.1, 0.2, 0.3, 0.4, 'class1'),
|
||||
(-0.1, -0.2, -0.3, -0.4, 'class2'),
|
||||
(1, 2, 3, 4, 'class3')]
|
||||
expected_types = ['numeric', 'numeric', 'numeric', 'numeric', 'nominal']
|
||||
|
||||
missing = pjoin(data_path, 'missing.arff')
|
||||
expect_missing_raw = np.array([[1, 5], [2, 4], [np.nan, np.nan]])
|
||||
expect_missing = np.empty(3, [('yop', float), ('yap', float)])
|
||||
expect_missing['yop'] = expect_missing_raw[:, 0]
|
||||
expect_missing['yap'] = expect_missing_raw[:, 1]
|
||||
|
||||
|
||||
class TestData(object):
|
||||
def test1(self):
|
||||
# Parsing trivial file with nothing.
|
||||
self._test(test4)
|
||||
|
||||
def test2(self):
|
||||
# Parsing trivial file with some comments in the data section.
|
||||
self._test(test5)
|
||||
|
||||
def test3(self):
|
||||
# Parsing trivial file with nominal attribute of 1 character.
|
||||
self._test(test6)
|
||||
|
||||
def _test(self, test_file):
|
||||
data, meta = loadarff(test_file)
|
||||
for i in range(len(data)):
|
||||
for j in range(4):
|
||||
assert_array_almost_equal(expect4_data[i][j], data[i][j])
|
||||
assert_equal(meta.types(), expected_types)
|
||||
|
||||
def test_filelike(self):
|
||||
# Test reading from file-like object (StringIO)
|
||||
f1 = open(test1)
|
||||
data1, meta1 = loadarff(f1)
|
||||
f1.close()
|
||||
f2 = open(test1)
|
||||
data2, meta2 = loadarff(StringIO(f2.read()))
|
||||
f2.close()
|
||||
assert_(data1 == data2)
|
||||
assert_(repr(meta1) == repr(meta2))
|
||||
|
||||
@pytest.mark.skipif(sys.version_info < (3, 6),
|
||||
reason='Passing path-like objects to IO functions requires Python >= 3.6')
|
||||
def test_path(self):
|
||||
# Test reading from `pathlib.Path` object
|
||||
from pathlib import Path
|
||||
|
||||
with open(test1) as f1:
|
||||
data1, meta1 = loadarff(f1)
|
||||
|
||||
data2, meta2 = loadarff(Path(test1))
|
||||
|
||||
assert_(data1 == data2)
|
||||
assert_(repr(meta1) == repr(meta2))
|
||||
|
||||
class TestMissingData(object):
|
||||
def test_missing(self):
|
||||
data, meta = loadarff(missing)
|
||||
for i in ['yop', 'yap']:
|
||||
assert_array_almost_equal(data[i], expect_missing[i])
|
||||
|
||||
|
||||
class TestNoData(object):
|
||||
def test_nodata(self):
|
||||
# The file nodata.arff has no data in the @DATA section.
|
||||
# Reading it should result in an array with length 0.
|
||||
nodata_filename = os.path.join(data_path, 'nodata.arff')
|
||||
data, meta = loadarff(nodata_filename)
|
||||
expected_dtype = np.dtype([('sepallength', '<f8'),
|
||||
('sepalwidth', '<f8'),
|
||||
('petallength', '<f8'),
|
||||
('petalwidth', '<f8'),
|
||||
('class', 'S15')])
|
||||
assert_equal(data.dtype, expected_dtype)
|
||||
assert_equal(data.size, 0)
|
||||
|
||||
|
||||
class TestHeader(object):
|
||||
def test_type_parsing(self):
|
||||
# Test parsing type of attribute from their value.
|
||||
ofile = open(test2)
|
||||
rel, attrs = read_header(ofile)
|
||||
ofile.close()
|
||||
|
||||
expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
|
||||
'numeric', 'string', 'string', 'nominal', 'nominal']
|
||||
|
||||
for i in range(len(attrs)):
|
||||
assert_(parse_type(attrs[i][1]) == expected[i])
|
||||
|
||||
def test_badtype_parsing(self):
|
||||
# Test parsing wrong type of attribute from their value.
|
||||
ofile = open(test3)
|
||||
rel, attrs = read_header(ofile)
|
||||
ofile.close()
|
||||
|
||||
for name, value in attrs:
|
||||
assert_raises(ParseArffError, parse_type, value)
|
||||
|
||||
def test_fullheader1(self):
|
||||
# Parsing trivial header with nothing.
|
||||
ofile = open(test1)
|
||||
rel, attrs = read_header(ofile)
|
||||
ofile.close()
|
||||
|
||||
# Test relation
|
||||
assert_(rel == 'test1')
|
||||
|
||||
# Test numerical attributes
|
||||
assert_(len(attrs) == 5)
|
||||
for i in range(4):
|
||||
assert_(attrs[i][0] == 'attr%d' % i)
|
||||
assert_(attrs[i][1] == 'REAL')
|
||||
|
||||
# Test nominal attribute
|
||||
assert_(attrs[4][0] == 'class')
|
||||
assert_(attrs[4][1] == '{class0, class1, class2, class3}')
|
||||
|
||||
def test_dateheader(self):
|
||||
ofile = open(test7)
|
||||
rel, attrs = read_header(ofile)
|
||||
ofile.close()
|
||||
|
||||
assert_(rel == 'test7')
|
||||
|
||||
assert_(len(attrs) == 5)
|
||||
|
||||
assert_(attrs[0][0] == 'attr_year')
|
||||
assert_(attrs[0][1] == 'DATE yyyy')
|
||||
|
||||
assert_(attrs[1][0] == 'attr_month')
|
||||
assert_(attrs[1][1] == 'DATE yyyy-MM')
|
||||
|
||||
assert_(attrs[2][0] == 'attr_date')
|
||||
assert_(attrs[2][1] == 'DATE yyyy-MM-dd')
|
||||
|
||||
assert_(attrs[3][0] == 'attr_datetime_local')
|
||||
assert_(attrs[3][1] == 'DATE "yyyy-MM-dd HH:mm"')
|
||||
|
||||
assert_(attrs[4][0] == 'attr_datetime_missing')
|
||||
assert_(attrs[4][1] == 'DATE "yyyy-MM-dd HH:mm"')
|
||||
|
||||
def test_dateheader_unsupported(self):
|
||||
ofile = open(test8)
|
||||
rel, attrs = read_header(ofile)
|
||||
ofile.close()
|
||||
|
||||
assert_(rel == 'test8')
|
||||
|
||||
assert_(len(attrs) == 2)
|
||||
assert_(attrs[0][0] == 'attr_datetime_utc')
|
||||
assert_(attrs[0][1] == 'DATE "yyyy-MM-dd HH:mm Z"')
|
||||
|
||||
assert_(attrs[1][0] == 'attr_datetime_full')
|
||||
assert_(attrs[1][1] == 'DATE "yy-MM-dd HH:mm:ss z"')
|
||||
|
||||
|
||||
class TestDateAttribute(object):
|
||||
def setup_method(self):
|
||||
self.data, self.meta = loadarff(test7)
|
||||
|
||||
def test_year_attribute(self):
|
||||
expected = np.array([
|
||||
'1999',
|
||||
'2004',
|
||||
'1817',
|
||||
'2100',
|
||||
'2013',
|
||||
'1631'
|
||||
], dtype='datetime64[Y]')
|
||||
|
||||
assert_array_equal(self.data["attr_year"], expected)
|
||||
|
||||
def test_month_attribute(self):
|
||||
expected = np.array([
|
||||
'1999-01',
|
||||
'2004-12',
|
||||
'1817-04',
|
||||
'2100-09',
|
||||
'2013-11',
|
||||
'1631-10'
|
||||
], dtype='datetime64[M]')
|
||||
|
||||
assert_array_equal(self.data["attr_month"], expected)
|
||||
|
||||
def test_date_attribute(self):
|
||||
expected = np.array([
|
||||
'1999-01-31',
|
||||
'2004-12-01',
|
||||
'1817-04-28',
|
||||
'2100-09-10',
|
||||
'2013-11-30',
|
||||
'1631-10-15'
|
||||
], dtype='datetime64[D]')
|
||||
|
||||
assert_array_equal(self.data["attr_date"], expected)
|
||||
|
||||
def test_datetime_local_attribute(self):
|
||||
expected = np.array([
|
||||
datetime.datetime(year=1999, month=1, day=31, hour=0, minute=1),
|
||||
datetime.datetime(year=2004, month=12, day=1, hour=23, minute=59),
|
||||
datetime.datetime(year=1817, month=4, day=28, hour=13, minute=0),
|
||||
datetime.datetime(year=2100, month=9, day=10, hour=12, minute=0),
|
||||
datetime.datetime(year=2013, month=11, day=30, hour=4, minute=55),
|
||||
datetime.datetime(year=1631, month=10, day=15, hour=20, minute=4)
|
||||
], dtype='datetime64[m]')
|
||||
|
||||
assert_array_equal(self.data["attr_datetime_local"], expected)
|
||||
|
||||
def test_datetime_missing(self):
|
||||
expected = np.array([
|
||||
'nat',
|
||||
'2004-12-01T23:59',
|
||||
'nat',
|
||||
'nat',
|
||||
'2013-11-30T04:55',
|
||||
'1631-10-15T20:04'
|
||||
], dtype='datetime64[m]')
|
||||
|
||||
assert_array_equal(self.data["attr_datetime_missing"], expected)
|
||||
|
||||
def test_datetime_timezone(self):
|
||||
assert_raises(ValueError, loadarff, test8)
|
||||
@@ -1,4 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from scipy.io.harwell_boeing.hb import MalformedHeader, HBInfo, HBFile, \
|
||||
HBMatrixType, hb_read, hb_write
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
-312
@@ -1,312 +0,0 @@
|
||||
"""
|
||||
Preliminary module to handle fortran formats for IO. Does not use this outside
|
||||
scipy.sparse io for now, until the API is deemed reasonable.
|
||||
|
||||
The *Format classes handle conversion between fortran and python format, and
|
||||
FortranFormatParser can create *Format instances from raw fortran format
|
||||
strings (e.g. '(3I4)', '(10I3)', etc...)
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
__all__ = ["BadFortranFormat", "FortranFormatParser", "IntFormat", "ExpFormat"]
|
||||
|
||||
|
||||
TOKENS = {
|
||||
"LPAR": r"\(",
|
||||
"RPAR": r"\)",
|
||||
"INT_ID": r"I",
|
||||
"EXP_ID": r"E",
|
||||
"INT": r"\d+",
|
||||
"DOT": r"\.",
|
||||
}
|
||||
|
||||
|
||||
class BadFortranFormat(SyntaxError):
|
||||
pass
|
||||
|
||||
|
||||
def number_digits(n):
|
||||
return int(np.floor(np.log10(np.abs(n))) + 1)
|
||||
|
||||
|
||||
class IntFormat(object):
|
||||
@classmethod
|
||||
def from_number(cls, n, min=None):
|
||||
"""Given an integer, returns a "reasonable" IntFormat instance to represent
|
||||
any number between 0 and n if n > 0, -n and n if n < 0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
max number one wants to be able to represent
|
||||
min : int
|
||||
minimum number of characters to use for the format
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : IntFormat
|
||||
IntFormat instance with reasonable (see Notes) computed width
|
||||
|
||||
Notes
|
||||
-----
|
||||
Reasonable should be understood as the minimal string length necessary
|
||||
without losing precision. For example, IntFormat.from_number(1) will
|
||||
return an IntFormat instance of width 2, so that any 0 and 1 may be
|
||||
represented as 1-character strings without loss of information.
|
||||
"""
|
||||
width = number_digits(n) + 1
|
||||
if n < 0:
|
||||
width += 1
|
||||
repeat = 80 // width
|
||||
return cls(width, min, repeat=repeat)
|
||||
|
||||
def __init__(self, width, min=None, repeat=None):
|
||||
self.width = width
|
||||
self.repeat = repeat
|
||||
self.min = min
|
||||
|
||||
def __repr__(self):
|
||||
r = "IntFormat("
|
||||
if self.repeat:
|
||||
r += "%d" % self.repeat
|
||||
r += "I%d" % self.width
|
||||
if self.min:
|
||||
r += ".%d" % self.min
|
||||
return r + ")"
|
||||
|
||||
@property
|
||||
def fortran_format(self):
|
||||
r = "("
|
||||
if self.repeat:
|
||||
r += "%d" % self.repeat
|
||||
r += "I%d" % self.width
|
||||
if self.min:
|
||||
r += ".%d" % self.min
|
||||
return r + ")"
|
||||
|
||||
@property
|
||||
def python_format(self):
|
||||
return "%" + str(self.width) + "d"
|
||||
|
||||
|
||||
class ExpFormat(object):
|
||||
@classmethod
|
||||
def from_number(cls, n, min=None):
|
||||
"""Given a float number, returns a "reasonable" ExpFormat instance to
|
||||
represent any number between -n and n.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : float
|
||||
max number one wants to be able to represent
|
||||
min : int
|
||||
minimum number of characters to use for the format
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : ExpFormat
|
||||
ExpFormat instance with reasonable (see Notes) computed width
|
||||
|
||||
Notes
|
||||
-----
|
||||
Reasonable should be understood as the minimal string length necessary
|
||||
to avoid losing precision.
|
||||
"""
|
||||
# len of one number in exp format: sign + 1|0 + "." +
|
||||
# number of digit for fractional part + 'E' + sign of exponent +
|
||||
# len of exponent
|
||||
finfo = np.finfo(n.dtype)
|
||||
# Number of digits for fractional part
|
||||
n_prec = finfo.precision + 1
|
||||
# Number of digits for exponential part
|
||||
n_exp = number_digits(np.max(np.abs([finfo.maxexp, finfo.minexp])))
|
||||
width = 1 + 1 + n_prec + 1 + n_exp + 1
|
||||
if n < 0:
|
||||
width += 1
|
||||
repeat = int(np.floor(80 / width))
|
||||
return cls(width, n_prec, min, repeat=repeat)
|
||||
|
||||
def __init__(self, width, significand, min=None, repeat=None):
|
||||
"""\
|
||||
Parameters
|
||||
----------
|
||||
width : int
|
||||
number of characters taken by the string (includes space).
|
||||
"""
|
||||
self.width = width
|
||||
self.significand = significand
|
||||
self.repeat = repeat
|
||||
self.min = min
|
||||
|
||||
def __repr__(self):
|
||||
r = "ExpFormat("
|
||||
if self.repeat:
|
||||
r += "%d" % self.repeat
|
||||
r += "E%d.%d" % (self.width, self.significand)
|
||||
if self.min:
|
||||
r += "E%d" % self.min
|
||||
return r + ")"
|
||||
|
||||
@property
|
||||
def fortran_format(self):
|
||||
r = "("
|
||||
if self.repeat:
|
||||
r += "%d" % self.repeat
|
||||
r += "E%d.%d" % (self.width, self.significand)
|
||||
if self.min:
|
||||
r += "E%d" % self.min
|
||||
return r + ")"
|
||||
|
||||
@property
|
||||
def python_format(self):
|
||||
return "%" + str(self.width-1) + "." + str(self.significand) + "E"
|
||||
|
||||
|
||||
class Token(object):
|
||||
def __init__(self, type, value, pos):
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.pos = pos
|
||||
|
||||
def __str__(self):
|
||||
return """Token('%s', "%s")""" % (self.type, self.value)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class Tokenizer(object):
|
||||
def __init__(self):
|
||||
self.tokens = list(TOKENS.keys())
|
||||
self.res = [re.compile(TOKENS[i]) for i in self.tokens]
|
||||
|
||||
def input(self, s):
|
||||
self.data = s
|
||||
self.curpos = 0
|
||||
self.len = len(s)
|
||||
|
||||
def next_token(self):
|
||||
curpos = self.curpos
|
||||
tokens = self.tokens
|
||||
|
||||
while curpos < self.len:
|
||||
for i, r in enumerate(self.res):
|
||||
m = r.match(self.data, curpos)
|
||||
if m is None:
|
||||
continue
|
||||
else:
|
||||
self.curpos = m.end()
|
||||
return Token(self.tokens[i], m.group(), self.curpos)
|
||||
raise SyntaxError("Unknown character at position %d (%s)"
|
||||
% (self.curpos, self.data[curpos]))
|
||||
|
||||
|
||||
# Grammar for fortran format:
|
||||
# format : LPAR format_string RPAR
|
||||
# format_string : repeated | simple
|
||||
# repeated : repeat simple
|
||||
# simple : int_fmt | exp_fmt
|
||||
# int_fmt : INT_ID width
|
||||
# exp_fmt : simple_exp_fmt
|
||||
# simple_exp_fmt : EXP_ID width DOT significand
|
||||
# extended_exp_fmt : EXP_ID width DOT significand EXP_ID ndigits
|
||||
# repeat : INT
|
||||
# width : INT
|
||||
# significand : INT
|
||||
# ndigits : INT
|
||||
|
||||
# Naive fortran formatter - parser is hand-made
|
||||
class FortranFormatParser(object):
|
||||
"""Parser for fortran format strings. The parse method returns a *Format
|
||||
instance.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Only ExpFormat (exponential format for floating values) and IntFormat
|
||||
(integer format) for now.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.tokenizer = Tokenizer()
|
||||
|
||||
def parse(self, s):
|
||||
self.tokenizer.input(s)
|
||||
|
||||
tokens = []
|
||||
|
||||
try:
|
||||
while True:
|
||||
t = self.tokenizer.next_token()
|
||||
if t is None:
|
||||
break
|
||||
else:
|
||||
tokens.append(t)
|
||||
return self._parse_format(tokens)
|
||||
except SyntaxError as e:
|
||||
raise BadFortranFormat(str(e))
|
||||
|
||||
def _get_min(self, tokens):
|
||||
next = tokens.pop(0)
|
||||
if not next.type == "DOT":
|
||||
raise SyntaxError()
|
||||
next = tokens.pop(0)
|
||||
return next.value
|
||||
|
||||
def _expect(self, token, tp):
|
||||
if not token.type == tp:
|
||||
raise SyntaxError()
|
||||
|
||||
def _parse_format(self, tokens):
|
||||
if not tokens[0].type == "LPAR":
|
||||
raise SyntaxError("Expected left parenthesis at position "
|
||||
"%d (got '%s')" % (0, tokens[0].value))
|
||||
elif not tokens[-1].type == "RPAR":
|
||||
raise SyntaxError("Expected right parenthesis at position "
|
||||
"%d (got '%s')" % (len(tokens), tokens[-1].value))
|
||||
|
||||
tokens = tokens[1:-1]
|
||||
types = [t.type for t in tokens]
|
||||
if types[0] == "INT":
|
||||
repeat = int(tokens.pop(0).value)
|
||||
else:
|
||||
repeat = None
|
||||
|
||||
next = tokens.pop(0)
|
||||
if next.type == "INT_ID":
|
||||
next = self._next(tokens, "INT")
|
||||
width = int(next.value)
|
||||
if tokens:
|
||||
min = int(self._get_min(tokens))
|
||||
else:
|
||||
min = None
|
||||
return IntFormat(width, min, repeat)
|
||||
elif next.type == "EXP_ID":
|
||||
next = self._next(tokens, "INT")
|
||||
width = int(next.value)
|
||||
|
||||
next = self._next(tokens, "DOT")
|
||||
|
||||
next = self._next(tokens, "INT")
|
||||
significand = int(next.value)
|
||||
|
||||
if tokens:
|
||||
next = self._next(tokens, "EXP_ID")
|
||||
|
||||
next = self._next(tokens, "INT")
|
||||
min = int(next.value)
|
||||
else:
|
||||
min = None
|
||||
return ExpFormat(width, significand, min, repeat)
|
||||
else:
|
||||
raise SyntaxError("Invalid formater type %s" % next.value)
|
||||
|
||||
def _next(self, tokens, tp):
|
||||
if not len(tokens) > 0:
|
||||
raise SyntaxError()
|
||||
next = tokens.pop(0)
|
||||
self._expect(next, tp)
|
||||
return next
|
||||
@@ -1,547 +0,0 @@
|
||||
"""
|
||||
Implementation of Harwell-Boeing read/write.
|
||||
|
||||
At the moment not the full Harwell-Boeing format is supported. Supported
|
||||
features are:
|
||||
|
||||
- assembled, non-symmetric, real matrices
|
||||
- integer for pointer/indices
|
||||
- exponential format for float values, and int format
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
# TODO:
|
||||
# - Add more support (symmetric/complex matrices, non-assembled matrices ?)
|
||||
|
||||
# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
|
||||
# takes a lot of memory. Being faster would require compiled code.
|
||||
# write is not efficient. Although not a terribly exciting task,
|
||||
# having reusable facilities to efficiently read/write fortran-formatted files
|
||||
# would be useful outside this module.
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy.sparse import csc_matrix
|
||||
from scipy.io.harwell_boeing._fortran_format_parser import \
|
||||
FortranFormatParser, IntFormat, ExpFormat
|
||||
|
||||
__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
|
||||
"HBMatrixType"]
|
||||
|
||||
|
||||
class MalformedHeader(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class LineOverflow(Warning):
|
||||
pass
|
||||
|
||||
|
||||
def _nbytes_full(fmt, nlines):
|
||||
"""Return the number of bytes to read to get every full lines for the
|
||||
given parsed fortran format."""
|
||||
return (fmt.repeat * fmt.width + 1) * (nlines - 1)
|
||||
|
||||
|
||||
class HBInfo(object):
|
||||
@classmethod
|
||||
def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
|
||||
"""Create a HBInfo instance from an existing sparse matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
m : sparse matrix
|
||||
the HBInfo instance will derive its parameters from m
|
||||
title : str
|
||||
Title to put in the HB header
|
||||
key : str
|
||||
Key
|
||||
mxtype : HBMatrixType
|
||||
type of the input matrix
|
||||
fmt : dict
|
||||
not implemented
|
||||
|
||||
Returns
|
||||
-------
|
||||
hb_info : HBInfo instance
|
||||
"""
|
||||
m = m.tocsc(copy=False)
|
||||
|
||||
pointer = m.indptr
|
||||
indices = m.indices
|
||||
values = m.data
|
||||
|
||||
nrows, ncols = m.shape
|
||||
nnon_zeros = m.nnz
|
||||
|
||||
if fmt is None:
|
||||
# +1 because HB use one-based indexing (Fortran), and we will write
|
||||
# the indices /pointer as such
|
||||
pointer_fmt = IntFormat.from_number(np.max(pointer+1))
|
||||
indices_fmt = IntFormat.from_number(np.max(indices+1))
|
||||
|
||||
if values.dtype.kind in np.typecodes["AllFloat"]:
|
||||
values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
|
||||
elif values.dtype.kind in np.typecodes["AllInteger"]:
|
||||
values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
|
||||
else:
|
||||
raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
|
||||
else:
|
||||
raise NotImplementedError("fmt argument not supported yet.")
|
||||
|
||||
if mxtype is None:
|
||||
if not np.isrealobj(values):
|
||||
raise ValueError("Complex values not supported yet")
|
||||
if values.dtype.kind in np.typecodes["AllInteger"]:
|
||||
tp = "integer"
|
||||
elif values.dtype.kind in np.typecodes["AllFloat"]:
|
||||
tp = "real"
|
||||
else:
|
||||
raise NotImplementedError("type %s for values not implemented"
|
||||
% values.dtype)
|
||||
mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
|
||||
else:
|
||||
raise ValueError("mxtype argument not handled yet.")
|
||||
|
||||
def _nlines(fmt, size):
|
||||
nlines = size // fmt.repeat
|
||||
if nlines * fmt.repeat != size:
|
||||
nlines += 1
|
||||
return nlines
|
||||
|
||||
pointer_nlines = _nlines(pointer_fmt, pointer.size)
|
||||
indices_nlines = _nlines(indices_fmt, indices.size)
|
||||
values_nlines = _nlines(values_fmt, values.size)
|
||||
|
||||
total_nlines = pointer_nlines + indices_nlines + values_nlines
|
||||
|
||||
return cls(title, key,
|
||||
total_nlines, pointer_nlines, indices_nlines, values_nlines,
|
||||
mxtype, nrows, ncols, nnon_zeros,
|
||||
pointer_fmt.fortran_format, indices_fmt.fortran_format,
|
||||
values_fmt.fortran_format)
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, fid):
|
||||
"""Create a HBInfo instance from a file object containing a matrix in the
|
||||
HB format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fid : file-like matrix
|
||||
File or file-like object containing a matrix in the HB format.
|
||||
|
||||
Returns
|
||||
-------
|
||||
hb_info : HBInfo instance
|
||||
"""
|
||||
# First line
|
||||
line = fid.readline().strip("\n")
|
||||
if not len(line) > 72:
|
||||
raise ValueError("Expected at least 72 characters for first line, "
|
||||
"got: \n%s" % line)
|
||||
title = line[:72]
|
||||
key = line[72:]
|
||||
|
||||
# Second line
|
||||
line = fid.readline().strip("\n")
|
||||
if not len(line.rstrip()) >= 56:
|
||||
raise ValueError("Expected at least 56 characters for second line, "
|
||||
"got: \n%s" % line)
|
||||
total_nlines = _expect_int(line[:14])
|
||||
pointer_nlines = _expect_int(line[14:28])
|
||||
indices_nlines = _expect_int(line[28:42])
|
||||
values_nlines = _expect_int(line[42:56])
|
||||
|
||||
rhs_nlines = line[56:72].strip()
|
||||
if rhs_nlines == '':
|
||||
rhs_nlines = 0
|
||||
else:
|
||||
rhs_nlines = _expect_int(rhs_nlines)
|
||||
if not rhs_nlines == 0:
|
||||
raise ValueError("Only files without right hand side supported for "
|
||||
"now.")
|
||||
|
||||
# Third line
|
||||
line = fid.readline().strip("\n")
|
||||
if not len(line) >= 70:
|
||||
raise ValueError("Expected at least 72 character for third line, got:\n"
|
||||
"%s" % line)
|
||||
|
||||
mxtype_s = line[:3].upper()
|
||||
if not len(mxtype_s) == 3:
|
||||
raise ValueError("mxtype expected to be 3 characters long")
|
||||
|
||||
mxtype = HBMatrixType.from_fortran(mxtype_s)
|
||||
if mxtype.value_type not in ["real", "integer"]:
|
||||
raise ValueError("Only real or integer matrices supported for "
|
||||
"now (detected %s)" % mxtype)
|
||||
if not mxtype.structure == "unsymmetric":
|
||||
raise ValueError("Only unsymmetric matrices supported for "
|
||||
"now (detected %s)" % mxtype)
|
||||
if not mxtype.storage == "assembled":
|
||||
raise ValueError("Only assembled matrices supported for now")
|
||||
|
||||
if not line[3:14] == " " * 11:
|
||||
raise ValueError("Malformed data for third line: %s" % line)
|
||||
|
||||
nrows = _expect_int(line[14:28])
|
||||
ncols = _expect_int(line[28:42])
|
||||
nnon_zeros = _expect_int(line[42:56])
|
||||
nelementals = _expect_int(line[56:70])
|
||||
if not nelementals == 0:
|
||||
raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
|
||||
% nelementals)
|
||||
|
||||
# Fourth line
|
||||
line = fid.readline().strip("\n")
|
||||
|
||||
ct = line.split()
|
||||
if not len(ct) == 3:
|
||||
raise ValueError("Expected 3 formats, got %s" % ct)
|
||||
|
||||
return cls(title, key,
|
||||
total_nlines, pointer_nlines, indices_nlines, values_nlines,
|
||||
mxtype, nrows, ncols, nnon_zeros,
|
||||
ct[0], ct[1], ct[2],
|
||||
rhs_nlines, nelementals)
|
||||
|
||||
def __init__(self, title, key,
|
||||
total_nlines, pointer_nlines, indices_nlines, values_nlines,
|
||||
mxtype, nrows, ncols, nnon_zeros,
|
||||
pointer_format_str, indices_format_str, values_format_str,
|
||||
right_hand_sides_nlines=0, nelementals=0):
|
||||
"""Do not use this directly, but the class ctrs (from_* functions)."""
|
||||
self.title = title
|
||||
self.key = key
|
||||
if title is None:
|
||||
title = "No Title"
|
||||
if len(title) > 72:
|
||||
raise ValueError("title cannot be > 72 characters")
|
||||
|
||||
if key is None:
|
||||
key = "|No Key"
|
||||
if len(key) > 8:
|
||||
warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)
|
||||
|
||||
self.total_nlines = total_nlines
|
||||
self.pointer_nlines = pointer_nlines
|
||||
self.indices_nlines = indices_nlines
|
||||
self.values_nlines = values_nlines
|
||||
|
||||
parser = FortranFormatParser()
|
||||
pointer_format = parser.parse(pointer_format_str)
|
||||
if not isinstance(pointer_format, IntFormat):
|
||||
raise ValueError("Expected int format for pointer format, got %s"
|
||||
% pointer_format)
|
||||
|
||||
indices_format = parser.parse(indices_format_str)
|
||||
if not isinstance(indices_format, IntFormat):
|
||||
raise ValueError("Expected int format for indices format, got %s" %
|
||||
indices_format)
|
||||
|
||||
values_format = parser.parse(values_format_str)
|
||||
if isinstance(values_format, ExpFormat):
|
||||
if mxtype.value_type not in ["real", "complex"]:
|
||||
raise ValueError("Inconsistency between matrix type %s and "
|
||||
"value type %s" % (mxtype, values_format))
|
||||
values_dtype = np.float64
|
||||
elif isinstance(values_format, IntFormat):
|
||||
if mxtype.value_type not in ["integer"]:
|
||||
raise ValueError("Inconsistency between matrix type %s and "
|
||||
"value type %s" % (mxtype, values_format))
|
||||
# XXX: fortran int -> dtype association ?
|
||||
values_dtype = int
|
||||
else:
|
||||
raise ValueError("Unsupported format for values %r" % (values_format,))
|
||||
|
||||
self.pointer_format = pointer_format
|
||||
self.indices_format = indices_format
|
||||
self.values_format = values_format
|
||||
|
||||
self.pointer_dtype = np.int32
|
||||
self.indices_dtype = np.int32
|
||||
self.values_dtype = values_dtype
|
||||
|
||||
self.pointer_nlines = pointer_nlines
|
||||
self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
|
||||
|
||||
self.indices_nlines = indices_nlines
|
||||
self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
|
||||
|
||||
self.values_nlines = values_nlines
|
||||
self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
|
||||
|
||||
self.nrows = nrows
|
||||
self.ncols = ncols
|
||||
self.nnon_zeros = nnon_zeros
|
||||
self.nelementals = nelementals
|
||||
self.mxtype = mxtype
|
||||
|
||||
def dump(self):
|
||||
"""Gives the header corresponding to this instance as a string."""
|
||||
header = [self.title.ljust(72) + self.key.ljust(8)]
|
||||
|
||||
header.append("%14d%14d%14d%14d" %
|
||||
(self.total_nlines, self.pointer_nlines,
|
||||
self.indices_nlines, self.values_nlines))
|
||||
header.append("%14s%14d%14d%14d%14d" %
|
||||
(self.mxtype.fortran_format.ljust(14), self.nrows,
|
||||
self.ncols, self.nnon_zeros, 0))
|
||||
|
||||
pffmt = self.pointer_format.fortran_format
|
||||
iffmt = self.indices_format.fortran_format
|
||||
vffmt = self.values_format.fortran_format
|
||||
header.append("%16s%16s%20s" %
|
||||
(pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
|
||||
return "\n".join(header)
|
||||
|
||||
|
||||
def _expect_int(value, msg=None):
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
if msg is None:
|
||||
msg = "Expected an int, got %s"
|
||||
raise ValueError(msg % value)
|
||||
|
||||
|
||||
def _read_hb_data(content, header):
|
||||
# XXX: look at a way to reduce memory here (big string creation)
|
||||
ptr_string = "".join([content.read(header.pointer_nbytes_full),
|
||||
content.readline()])
|
||||
ptr = np.fromstring(ptr_string,
|
||||
dtype=int, sep=' ')
|
||||
|
||||
ind_string = "".join([content.read(header.indices_nbytes_full),
|
||||
content.readline()])
|
||||
ind = np.fromstring(ind_string,
|
||||
dtype=int, sep=' ')
|
||||
|
||||
val_string = "".join([content.read(header.values_nbytes_full),
|
||||
content.readline()])
|
||||
val = np.fromstring(val_string,
|
||||
dtype=header.values_dtype, sep=' ')
|
||||
|
||||
try:
|
||||
return csc_matrix((val, ind-1, ptr-1),
|
||||
shape=(header.nrows, header.ncols))
|
||||
except ValueError as e:
|
||||
raise e
|
||||
|
||||
|
||||
def _write_data(m, fid, header):
|
||||
m = m.tocsc(copy=False)
|
||||
|
||||
def write_array(f, ar, nlines, fmt):
|
||||
# ar_nlines is the number of full lines, n is the number of items per
|
||||
# line, ffmt the fortran format
|
||||
pyfmt = fmt.python_format
|
||||
pyfmt_full = pyfmt * fmt.repeat
|
||||
|
||||
# for each array to write, we first write the full lines, and special
|
||||
# case for partial line
|
||||
full = ar[:(nlines - 1) * fmt.repeat]
|
||||
for row in full.reshape((nlines-1, fmt.repeat)):
|
||||
f.write(pyfmt_full % tuple(row) + "\n")
|
||||
nremain = ar.size - full.size
|
||||
if nremain > 0:
|
||||
f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
|
||||
|
||||
fid.write(header.dump())
|
||||
fid.write("\n")
|
||||
# +1 is for fortran one-based indexing
|
||||
write_array(fid, m.indptr+1, header.pointer_nlines,
|
||||
header.pointer_format)
|
||||
write_array(fid, m.indices+1, header.indices_nlines,
|
||||
header.indices_format)
|
||||
write_array(fid, m.data, header.values_nlines,
|
||||
header.values_format)
|
||||
|
||||
|
||||
class HBMatrixType(object):
|
||||
"""Class to hold the matrix type."""
|
||||
# q2f* translates qualified names to fortran character
|
||||
_q2f_type = {
|
||||
"real": "R",
|
||||
"complex": "C",
|
||||
"pattern": "P",
|
||||
"integer": "I",
|
||||
}
|
||||
_q2f_structure = {
|
||||
"symmetric": "S",
|
||||
"unsymmetric": "U",
|
||||
"hermitian": "H",
|
||||
"skewsymmetric": "Z",
|
||||
"rectangular": "R"
|
||||
}
|
||||
_q2f_storage = {
|
||||
"assembled": "A",
|
||||
"elemental": "E",
|
||||
}
|
||||
|
||||
_f2q_type = dict([(j, i) for i, j in _q2f_type.items()])
|
||||
_f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()])
|
||||
_f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()])
|
||||
|
||||
@classmethod
|
||||
def from_fortran(cls, fmt):
|
||||
if not len(fmt) == 3:
|
||||
raise ValueError("Fortran format for matrix type should be 3 "
|
||||
"characters long")
|
||||
try:
|
||||
value_type = cls._f2q_type[fmt[0]]
|
||||
structure = cls._f2q_structure[fmt[1]]
|
||||
storage = cls._f2q_storage[fmt[2]]
|
||||
return cls(value_type, structure, storage)
|
||||
except KeyError:
|
||||
raise ValueError("Unrecognized format %s" % fmt)
|
||||
|
||||
def __init__(self, value_type, structure, storage="assembled"):
|
||||
self.value_type = value_type
|
||||
self.structure = structure
|
||||
self.storage = storage
|
||||
|
||||
if value_type not in self._q2f_type:
|
||||
raise ValueError("Unrecognized type %s" % value_type)
|
||||
if structure not in self._q2f_structure:
|
||||
raise ValueError("Unrecognized structure %s" % structure)
|
||||
if storage not in self._q2f_storage:
|
||||
raise ValueError("Unrecognized storage %s" % storage)
|
||||
|
||||
@property
|
||||
def fortran_format(self):
|
||||
return self._q2f_type[self.value_type] + \
|
||||
self._q2f_structure[self.structure] + \
|
||||
self._q2f_storage[self.storage]
|
||||
|
||||
def __repr__(self):
|
||||
return "HBMatrixType(%s, %s, %s)" % \
|
||||
(self.value_type, self.structure, self.storage)
|
||||
|
||||
|
||||
class HBFile(object):
|
||||
def __init__(self, file, hb_info=None):
|
||||
"""Create a HBFile instance.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file : file-object
|
||||
StringIO work as well
|
||||
hb_info : HBInfo, optional
|
||||
Should be given as an argument for writing, in which case the file
|
||||
should be writable.
|
||||
"""
|
||||
self._fid = file
|
||||
if hb_info is None:
|
||||
self._hb_info = HBInfo.from_file(file)
|
||||
else:
|
||||
#raise IOError("file %s is not writable, and hb_info "
|
||||
# "was given." % file)
|
||||
self._hb_info = hb_info
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
return self._hb_info.title
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
return self._hb_info.key
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self._hb_info.mxtype.value_type
|
||||
|
||||
@property
|
||||
def structure(self):
|
||||
return self._hb_info.mxtype.structure
|
||||
|
||||
@property
|
||||
def storage(self):
|
||||
return self._hb_info.mxtype.storage
|
||||
|
||||
def read_matrix(self):
|
||||
return _read_hb_data(self._fid, self._hb_info)
|
||||
|
||||
def write_matrix(self, m):
|
||||
return _write_data(m, self._fid, self._hb_info)
|
||||
|
||||
|
||||
def hb_read(path_or_open_file):
|
||||
"""Read HB-format file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path_or_open_file : path-like or file-like
|
||||
If a file-like object, it is used as-is. Otherwise it is opened
|
||||
before reading.
|
||||
|
||||
Returns
|
||||
-------
|
||||
data : scipy.sparse.csc_matrix instance
|
||||
The data read from the HB file as a sparse matrix.
|
||||
|
||||
Notes
|
||||
-----
|
||||
At the moment not the full Harwell-Boeing format is supported. Supported
|
||||
features are:
|
||||
|
||||
- assembled, non-symmetric, real matrices
|
||||
- integer for pointer/indices
|
||||
- exponential format for float values, and int format
|
||||
|
||||
"""
|
||||
def _get_matrix(fid):
|
||||
hb = HBFile(fid)
|
||||
return hb.read_matrix()
|
||||
|
||||
if hasattr(path_or_open_file, 'read'):
|
||||
return _get_matrix(path_or_open_file)
|
||||
else:
|
||||
with open(path_or_open_file) as f:
|
||||
return _get_matrix(f)
|
||||
|
||||
|
||||
def hb_write(path_or_open_file, m, hb_info=None):
|
||||
"""Write HB-format file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path_or_open_file : path-like or file-like
|
||||
If a file-like object, it is used as-is. Otherwise it is opened
|
||||
before writing.
|
||||
m : sparse-matrix
|
||||
the sparse matrix to write
|
||||
hb_info : HBInfo
|
||||
contains the meta-data for write
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Notes
|
||||
-----
|
||||
At the moment not the full Harwell-Boeing format is supported. Supported
|
||||
features are:
|
||||
|
||||
- assembled, non-symmetric, real matrices
|
||||
- integer for pointer/indices
|
||||
- exponential format for float values, and int format
|
||||
|
||||
"""
|
||||
m = m.tocsc(copy=False)
|
||||
|
||||
if hb_info is None:
|
||||
hb_info = HBInfo.from_data(m)
|
||||
|
||||
def _set_matrix(fid):
|
||||
hb = HBFile(fid, hb_info)
|
||||
return hb.write_matrix(m)
|
||||
|
||||
if hasattr(path_or_open_file, 'write'):
|
||||
return _set_matrix(path_or_open_file)
|
||||
else:
|
||||
with open(path_or_open_file, 'w') as f:
|
||||
return _set_matrix(f)
|
||||
@@ -1,14 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
|
||||
def configuration(parent_package='',top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
config = Configuration('harwell_boeing',parent_package,top_path)
|
||||
config.add_data_dir('tests')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
-77
@@ -1,77 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.io.harwell_boeing._fortran_format_parser import (
|
||||
FortranFormatParser, IntFormat, ExpFormat, BadFortranFormat,
|
||||
number_digits)
|
||||
|
||||
|
||||
class TestFortranFormatParser(object):
|
||||
def setup_method(self):
|
||||
self.parser = FortranFormatParser()
|
||||
|
||||
def _test_equal(self, format, ref):
|
||||
ret = self.parser.parse(format)
|
||||
assert_equal(ret.__dict__, ref.__dict__)
|
||||
|
||||
def test_simple_int(self):
|
||||
self._test_equal("(I4)", IntFormat(4))
|
||||
|
||||
def test_simple_repeated_int(self):
|
||||
self._test_equal("(3I4)", IntFormat(4, repeat=3))
|
||||
|
||||
def test_simple_exp(self):
|
||||
self._test_equal("(E4.3)", ExpFormat(4, 3))
|
||||
|
||||
def test_exp_exp(self):
|
||||
self._test_equal("(E8.3E3)", ExpFormat(8, 3, 3))
|
||||
|
||||
def test_repeat_exp(self):
|
||||
self._test_equal("(2E4.3)", ExpFormat(4, 3, repeat=2))
|
||||
|
||||
def test_repeat_exp_exp(self):
|
||||
self._test_equal("(2E8.3E3)", ExpFormat(8, 3, 3, repeat=2))
|
||||
|
||||
def test_wrong_formats(self):
|
||||
def _test_invalid(bad_format):
|
||||
assert_raises(BadFortranFormat, lambda: self.parser.parse(bad_format))
|
||||
_test_invalid("I4")
|
||||
_test_invalid("(E4)")
|
||||
_test_invalid("(E4.)")
|
||||
_test_invalid("(E4.E3)")
|
||||
|
||||
|
||||
class TestIntFormat(object):
|
||||
def test_to_fortran(self):
|
||||
f = [IntFormat(10), IntFormat(12, 10), IntFormat(12, 10, 3)]
|
||||
res = ["(I10)", "(I12.10)", "(3I12.10)"]
|
||||
|
||||
for i, j in zip(f, res):
|
||||
assert_equal(i.fortran_format, j)
|
||||
|
||||
def test_from_number(self):
|
||||
f = [10, -12, 123456789]
|
||||
r_f = [IntFormat(3, repeat=26), IntFormat(4, repeat=20),
|
||||
IntFormat(10, repeat=8)]
|
||||
for i, j in zip(f, r_f):
|
||||
assert_equal(IntFormat.from_number(i).__dict__, j.__dict__)
|
||||
|
||||
|
||||
class TestExpFormat(object):
|
||||
def test_to_fortran(self):
|
||||
f = [ExpFormat(10, 5), ExpFormat(12, 10), ExpFormat(12, 10, min=3),
|
||||
ExpFormat(10, 5, repeat=3)]
|
||||
res = ["(E10.5)", "(E12.10)", "(E12.10E3)", "(3E10.5)"]
|
||||
|
||||
for i, j in zip(f, res):
|
||||
assert_equal(i.fortran_format, j)
|
||||
|
||||
def test_from_number(self):
|
||||
f = np.array([1.0, -1.2])
|
||||
r_f = [ExpFormat(24, 16, repeat=3), ExpFormat(25, 16, repeat=3)]
|
||||
for i, j in zip(f, r_f):
|
||||
assert_equal(ExpFormat.from_number(i).__dict__, j.__dict__)
|
||||
@@ -1,71 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] >= 3:
|
||||
from io import StringIO
|
||||
else:
|
||||
from StringIO import StringIO
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_equal, \
|
||||
assert_array_almost_equal_nulp
|
||||
|
||||
from scipy.sparse import coo_matrix, csc_matrix, rand
|
||||
|
||||
from scipy.io import hb_read, hb_write
|
||||
|
||||
|
||||
SIMPLE = """\
|
||||
No Title |No Key
|
||||
9 4 1 4
|
||||
RUA 100 100 10 0
|
||||
(26I3) (26I3) (3E23.15)
|
||||
1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
|
||||
3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
|
||||
3 3 3 3 3 3 3 4 4 4 6 6 6 6 6 6 6 6 6 6 6 8 9 9 9 9
|
||||
9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 11
|
||||
37 71 89 18 30 45 70 19 25 52
|
||||
2.971243799687726e-01 3.662366682877375e-01 4.786962174699534e-01
|
||||
6.490068647991184e-01 6.617490424831662e-02 8.870370343191623e-01
|
||||
4.196478590163001e-01 5.649603072111251e-01 9.934423887087086e-01
|
||||
6.912334991524289e-01
|
||||
"""
|
||||
|
||||
SIMPLE_MATRIX = coo_matrix(
|
||||
((0.297124379969, 0.366236668288, 0.47869621747, 0.649006864799,
|
||||
0.0661749042483, 0.887037034319, 0.419647859016,
|
||||
0.564960307211, 0.993442388709, 0.691233499152,),
|
||||
(np.array([[36, 70, 88, 17, 29, 44, 69, 18, 24, 51],
|
||||
[0, 4, 58, 61, 61, 72, 72, 73, 99, 99]]))))
|
||||
|
||||
|
||||
def assert_csc_almost_equal(r, l):
|
||||
r = csc_matrix(r)
|
||||
l = csc_matrix(l)
|
||||
assert_equal(r.indptr, l.indptr)
|
||||
assert_equal(r.indices, l.indices)
|
||||
assert_array_almost_equal_nulp(r.data, l.data, 10000)
|
||||
|
||||
|
||||
class TestHBReader(object):
|
||||
def test_simple(self):
|
||||
m = hb_read(StringIO(SIMPLE))
|
||||
assert_csc_almost_equal(m, SIMPLE_MATRIX)
|
||||
|
||||
|
||||
class TestHBReadWrite(object):
|
||||
|
||||
def check_save_load(self, value):
|
||||
with tempfile.NamedTemporaryFile(mode='w+t') as file:
|
||||
hb_write(file, value)
|
||||
file.file.seek(0)
|
||||
value_loaded = hb_read(file)
|
||||
assert_csc_almost_equal(value, value_loaded)
|
||||
|
||||
def test_simple(self):
|
||||
random_matrix = rand(10, 100, 0.1)
|
||||
for matrix_format in ('coo', 'csc', 'csr', 'bsr', 'dia', 'dok', 'lil'):
|
||||
matrix = random_matrix.asformat(matrix_format, copy=False)
|
||||
self.check_save_load(matrix)
|
||||
@@ -1,884 +0,0 @@
|
||||
# IDLSave - a python module to read IDL 'save' files
|
||||
# Copyright (c) 2010 Thomas P. Robitaille
|
||||
|
||||
# Many thanks to Craig Markwardt for publishing the Unofficial Format
|
||||
# Specification for IDL .sav files, without which this Python module would not
|
||||
# exist (http://cow.physics.wisc.edu/~craigm/idl/savefmt).
|
||||
|
||||
# This code was developed by with permission from ITT Visual Information
|
||||
# Systems. IDL(r) is a registered trademark of ITT Visual Information Systems,
|
||||
# Inc. for their Interactive Data Language software.
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
__all__ = ['readsav']
|
||||
|
||||
import struct
|
||||
import numpy as np
|
||||
from numpy.compat import asstr
|
||||
import tempfile
|
||||
import zlib
|
||||
import warnings
|
||||
|
||||
# Define the different data types that can be found in an IDL save file
|
||||
DTYPE_DICT = {1: '>u1',
|
||||
2: '>i2',
|
||||
3: '>i4',
|
||||
4: '>f4',
|
||||
5: '>f8',
|
||||
6: '>c8',
|
||||
7: '|O',
|
||||
8: '|O',
|
||||
9: '>c16',
|
||||
10: '|O',
|
||||
11: '|O',
|
||||
12: '>u2',
|
||||
13: '>u4',
|
||||
14: '>i8',
|
||||
15: '>u8'}
|
||||
|
||||
# Define the different record types that can be found in an IDL save file
|
||||
RECTYPE_DICT = {0: "START_MARKER",
|
||||
1: "COMMON_VARIABLE",
|
||||
2: "VARIABLE",
|
||||
3: "SYSTEM_VARIABLE",
|
||||
6: "END_MARKER",
|
||||
10: "TIMESTAMP",
|
||||
12: "COMPILED",
|
||||
13: "IDENTIFICATION",
|
||||
14: "VERSION",
|
||||
15: "HEAP_HEADER",
|
||||
16: "HEAP_DATA",
|
||||
17: "PROMOTE64",
|
||||
19: "NOTICE",
|
||||
20: "DESCRIPTION"}
|
||||
|
||||
# Define a dictionary to contain structure definitions
|
||||
STRUCT_DICT = {}
|
||||
|
||||
|
||||
def _align_32(f):
|
||||
'''Align to the next 32-bit position in a file'''
|
||||
|
||||
pos = f.tell()
|
||||
if pos % 4 != 0:
|
||||
f.seek(pos + 4 - pos % 4)
|
||||
return
|
||||
|
||||
|
||||
def _skip_bytes(f, n):
|
||||
'''Skip `n` bytes'''
|
||||
f.read(n)
|
||||
return
|
||||
|
||||
|
||||
def _read_bytes(f, n):
|
||||
'''Read the next `n` bytes'''
|
||||
return f.read(n)
|
||||
|
||||
|
||||
def _read_byte(f):
|
||||
'''Read a single byte'''
|
||||
return np.uint8(struct.unpack('>B', f.read(4)[:1])[0])
|
||||
|
||||
|
||||
def _read_long(f):
|
||||
'''Read a signed 32-bit integer'''
|
||||
return np.int32(struct.unpack('>l', f.read(4))[0])
|
||||
|
||||
|
||||
def _read_int16(f):
|
||||
'''Read a signed 16-bit integer'''
|
||||
return np.int16(struct.unpack('>h', f.read(4)[2:4])[0])
|
||||
|
||||
|
||||
def _read_int32(f):
|
||||
'''Read a signed 32-bit integer'''
|
||||
return np.int32(struct.unpack('>i', f.read(4))[0])
|
||||
|
||||
|
||||
def _read_int64(f):
|
||||
'''Read a signed 64-bit integer'''
|
||||
return np.int64(struct.unpack('>q', f.read(8))[0])
|
||||
|
||||
|
||||
def _read_uint16(f):
|
||||
'''Read an unsigned 16-bit integer'''
|
||||
return np.uint16(struct.unpack('>H', f.read(4)[2:4])[0])
|
||||
|
||||
|
||||
def _read_uint32(f):
|
||||
'''Read an unsigned 32-bit integer'''
|
||||
return np.uint32(struct.unpack('>I', f.read(4))[0])
|
||||
|
||||
|
||||
def _read_uint64(f):
|
||||
'''Read an unsigned 64-bit integer'''
|
||||
return np.uint64(struct.unpack('>Q', f.read(8))[0])
|
||||
|
||||
|
||||
def _read_float32(f):
|
||||
'''Read a 32-bit float'''
|
||||
return np.float32(struct.unpack('>f', f.read(4))[0])
|
||||
|
||||
|
||||
def _read_float64(f):
|
||||
'''Read a 64-bit float'''
|
||||
return np.float64(struct.unpack('>d', f.read(8))[0])
|
||||
|
||||
|
||||
class Pointer(object):
|
||||
'''Class used to define pointers'''
|
||||
|
||||
def __init__(self, index):
|
||||
self.index = index
|
||||
return
|
||||
|
||||
|
||||
class ObjectPointer(Pointer):
|
||||
'''Class used to define object pointers'''
|
||||
pass
|
||||
|
||||
|
||||
def _read_string(f):
|
||||
'''Read a string'''
|
||||
length = _read_long(f)
|
||||
if length > 0:
|
||||
chars = _read_bytes(f, length)
|
||||
_align_32(f)
|
||||
chars = asstr(chars)
|
||||
else:
|
||||
chars = ''
|
||||
return chars
|
||||
|
||||
|
||||
def _read_string_data(f):
|
||||
'''Read a data string (length is specified twice)'''
|
||||
length = _read_long(f)
|
||||
if length > 0:
|
||||
length = _read_long(f)
|
||||
string_data = _read_bytes(f, length)
|
||||
_align_32(f)
|
||||
else:
|
||||
string_data = ''
|
||||
return string_data
|
||||
|
||||
|
||||
def _read_data(f, dtype):
|
||||
'''Read a variable with a specified data type'''
|
||||
if dtype == 1:
|
||||
if _read_int32(f) != 1:
|
||||
raise Exception("Error occurred while reading byte variable")
|
||||
return _read_byte(f)
|
||||
elif dtype == 2:
|
||||
return _read_int16(f)
|
||||
elif dtype == 3:
|
||||
return _read_int32(f)
|
||||
elif dtype == 4:
|
||||
return _read_float32(f)
|
||||
elif dtype == 5:
|
||||
return _read_float64(f)
|
||||
elif dtype == 6:
|
||||
real = _read_float32(f)
|
||||
imag = _read_float32(f)
|
||||
return np.complex64(real + imag * 1j)
|
||||
elif dtype == 7:
|
||||
return _read_string_data(f)
|
||||
elif dtype == 8:
|
||||
raise Exception("Should not be here - please report this")
|
||||
elif dtype == 9:
|
||||
real = _read_float64(f)
|
||||
imag = _read_float64(f)
|
||||
return np.complex128(real + imag * 1j)
|
||||
elif dtype == 10:
|
||||
return Pointer(_read_int32(f))
|
||||
elif dtype == 11:
|
||||
return ObjectPointer(_read_int32(f))
|
||||
elif dtype == 12:
|
||||
return _read_uint16(f)
|
||||
elif dtype == 13:
|
||||
return _read_uint32(f)
|
||||
elif dtype == 14:
|
||||
return _read_int64(f)
|
||||
elif dtype == 15:
|
||||
return _read_uint64(f)
|
||||
else:
|
||||
raise Exception("Unknown IDL type: %i - please report this" % dtype)
|
||||
|
||||
|
||||
def _read_structure(f, array_desc, struct_desc):
|
||||
'''
|
||||
Read a structure, with the array and structure descriptors given as
|
||||
`array_desc` and `structure_desc` respectively.
|
||||
'''
|
||||
|
||||
nrows = array_desc['nelements']
|
||||
columns = struct_desc['tagtable']
|
||||
|
||||
dtype = []
|
||||
for col in columns:
|
||||
if col['structure'] or col['array']:
|
||||
dtype.append(((col['name'].lower(), col['name']), np.object_))
|
||||
else:
|
||||
if col['typecode'] in DTYPE_DICT:
|
||||
dtype.append(((col['name'].lower(), col['name']),
|
||||
DTYPE_DICT[col['typecode']]))
|
||||
else:
|
||||
raise Exception("Variable type %i not implemented" %
|
||||
col['typecode'])
|
||||
|
||||
structure = np.recarray((nrows, ), dtype=dtype)
|
||||
|
||||
for i in range(nrows):
|
||||
for col in columns:
|
||||
dtype = col['typecode']
|
||||
if col['structure']:
|
||||
structure[col['name']][i] = _read_structure(f,
|
||||
struct_desc['arrtable'][col['name']],
|
||||
struct_desc['structtable'][col['name']])
|
||||
elif col['array']:
|
||||
structure[col['name']][i] = _read_array(f, dtype,
|
||||
struct_desc['arrtable'][col['name']])
|
||||
else:
|
||||
structure[col['name']][i] = _read_data(f, dtype)
|
||||
|
||||
# Reshape structure if needed
|
||||
if array_desc['ndims'] > 1:
|
||||
dims = array_desc['dims'][:int(array_desc['ndims'])]
|
||||
dims.reverse()
|
||||
structure = structure.reshape(dims)
|
||||
|
||||
return structure
|
||||
|
||||
|
||||
def _read_array(f, typecode, array_desc):
|
||||
'''
|
||||
Read an array of type `typecode`, with the array descriptor given as
|
||||
`array_desc`.
|
||||
'''
|
||||
|
||||
if typecode in [1, 3, 4, 5, 6, 9, 13, 14, 15]:
|
||||
|
||||
if typecode == 1:
|
||||
nbytes = _read_int32(f)
|
||||
if nbytes != array_desc['nbytes']:
|
||||
warnings.warn("Not able to verify number of bytes from header")
|
||||
|
||||
# Read bytes as numpy array
|
||||
array = np.frombuffer(f.read(array_desc['nbytes']),
|
||||
dtype=DTYPE_DICT[typecode])
|
||||
|
||||
elif typecode in [2, 12]:
|
||||
|
||||
# These are 2 byte types, need to skip every two as they are not packed
|
||||
|
||||
array = np.frombuffer(f.read(array_desc['nbytes']*2),
|
||||
dtype=DTYPE_DICT[typecode])[1::2]
|
||||
|
||||
else:
|
||||
|
||||
# Read bytes into list
|
||||
array = []
|
||||
for i in range(array_desc['nelements']):
|
||||
dtype = typecode
|
||||
data = _read_data(f, dtype)
|
||||
array.append(data)
|
||||
|
||||
array = np.array(array, dtype=np.object_)
|
||||
|
||||
# Reshape array if needed
|
||||
if array_desc['ndims'] > 1:
|
||||
dims = array_desc['dims'][:int(array_desc['ndims'])]
|
||||
dims.reverse()
|
||||
array = array.reshape(dims)
|
||||
|
||||
# Go to next alignment position
|
||||
_align_32(f)
|
||||
|
||||
return array
|
||||
|
||||
|
||||
def _read_record(f):
|
||||
'''Function to read in a full record'''
|
||||
|
||||
record = {'rectype': _read_long(f)}
|
||||
|
||||
nextrec = _read_uint32(f)
|
||||
nextrec += _read_uint32(f) * 2**32
|
||||
|
||||
_skip_bytes(f, 4)
|
||||
|
||||
if not record['rectype'] in RECTYPE_DICT:
|
||||
raise Exception("Unknown RECTYPE: %i" % record['rectype'])
|
||||
|
||||
record['rectype'] = RECTYPE_DICT[record['rectype']]
|
||||
|
||||
if record['rectype'] in ["VARIABLE", "HEAP_DATA"]:
|
||||
|
||||
if record['rectype'] == "VARIABLE":
|
||||
record['varname'] = _read_string(f)
|
||||
else:
|
||||
record['heap_index'] = _read_long(f)
|
||||
_skip_bytes(f, 4)
|
||||
|
||||
rectypedesc = _read_typedesc(f)
|
||||
|
||||
if rectypedesc['typecode'] == 0:
|
||||
|
||||
if nextrec == f.tell():
|
||||
record['data'] = None # Indicates NULL value
|
||||
else:
|
||||
raise ValueError("Unexpected type code: 0")
|
||||
|
||||
else:
|
||||
|
||||
varstart = _read_long(f)
|
||||
if varstart != 7:
|
||||
raise Exception("VARSTART is not 7")
|
||||
|
||||
if rectypedesc['structure']:
|
||||
record['data'] = _read_structure(f, rectypedesc['array_desc'],
|
||||
rectypedesc['struct_desc'])
|
||||
elif rectypedesc['array']:
|
||||
record['data'] = _read_array(f, rectypedesc['typecode'],
|
||||
rectypedesc['array_desc'])
|
||||
else:
|
||||
dtype = rectypedesc['typecode']
|
||||
record['data'] = _read_data(f, dtype)
|
||||
|
||||
elif record['rectype'] == "TIMESTAMP":
|
||||
|
||||
_skip_bytes(f, 4*256)
|
||||
record['date'] = _read_string(f)
|
||||
record['user'] = _read_string(f)
|
||||
record['host'] = _read_string(f)
|
||||
|
||||
elif record['rectype'] == "VERSION":
|
||||
|
||||
record['format'] = _read_long(f)
|
||||
record['arch'] = _read_string(f)
|
||||
record['os'] = _read_string(f)
|
||||
record['release'] = _read_string(f)
|
||||
|
||||
elif record['rectype'] == "IDENTIFICATON":
|
||||
|
||||
record['author'] = _read_string(f)
|
||||
record['title'] = _read_string(f)
|
||||
record['idcode'] = _read_string(f)
|
||||
|
||||
elif record['rectype'] == "NOTICE":
|
||||
|
||||
record['notice'] = _read_string(f)
|
||||
|
||||
elif record['rectype'] == "DESCRIPTION":
|
||||
|
||||
record['description'] = _read_string_data(f)
|
||||
|
||||
elif record['rectype'] == "HEAP_HEADER":
|
||||
|
||||
record['nvalues'] = _read_long(f)
|
||||
record['indices'] = []
|
||||
for i in range(record['nvalues']):
|
||||
record['indices'].append(_read_long(f))
|
||||
|
||||
elif record['rectype'] == "COMMONBLOCK":
|
||||
|
||||
record['nvars'] = _read_long(f)
|
||||
record['name'] = _read_string(f)
|
||||
record['varnames'] = []
|
||||
for i in range(record['nvars']):
|
||||
record['varnames'].append(_read_string(f))
|
||||
|
||||
elif record['rectype'] == "END_MARKER":
|
||||
|
||||
record['end'] = True
|
||||
|
||||
elif record['rectype'] == "UNKNOWN":
|
||||
|
||||
warnings.warn("Skipping UNKNOWN record")
|
||||
|
||||
elif record['rectype'] == "SYSTEM_VARIABLE":
|
||||
|
||||
warnings.warn("Skipping SYSTEM_VARIABLE record")
|
||||
|
||||
else:
|
||||
|
||||
raise Exception("record['rectype']=%s not implemented" %
|
||||
record['rectype'])
|
||||
|
||||
f.seek(nextrec)
|
||||
|
||||
return record
|
||||
|
||||
|
||||
def _read_typedesc(f):
|
||||
'''Function to read in a type descriptor'''
|
||||
|
||||
typedesc = {'typecode': _read_long(f), 'varflags': _read_long(f)}
|
||||
|
||||
if typedesc['varflags'] & 2 == 2:
|
||||
raise Exception("System variables not implemented")
|
||||
|
||||
typedesc['array'] = typedesc['varflags'] & 4 == 4
|
||||
typedesc['structure'] = typedesc['varflags'] & 32 == 32
|
||||
|
||||
if typedesc['structure']:
|
||||
typedesc['array_desc'] = _read_arraydesc(f)
|
||||
typedesc['struct_desc'] = _read_structdesc(f)
|
||||
elif typedesc['array']:
|
||||
typedesc['array_desc'] = _read_arraydesc(f)
|
||||
|
||||
return typedesc
|
||||
|
||||
|
||||
def _read_arraydesc(f):
|
||||
'''Function to read in an array descriptor'''
|
||||
|
||||
arraydesc = {'arrstart': _read_long(f)}
|
||||
|
||||
if arraydesc['arrstart'] == 8:
|
||||
|
||||
_skip_bytes(f, 4)
|
||||
|
||||
arraydesc['nbytes'] = _read_long(f)
|
||||
arraydesc['nelements'] = _read_long(f)
|
||||
arraydesc['ndims'] = _read_long(f)
|
||||
|
||||
_skip_bytes(f, 8)
|
||||
|
||||
arraydesc['nmax'] = _read_long(f)
|
||||
|
||||
arraydesc['dims'] = []
|
||||
for d in range(arraydesc['nmax']):
|
||||
arraydesc['dims'].append(_read_long(f))
|
||||
|
||||
elif arraydesc['arrstart'] == 18:
|
||||
|
||||
warnings.warn("Using experimental 64-bit array read")
|
||||
|
||||
_skip_bytes(f, 8)
|
||||
|
||||
arraydesc['nbytes'] = _read_uint64(f)
|
||||
arraydesc['nelements'] = _read_uint64(f)
|
||||
arraydesc['ndims'] = _read_long(f)
|
||||
|
||||
_skip_bytes(f, 8)
|
||||
|
||||
arraydesc['nmax'] = 8
|
||||
|
||||
arraydesc['dims'] = []
|
||||
for d in range(arraydesc['nmax']):
|
||||
v = _read_long(f)
|
||||
if v != 0:
|
||||
raise Exception("Expected a zero in ARRAY_DESC")
|
||||
arraydesc['dims'].append(_read_long(f))
|
||||
|
||||
else:
|
||||
|
||||
raise Exception("Unknown ARRSTART: %i" % arraydesc['arrstart'])
|
||||
|
||||
return arraydesc
|
||||
|
||||
|
||||
def _read_structdesc(f):
|
||||
'''Function to read in a structure descriptor'''
|
||||
|
||||
structdesc = {}
|
||||
|
||||
structstart = _read_long(f)
|
||||
if structstart != 9:
|
||||
raise Exception("STRUCTSTART should be 9")
|
||||
|
||||
structdesc['name'] = _read_string(f)
|
||||
predef = _read_long(f)
|
||||
structdesc['ntags'] = _read_long(f)
|
||||
structdesc['nbytes'] = _read_long(f)
|
||||
|
||||
structdesc['predef'] = predef & 1
|
||||
structdesc['inherits'] = predef & 2
|
||||
structdesc['is_super'] = predef & 4
|
||||
|
||||
if not structdesc['predef']:
|
||||
|
||||
structdesc['tagtable'] = []
|
||||
for t in range(structdesc['ntags']):
|
||||
structdesc['tagtable'].append(_read_tagdesc(f))
|
||||
|
||||
for tag in structdesc['tagtable']:
|
||||
tag['name'] = _read_string(f)
|
||||
|
||||
structdesc['arrtable'] = {}
|
||||
for tag in structdesc['tagtable']:
|
||||
if tag['array']:
|
||||
structdesc['arrtable'][tag['name']] = _read_arraydesc(f)
|
||||
|
||||
structdesc['structtable'] = {}
|
||||
for tag in structdesc['tagtable']:
|
||||
if tag['structure']:
|
||||
structdesc['structtable'][tag['name']] = _read_structdesc(f)
|
||||
|
||||
if structdesc['inherits'] or structdesc['is_super']:
|
||||
structdesc['classname'] = _read_string(f)
|
||||
structdesc['nsupclasses'] = _read_long(f)
|
||||
structdesc['supclassnames'] = []
|
||||
for s in range(structdesc['nsupclasses']):
|
||||
structdesc['supclassnames'].append(_read_string(f))
|
||||
structdesc['supclasstable'] = []
|
||||
for s in range(structdesc['nsupclasses']):
|
||||
structdesc['supclasstable'].append(_read_structdesc(f))
|
||||
|
||||
STRUCT_DICT[structdesc['name']] = structdesc
|
||||
|
||||
else:
|
||||
|
||||
if not structdesc['name'] in STRUCT_DICT:
|
||||
raise Exception("PREDEF=1 but can't find definition")
|
||||
|
||||
structdesc = STRUCT_DICT[structdesc['name']]
|
||||
|
||||
return structdesc
|
||||
|
||||
|
||||
def _read_tagdesc(f):
|
||||
'''Function to read in a tag descriptor'''
|
||||
|
||||
tagdesc = {'offset': _read_long(f)}
|
||||
|
||||
if tagdesc['offset'] == -1:
|
||||
tagdesc['offset'] = _read_uint64(f)
|
||||
|
||||
tagdesc['typecode'] = _read_long(f)
|
||||
tagflags = _read_long(f)
|
||||
|
||||
tagdesc['array'] = tagflags & 4 == 4
|
||||
tagdesc['structure'] = tagflags & 32 == 32
|
||||
tagdesc['scalar'] = tagdesc['typecode'] in DTYPE_DICT
|
||||
# Assume '10'x is scalar
|
||||
|
||||
return tagdesc
|
||||
|
||||
|
||||
def _replace_heap(variable, heap):
|
||||
|
||||
if isinstance(variable, Pointer):
|
||||
|
||||
while isinstance(variable, Pointer):
|
||||
|
||||
if variable.index == 0:
|
||||
variable = None
|
||||
else:
|
||||
if variable.index in heap:
|
||||
variable = heap[variable.index]
|
||||
else:
|
||||
warnings.warn("Variable referenced by pointer not found "
|
||||
"in heap: variable will be set to None")
|
||||
variable = None
|
||||
|
||||
replace, new = _replace_heap(variable, heap)
|
||||
|
||||
if replace:
|
||||
variable = new
|
||||
|
||||
return True, variable
|
||||
|
||||
elif isinstance(variable, np.core.records.recarray):
|
||||
|
||||
# Loop over records
|
||||
for ir, record in enumerate(variable):
|
||||
|
||||
replace, new = _replace_heap(record, heap)
|
||||
|
||||
if replace:
|
||||
variable[ir] = new
|
||||
|
||||
return False, variable
|
||||
|
||||
elif isinstance(variable, np.core.records.record):
|
||||
|
||||
# Loop over values
|
||||
for iv, value in enumerate(variable):
|
||||
|
||||
replace, new = _replace_heap(value, heap)
|
||||
|
||||
if replace:
|
||||
variable[iv] = new
|
||||
|
||||
return False, variable
|
||||
|
||||
elif isinstance(variable, np.ndarray):
|
||||
|
||||
# Loop over values if type is np.object_
|
||||
if variable.dtype.type is np.object_:
|
||||
|
||||
for iv in range(variable.size):
|
||||
|
||||
replace, new = _replace_heap(variable.item(iv), heap)
|
||||
|
||||
if replace:
|
||||
variable.itemset(iv, new)
|
||||
|
||||
return False, variable
|
||||
|
||||
else:
|
||||
|
||||
return False, variable
|
||||
|
||||
|
||||
class AttrDict(dict):
|
||||
'''
|
||||
A case-insensitive dictionary with access via item, attribute, and call
|
||||
notations:
|
||||
|
||||
>>> d = AttrDict()
|
||||
>>> d['Variable'] = 123
|
||||
>>> d['Variable']
|
||||
123
|
||||
>>> d.Variable
|
||||
123
|
||||
>>> d.variable
|
||||
123
|
||||
>>> d('VARIABLE')
|
||||
123
|
||||
'''
|
||||
|
||||
def __init__(self, init={}):
|
||||
dict.__init__(self, init)
|
||||
|
||||
def __getitem__(self, name):
|
||||
return super(AttrDict, self).__getitem__(name.lower())
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
return super(AttrDict, self).__setitem__(key.lower(), value)
|
||||
|
||||
__getattr__ = __getitem__
|
||||
__setattr__ = __setitem__
|
||||
__call__ = __getitem__
|
||||
|
||||
|
||||
def readsav(file_name, idict=None, python_dict=False,
|
||||
uncompressed_file_name=None, verbose=False):
|
||||
"""
|
||||
Read an IDL .sav file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_name : str
|
||||
Name of the IDL save file.
|
||||
idict : dict, optional
|
||||
Dictionary in which to insert .sav file variables.
|
||||
python_dict : bool, optional
|
||||
By default, the object return is not a Python dictionary, but a
|
||||
case-insensitive dictionary with item, attribute, and call access
|
||||
to variables. To get a standard Python dictionary, set this option
|
||||
to True.
|
||||
uncompressed_file_name : str, optional
|
||||
This option only has an effect for .sav files written with the
|
||||
/compress option. If a file name is specified, compressed .sav
|
||||
files are uncompressed to this file. Otherwise, readsav will use
|
||||
the `tempfile` module to determine a temporary filename
|
||||
automatically, and will remove the temporary file upon successfully
|
||||
reading it in.
|
||||
verbose : bool, optional
|
||||
Whether to print out information about the save file, including
|
||||
the records read, and available variables.
|
||||
|
||||
Returns
|
||||
-------
|
||||
idl_dict : AttrDict or dict
|
||||
If `python_dict` is set to False (default), this function returns a
|
||||
case-insensitive dictionary with item, attribute, and call access
|
||||
to variables. If `python_dict` is set to True, this function
|
||||
returns a Python dictionary with all variable names in lowercase.
|
||||
If `idict` was specified, then variables are written to the
|
||||
dictionary specified, and the updated dictionary is returned.
|
||||
|
||||
"""
|
||||
|
||||
# Initialize record and variable holders
|
||||
records = []
|
||||
if python_dict or idict:
|
||||
variables = {}
|
||||
else:
|
||||
variables = AttrDict()
|
||||
|
||||
# Open the IDL file
|
||||
f = open(file_name, 'rb')
|
||||
|
||||
# Read the signature, which should be 'SR'
|
||||
signature = _read_bytes(f, 2)
|
||||
if signature != b'SR':
|
||||
raise Exception("Invalid SIGNATURE: %s" % signature)
|
||||
|
||||
# Next, the record format, which is '\x00\x04' for normal .sav
|
||||
# files, and '\x00\x06' for compressed .sav files.
|
||||
recfmt = _read_bytes(f, 2)
|
||||
|
||||
if recfmt == b'\x00\x04':
|
||||
pass
|
||||
|
||||
elif recfmt == b'\x00\x06':
|
||||
|
||||
if verbose:
|
||||
print("IDL Save file is compressed")
|
||||
|
||||
if uncompressed_file_name:
|
||||
fout = open(uncompressed_file_name, 'w+b')
|
||||
else:
|
||||
fout = tempfile.NamedTemporaryFile(suffix='.sav')
|
||||
|
||||
if verbose:
|
||||
print(" -> expanding to %s" % fout.name)
|
||||
|
||||
# Write header
|
||||
fout.write(b'SR\x00\x04')
|
||||
|
||||
# Cycle through records
|
||||
while True:
|
||||
|
||||
# Read record type
|
||||
rectype = _read_long(f)
|
||||
fout.write(struct.pack('>l', int(rectype)))
|
||||
|
||||
# Read position of next record and return as int
|
||||
nextrec = _read_uint32(f)
|
||||
nextrec += _read_uint32(f) * 2**32
|
||||
|
||||
# Read the unknown 4 bytes
|
||||
unknown = f.read(4)
|
||||
|
||||
# Check if the end of the file has been reached
|
||||
if RECTYPE_DICT[rectype] == 'END_MARKER':
|
||||
fout.write(struct.pack('>I', int(nextrec) % 2**32))
|
||||
fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))
|
||||
fout.write(unknown)
|
||||
break
|
||||
|
||||
# Find current position
|
||||
pos = f.tell()
|
||||
|
||||
# Decompress record
|
||||
rec_string = zlib.decompress(f.read(nextrec-pos))
|
||||
|
||||
# Find new position of next record
|
||||
nextrec = fout.tell() + len(rec_string) + 12
|
||||
|
||||
# Write out record
|
||||
fout.write(struct.pack('>I', int(nextrec % 2**32)))
|
||||
fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))
|
||||
fout.write(unknown)
|
||||
fout.write(rec_string)
|
||||
|
||||
# Close the original compressed file
|
||||
f.close()
|
||||
|
||||
# Set f to be the decompressed file, and skip the first four bytes
|
||||
f = fout
|
||||
f.seek(4)
|
||||
|
||||
else:
|
||||
raise Exception("Invalid RECFMT: %s" % recfmt)
|
||||
|
||||
# Loop through records, and add them to the list
|
||||
while True:
|
||||
r = _read_record(f)
|
||||
records.append(r)
|
||||
if 'end' in r:
|
||||
if r['end']:
|
||||
break
|
||||
|
||||
# Close the file
|
||||
f.close()
|
||||
|
||||
# Find heap data variables
|
||||
heap = {}
|
||||
for r in records:
|
||||
if r['rectype'] == "HEAP_DATA":
|
||||
heap[r['heap_index']] = r['data']
|
||||
|
||||
# Find all variables
|
||||
for r in records:
|
||||
if r['rectype'] == "VARIABLE":
|
||||
replace, new = _replace_heap(r['data'], heap)
|
||||
if replace:
|
||||
r['data'] = new
|
||||
variables[r['varname'].lower()] = r['data']
|
||||
|
||||
if verbose:
|
||||
|
||||
# Print out timestamp info about the file
|
||||
for record in records:
|
||||
if record['rectype'] == "TIMESTAMP":
|
||||
print("-"*50)
|
||||
print("Date: %s" % record['date'])
|
||||
print("User: %s" % record['user'])
|
||||
print("Host: %s" % record['host'])
|
||||
break
|
||||
|
||||
# Print out version info about the file
|
||||
for record in records:
|
||||
if record['rectype'] == "VERSION":
|
||||
print("-"*50)
|
||||
print("Format: %s" % record['format'])
|
||||
print("Architecture: %s" % record['arch'])
|
||||
print("Operating System: %s" % record['os'])
|
||||
print("IDL Version: %s" % record['release'])
|
||||
break
|
||||
|
||||
# Print out identification info about the file
|
||||
for record in records:
|
||||
if record['rectype'] == "IDENTIFICATON":
|
||||
print("-"*50)
|
||||
print("Author: %s" % record['author'])
|
||||
print("Title: %s" % record['title'])
|
||||
print("ID Code: %s" % record['idcode'])
|
||||
break
|
||||
|
||||
# Print out descriptions saved with the file
|
||||
for record in records:
|
||||
if record['rectype'] == "DESCRIPTION":
|
||||
print("-"*50)
|
||||
print("Description: %s" % record['description'])
|
||||
break
|
||||
|
||||
print("-"*50)
|
||||
print("Successfully read %i records of which:" %
|
||||
(len(records)))
|
||||
|
||||
# Create convenience list of record types
|
||||
rectypes = [r['rectype'] for r in records]
|
||||
|
||||
for rt in set(rectypes):
|
||||
if rt != 'END_MARKER':
|
||||
print(" - %i are of type %s" % (rectypes.count(rt), rt))
|
||||
print("-"*50)
|
||||
|
||||
if 'VARIABLE' in rectypes:
|
||||
print("Available variables:")
|
||||
for var in variables:
|
||||
print(" - %s [%s]" % (var, type(variables[var])))
|
||||
print("-"*50)
|
||||
|
||||
if idict:
|
||||
for var in variables:
|
||||
idict[var] = variables[var]
|
||||
return idict
|
||||
else:
|
||||
return variables
|
||||
@@ -1,20 +0,0 @@
|
||||
"""
|
||||
Utilities for dealing with MATLAB(R) files
|
||||
|
||||
Notes
|
||||
-----
|
||||
MATLAB(R) is a registered trademark of The MathWorks, Inc., 3 Apple Hill
|
||||
Drive, Natick, MA 01760-2098, USA.
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
# Matlab file read and write utilities
|
||||
from .mio import loadmat, savemat, whosmat
|
||||
from . import byteordercodes
|
||||
|
||||
__all__ = ['loadmat', 'savemat', 'whosmat', 'byteordercodes']
|
||||
|
||||
from scipy._lib._testutils import PytestTester
|
||||
test = PytestTester(__name__)
|
||||
del PytestTester
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,70 +0,0 @@
|
||||
''' Byteorder utilities for system - numpy byteorder encoding
|
||||
|
||||
Converts a variety of string codes for little endian, big endian,
|
||||
native byte order and swapped byte order to explicit numpy endian
|
||||
codes - one of '<' (little endian) or '>' (big endian)
|
||||
|
||||
'''
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import sys
|
||||
|
||||
sys_is_le = sys.byteorder == 'little'
|
||||
native_code = sys_is_le and '<' or '>'
|
||||
swapped_code = sys_is_le and '>' or '<'
|
||||
|
||||
aliases = {'little': ('little', '<', 'l', 'le'),
|
||||
'big': ('big', '>', 'b', 'be'),
|
||||
'native': ('native', '='),
|
||||
'swapped': ('swapped', 'S')}
|
||||
|
||||
|
||||
def to_numpy_code(code):
|
||||
"""
|
||||
Convert various order codings to numpy format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
code : str
|
||||
The code to convert. It is converted to lower case before parsing.
|
||||
Legal values are:
|
||||
'little', 'big', 'l', 'b', 'le', 'be', '<', '>', 'native', '=',
|
||||
'swapped', 's'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out_code : {'<', '>'}
|
||||
Here '<' is the numpy dtype code for little endian,
|
||||
and '>' is the code for big endian.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import sys
|
||||
>>> sys_is_le == (sys.byteorder == 'little')
|
||||
True
|
||||
>>> to_numpy_code('big')
|
||||
'>'
|
||||
>>> to_numpy_code('little')
|
||||
'<'
|
||||
>>> nc = to_numpy_code('native')
|
||||
>>> nc == '<' if sys_is_le else nc == '>'
|
||||
True
|
||||
>>> sc = to_numpy_code('swapped')
|
||||
>>> sc == '>' if sys_is_le else sc == '<'
|
||||
True
|
||||
|
||||
"""
|
||||
code = code.lower()
|
||||
if code is None:
|
||||
return native_code
|
||||
if code in aliases['little']:
|
||||
return '<'
|
||||
elif code in aliases['big']:
|
||||
return '>'
|
||||
elif code in aliases['native']:
|
||||
return native_code
|
||||
elif code in aliases['swapped']:
|
||||
return swapped_code
|
||||
else:
|
||||
raise ValueError(
|
||||
'We cannot handle byte order %s' % code)
|
||||
@@ -1,326 +0,0 @@
|
||||
"""
|
||||
Module for reading and writing matlab (TM) .mat files
|
||||
"""
|
||||
# Authors: Travis Oliphant, Matthew Brett
|
||||
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from scipy._lib.six import string_types
|
||||
|
||||
from .miobase import get_matfile_version, docfiller
|
||||
from .mio4 import MatFile4Reader, MatFile4Writer
|
||||
from .mio5 import MatFile5Reader, MatFile5Writer
|
||||
|
||||
__all__ = ['mat_reader_factory', 'loadmat', 'savemat', 'whosmat']
|
||||
|
||||
|
||||
def _open_file(file_like, appendmat):
|
||||
"""
|
||||
Open `file_like` and return as file-like object. First, check if object is
|
||||
already file-like; if so, return it as-is. Otherwise, try to pass it
|
||||
to open(). If that fails, and `file_like` is a string, and `appendmat` is true,
|
||||
append '.mat' and try again.
|
||||
"""
|
||||
try:
|
||||
file_like.read(0)
|
||||
return file_like, False
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return open(file_like, 'rb'), True
|
||||
except IOError:
|
||||
# Probably "not found"
|
||||
if isinstance(file_like, string_types):
|
||||
if appendmat and not file_like.endswith('.mat'):
|
||||
file_like += '.mat'
|
||||
return open(file_like, 'rb'), True
|
||||
else:
|
||||
raise IOError('Reader needs file name or open file-like object')
|
||||
|
||||
@docfiller
|
||||
def mat_reader_factory(file_name, appendmat=True, **kwargs):
|
||||
"""
|
||||
Create reader for matlab .mat format files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
%(file_arg)s
|
||||
%(append_arg)s
|
||||
%(load_args)s
|
||||
%(struct_arg)s
|
||||
|
||||
Returns
|
||||
-------
|
||||
matreader : MatFileReader object
|
||||
Initialized instance of MatFileReader class matching the mat file
|
||||
type detected in `filename`.
|
||||
file_opened : bool
|
||||
Whether the file was opened by this routine.
|
||||
|
||||
"""
|
||||
byte_stream, file_opened = _open_file(file_name, appendmat)
|
||||
mjv, mnv = get_matfile_version(byte_stream)
|
||||
if mjv == 0:
|
||||
return MatFile4Reader(byte_stream, **kwargs), file_opened
|
||||
elif mjv == 1:
|
||||
return MatFile5Reader(byte_stream, **kwargs), file_opened
|
||||
elif mjv == 2:
|
||||
raise NotImplementedError('Please use HDF reader for matlab v7.3 files')
|
||||
else:
|
||||
raise TypeError('Did not recognize version %s' % mjv)
|
||||
|
||||
|
||||
@docfiller
|
||||
def loadmat(file_name, mdict=None, appendmat=True, **kwargs):
|
||||
"""
|
||||
Load MATLAB file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_name : str
|
||||
Name of the mat file (do not need .mat extension if
|
||||
appendmat==True). Can also pass open file-like object.
|
||||
mdict : dict, optional
|
||||
Dictionary in which to insert matfile variables.
|
||||
appendmat : bool, optional
|
||||
True to append the .mat extension to the end of the given
|
||||
filename, if not already present.
|
||||
byte_order : str or None, optional
|
||||
None by default, implying byte order guessed from mat
|
||||
file. Otherwise can be one of ('native', '=', 'little', '<',
|
||||
'BIG', '>').
|
||||
mat_dtype : bool, optional
|
||||
If True, return arrays in same dtype as would be loaded into
|
||||
MATLAB (instead of the dtype with which they are saved).
|
||||
squeeze_me : bool, optional
|
||||
Whether to squeeze unit matrix dimensions or not.
|
||||
chars_as_strings : bool, optional
|
||||
Whether to convert char arrays to string arrays.
|
||||
matlab_compatible : bool, optional
|
||||
Returns matrices as would be loaded by MATLAB (implies
|
||||
squeeze_me=False, chars_as_strings=False, mat_dtype=True,
|
||||
struct_as_record=True).
|
||||
struct_as_record : bool, optional
|
||||
Whether to load MATLAB structs as numpy record arrays, or as
|
||||
old-style numpy arrays with dtype=object. Setting this flag to
|
||||
False replicates the behavior of scipy version 0.7.x (returning
|
||||
numpy object arrays). The default setting is True, because it
|
||||
allows easier round-trip load and save of MATLAB files.
|
||||
verify_compressed_data_integrity : bool, optional
|
||||
Whether the length of compressed sequences in the MATLAB file
|
||||
should be checked, to ensure that they are not longer than we expect.
|
||||
It is advisable to enable this (the default) because overlong
|
||||
compressed sequences in MATLAB files generally indicate that the
|
||||
files have experienced some sort of corruption.
|
||||
variable_names : None or sequence
|
||||
If None (the default) - read all variables in file. Otherwise
|
||||
`variable_names` should be a sequence of strings, giving names of the
|
||||
MATLAB variables to read from the file. The reader will skip any
|
||||
variable with a name not in this sequence, possibly saving some read
|
||||
processing.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mat_dict : dict
|
||||
dictionary with variable names as keys, and loaded matrices as
|
||||
values.
|
||||
|
||||
Notes
|
||||
-----
|
||||
v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
|
||||
|
||||
You will need an HDF5 python library to read MATLAB 7.3 format mat
|
||||
files. Because scipy does not supply one, we do not implement the
|
||||
HDF5 / 7.3 interface here.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from os.path import dirname, join as pjoin
|
||||
>>> import scipy.io as sio
|
||||
|
||||
Get the filename for an example .mat file from the tests/data directory.
|
||||
|
||||
>>> data_dir = pjoin(dirname(sio.__file__), 'matlab', 'tests', 'data')
|
||||
>>> mat_fname = pjoin(data_dir, 'testdouble_7.4_GLNX86.mat')
|
||||
|
||||
Load the .mat file contents.
|
||||
|
||||
>>> mat_contents = sio.loadmat(mat_fname)
|
||||
|
||||
The result is a dictionary, one key/value pair for each variable:
|
||||
|
||||
>>> sorted(mat_contents.keys())
|
||||
['__globals__', '__header__', '__version__', 'testdouble']
|
||||
>>> mat_contents['testdouble']
|
||||
array([[0. , 0.78539816, 1.57079633, 2.35619449, 3.14159265,
|
||||
3.92699082, 4.71238898, 5.49778714, 6.28318531]])
|
||||
|
||||
By default SciPy reads MATLAB structs as structured NumPy arrays where the
|
||||
dtype fields are of type `object` and the names correspond to the MATLAB
|
||||
struct field names. This can be disabled by setting the optional argument
|
||||
`struct_as_record=False`.
|
||||
|
||||
Get the filename for an example .mat file that contains a MATLAB struct
|
||||
called `teststruct` and load the contents.
|
||||
|
||||
>>> matstruct_fname = pjoin(data_dir, 'teststruct_7.4_GLNX86.mat')
|
||||
>>> matstruct_contents = sio.loadmat(matstruct_fname)
|
||||
>>> teststruct = matstruct_contents['teststruct']
|
||||
>>> teststruct.dtype
|
||||
dtype([('stringfield', 'O'), ('doublefield', 'O'), ('complexfield', 'O')])
|
||||
|
||||
The size of the structured array is the size of the MATLAB struct, not the
|
||||
number of elements in any particular field. The shape defaults to 2-D
|
||||
unless the optional argument `squeeze_me=True`, in which case all length 1
|
||||
dimensions are removed.
|
||||
|
||||
>>> teststruct.size
|
||||
1
|
||||
>>> teststruct.shape
|
||||
(1, 1)
|
||||
|
||||
Get the 'stringfield' of the first element in the MATLAB struct.
|
||||
|
||||
>>> teststruct[0, 0]['stringfield']
|
||||
array(['Rats live on no evil star.'],
|
||||
dtype='<U26')
|
||||
|
||||
Get the first element of the 'doublefield'.
|
||||
|
||||
>>> teststruct['doublefield'][0, 0]
|
||||
array([[ 1.41421356, 2.71828183, 3.14159265]])
|
||||
|
||||
Load the MATLAB struct, squeezing out length 1 dimensions, and get the item
|
||||
from the 'complexfield'.
|
||||
|
||||
>>> matstruct_squeezed = sio.loadmat(matstruct_fname, squeeze_me=True)
|
||||
>>> matstruct_squeezed['teststruct'].shape
|
||||
()
|
||||
>>> matstruct_squeezed['teststruct']['complexfield'].shape
|
||||
()
|
||||
>>> matstruct_squeezed['teststruct']['complexfield'].item()
|
||||
array([ 1.41421356+1.41421356j, 2.71828183+2.71828183j,
|
||||
3.14159265+3.14159265j])
|
||||
"""
|
||||
variable_names = kwargs.pop('variable_names', None)
|
||||
MR, file_opened = mat_reader_factory(file_name, appendmat, **kwargs)
|
||||
matfile_dict = MR.get_variables(variable_names)
|
||||
if mdict is not None:
|
||||
mdict.update(matfile_dict)
|
||||
else:
|
||||
mdict = matfile_dict
|
||||
if file_opened:
|
||||
MR.mat_stream.close()
|
||||
return mdict
|
||||
|
||||
|
||||
@docfiller
|
||||
def savemat(file_name, mdict,
|
||||
appendmat=True,
|
||||
format='5',
|
||||
long_field_names=False,
|
||||
do_compression=False,
|
||||
oned_as='row'):
|
||||
"""
|
||||
Save a dictionary of names and arrays into a MATLAB-style .mat file.
|
||||
|
||||
This saves the array objects in the given dictionary to a MATLAB-
|
||||
style .mat file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_name : str or file-like object
|
||||
Name of the .mat file (.mat extension not needed if ``appendmat ==
|
||||
True``).
|
||||
Can also pass open file_like object.
|
||||
mdict : dict
|
||||
Dictionary from which to save matfile variables.
|
||||
appendmat : bool, optional
|
||||
True (the default) to append the .mat extension to the end of the
|
||||
given filename, if not already present.
|
||||
format : {'5', '4'}, string, optional
|
||||
'5' (the default) for MATLAB 5 and up (to 7.2),
|
||||
'4' for MATLAB 4 .mat files.
|
||||
long_field_names : bool, optional
|
||||
False (the default) - maximum field name length in a structure is
|
||||
31 characters which is the documented maximum length.
|
||||
True - maximum field name length in a structure is 63 characters
|
||||
which works for MATLAB 7.6+.
|
||||
do_compression : bool, optional
|
||||
Whether or not to compress matrices on write. Default is False.
|
||||
oned_as : {'row', 'column'}, optional
|
||||
If 'column', write 1-D numpy arrays as column vectors.
|
||||
If 'row', write 1-D numpy arrays as row vectors.
|
||||
|
||||
See also
|
||||
--------
|
||||
mio4.MatFile4Writer
|
||||
mio5.MatFile5Writer
|
||||
"""
|
||||
file_opened = False
|
||||
if hasattr(file_name, 'write'):
|
||||
# File-like object already; use as-is
|
||||
file_stream = file_name
|
||||
else:
|
||||
if isinstance(file_name, string_types):
|
||||
if appendmat and not file_name.endswith('.mat'):
|
||||
file_name = file_name + ".mat"
|
||||
|
||||
file_stream = open(file_name, 'wb')
|
||||
file_opened = True
|
||||
|
||||
if format == '4':
|
||||
if long_field_names:
|
||||
raise ValueError("Long field names are not available for version 4 files")
|
||||
MW = MatFile4Writer(file_stream, oned_as)
|
||||
elif format == '5':
|
||||
MW = MatFile5Writer(file_stream,
|
||||
do_compression=do_compression,
|
||||
unicode_strings=True,
|
||||
long_field_names=long_field_names,
|
||||
oned_as=oned_as)
|
||||
else:
|
||||
raise ValueError("Format should be '4' or '5'")
|
||||
MW.put_variables(mdict)
|
||||
if file_opened:
|
||||
file_stream.close()
|
||||
|
||||
|
||||
@docfiller
|
||||
def whosmat(file_name, appendmat=True, **kwargs):
|
||||
"""
|
||||
List variables inside a MATLAB file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
%(file_arg)s
|
||||
%(append_arg)s
|
||||
%(load_args)s
|
||||
%(struct_arg)s
|
||||
|
||||
Returns
|
||||
-------
|
||||
variables : list of tuples
|
||||
A list of tuples, where each tuple holds the matrix name (a string),
|
||||
its shape (tuple of ints), and its data class (a string).
|
||||
Possible data classes are: int8, uint8, int16, uint16, int32, uint32,
|
||||
int64, uint64, single, double, cell, struct, object, char, sparse,
|
||||
function, opaque, logical, unknown.
|
||||
|
||||
Notes
|
||||
-----
|
||||
v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
|
||||
|
||||
You will need an HDF5 python library to read matlab 7.3 format mat
|
||||
files. Because scipy does not supply one, we do not implement the
|
||||
HDF5 / 7.3 interface here.
|
||||
|
||||
.. versionadded:: 0.12.0
|
||||
|
||||
"""
|
||||
ML, file_opened = mat_reader_factory(file_name, **kwargs)
|
||||
variables = ML.list_variables()
|
||||
if file_opened:
|
||||
ML.mat_stream.close()
|
||||
return variables
|
||||
@@ -1,618 +0,0 @@
|
||||
''' Classes for read / write of matlab (TM) 4 files
|
||||
'''
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from numpy.compat import asbytes, asstr
|
||||
|
||||
import scipy.sparse
|
||||
|
||||
from scipy._lib.six import string_types
|
||||
|
||||
from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
|
||||
convert_dtypes, arr_to_chars, arr_dtype_number)
|
||||
|
||||
from .mio_utils import squeeze_element, chars_to_strings
|
||||
from functools import reduce
|
||||
|
||||
|
||||
SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
|
||||
|
||||
miDOUBLE = 0
|
||||
miSINGLE = 1
|
||||
miINT32 = 2
|
||||
miINT16 = 3
|
||||
miUINT16 = 4
|
||||
miUINT8 = 5
|
||||
|
||||
mdtypes_template = {
|
||||
miDOUBLE: 'f8',
|
||||
miSINGLE: 'f4',
|
||||
miINT32: 'i4',
|
||||
miINT16: 'i2',
|
||||
miUINT16: 'u2',
|
||||
miUINT8: 'u1',
|
||||
'header': [('mopt', 'i4'),
|
||||
('mrows', 'i4'),
|
||||
('ncols', 'i4'),
|
||||
('imagf', 'i4'),
|
||||
('namlen', 'i4')],
|
||||
'U1': 'U1',
|
||||
}
|
||||
|
||||
np_to_mtypes = {
|
||||
'f8': miDOUBLE,
|
||||
'c32': miDOUBLE,
|
||||
'c24': miDOUBLE,
|
||||
'c16': miDOUBLE,
|
||||
'f4': miSINGLE,
|
||||
'c8': miSINGLE,
|
||||
'i4': miINT32,
|
||||
'i2': miINT16,
|
||||
'u2': miUINT16,
|
||||
'u1': miUINT8,
|
||||
'S1': miUINT8,
|
||||
}
|
||||
|
||||
# matrix classes
|
||||
mxFULL_CLASS = 0
|
||||
mxCHAR_CLASS = 1
|
||||
mxSPARSE_CLASS = 2
|
||||
|
||||
order_codes = {
|
||||
0: '<',
|
||||
1: '>',
|
||||
2: 'VAX D-float', # !
|
||||
3: 'VAX G-float',
|
||||
4: 'Cray', # !!
|
||||
}
|
||||
|
||||
mclass_info = {
|
||||
mxFULL_CLASS: 'double',
|
||||
mxCHAR_CLASS: 'char',
|
||||
mxSPARSE_CLASS: 'sparse',
|
||||
}
|
||||
|
||||
|
||||
class VarHeader4(object):
|
||||
# Mat4 variables never logical or global
|
||||
is_logical = False
|
||||
is_global = False
|
||||
|
||||
def __init__(self,
|
||||
name,
|
||||
dtype,
|
||||
mclass,
|
||||
dims,
|
||||
is_complex):
|
||||
self.name = name
|
||||
self.dtype = dtype
|
||||
self.mclass = mclass
|
||||
self.dims = dims
|
||||
self.is_complex = is_complex
|
||||
|
||||
|
||||
class VarReader4(object):
|
||||
''' Class to read matlab 4 variables '''
|
||||
|
||||
def __init__(self, file_reader):
|
||||
self.file_reader = file_reader
|
||||
self.mat_stream = file_reader.mat_stream
|
||||
self.dtypes = file_reader.dtypes
|
||||
self.chars_as_strings = file_reader.chars_as_strings
|
||||
self.squeeze_me = file_reader.squeeze_me
|
||||
|
||||
def read_header(self):
|
||||
''' Read and return header for variable '''
|
||||
data = read_dtype(self.mat_stream, self.dtypes['header'])
|
||||
name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00')
|
||||
if data['mopt'] < 0 or data['mopt'] > 5000:
|
||||
raise ValueError('Mat 4 mopt wrong format, byteswapping problem?')
|
||||
M, rest = divmod(data['mopt'], 1000) # order code
|
||||
if M not in (0, 1):
|
||||
warnings.warn("We do not support byte ordering '%s'; returned "
|
||||
"data may be corrupt" % order_codes[M],
|
||||
UserWarning)
|
||||
O, rest = divmod(rest, 100) # unused, should be 0
|
||||
if O != 0:
|
||||
raise ValueError('O in MOPT integer should be 0, wrong format?')
|
||||
P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above)
|
||||
T = rest # matrix type code e.g. mxFULL_CLASS (see above)
|
||||
dims = (data['mrows'], data['ncols'])
|
||||
is_complex = data['imagf'] == 1
|
||||
dtype = self.dtypes[P]
|
||||
return VarHeader4(
|
||||
name,
|
||||
dtype,
|
||||
T,
|
||||
dims,
|
||||
is_complex)
|
||||
|
||||
def array_from_header(self, hdr, process=True):
|
||||
mclass = hdr.mclass
|
||||
if mclass == mxFULL_CLASS:
|
||||
arr = self.read_full_array(hdr)
|
||||
elif mclass == mxCHAR_CLASS:
|
||||
arr = self.read_char_array(hdr)
|
||||
if process and self.chars_as_strings:
|
||||
arr = chars_to_strings(arr)
|
||||
elif mclass == mxSPARSE_CLASS:
|
||||
# no current processing (below) makes sense for sparse
|
||||
return self.read_sparse_array(hdr)
|
||||
else:
|
||||
raise TypeError('No reader for class code %s' % mclass)
|
||||
if process and self.squeeze_me:
|
||||
return squeeze_element(arr)
|
||||
return arr
|
||||
|
||||
def read_sub_array(self, hdr, copy=True):
|
||||
''' Mat4 read using header `hdr` dtype and dims
|
||||
|
||||
Parameters
|
||||
----------
|
||||
hdr : object
|
||||
object with attributes ``dtype``, ``dims``. dtype is assumed to be
|
||||
the correct endianness
|
||||
copy : bool, optional
|
||||
copies array before return if True (default True)
|
||||
(buffer is usually read only)
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : ndarray
|
||||
of dtype givem by `hdr` ``dtype`` and shape givem by `hdr` ``dims``
|
||||
'''
|
||||
dt = hdr.dtype
|
||||
dims = hdr.dims
|
||||
num_bytes = dt.itemsize
|
||||
for d in dims:
|
||||
num_bytes *= d
|
||||
buffer = self.mat_stream.read(int(num_bytes))
|
||||
if len(buffer) != num_bytes:
|
||||
raise ValueError("Not enough bytes to read matrix '%s'; is this "
|
||||
"a badly-formed file? Consider listing matrices "
|
||||
"with `whosmat` and loading named matrices with "
|
||||
"`variable_names` kwarg to `loadmat`" % hdr.name)
|
||||
arr = np.ndarray(shape=dims,
|
||||
dtype=dt,
|
||||
buffer=buffer,
|
||||
order='F')
|
||||
if copy:
|
||||
arr = arr.copy()
|
||||
return arr
|
||||
|
||||
def read_full_array(self, hdr):
|
||||
''' Full (rather than sparse) matrix getter
|
||||
|
||||
Read matrix (array) can be real or complex
|
||||
|
||||
Parameters
|
||||
----------
|
||||
hdr : ``VarHeader4`` instance
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : ndarray
|
||||
complex array if ``hdr.is_complex`` is True, otherwise a real
|
||||
numeric array
|
||||
'''
|
||||
if hdr.is_complex:
|
||||
# avoid array copy to save memory
|
||||
res = self.read_sub_array(hdr, copy=False)
|
||||
res_j = self.read_sub_array(hdr, copy=False)
|
||||
return res + (res_j * 1j)
|
||||
return self.read_sub_array(hdr)
|
||||
|
||||
def read_char_array(self, hdr):
|
||||
''' latin-1 text matrix (char matrix) reader
|
||||
|
||||
Parameters
|
||||
----------
|
||||
hdr : ``VarHeader4`` instance
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : ndarray
|
||||
with dtype 'U1', shape given by `hdr` ``dims``
|
||||
'''
|
||||
arr = self.read_sub_array(hdr).astype(np.uint8)
|
||||
S = arr.tostring().decode('latin-1')
|
||||
return np.ndarray(shape=hdr.dims,
|
||||
dtype=np.dtype('U1'),
|
||||
buffer=np.array(S)).copy()
|
||||
|
||||
def read_sparse_array(self, hdr):
|
||||
''' Read and return sparse matrix type
|
||||
|
||||
Parameters
|
||||
----------
|
||||
hdr : ``VarHeader4`` instance
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : ``scipy.sparse.coo_matrix``
|
||||
with dtype ``float`` and shape read from the sparse matrix data
|
||||
|
||||
Notes
|
||||
-----
|
||||
MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where
|
||||
N is the number of non-zero values. Column 1 values [0:N] are the
|
||||
(1-based) row indices of the each non-zero value, column 2 [0:N] are the
|
||||
column indices, column 3 [0:N] are the (real) values. The last values
|
||||
[-1,0:2] of the rows, column indices are shape[0] and shape[1]
|
||||
respectively of the output matrix. The last value for the values column
|
||||
is a padding 0. mrows and ncols values from the header give the shape of
|
||||
the stored matrix, here [N+1, 3]. Complex data is saved as a 4 column
|
||||
matrix, where the fourth column contains the imaginary component; the
|
||||
last value is again 0. Complex sparse data do *not* have the header
|
||||
``imagf`` field set to True; the fact that the data are complex is only
|
||||
detectable because there are 4 storage columns
|
||||
'''
|
||||
res = self.read_sub_array(hdr)
|
||||
tmp = res[:-1,:]
|
||||
# All numbers are float64 in Matlab, but Scipy sparse expects int shape
|
||||
dims = (int(res[-1,0]), int(res[-1,1]))
|
||||
I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also
|
||||
J = np.ascontiguousarray(tmp[:,1],dtype='intc')
|
||||
I -= 1 # for 1-based indexing
|
||||
J -= 1
|
||||
if res.shape[1] == 3:
|
||||
V = np.ascontiguousarray(tmp[:,2],dtype='float')
|
||||
else:
|
||||
V = np.ascontiguousarray(tmp[:,2],dtype='complex')
|
||||
V.imag = tmp[:,3]
|
||||
return scipy.sparse.coo_matrix((V,(I,J)), dims)
|
||||
|
||||
def shape_from_header(self, hdr):
|
||||
'''Read the shape of the array described by the header.
|
||||
The file position after this call is unspecified.
|
||||
'''
|
||||
mclass = hdr.mclass
|
||||
if mclass == mxFULL_CLASS:
|
||||
shape = tuple(map(int, hdr.dims))
|
||||
elif mclass == mxCHAR_CLASS:
|
||||
shape = tuple(map(int, hdr.dims))
|
||||
if self.chars_as_strings:
|
||||
shape = shape[:-1]
|
||||
elif mclass == mxSPARSE_CLASS:
|
||||
dt = hdr.dtype
|
||||
dims = hdr.dims
|
||||
|
||||
if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1):
|
||||
return ()
|
||||
|
||||
# Read only the row and column counts
|
||||
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
|
||||
rows = np.ndarray(shape=(1,), dtype=dt,
|
||||
buffer=self.mat_stream.read(dt.itemsize))
|
||||
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
|
||||
cols = np.ndarray(shape=(1,), dtype=dt,
|
||||
buffer=self.mat_stream.read(dt.itemsize))
|
||||
|
||||
shape = (int(rows), int(cols))
|
||||
else:
|
||||
raise TypeError('No reader for class code %s' % mclass)
|
||||
|
||||
if self.squeeze_me:
|
||||
shape = tuple([x for x in shape if x != 1])
|
||||
return shape
|
||||
|
||||
|
||||
class MatFile4Reader(MatFileReader):
|
||||
''' Reader for Mat4 files '''
|
||||
@docfiller
|
||||
def __init__(self, mat_stream, *args, **kwargs):
|
||||
''' Initialize matlab 4 file reader
|
||||
|
||||
%(matstream_arg)s
|
||||
%(load_args)s
|
||||
'''
|
||||
super(MatFile4Reader, self).__init__(mat_stream, *args, **kwargs)
|
||||
self._matrix_reader = None
|
||||
|
||||
def guess_byte_order(self):
|
||||
self.mat_stream.seek(0)
|
||||
mopt = read_dtype(self.mat_stream, np.dtype('i4'))
|
||||
self.mat_stream.seek(0)
|
||||
if mopt == 0:
|
||||
return '<'
|
||||
if mopt < 0 or mopt > 5000:
|
||||
# Number must have been byteswapped
|
||||
return SYS_LITTLE_ENDIAN and '>' or '<'
|
||||
# Not byteswapped
|
||||
return SYS_LITTLE_ENDIAN and '<' or '>'
|
||||
|
||||
def initialize_read(self):
|
||||
''' Run when beginning read of variables
|
||||
|
||||
Sets up readers from parameters in `self`
|
||||
'''
|
||||
self.dtypes = convert_dtypes(mdtypes_template, self.byte_order)
|
||||
self._matrix_reader = VarReader4(self)
|
||||
|
||||
def read_var_header(self):
|
||||
''' Read and return header, next position
|
||||
|
||||
Parameters
|
||||
----------
|
||||
None
|
||||
|
||||
Returns
|
||||
-------
|
||||
header : object
|
||||
object that can be passed to self.read_var_array, and that
|
||||
has attributes ``name`` and ``is_global``
|
||||
next_position : int
|
||||
position in stream of next variable
|
||||
'''
|
||||
hdr = self._matrix_reader.read_header()
|
||||
n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product
|
||||
remaining_bytes = hdr.dtype.itemsize * n
|
||||
if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS:
|
||||
remaining_bytes *= 2
|
||||
next_position = self.mat_stream.tell() + remaining_bytes
|
||||
return hdr, next_position
|
||||
|
||||
def read_var_array(self, header, process=True):
|
||||
''' Read array, given `header`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
header : header object
|
||||
object with fields defining variable header
|
||||
process : {True, False}, optional
|
||||
If True, apply recursive post-processing during loading of array.
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : array
|
||||
array with post-processing applied or not according to
|
||||
`process`.
|
||||
'''
|
||||
return self._matrix_reader.array_from_header(header, process)
|
||||
|
||||
def get_variables(self, variable_names=None):
|
||||
''' get variables from stream as dictionary
|
||||
|
||||
Parameters
|
||||
----------
|
||||
variable_names : None or str or sequence of str, optional
|
||||
variable name, or sequence of variable names to get from Mat file /
|
||||
file stream. If None, then get all variables in file
|
||||
'''
|
||||
if isinstance(variable_names, string_types):
|
||||
variable_names = [variable_names]
|
||||
elif variable_names is not None:
|
||||
variable_names = list(variable_names)
|
||||
self.mat_stream.seek(0)
|
||||
# set up variable reader
|
||||
self.initialize_read()
|
||||
mdict = {}
|
||||
while not self.end_of_stream():
|
||||
hdr, next_position = self.read_var_header()
|
||||
name = asstr(hdr.name)
|
||||
if variable_names is not None and name not in variable_names:
|
||||
self.mat_stream.seek(next_position)
|
||||
continue
|
||||
mdict[name] = self.read_var_array(hdr)
|
||||
self.mat_stream.seek(next_position)
|
||||
if variable_names is not None:
|
||||
variable_names.remove(name)
|
||||
if len(variable_names) == 0:
|
||||
break
|
||||
return mdict
|
||||
|
||||
def list_variables(self):
|
||||
''' list variables from stream '''
|
||||
self.mat_stream.seek(0)
|
||||
# set up variable reader
|
||||
self.initialize_read()
|
||||
vars = []
|
||||
while not self.end_of_stream():
|
||||
hdr, next_position = self.read_var_header()
|
||||
name = asstr(hdr.name)
|
||||
shape = self._matrix_reader.shape_from_header(hdr)
|
||||
info = mclass_info.get(hdr.mclass, 'unknown')
|
||||
vars.append((name, shape, info))
|
||||
|
||||
self.mat_stream.seek(next_position)
|
||||
return vars
|
||||
|
||||
|
||||
def arr_to_2d(arr, oned_as='row'):
|
||||
''' Make ``arr`` exactly two dimensional
|
||||
|
||||
If `arr` has more than 2 dimensions, raise a ValueError
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : array
|
||||
oned_as : {'row', 'column'}, optional
|
||||
Whether to reshape 1D vectors as row vectors or column vectors.
|
||||
See documentation for ``matdims`` for more detail
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr2d : array
|
||||
2D version of the array
|
||||
'''
|
||||
dims = matdims(arr, oned_as)
|
||||
if len(dims) > 2:
|
||||
raise ValueError('Matlab 4 files cannot save arrays with more than '
|
||||
'2 dimensions')
|
||||
return arr.reshape(dims)
|
||||
|
||||
|
||||
class VarWriter4(object):
|
||||
def __init__(self, file_writer):
|
||||
self.file_stream = file_writer.file_stream
|
||||
self.oned_as = file_writer.oned_as
|
||||
|
||||
def write_bytes(self, arr):
|
||||
self.file_stream.write(arr.tostring(order='F'))
|
||||
|
||||
def write_string(self, s):
|
||||
self.file_stream.write(s)
|
||||
|
||||
def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0):
|
||||
''' Write header for given data options
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
name of variable
|
||||
shape : sequence
|
||||
Shape of array as it will be read in matlab
|
||||
P : int, optional
|
||||
code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32,
|
||||
miINT16, miUINT16, miUINT8``
|
||||
T : int, optional
|
||||
code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS,
|
||||
mxSPARSE_CLASS``
|
||||
imagf : int, optional
|
||||
flag indicating complex
|
||||
'''
|
||||
header = np.empty((), mdtypes_template['header'])
|
||||
M = not SYS_LITTLE_ENDIAN
|
||||
O = 0
|
||||
header['mopt'] = (M * 1000 +
|
||||
O * 100 +
|
||||
P * 10 +
|
||||
T)
|
||||
header['mrows'] = shape[0]
|
||||
header['ncols'] = shape[1]
|
||||
header['imagf'] = imagf
|
||||
header['namlen'] = len(name) + 1
|
||||
self.write_bytes(header)
|
||||
self.write_string(asbytes(name + '\0'))
|
||||
|
||||
def write(self, arr, name):
|
||||
''' Write matrix `arr`, with name `name`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : array_like
|
||||
array to write
|
||||
name : str
|
||||
name in matlab workspace
|
||||
'''
|
||||
# we need to catch sparse first, because np.asarray returns an
|
||||
# an object array for scipy.sparse
|
||||
if scipy.sparse.issparse(arr):
|
||||
self.write_sparse(arr, name)
|
||||
return
|
||||
arr = np.asarray(arr)
|
||||
dt = arr.dtype
|
||||
if not dt.isnative:
|
||||
arr = arr.astype(dt.newbyteorder('='))
|
||||
dtt = dt.type
|
||||
if dtt is np.object_:
|
||||
raise TypeError('Cannot save object arrays in Mat4')
|
||||
elif dtt is np.void:
|
||||
raise TypeError('Cannot save void type arrays')
|
||||
elif dtt in (np.unicode_, np.string_):
|
||||
self.write_char(arr, name)
|
||||
return
|
||||
self.write_numeric(arr, name)
|
||||
|
||||
def write_numeric(self, arr, name):
|
||||
arr = arr_to_2d(arr, self.oned_as)
|
||||
imagf = arr.dtype.kind == 'c'
|
||||
try:
|
||||
P = np_to_mtypes[arr.dtype.str[1:]]
|
||||
except KeyError:
|
||||
if imagf:
|
||||
arr = arr.astype('c128')
|
||||
else:
|
||||
arr = arr.astype('f8')
|
||||
P = miDOUBLE
|
||||
self.write_header(name,
|
||||
arr.shape,
|
||||
P=P,
|
||||
T=mxFULL_CLASS,
|
||||
imagf=imagf)
|
||||
if imagf:
|
||||
self.write_bytes(arr.real)
|
||||
self.write_bytes(arr.imag)
|
||||
else:
|
||||
self.write_bytes(arr)
|
||||
|
||||
def write_char(self, arr, name):
|
||||
arr = arr_to_chars(arr)
|
||||
arr = arr_to_2d(arr, self.oned_as)
|
||||
dims = arr.shape
|
||||
self.write_header(
|
||||
name,
|
||||
dims,
|
||||
P=miUINT8,
|
||||
T=mxCHAR_CLASS)
|
||||
if arr.dtype.kind == 'U':
|
||||
# Recode unicode to latin1
|
||||
n_chars = np.product(dims)
|
||||
st_arr = np.ndarray(shape=(),
|
||||
dtype=arr_dtype_number(arr, n_chars),
|
||||
buffer=arr)
|
||||
st = st_arr.item().encode('latin-1')
|
||||
arr = np.ndarray(shape=dims, dtype='S1', buffer=st)
|
||||
self.write_bytes(arr)
|
||||
|
||||
def write_sparse(self, arr, name):
|
||||
''' Sparse matrices are 2D
|
||||
|
||||
See docstring for VarReader4.read_sparse_array
|
||||
'''
|
||||
A = arr.tocoo() # convert to sparse COO format (ijv)
|
||||
imagf = A.dtype.kind == 'c'
|
||||
ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8')
|
||||
ijv[:-1,0] = A.row
|
||||
ijv[:-1,1] = A.col
|
||||
ijv[:-1,0:2] += 1 # 1 based indexing
|
||||
if imagf:
|
||||
ijv[:-1,2] = A.data.real
|
||||
ijv[:-1,3] = A.data.imag
|
||||
else:
|
||||
ijv[:-1,2] = A.data
|
||||
ijv[-1,0:2] = A.shape
|
||||
self.write_header(
|
||||
name,
|
||||
ijv.shape,
|
||||
P=miDOUBLE,
|
||||
T=mxSPARSE_CLASS)
|
||||
self.write_bytes(ijv)
|
||||
|
||||
|
||||
class MatFile4Writer(object):
|
||||
''' Class for writing matlab 4 format files '''
|
||||
def __init__(self, file_stream, oned_as=None):
|
||||
self.file_stream = file_stream
|
||||
if oned_as is None:
|
||||
oned_as = 'row'
|
||||
self.oned_as = oned_as
|
||||
self._matrix_writer = None
|
||||
|
||||
def put_variables(self, mdict, write_header=None):
|
||||
''' Write variables in `mdict` to stream
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mdict : mapping
|
||||
mapping with method ``items`` return name, contents pairs
|
||||
where ``name`` which will appeak in the matlab workspace in
|
||||
file load, and ``contents`` is something writeable to a
|
||||
matlab file, such as a numpy array.
|
||||
write_header : {None, True, False}
|
||||
If True, then write the matlab file header before writing the
|
||||
variables. If None (the default) then write the file header
|
||||
if we are at position 0 in the stream. By setting False
|
||||
here, and setting the stream position to the end of the file,
|
||||
you can append variables to a matlab file
|
||||
'''
|
||||
# there is no header for a matlab 4 mat file, so we ignore the
|
||||
# ``write_header`` input argument. It's there for compatibility
|
||||
# with the matlab 5 version of this method
|
||||
self._matrix_writer = VarWriter4(self)
|
||||
for name, var in mdict.items():
|
||||
self._matrix_writer.write(var, name)
|
||||
@@ -1,849 +0,0 @@
|
||||
''' Classes for read / write of matlab (TM) 5 files
|
||||
|
||||
The matfile specification last found here:
|
||||
|
||||
https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf
|
||||
|
||||
(as of December 5 2008)
|
||||
'''
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
'''
|
||||
=================================
|
||||
Note on functions and mat files
|
||||
=================================
|
||||
|
||||
The document above does not give any hints as to the storage of matlab
|
||||
function handles, or anonymous function handles. I had therefore to
|
||||
guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and
|
||||
``mxOPAQUE_CLASS`` by looking at example mat files.
|
||||
|
||||
``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to
|
||||
contain a struct matrix with a set pattern of fields. For anonymous
|
||||
functions, a sub-fields of one of these fields seems to contain the
|
||||
well-named ``mxOPAQUE_CLASS``. This seems to contain:
|
||||
|
||||
* array flags as for any matlab matrix
|
||||
* 3 int8 strings
|
||||
* a matrix
|
||||
|
||||
It seems that, whenever the mat file contains a ``mxOPAQUE_CLASS``
|
||||
instance, there is also an un-named matrix (name == '') at the end of
|
||||
the mat file. I'll call this the ``__function_workspace__`` matrix.
|
||||
|
||||
When I saved two anonymous functions in a mat file, or appended another
|
||||
anonymous function to the mat file, there was still only one
|
||||
``__function_workspace__`` un-named matrix at the end, but larger than
|
||||
that for a mat file with a single anonymous function, suggesting that
|
||||
the workspaces for the two functions had been merged.
|
||||
|
||||
The ``__function_workspace__`` matrix appears to be of double class
|
||||
(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in
|
||||
the format of a mini .mat file, without the first 124 bytes of the file
|
||||
header (the description and the subsystem_offset), but with the version
|
||||
U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes,
|
||||
presumably for 8 byte padding, and then a series of ``miMATRIX``
|
||||
entries, as in a standard mat file. The ``miMATRIX`` entries appear to
|
||||
be series of un-named (name == '') matrices, and may also contain arrays
|
||||
of this same mini-mat format.
|
||||
|
||||
I guess that:
|
||||
|
||||
* saving an anonymous function back to a mat file will need the
|
||||
associated ``__function_workspace__`` matrix saved as well for the
|
||||
anonymous function to work correctly.
|
||||
* appending to a mat file that has a ``__function_workspace__`` would
|
||||
involve first pulling off this workspace, appending, checking whether
|
||||
there were any more anonymous functions appended, and then somehow
|
||||
merging the relevant workspaces, and saving at the end of the mat
|
||||
file.
|
||||
|
||||
The mat files I was playing with are in ``tests/data``:
|
||||
|
||||
* sqr.mat
|
||||
* parabola.mat
|
||||
* some_functions.mat
|
||||
|
||||
See ``tests/test_mio.py:test_mio_funcs.py`` for a debugging
|
||||
script I was working with.
|
||||
|
||||
'''
|
||||
|
||||
# Small fragments of current code adapted from matfile.py by Heiko
|
||||
# Henkelmann
|
||||
|
||||
import os
|
||||
import time
|
||||
import sys
|
||||
import zlib
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from numpy.compat import asbytes, asstr
|
||||
|
||||
import scipy.sparse
|
||||
|
||||
from scipy._lib.six import string_types
|
||||
|
||||
from .byteordercodes import native_code, swapped_code
|
||||
|
||||
from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
|
||||
arr_to_chars, arr_dtype_number, MatWriteError,
|
||||
MatReadError, MatReadWarning)
|
||||
|
||||
# Reader object for matlab 5 format variables
|
||||
from .mio5_utils import VarReader5
|
||||
|
||||
# Constants and helper objects
|
||||
from .mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES,
|
||||
NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8,
|
||||
miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS,
|
||||
mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS,
|
||||
mxDOUBLE_CLASS, mclass_info)
|
||||
|
||||
from .streams import ZlibInputStream
|
||||
|
||||
|
||||
class MatFile5Reader(MatFileReader):
|
||||
''' Reader for Mat 5 mat files
|
||||
Adds the following attribute to base class
|
||||
|
||||
uint16_codec - char codec to use for uint16 char arrays
|
||||
(defaults to system default codec)
|
||||
|
||||
Uses variable reader that has the following stardard interface (see
|
||||
abstract class in ``miobase``::
|
||||
|
||||
__init__(self, file_reader)
|
||||
read_header(self)
|
||||
array_from_header(self)
|
||||
|
||||
and added interface::
|
||||
|
||||
set_stream(self, stream)
|
||||
read_full_tag(self)
|
||||
|
||||
'''
|
||||
@docfiller
|
||||
def __init__(self,
|
||||
mat_stream,
|
||||
byte_order=None,
|
||||
mat_dtype=False,
|
||||
squeeze_me=False,
|
||||
chars_as_strings=True,
|
||||
matlab_compatible=False,
|
||||
struct_as_record=True,
|
||||
verify_compressed_data_integrity=True,
|
||||
uint16_codec=None
|
||||
):
|
||||
'''Initializer for matlab 5 file format reader
|
||||
|
||||
%(matstream_arg)s
|
||||
%(load_args)s
|
||||
%(struct_arg)s
|
||||
uint16_codec : {None, string}
|
||||
Set codec to use for uint16 char arrays (e.g. 'utf-8').
|
||||
Use system default codec if None
|
||||
'''
|
||||
super(MatFile5Reader, self).__init__(
|
||||
mat_stream,
|
||||
byte_order,
|
||||
mat_dtype,
|
||||
squeeze_me,
|
||||
chars_as_strings,
|
||||
matlab_compatible,
|
||||
struct_as_record,
|
||||
verify_compressed_data_integrity
|
||||
)
|
||||
# Set uint16 codec
|
||||
if not uint16_codec:
|
||||
uint16_codec = sys.getdefaultencoding()
|
||||
self.uint16_codec = uint16_codec
|
||||
# placeholders for readers - see initialize_read method
|
||||
self._file_reader = None
|
||||
self._matrix_reader = None
|
||||
|
||||
def guess_byte_order(self):
|
||||
''' Guess byte order.
|
||||
Sets stream pointer to 0 '''
|
||||
self.mat_stream.seek(126)
|
||||
mi = self.mat_stream.read(2)
|
||||
self.mat_stream.seek(0)
|
||||
return mi == b'IM' and '<' or '>'
|
||||
|
||||
def read_file_header(self):
|
||||
''' Read in mat 5 file header '''
|
||||
hdict = {}
|
||||
hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header']
|
||||
hdr = read_dtype(self.mat_stream, hdr_dtype)
|
||||
hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000')
|
||||
v_major = hdr['version'] >> 8
|
||||
v_minor = hdr['version'] & 0xFF
|
||||
hdict['__version__'] = '%d.%d' % (v_major, v_minor)
|
||||
return hdict
|
||||
|
||||
def initialize_read(self):
|
||||
''' Run when beginning read of variables
|
||||
|
||||
Sets up readers from parameters in `self`
|
||||
'''
|
||||
# reader for top level stream. We need this extra top-level
|
||||
# reader because we use the matrix_reader object to contain
|
||||
# compressed matrices (so they have their own stream)
|
||||
self._file_reader = VarReader5(self)
|
||||
# reader for matrix streams
|
||||
self._matrix_reader = VarReader5(self)
|
||||
|
||||
def read_var_header(self):
|
||||
''' Read header, return header, next position
|
||||
|
||||
Header has to define at least .name and .is_global
|
||||
|
||||
Parameters
|
||||
----------
|
||||
None
|
||||
|
||||
Returns
|
||||
-------
|
||||
header : object
|
||||
object that can be passed to self.read_var_array, and that
|
||||
has attributes .name and .is_global
|
||||
next_position : int
|
||||
position in stream of next variable
|
||||
'''
|
||||
mdtype, byte_count = self._file_reader.read_full_tag()
|
||||
if not byte_count > 0:
|
||||
raise ValueError("Did not read any bytes")
|
||||
next_pos = self.mat_stream.tell() + byte_count
|
||||
if mdtype == miCOMPRESSED:
|
||||
# Make new stream from compressed data
|
||||
stream = ZlibInputStream(self.mat_stream, byte_count)
|
||||
self._matrix_reader.set_stream(stream)
|
||||
check_stream_limit = self.verify_compressed_data_integrity
|
||||
mdtype, byte_count = self._matrix_reader.read_full_tag()
|
||||
else:
|
||||
check_stream_limit = False
|
||||
self._matrix_reader.set_stream(self.mat_stream)
|
||||
if not mdtype == miMATRIX:
|
||||
raise TypeError('Expecting miMATRIX type here, got %d' % mdtype)
|
||||
header = self._matrix_reader.read_header(check_stream_limit)
|
||||
return header, next_pos
|
||||
|
||||
def read_var_array(self, header, process=True):
|
||||
''' Read array, given `header`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
header : header object
|
||||
object with fields defining variable header
|
||||
process : {True, False} bool, optional
|
||||
If True, apply recursive post-processing during loading of
|
||||
array.
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : array
|
||||
array with post-processing applied or not according to
|
||||
`process`.
|
||||
'''
|
||||
return self._matrix_reader.array_from_header(header, process)
|
||||
|
||||
def get_variables(self, variable_names=None):
|
||||
''' get variables from stream as dictionary
|
||||
|
||||
variable_names - optional list of variable names to get
|
||||
|
||||
If variable_names is None, then get all variables in file
|
||||
'''
|
||||
if isinstance(variable_names, string_types):
|
||||
variable_names = [variable_names]
|
||||
elif variable_names is not None:
|
||||
variable_names = list(variable_names)
|
||||
|
||||
self.mat_stream.seek(0)
|
||||
# Here we pass all the parameters in self to the reading objects
|
||||
self.initialize_read()
|
||||
mdict = self.read_file_header()
|
||||
mdict['__globals__'] = []
|
||||
while not self.end_of_stream():
|
||||
hdr, next_position = self.read_var_header()
|
||||
name = asstr(hdr.name)
|
||||
if name in mdict:
|
||||
warnings.warn('Duplicate variable name "%s" in stream'
|
||||
' - replacing previous with new\n'
|
||||
'Consider mio5.varmats_from_mat to split '
|
||||
'file into single variable files' % name,
|
||||
MatReadWarning, stacklevel=2)
|
||||
if name == '':
|
||||
# can only be a matlab 7 function workspace
|
||||
name = '__function_workspace__'
|
||||
# We want to keep this raw because mat_dtype processing
|
||||
# will break the format (uint8 as mxDOUBLE_CLASS)
|
||||
process = False
|
||||
else:
|
||||
process = True
|
||||
if variable_names is not None and name not in variable_names:
|
||||
self.mat_stream.seek(next_position)
|
||||
continue
|
||||
try:
|
||||
res = self.read_var_array(hdr, process)
|
||||
except MatReadError as err:
|
||||
warnings.warn(
|
||||
'Unreadable variable "%s", because "%s"' %
|
||||
(name, err),
|
||||
Warning, stacklevel=2)
|
||||
res = "Read error: %s" % err
|
||||
self.mat_stream.seek(next_position)
|
||||
mdict[name] = res
|
||||
if hdr.is_global:
|
||||
mdict['__globals__'].append(name)
|
||||
if variable_names is not None:
|
||||
variable_names.remove(name)
|
||||
if len(variable_names) == 0:
|
||||
break
|
||||
return mdict
|
||||
|
||||
def list_variables(self):
|
||||
''' list variables from stream '''
|
||||
self.mat_stream.seek(0)
|
||||
# Here we pass all the parameters in self to the reading objects
|
||||
self.initialize_read()
|
||||
self.read_file_header()
|
||||
vars = []
|
||||
while not self.end_of_stream():
|
||||
hdr, next_position = self.read_var_header()
|
||||
name = asstr(hdr.name)
|
||||
if name == '':
|
||||
# can only be a matlab 7 function workspace
|
||||
name = '__function_workspace__'
|
||||
|
||||
shape = self._matrix_reader.shape_from_header(hdr)
|
||||
if hdr.is_logical:
|
||||
info = 'logical'
|
||||
else:
|
||||
info = mclass_info.get(hdr.mclass, 'unknown')
|
||||
vars.append((name, shape, info))
|
||||
|
||||
self.mat_stream.seek(next_position)
|
||||
return vars
|
||||
|
||||
|
||||
def varmats_from_mat(file_obj):
|
||||
""" Pull variables out of mat 5 file as a sequence of mat file objects
|
||||
|
||||
This can be useful with a difficult mat file, containing unreadable
|
||||
variables. This routine pulls the variables out in raw form and puts them,
|
||||
unread, back into a file stream for saving or reading. Another use is the
|
||||
pathological case where there is more than one variable of the same name in
|
||||
the file; this routine returns the duplicates, whereas the standard reader
|
||||
will overwrite duplicates in the returned dictionary.
|
||||
|
||||
The file pointer in `file_obj` will be undefined. File pointers for the
|
||||
returned file-like objects are set at 0.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_obj : file-like
|
||||
file object containing mat file
|
||||
|
||||
Returns
|
||||
-------
|
||||
named_mats : list
|
||||
list contains tuples of (name, BytesIO) where BytesIO is a file-like
|
||||
object containing mat file contents as for a single variable. The
|
||||
BytesIO contains a string with the original header and a single var. If
|
||||
``var_file_obj`` is an individual BytesIO instance, then save as a mat
|
||||
file with something like ``open('test.mat',
|
||||
'wb').write(var_file_obj.read())``
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import scipy.io
|
||||
|
||||
BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
|
||||
python < 3.
|
||||
|
||||
>>> mat_fileobj = BytesIO()
|
||||
>>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
|
||||
>>> varmats = varmats_from_mat(mat_fileobj)
|
||||
>>> sorted([name for name, str_obj in varmats])
|
||||
['a', 'b']
|
||||
"""
|
||||
rdr = MatFile5Reader(file_obj)
|
||||
file_obj.seek(0)
|
||||
# Raw read of top-level file header
|
||||
hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
|
||||
raw_hdr = file_obj.read(hdr_len)
|
||||
# Initialize variable reading
|
||||
file_obj.seek(0)
|
||||
rdr.initialize_read()
|
||||
mdict = rdr.read_file_header()
|
||||
next_position = file_obj.tell()
|
||||
named_mats = []
|
||||
while not rdr.end_of_stream():
|
||||
start_position = next_position
|
||||
hdr, next_position = rdr.read_var_header()
|
||||
name = asstr(hdr.name)
|
||||
# Read raw variable string
|
||||
file_obj.seek(start_position)
|
||||
byte_count = next_position - start_position
|
||||
var_str = file_obj.read(byte_count)
|
||||
# write to stringio object
|
||||
out_obj = BytesIO()
|
||||
out_obj.write(raw_hdr)
|
||||
out_obj.write(var_str)
|
||||
out_obj.seek(0)
|
||||
named_mats.append((name, out_obj))
|
||||
return named_mats
|
||||
|
||||
|
||||
class EmptyStructMarker(object):
|
||||
""" Class to indicate presence of empty matlab struct on output """
|
||||
|
||||
|
||||
def to_writeable(source):
|
||||
''' Convert input object ``source`` to something we can write
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : object
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : None or ndarray or EmptyStructMarker
|
||||
If `source` cannot be converted to something we can write to a matfile,
|
||||
return None. If `source` is equivalent to an empty dictionary, return
|
||||
``EmptyStructMarker``. Otherwise return `source` converted to an
|
||||
ndarray with contents for writing to matfile.
|
||||
'''
|
||||
if isinstance(source, np.ndarray):
|
||||
return source
|
||||
if source is None:
|
||||
return None
|
||||
# Objects that implement mappings
|
||||
is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and
|
||||
hasattr(source, 'items'))
|
||||
# Objects that don't implement mappings, but do have dicts
|
||||
if isinstance(source, np.generic):
|
||||
# Numpy scalars are never mappings (pypy issue workaround)
|
||||
pass
|
||||
elif not is_mapping and hasattr(source, '__dict__'):
|
||||
source = dict((key, value) for key, value in source.__dict__.items()
|
||||
if not key.startswith('_'))
|
||||
is_mapping = True
|
||||
if is_mapping:
|
||||
dtype = []
|
||||
values = []
|
||||
for field, value in source.items():
|
||||
if (isinstance(field, string_types) and
|
||||
field[0] not in '_0123456789'):
|
||||
dtype.append((str(field), object))
|
||||
values.append(value)
|
||||
if dtype:
|
||||
return np.array([tuple(values)], dtype)
|
||||
else:
|
||||
return EmptyStructMarker
|
||||
# Next try and convert to an array
|
||||
narr = np.asanyarray(source)
|
||||
if narr.dtype.type in (object, np.object_) and \
|
||||
narr.shape == () and narr == source:
|
||||
# No interesting conversion possible
|
||||
return None
|
||||
return narr
|
||||
|
||||
|
||||
# Native byte ordered dtypes for convenience for writers
|
||||
NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header']
|
||||
NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full']
|
||||
NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata']
|
||||
NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags']
|
||||
|
||||
|
||||
class VarWriter5(object):
|
||||
''' Generic matlab matrix writing class '''
|
||||
mat_tag = np.zeros((), NDT_TAG_FULL)
|
||||
mat_tag['mdtype'] = miMATRIX
|
||||
|
||||
def __init__(self, file_writer):
|
||||
self.file_stream = file_writer.file_stream
|
||||
self.unicode_strings = file_writer.unicode_strings
|
||||
self.long_field_names = file_writer.long_field_names
|
||||
self.oned_as = file_writer.oned_as
|
||||
# These are used for top level writes, and unset after
|
||||
self._var_name = None
|
||||
self._var_is_global = False
|
||||
|
||||
def write_bytes(self, arr):
|
||||
self.file_stream.write(arr.tostring(order='F'))
|
||||
|
||||
def write_string(self, s):
|
||||
self.file_stream.write(s)
|
||||
|
||||
def write_element(self, arr, mdtype=None):
|
||||
''' write tag and data '''
|
||||
if mdtype is None:
|
||||
mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]
|
||||
# Array needs to be in native byte order
|
||||
if arr.dtype.byteorder == swapped_code:
|
||||
arr = arr.byteswap().newbyteorder()
|
||||
byte_count = arr.size*arr.itemsize
|
||||
if byte_count <= 4:
|
||||
self.write_smalldata_element(arr, mdtype, byte_count)
|
||||
else:
|
||||
self.write_regular_element(arr, mdtype, byte_count)
|
||||
|
||||
def write_smalldata_element(self, arr, mdtype, byte_count):
|
||||
# write tag with embedded data
|
||||
tag = np.zeros((), NDT_TAG_SMALL)
|
||||
tag['byte_count_mdtype'] = (byte_count << 16) + mdtype
|
||||
# if arr.tostring is < 4, the element will be zero-padded as needed.
|
||||
tag['data'] = arr.tostring(order='F')
|
||||
self.write_bytes(tag)
|
||||
|
||||
def write_regular_element(self, arr, mdtype, byte_count):
|
||||
# write tag, data
|
||||
tag = np.zeros((), NDT_TAG_FULL)
|
||||
tag['mdtype'] = mdtype
|
||||
tag['byte_count'] = byte_count
|
||||
self.write_bytes(tag)
|
||||
self.write_bytes(arr)
|
||||
# pad to next 64-bit boundary
|
||||
bc_mod_8 = byte_count % 8
|
||||
if bc_mod_8:
|
||||
self.file_stream.write(b'\x00' * (8-bc_mod_8))
|
||||
|
||||
def write_header(self,
|
||||
shape,
|
||||
mclass,
|
||||
is_complex=False,
|
||||
is_logical=False,
|
||||
nzmax=0):
|
||||
''' Write header for given data options
|
||||
shape : sequence
|
||||
array shape
|
||||
mclass - mat5 matrix class
|
||||
is_complex - True if matrix is complex
|
||||
is_logical - True if matrix is logical
|
||||
nzmax - max non zero elements for sparse arrays
|
||||
|
||||
We get the name and the global flag from the object, and reset
|
||||
them to defaults after we've used them
|
||||
'''
|
||||
# get name and is_global from one-shot object store
|
||||
name = self._var_name
|
||||
is_global = self._var_is_global
|
||||
# initialize the top-level matrix tag, store position
|
||||
self._mat_tag_pos = self.file_stream.tell()
|
||||
self.write_bytes(self.mat_tag)
|
||||
# write array flags (complex, global, logical, class, nzmax)
|
||||
af = np.zeros((), NDT_ARRAY_FLAGS)
|
||||
af['data_type'] = miUINT32
|
||||
af['byte_count'] = 8
|
||||
flags = is_complex << 3 | is_global << 2 | is_logical << 1
|
||||
af['flags_class'] = mclass | flags << 8
|
||||
af['nzmax'] = nzmax
|
||||
self.write_bytes(af)
|
||||
# shape
|
||||
self.write_element(np.array(shape, dtype='i4'))
|
||||
# write name
|
||||
name = np.asarray(name)
|
||||
if name == '': # empty string zero-terminated
|
||||
self.write_smalldata_element(name, miINT8, 0)
|
||||
else:
|
||||
self.write_element(name, miINT8)
|
||||
# reset the one-shot store to defaults
|
||||
self._var_name = ''
|
||||
self._var_is_global = False
|
||||
|
||||
def update_matrix_tag(self, start_pos):
|
||||
curr_pos = self.file_stream.tell()
|
||||
self.file_stream.seek(start_pos)
|
||||
byte_count = curr_pos - start_pos - 8
|
||||
if byte_count >= 2**32:
|
||||
raise MatWriteError("Matrix too large to save with Matlab "
|
||||
"5 format")
|
||||
self.mat_tag['byte_count'] = byte_count
|
||||
self.write_bytes(self.mat_tag)
|
||||
self.file_stream.seek(curr_pos)
|
||||
|
||||
def write_top(self, arr, name, is_global):
|
||||
""" Write variable at top level of mat file
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : array_like
|
||||
array-like object to create writer for
|
||||
name : str, optional
|
||||
name as it will appear in matlab workspace
|
||||
default is empty string
|
||||
is_global : {False, True}, optional
|
||||
whether variable will be global on load into matlab
|
||||
"""
|
||||
# these are set before the top-level header write, and unset at
|
||||
# the end of the same write, because they do not apply for lower levels
|
||||
self._var_is_global = is_global
|
||||
self._var_name = name
|
||||
# write the header and data
|
||||
self.write(arr)
|
||||
|
||||
def write(self, arr):
|
||||
''' Write `arr` to stream at top and sub levels
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : array_like
|
||||
array-like object to create writer for
|
||||
'''
|
||||
# store position, so we can update the matrix tag
|
||||
mat_tag_pos = self.file_stream.tell()
|
||||
# First check if these are sparse
|
||||
if scipy.sparse.issparse(arr):
|
||||
self.write_sparse(arr)
|
||||
self.update_matrix_tag(mat_tag_pos)
|
||||
return
|
||||
# Try to convert things that aren't arrays
|
||||
narr = to_writeable(arr)
|
||||
if narr is None:
|
||||
raise TypeError('Could not convert %s (type %s) to array'
|
||||
% (arr, type(arr)))
|
||||
if isinstance(narr, MatlabObject):
|
||||
self.write_object(narr)
|
||||
elif isinstance(narr, MatlabFunction):
|
||||
raise MatWriteError('Cannot write matlab functions')
|
||||
elif narr is EmptyStructMarker: # empty struct array
|
||||
self.write_empty_struct()
|
||||
elif narr.dtype.fields: # struct array
|
||||
self.write_struct(narr)
|
||||
elif narr.dtype.hasobject: # cell array
|
||||
self.write_cells(narr)
|
||||
elif narr.dtype.kind in ('U', 'S'):
|
||||
if self.unicode_strings:
|
||||
codec = 'UTF8'
|
||||
else:
|
||||
codec = 'ascii'
|
||||
self.write_char(narr, codec)
|
||||
else:
|
||||
self.write_numeric(narr)
|
||||
self.update_matrix_tag(mat_tag_pos)
|
||||
|
||||
def write_numeric(self, arr):
|
||||
imagf = arr.dtype.kind == 'c'
|
||||
logif = arr.dtype.kind == 'b'
|
||||
try:
|
||||
mclass = NP_TO_MXTYPES[arr.dtype.str[1:]]
|
||||
except KeyError:
|
||||
# No matching matlab type, probably complex256 / float128 / float96
|
||||
# Cast data to complex128 / float64.
|
||||
if imagf:
|
||||
arr = arr.astype('c128')
|
||||
elif logif:
|
||||
arr = arr.astype('i1') # Should only contain 0/1
|
||||
else:
|
||||
arr = arr.astype('f8')
|
||||
mclass = mxDOUBLE_CLASS
|
||||
self.write_header(matdims(arr, self.oned_as),
|
||||
mclass,
|
||||
is_complex=imagf,
|
||||
is_logical=logif)
|
||||
if imagf:
|
||||
self.write_element(arr.real)
|
||||
self.write_element(arr.imag)
|
||||
else:
|
||||
self.write_element(arr)
|
||||
|
||||
def write_char(self, arr, codec='ascii'):
|
||||
''' Write string array `arr` with given `codec`
|
||||
'''
|
||||
if arr.size == 0 or np.all(arr == ''):
|
||||
# This an empty string array or a string array containing
|
||||
# only empty strings. Matlab cannot distinguish between a
|
||||
# string array that is empty, and a string array containing
|
||||
# only empty strings, because it stores strings as arrays of
|
||||
# char. There is no way of having an array of char that is
|
||||
# not empty, but contains an empty string. We have to
|
||||
# special-case the array-with-empty-strings because even
|
||||
# empty strings have zero padding, which would otherwise
|
||||
# appear in matlab as a string with a space.
|
||||
shape = (0,) * np.max([arr.ndim, 2])
|
||||
self.write_header(shape, mxCHAR_CLASS)
|
||||
self.write_smalldata_element(arr, miUTF8, 0)
|
||||
return
|
||||
# non-empty string.
|
||||
#
|
||||
# Convert to char array
|
||||
arr = arr_to_chars(arr)
|
||||
# We have to write the shape directly, because we are going
|
||||
# recode the characters, and the resulting stream of chars
|
||||
# may have a different length
|
||||
shape = arr.shape
|
||||
self.write_header(shape, mxCHAR_CLASS)
|
||||
if arr.dtype.kind == 'U' and arr.size:
|
||||
# Make one long string from all the characters. We need to
|
||||
# transpose here, because we're flattening the array, before
|
||||
# we write the bytes. The bytes have to be written in
|
||||
# Fortran order.
|
||||
n_chars = np.product(shape)
|
||||
st_arr = np.ndarray(shape=(),
|
||||
dtype=arr_dtype_number(arr, n_chars),
|
||||
buffer=arr.T.copy()) # Fortran order
|
||||
# Recode with codec to give byte string
|
||||
st = st_arr.item().encode(codec)
|
||||
# Reconstruct as one-dimensional byte array
|
||||
arr = np.ndarray(shape=(len(st),),
|
||||
dtype='S1',
|
||||
buffer=st)
|
||||
self.write_element(arr, mdtype=miUTF8)
|
||||
|
||||
def write_sparse(self, arr):
|
||||
''' Sparse matrices are 2D
|
||||
'''
|
||||
A = arr.tocsc() # convert to sparse CSC format
|
||||
A.sort_indices() # MATLAB expects sorted row indices
|
||||
is_complex = (A.dtype.kind == 'c')
|
||||
is_logical = (A.dtype.kind == 'b')
|
||||
nz = A.nnz
|
||||
self.write_header(matdims(arr, self.oned_as),
|
||||
mxSPARSE_CLASS,
|
||||
is_complex=is_complex,
|
||||
is_logical=is_logical,
|
||||
# matlab won't load file with 0 nzmax
|
||||
nzmax=1 if nz == 0 else nz)
|
||||
self.write_element(A.indices.astype('i4'))
|
||||
self.write_element(A.indptr.astype('i4'))
|
||||
self.write_element(A.data.real)
|
||||
if is_complex:
|
||||
self.write_element(A.data.imag)
|
||||
|
||||
def write_cells(self, arr):
|
||||
self.write_header(matdims(arr, self.oned_as),
|
||||
mxCELL_CLASS)
|
||||
# loop over data, column major
|
||||
A = np.atleast_2d(arr).flatten('F')
|
||||
for el in A:
|
||||
self.write(el)
|
||||
|
||||
def write_empty_struct(self):
|
||||
self.write_header((1, 1), mxSTRUCT_CLASS)
|
||||
# max field name length set to 1 in an example matlab struct
|
||||
self.write_element(np.array(1, dtype=np.int32))
|
||||
# Field names element is empty
|
||||
self.write_element(np.array([], dtype=np.int8))
|
||||
|
||||
def write_struct(self, arr):
|
||||
self.write_header(matdims(arr, self.oned_as),
|
||||
mxSTRUCT_CLASS)
|
||||
self._write_items(arr)
|
||||
|
||||
def _write_items(self, arr):
|
||||
# write fieldnames
|
||||
fieldnames = [f[0] for f in arr.dtype.descr]
|
||||
length = max([len(fieldname) for fieldname in fieldnames])+1
|
||||
max_length = (self.long_field_names and 64) or 32
|
||||
if length > max_length:
|
||||
raise ValueError("Field names are restricted to %d characters" %
|
||||
(max_length-1))
|
||||
self.write_element(np.array([length], dtype='i4'))
|
||||
self.write_element(
|
||||
np.array(fieldnames, dtype='S%d' % (length)),
|
||||
mdtype=miINT8)
|
||||
A = np.atleast_2d(arr).flatten('F')
|
||||
for el in A:
|
||||
for f in fieldnames:
|
||||
self.write(el[f])
|
||||
|
||||
def write_object(self, arr):
|
||||
'''Same as writing structs, except different mx class, and extra
|
||||
classname element after header
|
||||
'''
|
||||
self.write_header(matdims(arr, self.oned_as),
|
||||
mxOBJECT_CLASS)
|
||||
self.write_element(np.array(arr.classname, dtype='S'),
|
||||
mdtype=miINT8)
|
||||
self._write_items(arr)
|
||||
|
||||
|
||||
class MatFile5Writer(object):
|
||||
''' Class for writing mat5 files '''
|
||||
|
||||
@docfiller
|
||||
def __init__(self, file_stream,
|
||||
do_compression=False,
|
||||
unicode_strings=False,
|
||||
global_vars=None,
|
||||
long_field_names=False,
|
||||
oned_as='row'):
|
||||
''' Initialize writer for matlab 5 format files
|
||||
|
||||
Parameters
|
||||
----------
|
||||
%(do_compression)s
|
||||
%(unicode_strings)s
|
||||
global_vars : None or sequence of strings, optional
|
||||
Names of variables to be marked as global for matlab
|
||||
%(long_fields)s
|
||||
%(oned_as)s
|
||||
'''
|
||||
self.file_stream = file_stream
|
||||
self.do_compression = do_compression
|
||||
self.unicode_strings = unicode_strings
|
||||
if global_vars:
|
||||
self.global_vars = global_vars
|
||||
else:
|
||||
self.global_vars = []
|
||||
self.long_field_names = long_field_names
|
||||
self.oned_as = oned_as
|
||||
self._matrix_writer = None
|
||||
|
||||
def write_file_header(self):
|
||||
# write header
|
||||
hdr = np.zeros((), NDT_FILE_HDR)
|
||||
hdr['description'] = 'MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \
|
||||
% (os.name,time.asctime())
|
||||
hdr['version'] = 0x0100
|
||||
hdr['endian_test'] = np.ndarray(shape=(),
|
||||
dtype='S2',
|
||||
buffer=np.uint16(0x4d49))
|
||||
self.file_stream.write(hdr.tostring())
|
||||
|
||||
def put_variables(self, mdict, write_header=None):
|
||||
''' Write variables in `mdict` to stream
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mdict : mapping
|
||||
mapping with method ``items`` returns name, contents pairs where
|
||||
``name`` which will appear in the matlab workspace in file load, and
|
||||
``contents`` is something writeable to a matlab file, such as a numpy
|
||||
array.
|
||||
write_header : {None, True, False}, optional
|
||||
If True, then write the matlab file header before writing the
|
||||
variables. If None (the default) then write the file header
|
||||
if we are at position 0 in the stream. By setting False
|
||||
here, and setting the stream position to the end of the file,
|
||||
you can append variables to a matlab file
|
||||
'''
|
||||
# write header if requested, or None and start of file
|
||||
if write_header is None:
|
||||
write_header = self.file_stream.tell() == 0
|
||||
if write_header:
|
||||
self.write_file_header()
|
||||
self._matrix_writer = VarWriter5(self)
|
||||
for name, var in mdict.items():
|
||||
if name[0] == '_':
|
||||
continue
|
||||
is_global = name in self.global_vars
|
||||
if self.do_compression:
|
||||
stream = BytesIO()
|
||||
self._matrix_writer.file_stream = stream
|
||||
self._matrix_writer.write_top(var, asbytes(name), is_global)
|
||||
out_str = zlib.compress(stream.getvalue())
|
||||
tag = np.empty((), NDT_TAG_FULL)
|
||||
tag['mdtype'] = miCOMPRESSED
|
||||
tag['byte_count'] = len(out_str)
|
||||
self.file_stream.write(tag.tostring())
|
||||
self.file_stream.write(out_str)
|
||||
else: # not compressing
|
||||
self._matrix_writer.write_top(var, asbytes(name), is_global)
|
||||
@@ -1,254 +0,0 @@
|
||||
''' Constants and classes for matlab 5 read and write
|
||||
|
||||
See also mio5_utils.pyx where these same constants arise as c enums.
|
||||
|
||||
If you make changes in this file, don't forget to change mio5_utils.pyx
|
||||
'''
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .miobase import convert_dtypes
|
||||
|
||||
miINT8 = 1
|
||||
miUINT8 = 2
|
||||
miINT16 = 3
|
||||
miUINT16 = 4
|
||||
miINT32 = 5
|
||||
miUINT32 = 6
|
||||
miSINGLE = 7
|
||||
miDOUBLE = 9
|
||||
miINT64 = 12
|
||||
miUINT64 = 13
|
||||
miMATRIX = 14
|
||||
miCOMPRESSED = 15
|
||||
miUTF8 = 16
|
||||
miUTF16 = 17
|
||||
miUTF32 = 18
|
||||
|
||||
mxCELL_CLASS = 1
|
||||
mxSTRUCT_CLASS = 2
|
||||
# The March 2008 edition of "Matlab 7 MAT-File Format" says that
|
||||
# mxOBJECT_CLASS = 3, whereas matrix.h says that mxLOGICAL = 3.
|
||||
# Matlab 2008a appears to save logicals as type 9, so we assume that
|
||||
# the document is correct. See type 18, below.
|
||||
mxOBJECT_CLASS = 3
|
||||
mxCHAR_CLASS = 4
|
||||
mxSPARSE_CLASS = 5
|
||||
mxDOUBLE_CLASS = 6
|
||||
mxSINGLE_CLASS = 7
|
||||
mxINT8_CLASS = 8
|
||||
mxUINT8_CLASS = 9
|
||||
mxINT16_CLASS = 10
|
||||
mxUINT16_CLASS = 11
|
||||
mxINT32_CLASS = 12
|
||||
mxUINT32_CLASS = 13
|
||||
# The following are not in the March 2008 edition of "Matlab 7
|
||||
# MAT-File Format," but were guessed from matrix.h.
|
||||
mxINT64_CLASS = 14
|
||||
mxUINT64_CLASS = 15
|
||||
mxFUNCTION_CLASS = 16
|
||||
# Not doing anything with these at the moment.
|
||||
mxOPAQUE_CLASS = 17 # This appears to be a function workspace
|
||||
# Thread 'saveing/loading symbol table of annymous functions', octave-maintainers, April-May 2007
|
||||
# https://lists.gnu.org/archive/html/octave-maintainers/2007-04/msg00031.html
|
||||
# https://lists.gnu.org/archive/html/octave-maintainers/2007-05/msg00032.html
|
||||
# (Was/Deprecated: https://www-old.cae.wisc.edu/pipermail/octave-maintainers/2007-May/002824.html)
|
||||
mxOBJECT_CLASS_FROM_MATRIX_H = 18
|
||||
|
||||
mdtypes_template = {
|
||||
miINT8: 'i1',
|
||||
miUINT8: 'u1',
|
||||
miINT16: 'i2',
|
||||
miUINT16: 'u2',
|
||||
miINT32: 'i4',
|
||||
miUINT32: 'u4',
|
||||
miSINGLE: 'f4',
|
||||
miDOUBLE: 'f8',
|
||||
miINT64: 'i8',
|
||||
miUINT64: 'u8',
|
||||
miUTF8: 'u1',
|
||||
miUTF16: 'u2',
|
||||
miUTF32: 'u4',
|
||||
'file_header': [('description', 'S116'),
|
||||
('subsystem_offset', 'i8'),
|
||||
('version', 'u2'),
|
||||
('endian_test', 'S2')],
|
||||
'tag_full': [('mdtype', 'u4'), ('byte_count', 'u4')],
|
||||
'tag_smalldata':[('byte_count_mdtype', 'u4'), ('data', 'S4')],
|
||||
'array_flags': [('data_type', 'u4'),
|
||||
('byte_count', 'u4'),
|
||||
('flags_class','u4'),
|
||||
('nzmax', 'u4')],
|
||||
'U1': 'U1',
|
||||
}
|
||||
|
||||
mclass_dtypes_template = {
|
||||
mxINT8_CLASS: 'i1',
|
||||
mxUINT8_CLASS: 'u1',
|
||||
mxINT16_CLASS: 'i2',
|
||||
mxUINT16_CLASS: 'u2',
|
||||
mxINT32_CLASS: 'i4',
|
||||
mxUINT32_CLASS: 'u4',
|
||||
mxINT64_CLASS: 'i8',
|
||||
mxUINT64_CLASS: 'u8',
|
||||
mxSINGLE_CLASS: 'f4',
|
||||
mxDOUBLE_CLASS: 'f8',
|
||||
}
|
||||
|
||||
mclass_info = {
|
||||
mxINT8_CLASS: 'int8',
|
||||
mxUINT8_CLASS: 'uint8',
|
||||
mxINT16_CLASS: 'int16',
|
||||
mxUINT16_CLASS: 'uint16',
|
||||
mxINT32_CLASS: 'int32',
|
||||
mxUINT32_CLASS: 'uint32',
|
||||
mxINT64_CLASS: 'int64',
|
||||
mxUINT64_CLASS: 'uint64',
|
||||
mxSINGLE_CLASS: 'single',
|
||||
mxDOUBLE_CLASS: 'double',
|
||||
mxCELL_CLASS: 'cell',
|
||||
mxSTRUCT_CLASS: 'struct',
|
||||
mxOBJECT_CLASS: 'object',
|
||||
mxCHAR_CLASS: 'char',
|
||||
mxSPARSE_CLASS: 'sparse',
|
||||
mxFUNCTION_CLASS: 'function',
|
||||
mxOPAQUE_CLASS: 'opaque',
|
||||
}
|
||||
|
||||
NP_TO_MTYPES = {
|
||||
'f8': miDOUBLE,
|
||||
'c32': miDOUBLE,
|
||||
'c24': miDOUBLE,
|
||||
'c16': miDOUBLE,
|
||||
'f4': miSINGLE,
|
||||
'c8': miSINGLE,
|
||||
'i8': miINT64,
|
||||
'i4': miINT32,
|
||||
'i2': miINT16,
|
||||
'i1': miINT8,
|
||||
'u8': miUINT64,
|
||||
'u4': miUINT32,
|
||||
'u2': miUINT16,
|
||||
'u1': miUINT8,
|
||||
'S1': miUINT8,
|
||||
'U1': miUTF16,
|
||||
'b1': miUINT8, # not standard but seems MATLAB uses this (gh-4022)
|
||||
}
|
||||
|
||||
|
||||
NP_TO_MXTYPES = {
|
||||
'f8': mxDOUBLE_CLASS,
|
||||
'c32': mxDOUBLE_CLASS,
|
||||
'c24': mxDOUBLE_CLASS,
|
||||
'c16': mxDOUBLE_CLASS,
|
||||
'f4': mxSINGLE_CLASS,
|
||||
'c8': mxSINGLE_CLASS,
|
||||
'i8': mxINT64_CLASS,
|
||||
'i4': mxINT32_CLASS,
|
||||
'i2': mxINT16_CLASS,
|
||||
'i1': mxINT8_CLASS,
|
||||
'u8': mxUINT64_CLASS,
|
||||
'u4': mxUINT32_CLASS,
|
||||
'u2': mxUINT16_CLASS,
|
||||
'u1': mxUINT8_CLASS,
|
||||
'S1': mxUINT8_CLASS,
|
||||
'b1': mxUINT8_CLASS, # not standard but seems MATLAB uses this
|
||||
}
|
||||
|
||||
''' Before release v7.1 (release 14) matlab (TM) used the system
|
||||
default character encoding scheme padded out to 16-bits. Release 14
|
||||
and later use Unicode. When saving character data, R14 checks if it
|
||||
can be encoded in 7-bit ascii, and saves in that format if so.'''
|
||||
|
||||
codecs_template = {
|
||||
miUTF8: {'codec': 'utf_8', 'width': 1},
|
||||
miUTF16: {'codec': 'utf_16', 'width': 2},
|
||||
miUTF32: {'codec': 'utf_32','width': 4},
|
||||
}
|
||||
|
||||
|
||||
def _convert_codecs(template, byte_order):
|
||||
''' Convert codec template mapping to byte order
|
||||
|
||||
Set codecs not on this system to None
|
||||
|
||||
Parameters
|
||||
----------
|
||||
template : mapping
|
||||
key, value are respectively codec name, and root name for codec
|
||||
(without byte order suffix)
|
||||
byte_order : {'<', '>'}
|
||||
code for little or big endian
|
||||
|
||||
Returns
|
||||
-------
|
||||
codecs : dict
|
||||
key, value are name, codec (as in .encode(codec))
|
||||
'''
|
||||
codecs = {}
|
||||
postfix = byte_order == '<' and '_le' or '_be'
|
||||
for k, v in template.items():
|
||||
codec = v['codec']
|
||||
try:
|
||||
" ".encode(codec)
|
||||
except LookupError:
|
||||
codecs[k] = None
|
||||
continue
|
||||
if v['width'] > 1:
|
||||
codec += postfix
|
||||
codecs[k] = codec
|
||||
return codecs.copy()
|
||||
|
||||
|
||||
MDTYPES = {}
|
||||
for _bytecode in '<>':
|
||||
_def = {'dtypes': convert_dtypes(mdtypes_template, _bytecode),
|
||||
'classes': convert_dtypes(mclass_dtypes_template, _bytecode),
|
||||
'codecs': _convert_codecs(codecs_template, _bytecode)}
|
||||
MDTYPES[_bytecode] = _def
|
||||
|
||||
|
||||
class mat_struct(object):
|
||||
''' Placeholder for holding read data from structs
|
||||
|
||||
We use instances of this class when the user passes False as a value to the
|
||||
``struct_as_record`` parameter of the :func:`scipy.io.matlab.loadmat`
|
||||
function.
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
class MatlabObject(np.ndarray):
|
||||
''' ndarray Subclass to contain matlab object '''
|
||||
def __new__(cls, input_array, classname=None):
|
||||
# Input array is an already formed ndarray instance
|
||||
# We first cast to be our class type
|
||||
obj = np.asarray(input_array).view(cls)
|
||||
# add the new attribute to the created instance
|
||||
obj.classname = classname
|
||||
# Finally, we must return the newly created object:
|
||||
return obj
|
||||
|
||||
def __array_finalize__(self,obj):
|
||||
# reset the attribute from passed original object
|
||||
self.classname = getattr(obj, 'classname', None)
|
||||
# We do not need to return anything
|
||||
|
||||
|
||||
class MatlabFunction(np.ndarray):
|
||||
''' Subclass to signal this is a matlab function '''
|
||||
def __new__(cls, input_array):
|
||||
obj = np.asarray(input_array).view(cls)
|
||||
return obj
|
||||
|
||||
|
||||
class MatlabOpaque(np.ndarray):
|
||||
''' Subclass to signal this is a matlab opaque matrix '''
|
||||
def __new__(cls, input_array):
|
||||
obj = np.asarray(input_array).view(cls)
|
||||
return obj
|
||||
|
||||
|
||||
OPAQUE_DTYPE = np.dtype(
|
||||
[('s0', 'O'), ('s1', 'O'), ('s2', 'O'), ('arr', 'O')])
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,415 +0,0 @@
|
||||
# Authors: Travis Oliphant, Matthew Brett
|
||||
|
||||
"""
|
||||
Base classes for MATLAB file stream reading.
|
||||
|
||||
MATLAB is a registered trademark of the Mathworks inc.
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import sys
|
||||
import operator
|
||||
|
||||
from scipy._lib.six import reduce
|
||||
|
||||
import numpy as np
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
byteord = int
|
||||
else:
|
||||
byteord = ord
|
||||
|
||||
from scipy.misc import doccer
|
||||
|
||||
from . import byteordercodes as boc
|
||||
|
||||
|
||||
class MatReadError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class MatWriteError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class MatReadWarning(UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
doc_dict = \
|
||||
{'file_arg':
|
||||
'''file_name : str
|
||||
Name of the mat file (do not need .mat extension if
|
||||
appendmat==True) Can also pass open file-like object.''',
|
||||
'append_arg':
|
||||
'''appendmat : bool, optional
|
||||
True to append the .mat extension to the end of the given
|
||||
filename, if not already present.''',
|
||||
'load_args':
|
||||
'''byte_order : str or None, optional
|
||||
None by default, implying byte order guessed from mat
|
||||
file. Otherwise can be one of ('native', '=', 'little', '<',
|
||||
'BIG', '>').
|
||||
mat_dtype : bool, optional
|
||||
If True, return arrays in same dtype as would be loaded into
|
||||
MATLAB (instead of the dtype with which they are saved).
|
||||
squeeze_me : bool, optional
|
||||
Whether to squeeze unit matrix dimensions or not.
|
||||
chars_as_strings : bool, optional
|
||||
Whether to convert char arrays to string arrays.
|
||||
matlab_compatible : bool, optional
|
||||
Returns matrices as would be loaded by MATLAB (implies
|
||||
squeeze_me=False, chars_as_strings=False, mat_dtype=True,
|
||||
struct_as_record=True).''',
|
||||
'struct_arg':
|
||||
'''struct_as_record : bool, optional
|
||||
Whether to load MATLAB structs as numpy record arrays, or as
|
||||
old-style numpy arrays with dtype=object. Setting this flag to
|
||||
False replicates the behavior of scipy version 0.7.x (returning
|
||||
numpy object arrays). The default setting is True, because it
|
||||
allows easier round-trip load and save of MATLAB files.''',
|
||||
'matstream_arg':
|
||||
'''mat_stream : file-like
|
||||
Object with file API, open for reading.''',
|
||||
'long_fields':
|
||||
'''long_field_names : bool, optional
|
||||
* False - maximum field name length in a structure is 31 characters
|
||||
which is the documented maximum length. This is the default.
|
||||
* True - maximum field name length in a structure is 63 characters
|
||||
which works for MATLAB 7.6''',
|
||||
'do_compression':
|
||||
'''do_compression : bool, optional
|
||||
Whether to compress matrices on write. Default is False.''',
|
||||
'oned_as':
|
||||
'''oned_as : {'row', 'column'}, optional
|
||||
If 'column', write 1-D numpy arrays as column vectors.
|
||||
If 'row', write 1D numpy arrays as row vectors.''',
|
||||
'unicode_strings':
|
||||
'''unicode_strings : bool, optional
|
||||
If True, write strings as Unicode, else MATLAB usual encoding.'''}
|
||||
|
||||
docfiller = doccer.filldoc(doc_dict)
|
||||
|
||||
'''
|
||||
|
||||
Note on architecture
|
||||
======================
|
||||
|
||||
There are three sets of parameters relevant for reading files. The
|
||||
first are *file read parameters* - containing options that are common
|
||||
for reading the whole file, and therefore every variable within that
|
||||
file. At the moment these are:
|
||||
|
||||
* mat_stream
|
||||
* dtypes (derived from byte code)
|
||||
* byte_order
|
||||
* chars_as_strings
|
||||
* squeeze_me
|
||||
* struct_as_record (MATLAB 5 files)
|
||||
* class_dtypes (derived from order code, MATLAB 5 files)
|
||||
* codecs (MATLAB 5 files)
|
||||
* uint16_codec (MATLAB 5 files)
|
||||
|
||||
Another set of parameters are those that apply only to the current
|
||||
variable being read - the *header*:
|
||||
|
||||
* header related variables (different for v4 and v5 mat files)
|
||||
* is_complex
|
||||
* mclass
|
||||
* var_stream
|
||||
|
||||
With the header, we need ``next_position`` to tell us where the next
|
||||
variable in the stream is.
|
||||
|
||||
Then, for each element in a matrix, there can be *element read
|
||||
parameters*. An element is, for example, one element in a MATLAB cell
|
||||
array. At the moment these are:
|
||||
|
||||
* mat_dtype
|
||||
|
||||
The file-reading object contains the *file read parameters*. The
|
||||
*header* is passed around as a data object, or may be read and discarded
|
||||
in a single function. The *element read parameters* - the mat_dtype in
|
||||
this instance, is passed into a general post-processing function - see
|
||||
``mio_utils`` for details.
|
||||
'''
|
||||
|
||||
|
||||
def convert_dtypes(dtype_template, order_code):
|
||||
''' Convert dtypes in mapping to given order
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype_template : mapping
|
||||
mapping with values returning numpy dtype from ``np.dtype(val)``
|
||||
order_code : str
|
||||
an order code suitable for using in ``dtype.newbyteorder()``
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtypes : mapping
|
||||
mapping where values have been replaced by
|
||||
``np.dtype(val).newbyteorder(order_code)``
|
||||
|
||||
'''
|
||||
dtypes = dtype_template.copy()
|
||||
for k in dtypes:
|
||||
dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code)
|
||||
return dtypes
|
||||
|
||||
|
||||
def read_dtype(mat_stream, a_dtype):
|
||||
"""
|
||||
Generic get of byte stream data of known type
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mat_stream : file_like object
|
||||
MATLAB (tm) mat file stream
|
||||
a_dtype : dtype
|
||||
dtype of array to read. `a_dtype` is assumed to be correct
|
||||
endianness.
|
||||
|
||||
Returns
|
||||
-------
|
||||
arr : ndarray
|
||||
Array of dtype `a_dtype` read from stream.
|
||||
|
||||
"""
|
||||
num_bytes = a_dtype.itemsize
|
||||
arr = np.ndarray(shape=(),
|
||||
dtype=a_dtype,
|
||||
buffer=mat_stream.read(num_bytes),
|
||||
order='F')
|
||||
return arr
|
||||
|
||||
|
||||
def get_matfile_version(fileobj):
|
||||
"""
|
||||
Return major, minor tuple depending on apparent mat file type
|
||||
|
||||
Where:
|
||||
|
||||
#. 0,x -> version 4 format mat files
|
||||
#. 1,x -> version 5 format mat files
|
||||
#. 2,x -> version 7.3 format mat files (HDF format)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fileobj : file_like
|
||||
object implementing seek() and read()
|
||||
|
||||
Returns
|
||||
-------
|
||||
major_version : {0, 1, 2}
|
||||
major MATLAB File format version
|
||||
minor_version : int
|
||||
minor MATLAB file format version
|
||||
|
||||
Raises
|
||||
------
|
||||
MatReadError
|
||||
If the file is empty.
|
||||
ValueError
|
||||
The matfile version is unknown.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Has the side effect of setting the file read pointer to 0
|
||||
"""
|
||||
# Mat4 files have a zero somewhere in first 4 bytes
|
||||
fileobj.seek(0)
|
||||
mopt_bytes = fileobj.read(4)
|
||||
if len(mopt_bytes) == 0:
|
||||
raise MatReadError("Mat file appears to be empty")
|
||||
mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes)
|
||||
if 0 in mopt_ints:
|
||||
fileobj.seek(0)
|
||||
return (0,0)
|
||||
# For 5 format or 7.3 format we need to read an integer in the
|
||||
# header. Bytes 124 through 128 contain a version integer and an
|
||||
# endian test string
|
||||
fileobj.seek(124)
|
||||
tst_str = fileobj.read(4)
|
||||
fileobj.seek(0)
|
||||
maj_ind = int(tst_str[2] == b'I'[0])
|
||||
maj_val = byteord(tst_str[maj_ind])
|
||||
min_val = byteord(tst_str[1-maj_ind])
|
||||
ret = (maj_val, min_val)
|
||||
if maj_val in (1, 2):
|
||||
return ret
|
||||
raise ValueError('Unknown mat file type, version %s, %s' % ret)
|
||||
|
||||
|
||||
def matdims(arr, oned_as='column'):
|
||||
"""
|
||||
Determine equivalent MATLAB dimensions for given array
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : ndarray
|
||||
Input array
|
||||
oned_as : {'column', 'row'}, optional
|
||||
Whether 1-D arrays are returned as MATLAB row or column matrices.
|
||||
Default is 'column'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dims : tuple
|
||||
Shape tuple, in the form MATLAB expects it.
|
||||
|
||||
Notes
|
||||
-----
|
||||
We had to decide what shape a 1 dimensional array would be by
|
||||
default. ``np.atleast_2d`` thinks it is a row vector. The
|
||||
default for a vector in MATLAB (e.g. ``>> 1:12``) is a row vector.
|
||||
|
||||
Versions of scipy up to and including 0.11 resulted (accidentally)
|
||||
in 1-D arrays being read as column vectors. For the moment, we
|
||||
maintain the same tradition here.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> matdims(np.array(1)) # numpy scalar
|
||||
(1, 1)
|
||||
>>> matdims(np.array([1])) # 1d array, 1 element
|
||||
(1, 1)
|
||||
>>> matdims(np.array([1,2])) # 1d array, 2 elements
|
||||
(2, 1)
|
||||
>>> matdims(np.array([[2],[3]])) # 2d array, column vector
|
||||
(2, 1)
|
||||
>>> matdims(np.array([[2,3]])) # 2d array, row vector
|
||||
(1, 2)
|
||||
>>> matdims(np.array([[[2,3]]])) # 3d array, rowish vector
|
||||
(1, 1, 2)
|
||||
>>> matdims(np.array([])) # empty 1d array
|
||||
(0, 0)
|
||||
>>> matdims(np.array([[]])) # empty 2d
|
||||
(0, 0)
|
||||
>>> matdims(np.array([[[]]])) # empty 3d
|
||||
(0, 0, 0)
|
||||
|
||||
Optional argument flips 1-D shape behavior.
|
||||
|
||||
>>> matdims(np.array([1,2]), 'row') # 1d array, 2 elements
|
||||
(1, 2)
|
||||
|
||||
The argument has to make sense though
|
||||
|
||||
>>> matdims(np.array([1,2]), 'bizarre')
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: 1D option "bizarre" is strange
|
||||
|
||||
"""
|
||||
shape = arr.shape
|
||||
if shape == (): # scalar
|
||||
return (1,1)
|
||||
if reduce(operator.mul, shape) == 0: # zero elememts
|
||||
return (0,) * np.max([arr.ndim, 2])
|
||||
if len(shape) == 1: # 1D
|
||||
if oned_as == 'column':
|
||||
return shape + (1,)
|
||||
elif oned_as == 'row':
|
||||
return (1,) + shape
|
||||
else:
|
||||
raise ValueError('1D option "%s" is strange'
|
||||
% oned_as)
|
||||
return shape
|
||||
|
||||
|
||||
class MatVarReader(object):
|
||||
''' Abstract class defining required interface for var readers'''
|
||||
def __init__(self, file_reader):
|
||||
pass
|
||||
|
||||
def read_header(self):
|
||||
''' Returns header '''
|
||||
pass
|
||||
|
||||
def array_from_header(self, header):
|
||||
''' Reads array given header '''
|
||||
pass
|
||||
|
||||
|
||||
class MatFileReader(object):
|
||||
""" Base object for reading mat files
|
||||
|
||||
To make this class functional, you will need to override the
|
||||
following methods:
|
||||
|
||||
matrix_getter_factory - gives object to fetch next matrix from stream
|
||||
guess_byte_order - guesses file byte order from file
|
||||
"""
|
||||
|
||||
@docfiller
|
||||
def __init__(self, mat_stream,
|
||||
byte_order=None,
|
||||
mat_dtype=False,
|
||||
squeeze_me=False,
|
||||
chars_as_strings=True,
|
||||
matlab_compatible=False,
|
||||
struct_as_record=True,
|
||||
verify_compressed_data_integrity=True
|
||||
):
|
||||
'''
|
||||
Initializer for mat file reader
|
||||
|
||||
mat_stream : file-like
|
||||
object with file API, open for reading
|
||||
%(load_args)s
|
||||
'''
|
||||
# Initialize stream
|
||||
self.mat_stream = mat_stream
|
||||
self.dtypes = {}
|
||||
if not byte_order:
|
||||
byte_order = self.guess_byte_order()
|
||||
else:
|
||||
byte_order = boc.to_numpy_code(byte_order)
|
||||
self.byte_order = byte_order
|
||||
self.struct_as_record = struct_as_record
|
||||
if matlab_compatible:
|
||||
self.set_matlab_compatible()
|
||||
else:
|
||||
self.squeeze_me = squeeze_me
|
||||
self.chars_as_strings = chars_as_strings
|
||||
self.mat_dtype = mat_dtype
|
||||
self.verify_compressed_data_integrity = verify_compressed_data_integrity
|
||||
|
||||
def set_matlab_compatible(self):
|
||||
''' Sets options to return arrays as MATLAB loads them '''
|
||||
self.mat_dtype = True
|
||||
self.squeeze_me = False
|
||||
self.chars_as_strings = False
|
||||
|
||||
def guess_byte_order(self):
|
||||
''' As we do not know what file type we have, assume native '''
|
||||
return boc.native_code
|
||||
|
||||
def end_of_stream(self):
|
||||
b = self.mat_stream.read(1)
|
||||
curpos = self.mat_stream.tell()
|
||||
self.mat_stream.seek(curpos-1)
|
||||
return len(b) == 0
|
||||
|
||||
|
||||
def arr_dtype_number(arr, num):
|
||||
''' Return dtype for given number of items per element'''
|
||||
return np.dtype(arr.dtype.str[:2] + str(num))
|
||||
|
||||
|
||||
def arr_to_chars(arr):
|
||||
''' Convert string array to char array '''
|
||||
dims = list(arr.shape)
|
||||
if not dims:
|
||||
dims = [1]
|
||||
dims.append(int(arr.dtype.str[2:]))
|
||||
arr = np.ndarray(shape=dims,
|
||||
dtype=arr_dtype_number(arr, 1),
|
||||
buffer=arr)
|
||||
empties = [arr == '']
|
||||
if not np.any(empties):
|
||||
return arr
|
||||
arr = arr.copy()
|
||||
arr[tuple(empties)] = ' '
|
||||
return arr
|
||||
@@ -1,16 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
|
||||
def configuration(parent_package='io',top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
config = Configuration('matlab', parent_package, top_path)
|
||||
config.add_extension('streams', sources=['streams.c'])
|
||||
config.add_extension('mio_utils', sources=['mio_utils.c'])
|
||||
config.add_extension('mio5_utils', sources=['mio5_utils.c'])
|
||||
config.add_data_dir('tests')
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,4 +0,0 @@
|
||||
function [a, b] = afunc(c, d)
|
||||
% A function
|
||||
a = c + 1;
|
||||
b = d + 10;
|
||||
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,5 +0,0 @@
|
||||
Japanese:
|
||||
すべての人間は、生まれながらにして自由であり、
|
||||
かつ、尊厳と権利と について平等である。
|
||||
人間は、理性と良心とを授けられており、
|
||||
互いに同胞の精神をもって行動しなければならない。
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -1,50 +0,0 @@
|
||||
% Generates mat files for loadmat unit tests
|
||||
% Uses save_matfile.m function
|
||||
% This is the version for matlab 4
|
||||
|
||||
% work out matlab version and file suffix for test files
|
||||
global FILEPREFIX FILESUFFIX
|
||||
sepchar = '/';
|
||||
if strcmp(computer, 'PCWIN'), sepchar = '\'; end
|
||||
FILEPREFIX = [pwd sepchar 'data' sepchar];
|
||||
mlv = version;
|
||||
FILESUFFIX = ['_' mlv '_' computer '.mat'];
|
||||
|
||||
% basic double array
|
||||
theta = 0:pi/4:2*pi;
|
||||
save_matfile('testdouble', theta);
|
||||
|
||||
% string
|
||||
save_matfile('teststring', '"Do nine men interpret?" "Nine men," I nod.')
|
||||
|
||||
% complex
|
||||
save_matfile('testcomplex', cos(theta) + 1j*sin(theta));
|
||||
|
||||
% asymmetric array to check indexing
|
||||
a = zeros(3, 5);
|
||||
a(:,1) = [1:3]';
|
||||
a(1,:) = 1:5;
|
||||
|
||||
% 2D matrix
|
||||
save_matfile('testmatrix', a);
|
||||
|
||||
% minus number - tests signed int
|
||||
save_matfile('testminus', -1);
|
||||
|
||||
% single character
|
||||
save_matfile('testonechar', 'r');
|
||||
|
||||
% string array
|
||||
save_matfile('teststringarray', ['one '; 'two '; 'three']);
|
||||
|
||||
% sparse array
|
||||
save_matfile('testsparse', sparse(a));
|
||||
|
||||
% sparse complex array
|
||||
b = sparse(a);
|
||||
b(1,1) = b(1,1) + j;
|
||||
save_matfile('testsparsecomplex', b);
|
||||
|
||||
% Two variables in same file
|
||||
save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
% Generates mat files for loadmat unit tests
|
||||
% This is the version for matlab 5 and higher
|
||||
% Uses save_matfile.m function
|
||||
|
||||
% work out matlab version and file suffix for test files
|
||||
global FILEPREFIX FILESUFFIX
|
||||
FILEPREFIX = [fullfile(pwd, 'data') filesep];
|
||||
temp = ver('MATLAB');
|
||||
mlv = temp.Version;
|
||||
FILESUFFIX = ['_' mlv '_' computer '.mat'];
|
||||
|
||||
% basic double array
|
||||
theta = 0:pi/4:2*pi;
|
||||
save_matfile('testdouble', theta);
|
||||
|
||||
% string
|
||||
save_matfile('teststring', '"Do nine men interpret?" "Nine men," I nod.')
|
||||
|
||||
% complex
|
||||
save_matfile('testcomplex', cos(theta) + 1j*sin(theta));
|
||||
|
||||
% asymmetric array to check indexing
|
||||
a = zeros(3, 5);
|
||||
a(:,1) = [1:3]';
|
||||
a(1,:) = 1:5;
|
||||
|
||||
% 2D matrix
|
||||
save_matfile('testmatrix', a);
|
||||
|
||||
% minus number - tests signed int
|
||||
save_matfile('testminus', -1);
|
||||
|
||||
% single character
|
||||
save_matfile('testonechar', 'r');
|
||||
|
||||
% string array
|
||||
save_matfile('teststringarray', ['one '; 'two '; 'three']);
|
||||
|
||||
% sparse array
|
||||
save_matfile('testsparse', sparse(a));
|
||||
|
||||
% sparse complex array
|
||||
b = sparse(a);
|
||||
b(1,1) = b(1,1) + j;
|
||||
save_matfile('testsparsecomplex', b);
|
||||
|
||||
% Two variables in same file
|
||||
save([FILEPREFIX 'testmulti' FILESUFFIX], 'a', 'theta')
|
||||
|
||||
|
||||
% struct
|
||||
save_matfile('teststruct', ...
|
||||
struct('stringfield','Rats live on no evil star.',...
|
||||
'doublefield',[sqrt(2) exp(1) pi],...
|
||||
'complexfield',(1+1j)*[sqrt(2) exp(1) pi]));
|
||||
|
||||
% cell
|
||||
save_matfile('testcell', ...
|
||||
{['This cell contains this string and 3 arrays of increasing' ...
|
||||
' length'], 1., 1.:2., 1.:3.});
|
||||
|
||||
% scalar cell
|
||||
save_matfile('testscalarcell', {1})
|
||||
|
||||
% Empty cells in two cell matrices
|
||||
save_matfile('testemptycell', {1, 2, [], [], 3});
|
||||
|
||||
% 3D matrix
|
||||
save_matfile('test3dmatrix', reshape(1:24,[2 3 4]))
|
||||
|
||||
% nested cell array
|
||||
save_matfile('testcellnest', {1, {2, 3, {4, 5}}});
|
||||
|
||||
% nested struct
|
||||
save_matfile('teststructnest', struct('one', 1, 'two', ...
|
||||
struct('three', 'number 3')));
|
||||
|
||||
% array of struct
|
||||
save_matfile('teststructarr', [struct('one', 1, 'two', 2) ...
|
||||
struct('one', 'number 1', 'two', 'number 2')]);
|
||||
|
||||
% matlab object
|
||||
save_matfile('testobject', inline('x'))
|
||||
|
||||
% array of matlab objects
|
||||
%save_matfile('testobjarr', [inline('x') inline('x')])
|
||||
|
||||
% unicode test
|
||||
if str2num(mlv) > 7 % function added 7.0.1
|
||||
fid = fopen([FILEPREFIX 'japanese_utf8.txt']);
|
||||
from_japan = fread(fid, 'uint8')';
|
||||
fclose(fid);
|
||||
save_matfile('testunicode', native2unicode(from_japan, 'utf-8'));
|
||||
end
|
||||
|
||||
% func
|
||||
if str2num(mlv) > 7 % function pointers added recently
|
||||
func = @afunc;
|
||||
save_matfile('testfunc', func);
|
||||
end
|
||||
@@ -1,6 +0,0 @@
|
||||
function save_matfile(test_name, v)
|
||||
% saves variable passed in m with filename from prefix
|
||||
|
||||
global FILEPREFIX FILESUFFIX
|
||||
eval([test_name ' = v;']);
|
||||
save([FILEPREFIX test_name FILESUFFIX], test_name)
|
||||
@@ -1,31 +0,0 @@
|
||||
''' Tests for byteorder module '''
|
||||
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import sys
|
||||
|
||||
from numpy.testing import assert_
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import scipy.io.matlab.byteordercodes as sibc
|
||||
|
||||
|
||||
def test_native():
|
||||
native_is_le = sys.byteorder == 'little'
|
||||
assert_(sibc.sys_is_le == native_is_le)
|
||||
|
||||
|
||||
def test_to_numpy():
|
||||
if sys.byteorder == 'little':
|
||||
assert_(sibc.to_numpy_code('native') == '<')
|
||||
assert_(sibc.to_numpy_code('swapped') == '>')
|
||||
else:
|
||||
assert_(sibc.to_numpy_code('native') == '>')
|
||||
assert_(sibc.to_numpy_code('swapped') == '<')
|
||||
assert_(sibc.to_numpy_code('native') == sibc.to_numpy_code('='))
|
||||
assert_(sibc.to_numpy_code('big') == '>')
|
||||
for code in ('little', '<', 'l', 'L', 'le'):
|
||||
assert_(sibc.to_numpy_code(code) == '<')
|
||||
for code in ('big', '>', 'b', 'B', 'be'):
|
||||
assert_(sibc.to_numpy_code(code) == '>')
|
||||
assert_raises(ValueError, sibc.to_numpy_code, 'silly string')
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,185 +0,0 @@
|
||||
""" Testing mio5_utils Cython module
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import sys
|
||||
|
||||
from io import BytesIO
|
||||
cStringIO = BytesIO
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_array_equal, assert_equal, assert_
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy._lib.six import u
|
||||
|
||||
import scipy.io.matlab.byteordercodes as boc
|
||||
import scipy.io.matlab.streams as streams
|
||||
import scipy.io.matlab.mio5_params as mio5p
|
||||
import scipy.io.matlab.mio5_utils as m5u
|
||||
|
||||
|
||||
def test_byteswap():
|
||||
for val in (
|
||||
1,
|
||||
0x100,
|
||||
0x10000):
|
||||
a = np.array(val, dtype=np.uint32)
|
||||
b = a.byteswap()
|
||||
c = m5u.byteswap_u4(a)
|
||||
assert_equal(b.item(), c)
|
||||
d = m5u.byteswap_u4(c)
|
||||
assert_equal(a.item(), d)
|
||||
|
||||
|
||||
def _make_tag(base_dt, val, mdtype, sde=False):
|
||||
''' Makes a simple matlab tag, full or sde '''
|
||||
base_dt = np.dtype(base_dt)
|
||||
bo = boc.to_numpy_code(base_dt.byteorder)
|
||||
byte_count = base_dt.itemsize
|
||||
if not sde:
|
||||
udt = bo + 'u4'
|
||||
padding = 8 - (byte_count % 8)
|
||||
all_dt = [('mdtype', udt),
|
||||
('byte_count', udt),
|
||||
('val', base_dt)]
|
||||
if padding:
|
||||
all_dt.append(('padding', 'u1', padding))
|
||||
else: # is sde
|
||||
udt = bo + 'u2'
|
||||
padding = 4-byte_count
|
||||
if bo == '<': # little endian
|
||||
all_dt = [('mdtype', udt),
|
||||
('byte_count', udt),
|
||||
('val', base_dt)]
|
||||
else: # big endian
|
||||
all_dt = [('byte_count', udt),
|
||||
('mdtype', udt),
|
||||
('val', base_dt)]
|
||||
if padding:
|
||||
all_dt.append(('padding', 'u1', padding))
|
||||
tag = np.zeros((1,), dtype=all_dt)
|
||||
tag['mdtype'] = mdtype
|
||||
tag['byte_count'] = byte_count
|
||||
tag['val'] = val
|
||||
return tag
|
||||
|
||||
|
||||
def _write_stream(stream, *strings):
|
||||
stream.truncate(0)
|
||||
stream.seek(0)
|
||||
for s in strings:
|
||||
stream.write(s)
|
||||
stream.seek(0)
|
||||
|
||||
|
||||
def _make_readerlike(stream, byte_order=boc.native_code):
|
||||
class R(object):
|
||||
pass
|
||||
r = R()
|
||||
r.mat_stream = stream
|
||||
r.byte_order = byte_order
|
||||
r.struct_as_record = True
|
||||
r.uint16_codec = sys.getdefaultencoding()
|
||||
r.chars_as_strings = False
|
||||
r.mat_dtype = False
|
||||
r.squeeze_me = False
|
||||
return r
|
||||
|
||||
|
||||
def test_read_tag():
|
||||
# mainly to test errors
|
||||
# make reader-like thing
|
||||
str_io = BytesIO()
|
||||
r = _make_readerlike(str_io)
|
||||
c_reader = m5u.VarReader5(r)
|
||||
# This works for StringIO but _not_ cStringIO
|
||||
assert_raises(IOError, c_reader.read_tag)
|
||||
# bad SDE
|
||||
tag = _make_tag('i4', 1, mio5p.miINT32, sde=True)
|
||||
tag['byte_count'] = 5
|
||||
_write_stream(str_io, tag.tostring())
|
||||
assert_raises(ValueError, c_reader.read_tag)
|
||||
|
||||
|
||||
def test_read_stream():
|
||||
tag = _make_tag('i4', 1, mio5p.miINT32, sde=True)
|
||||
tag_str = tag.tostring()
|
||||
str_io = cStringIO(tag_str)
|
||||
st = streams.make_stream(str_io)
|
||||
s = streams._read_into(st, tag.itemsize)
|
||||
assert_equal(s, tag.tostring())
|
||||
|
||||
|
||||
def test_read_numeric():
|
||||
# make reader-like thing
|
||||
str_io = cStringIO()
|
||||
r = _make_readerlike(str_io)
|
||||
# check simplest of tags
|
||||
for base_dt, val, mdtype in (('u2', 30, mio5p.miUINT16),
|
||||
('i4', 1, mio5p.miINT32),
|
||||
('i2', -1, mio5p.miINT16)):
|
||||
for byte_code in ('<', '>'):
|
||||
r.byte_order = byte_code
|
||||
c_reader = m5u.VarReader5(r)
|
||||
assert_equal(c_reader.little_endian, byte_code == '<')
|
||||
assert_equal(c_reader.is_swapped, byte_code != boc.native_code)
|
||||
for sde_f in (False, True):
|
||||
dt = np.dtype(base_dt).newbyteorder(byte_code)
|
||||
a = _make_tag(dt, val, mdtype, sde_f)
|
||||
a_str = a.tostring()
|
||||
_write_stream(str_io, a_str)
|
||||
el = c_reader.read_numeric()
|
||||
assert_equal(el, val)
|
||||
# two sequential reads
|
||||
_write_stream(str_io, a_str, a_str)
|
||||
el = c_reader.read_numeric()
|
||||
assert_equal(el, val)
|
||||
el = c_reader.read_numeric()
|
||||
assert_equal(el, val)
|
||||
|
||||
|
||||
def test_read_numeric_writeable():
|
||||
# make reader-like thing
|
||||
str_io = cStringIO()
|
||||
r = _make_readerlike(str_io, '<')
|
||||
c_reader = m5u.VarReader5(r)
|
||||
dt = np.dtype('<u2')
|
||||
a = _make_tag(dt, 30, mio5p.miUINT16, 0)
|
||||
a_str = a.tostring()
|
||||
_write_stream(str_io, a_str)
|
||||
el = c_reader.read_numeric()
|
||||
assert_(el.flags.writeable is True)
|
||||
|
||||
|
||||
def test_zero_byte_string():
|
||||
# Tests hack to allow chars of non-zero length, but 0 bytes
|
||||
# make reader-like thing
|
||||
str_io = cStringIO()
|
||||
r = _make_readerlike(str_io, boc.native_code)
|
||||
c_reader = m5u.VarReader5(r)
|
||||
tag_dt = np.dtype([('mdtype', 'u4'), ('byte_count', 'u4')])
|
||||
tag = np.zeros((1,), dtype=tag_dt)
|
||||
tag['mdtype'] = mio5p.miINT8
|
||||
tag['byte_count'] = 1
|
||||
hdr = m5u.VarHeader5()
|
||||
# Try when string is 1 length
|
||||
hdr.set_dims([1,])
|
||||
_write_stream(str_io, tag.tostring() + b' ')
|
||||
str_io.seek(0)
|
||||
val = c_reader.read_char(hdr)
|
||||
assert_equal(val, u(' '))
|
||||
# Now when string has 0 bytes 1 length
|
||||
tag['byte_count'] = 0
|
||||
_write_stream(str_io, tag.tostring())
|
||||
str_io.seek(0)
|
||||
val = c_reader.read_char(hdr)
|
||||
assert_equal(val, u(' '))
|
||||
# Now when string has 0 bytes 4 length
|
||||
str_io.seek(0)
|
||||
hdr.set_dims([4,])
|
||||
val = c_reader.read_char(hdr)
|
||||
assert_array_equal(val, [u(' ')] * 4)
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
''' Jottings to work out format for __function_workspace__ matrix at end
|
||||
of mat file.
|
||||
|
||||
'''
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
import io
|
||||
|
||||
from numpy.compat import asstr
|
||||
|
||||
from scipy.io.matlab.mio5 import (MatlabObject, MatFile5Writer,
|
||||
MatFile5Reader, MatlabFunction)
|
||||
|
||||
test_data_path = os.path.join(os.path.dirname(__file__), 'data')
|
||||
|
||||
|
||||
def read_minimat_vars(rdr):
|
||||
rdr.initialize_read()
|
||||
mdict = {'__globals__': []}
|
||||
i = 0
|
||||
while not rdr.end_of_stream():
|
||||
hdr, next_position = rdr.read_var_header()
|
||||
name = asstr(hdr.name)
|
||||
if name == '':
|
||||
name = 'var_%d' % i
|
||||
i += 1
|
||||
res = rdr.read_var_array(hdr, process=False)
|
||||
rdr.mat_stream.seek(next_position)
|
||||
mdict[name] = res
|
||||
if hdr.is_global:
|
||||
mdict['__globals__'].append(name)
|
||||
return mdict
|
||||
|
||||
|
||||
def read_workspace_vars(fname):
|
||||
fp = open(fname, 'rb')
|
||||
rdr = MatFile5Reader(fp, struct_as_record=True)
|
||||
vars = rdr.get_variables()
|
||||
fws = vars['__function_workspace__']
|
||||
ws_bs = io.BytesIO(fws.tostring())
|
||||
ws_bs.seek(2)
|
||||
rdr.mat_stream = ws_bs
|
||||
# Guess byte order.
|
||||
mi = rdr.mat_stream.read(2)
|
||||
rdr.byte_order = mi == b'IM' and '<' or '>'
|
||||
rdr.mat_stream.read(4) # presumably byte padding
|
||||
mdict = read_minimat_vars(rdr)
|
||||
fp.close()
|
||||
return mdict
|
||||
|
||||
|
||||
def test_jottings():
|
||||
# example
|
||||
fname = os.path.join(test_data_path, 'parabola.mat')
|
||||
ws_vars = read_workspace_vars(fname)
|
||||
@@ -1,46 +0,0 @@
|
||||
""" Testing
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_array_equal, assert_array_almost_equal, \
|
||||
assert_
|
||||
|
||||
from scipy.io.matlab.mio_utils import squeeze_element, chars_to_strings
|
||||
|
||||
|
||||
def test_squeeze_element():
|
||||
a = np.zeros((1,3))
|
||||
assert_array_equal(np.squeeze(a), squeeze_element(a))
|
||||
# 0d output from squeeze gives scalar
|
||||
sq_int = squeeze_element(np.zeros((1,1), dtype=float))
|
||||
assert_(isinstance(sq_int, float))
|
||||
# Unless it's a structured array
|
||||
sq_sa = squeeze_element(np.zeros((1,1),dtype=[('f1', 'f')]))
|
||||
assert_(isinstance(sq_sa, np.ndarray))
|
||||
|
||||
|
||||
def test_chars_strings():
|
||||
# chars as strings
|
||||
strings = ['learn ', 'python', 'fast ', 'here ']
|
||||
str_arr = np.array(strings, dtype='U6') # shape (4,)
|
||||
chars = [list(s) for s in strings]
|
||||
char_arr = np.array(chars, dtype='U1') # shape (4,6)
|
||||
assert_array_equal(chars_to_strings(char_arr), str_arr)
|
||||
ca2d = char_arr.reshape((2,2,6))
|
||||
sa2d = str_arr.reshape((2,2))
|
||||
assert_array_equal(chars_to_strings(ca2d), sa2d)
|
||||
ca3d = char_arr.reshape((1,2,2,6))
|
||||
sa3d = str_arr.reshape((1,2,2))
|
||||
assert_array_equal(chars_to_strings(ca3d), sa3d)
|
||||
# Fortran ordered arrays
|
||||
char_arrf = np.array(chars, dtype='U1', order='F') # shape (4,6)
|
||||
assert_array_equal(chars_to_strings(char_arrf), str_arr)
|
||||
# empty array
|
||||
arr = np.array([['']], dtype='U1')
|
||||
out_arr = np.array([''], dtype='U1')
|
||||
assert_array_equal(chars_to_strings(arr), out_arr)
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
""" Testing miobase module
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.io.matlab.miobase import matdims
|
||||
|
||||
|
||||
def test_matdims():
|
||||
# Test matdims dimension finder
|
||||
assert_equal(matdims(np.array(1)), (1, 1)) # numpy scalar
|
||||
assert_equal(matdims(np.array([1])), (1, 1)) # 1d array, 1 element
|
||||
assert_equal(matdims(np.array([1,2])), (2, 1)) # 1d array, 2 elements
|
||||
assert_equal(matdims(np.array([[2],[3]])), (2, 1)) # 2d array, column vector
|
||||
assert_equal(matdims(np.array([[2,3]])), (1, 2)) # 2d array, row vector
|
||||
# 3d array, rowish vector
|
||||
assert_equal(matdims(np.array([[[2,3]]])), (1, 1, 2))
|
||||
assert_equal(matdims(np.array([])), (0, 0)) # empty 1d array
|
||||
assert_equal(matdims(np.array([[]])), (0, 0)) # empty 2d
|
||||
assert_equal(matdims(np.array([[[]]])), (0, 0, 0)) # empty 3d
|
||||
# Optional argument flips 1-D shape behavior.
|
||||
assert_equal(matdims(np.array([1,2]), 'row'), (1, 2)) # 1d array, 2 elements
|
||||
# The argument has to make sense though
|
||||
assert_raises(ValueError, matdims, np.array([1,2]), 'bizarre')
|
||||
# Check empty sparse matrices get their own shape
|
||||
from scipy.sparse import csr_matrix, csc_matrix
|
||||
assert_equal(matdims(csr_matrix(np.zeros((3, 3)))), (3, 3))
|
||||
assert_equal(matdims(csc_matrix(np.zeros((2, 2)))), (2, 2))
|
||||
@@ -1,35 +0,0 @@
|
||||
""" Test reading of files not conforming to matlab specification
|
||||
|
||||
We try and read any file that matlab reads, these files included
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from os.path import dirname, join as pjoin
|
||||
|
||||
from numpy.testing import assert_
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.io.matlab.mio import loadmat
|
||||
|
||||
TEST_DATA_PATH = pjoin(dirname(__file__), 'data')
|
||||
|
||||
|
||||
def test_multiple_fieldnames():
|
||||
# Example provided by Dharhas Pothina
|
||||
# Extracted using mio5.varmats_from_mat
|
||||
multi_fname = pjoin(TEST_DATA_PATH, 'nasty_duplicate_fieldnames.mat')
|
||||
vars = loadmat(multi_fname)
|
||||
funny_names = vars['Summary'].dtype.names
|
||||
assert_(set(['_1_Station_Q', '_2_Station_Q',
|
||||
'_3_Station_Q']).issubset(funny_names))
|
||||
|
||||
|
||||
def test_malformed1():
|
||||
# Example from gh-6072
|
||||
# Contains malformed header data, which previously resulted into a
|
||||
# buffer overflow.
|
||||
#
|
||||
# Should raise an exception, not segfault
|
||||
fname = pjoin(TEST_DATA_PATH, 'malformed1.mat')
|
||||
with open(fname, 'rb') as f:
|
||||
assert_raises(ValueError, loadmat, f)
|
||||
@@ -1,184 +0,0 @@
|
||||
""" Testing
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import os
|
||||
import sys
|
||||
import zlib
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
cStringIO = BytesIO
|
||||
else:
|
||||
from cStringIO import StringIO as cStringIO
|
||||
|
||||
from tempfile import mkstemp
|
||||
from contextlib import contextmanager
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_, assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.io.matlab.streams import (make_stream,
|
||||
GenericStream, cStringStream, FileStream, ZlibInputStream,
|
||||
_read_into, _read_string)
|
||||
|
||||
IS_PYPY = ('__pypy__' in sys.modules)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def setup_test_file():
|
||||
val = b'a\x00string'
|
||||
fd, fname = mkstemp()
|
||||
|
||||
with os.fdopen(fd, 'wb') as fs:
|
||||
fs.write(val)
|
||||
with open(fname, 'rb') as fs:
|
||||
gs = BytesIO(val)
|
||||
cs = cStringIO(val)
|
||||
yield fs, gs, cs
|
||||
os.unlink(fname)
|
||||
|
||||
|
||||
def test_make_stream():
|
||||
with setup_test_file() as (fs, gs, cs):
|
||||
# test stream initialization
|
||||
assert_(isinstance(make_stream(gs), GenericStream))
|
||||
if sys.version_info[0] < 3 and not IS_PYPY:
|
||||
assert_(isinstance(make_stream(cs), cStringStream))
|
||||
assert_(isinstance(make_stream(fs), FileStream))
|
||||
|
||||
|
||||
def test_tell_seek():
|
||||
with setup_test_file() as (fs, gs, cs):
|
||||
for s in (fs, gs, cs):
|
||||
st = make_stream(s)
|
||||
res = st.seek(0)
|
||||
assert_equal(res, 0)
|
||||
assert_equal(st.tell(), 0)
|
||||
res = st.seek(5)
|
||||
assert_equal(res, 0)
|
||||
assert_equal(st.tell(), 5)
|
||||
res = st.seek(2, 1)
|
||||
assert_equal(res, 0)
|
||||
assert_equal(st.tell(), 7)
|
||||
res = st.seek(-2, 2)
|
||||
assert_equal(res, 0)
|
||||
assert_equal(st.tell(), 6)
|
||||
|
||||
|
||||
def test_read():
|
||||
with setup_test_file() as (fs, gs, cs):
|
||||
for s in (fs, gs, cs):
|
||||
st = make_stream(s)
|
||||
st.seek(0)
|
||||
res = st.read(-1)
|
||||
assert_equal(res, b'a\x00string')
|
||||
st.seek(0)
|
||||
res = st.read(4)
|
||||
assert_equal(res, b'a\x00st')
|
||||
# read into
|
||||
st.seek(0)
|
||||
res = _read_into(st, 4)
|
||||
assert_equal(res, b'a\x00st')
|
||||
res = _read_into(st, 4)
|
||||
assert_equal(res, b'ring')
|
||||
assert_raises(IOError, _read_into, st, 2)
|
||||
# read alloc
|
||||
st.seek(0)
|
||||
res = _read_string(st, 4)
|
||||
assert_equal(res, b'a\x00st')
|
||||
res = _read_string(st, 4)
|
||||
assert_equal(res, b'ring')
|
||||
assert_raises(IOError, _read_string, st, 2)
|
||||
|
||||
|
||||
class TestZlibInputStream(object):
|
||||
def _get_data(self, size):
|
||||
data = np.random.randint(0, 256, size).astype(np.uint8).tostring()
|
||||
compressed_data = zlib.compress(data)
|
||||
stream = BytesIO(compressed_data)
|
||||
return stream, len(compressed_data), data
|
||||
|
||||
def test_read(self):
|
||||
block_size = 131072
|
||||
|
||||
SIZES = [0, 1, 10, block_size//2, block_size-1,
|
||||
block_size, block_size+1, 2*block_size-1]
|
||||
|
||||
READ_SIZES = [block_size//2, block_size-1,
|
||||
block_size, block_size+1]
|
||||
|
||||
def check(size, read_size):
|
||||
compressed_stream, compressed_data_len, data = self._get_data(size)
|
||||
stream = ZlibInputStream(compressed_stream, compressed_data_len)
|
||||
data2 = b''
|
||||
so_far = 0
|
||||
while True:
|
||||
block = stream.read(min(read_size,
|
||||
size - so_far))
|
||||
if not block:
|
||||
break
|
||||
so_far += len(block)
|
||||
data2 += block
|
||||
assert_equal(data, data2)
|
||||
|
||||
for size in SIZES:
|
||||
for read_size in READ_SIZES:
|
||||
check(size, read_size)
|
||||
|
||||
def test_read_max_length(self):
|
||||
size = 1234
|
||||
data = np.random.randint(0, 256, size).astype(np.uint8).tostring()
|
||||
compressed_data = zlib.compress(data)
|
||||
compressed_stream = BytesIO(compressed_data + b"abbacaca")
|
||||
stream = ZlibInputStream(compressed_stream, len(compressed_data))
|
||||
|
||||
stream.read(len(data))
|
||||
assert_equal(compressed_stream.tell(), len(compressed_data))
|
||||
|
||||
assert_raises(IOError, stream.read, 1)
|
||||
|
||||
def test_seek(self):
|
||||
compressed_stream, compressed_data_len, data = self._get_data(1024)
|
||||
|
||||
stream = ZlibInputStream(compressed_stream, compressed_data_len)
|
||||
|
||||
stream.seek(123)
|
||||
p = 123
|
||||
assert_equal(stream.tell(), p)
|
||||
d1 = stream.read(11)
|
||||
assert_equal(d1, data[p:p+11])
|
||||
|
||||
stream.seek(321, 1)
|
||||
p = 123+11+321
|
||||
assert_equal(stream.tell(), p)
|
||||
d2 = stream.read(21)
|
||||
assert_equal(d2, data[p:p+21])
|
||||
|
||||
stream.seek(641, 0)
|
||||
p = 641
|
||||
assert_equal(stream.tell(), p)
|
||||
d3 = stream.read(11)
|
||||
assert_equal(d3, data[p:p+11])
|
||||
|
||||
assert_raises(IOError, stream.seek, 10, 2)
|
||||
assert_raises(IOError, stream.seek, -1, 1)
|
||||
assert_raises(ValueError, stream.seek, 1, 123)
|
||||
|
||||
stream.seek(10000, 1)
|
||||
assert_raises(IOError, stream.read, 12)
|
||||
|
||||
def test_all_data_read(self):
|
||||
compressed_stream, compressed_data_len, data = self._get_data(1024)
|
||||
stream = ZlibInputStream(compressed_stream, compressed_data_len)
|
||||
assert_(not stream.all_data_read())
|
||||
stream.seek(512)
|
||||
assert_(not stream.all_data_read())
|
||||
stream.seek(1024)
|
||||
assert_(stream.all_data_read())
|
||||
|
||||
@@ -1,835 +0,0 @@
|
||||
"""
|
||||
Matrix Market I/O in Python.
|
||||
See http://math.nist.gov/MatrixMarket/formats.html
|
||||
for information about the Matrix Market format.
|
||||
"""
|
||||
#
|
||||
# Author: Pearu Peterson <pearu@cens.ioc.ee>
|
||||
# Created: October, 2004
|
||||
#
|
||||
# References:
|
||||
# http://math.nist.gov/MatrixMarket/
|
||||
#
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from numpy import (asarray, real, imag, conj, zeros, ndarray, concatenate,
|
||||
ones, can_cast)
|
||||
from numpy.compat import asbytes, asstr
|
||||
|
||||
from scipy._lib.six import string_types
|
||||
from scipy.sparse import coo_matrix, isspmatrix
|
||||
|
||||
__all__ = ['mminfo', 'mmread', 'mmwrite', 'MMFile']
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
def mminfo(source):
|
||||
"""
|
||||
Return size and storage parameters from Matrix Market file-like 'source'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str or file-like
|
||||
Matrix Market filename (extension .mtx) or open file-like object
|
||||
|
||||
Returns
|
||||
-------
|
||||
rows : int
|
||||
Number of matrix rows.
|
||||
cols : int
|
||||
Number of matrix columns.
|
||||
entries : int
|
||||
Number of non-zero entries of a sparse matrix
|
||||
or rows*cols for a dense matrix.
|
||||
format : str
|
||||
Either 'coordinate' or 'array'.
|
||||
field : str
|
||||
Either 'real', 'complex', 'pattern', or 'integer'.
|
||||
symmetry : str
|
||||
Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
|
||||
"""
|
||||
return MMFile.info(source)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def mmread(source):
|
||||
"""
|
||||
Reads the contents of a Matrix Market file-like 'source' into a matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str or file-like
|
||||
Matrix Market filename (extensions .mtx, .mtz.gz)
|
||||
or open file-like object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
a : ndarray or coo_matrix
|
||||
Dense or sparse matrix depending on the matrix format in the
|
||||
Matrix Market file.
|
||||
"""
|
||||
return MMFile().read(source)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def mmwrite(target, a, comment='', field=None, precision=None, symmetry=None):
|
||||
"""
|
||||
Writes the sparse or dense array `a` to Matrix Market file-like `target`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target : str or file-like
|
||||
Matrix Market filename (extension .mtx) or open file-like object.
|
||||
a : array like
|
||||
Sparse or dense 2D array.
|
||||
comment : str, optional
|
||||
Comments to be prepended to the Matrix Market file.
|
||||
field : None or str, optional
|
||||
Either 'real', 'complex', 'pattern', or 'integer'.
|
||||
precision : None or int, optional
|
||||
Number of digits to display for real or complex values.
|
||||
symmetry : None or str, optional
|
||||
Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
|
||||
If symmetry is None the symmetry type of 'a' is determined by its
|
||||
values.
|
||||
"""
|
||||
MMFile().write(target, a, comment, field, precision, symmetry)
|
||||
|
||||
|
||||
###############################################################################
|
||||
class MMFile (object):
|
||||
__slots__ = ('_rows',
|
||||
'_cols',
|
||||
'_entries',
|
||||
'_format',
|
||||
'_field',
|
||||
'_symmetry')
|
||||
|
||||
@property
|
||||
def rows(self):
|
||||
return self._rows
|
||||
|
||||
@property
|
||||
def cols(self):
|
||||
return self._cols
|
||||
|
||||
@property
|
||||
def entries(self):
|
||||
return self._entries
|
||||
|
||||
@property
|
||||
def format(self):
|
||||
return self._format
|
||||
|
||||
@property
|
||||
def field(self):
|
||||
return self._field
|
||||
|
||||
@property
|
||||
def symmetry(self):
|
||||
return self._symmetry
|
||||
|
||||
@property
|
||||
def has_symmetry(self):
|
||||
return self._symmetry in (self.SYMMETRY_SYMMETRIC,
|
||||
self.SYMMETRY_SKEW_SYMMETRIC,
|
||||
self.SYMMETRY_HERMITIAN)
|
||||
|
||||
# format values
|
||||
FORMAT_COORDINATE = 'coordinate'
|
||||
FORMAT_ARRAY = 'array'
|
||||
FORMAT_VALUES = (FORMAT_COORDINATE, FORMAT_ARRAY)
|
||||
|
||||
@classmethod
|
||||
def _validate_format(self, format):
|
||||
if format not in self.FORMAT_VALUES:
|
||||
raise ValueError('unknown format type %s, must be one of %s' %
|
||||
(format, self.FORMAT_VALUES))
|
||||
|
||||
# field values
|
||||
FIELD_INTEGER = 'integer'
|
||||
FIELD_UNSIGNED = 'unsigned-integer'
|
||||
FIELD_REAL = 'real'
|
||||
FIELD_COMPLEX = 'complex'
|
||||
FIELD_PATTERN = 'pattern'
|
||||
FIELD_VALUES = (FIELD_INTEGER, FIELD_UNSIGNED, FIELD_REAL, FIELD_COMPLEX, FIELD_PATTERN)
|
||||
|
||||
@classmethod
|
||||
def _validate_field(self, field):
|
||||
if field not in self.FIELD_VALUES:
|
||||
raise ValueError('unknown field type %s, must be one of %s' %
|
||||
(field, self.FIELD_VALUES))
|
||||
|
||||
# symmetry values
|
||||
SYMMETRY_GENERAL = 'general'
|
||||
SYMMETRY_SYMMETRIC = 'symmetric'
|
||||
SYMMETRY_SKEW_SYMMETRIC = 'skew-symmetric'
|
||||
SYMMETRY_HERMITIAN = 'hermitian'
|
||||
SYMMETRY_VALUES = (SYMMETRY_GENERAL, SYMMETRY_SYMMETRIC,
|
||||
SYMMETRY_SKEW_SYMMETRIC, SYMMETRY_HERMITIAN)
|
||||
|
||||
@classmethod
|
||||
def _validate_symmetry(self, symmetry):
|
||||
if symmetry not in self.SYMMETRY_VALUES:
|
||||
raise ValueError('unknown symmetry type %s, must be one of %s' %
|
||||
(symmetry, self.SYMMETRY_VALUES))
|
||||
|
||||
DTYPES_BY_FIELD = {FIELD_INTEGER: 'intp',
|
||||
FIELD_UNSIGNED: 'uint64',
|
||||
FIELD_REAL: 'd',
|
||||
FIELD_COMPLEX: 'D',
|
||||
FIELD_PATTERN: 'd'}
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def reader():
|
||||
pass
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def writer():
|
||||
pass
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@classmethod
|
||||
def info(self, source):
|
||||
"""
|
||||
Return size, storage parameters from Matrix Market file-like 'source'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str or file-like
|
||||
Matrix Market filename (extension .mtx) or open file-like object
|
||||
|
||||
Returns
|
||||
-------
|
||||
rows : int
|
||||
Number of matrix rows.
|
||||
cols : int
|
||||
Number of matrix columns.
|
||||
entries : int
|
||||
Number of non-zero entries of a sparse matrix
|
||||
or rows*cols for a dense matrix.
|
||||
format : str
|
||||
Either 'coordinate' or 'array'.
|
||||
field : str
|
||||
Either 'real', 'complex', 'pattern', or 'integer'.
|
||||
symmetry : str
|
||||
Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
|
||||
"""
|
||||
|
||||
stream, close_it = self._open(source)
|
||||
|
||||
try:
|
||||
|
||||
# read and validate header line
|
||||
line = stream.readline()
|
||||
mmid, matrix, format, field, symmetry = \
|
||||
[asstr(part.strip()) for part in line.split()]
|
||||
if not mmid.startswith('%%MatrixMarket'):
|
||||
raise ValueError('source is not in Matrix Market format')
|
||||
if not matrix.lower() == 'matrix':
|
||||
raise ValueError("Problem reading file header: " + line)
|
||||
|
||||
# http://math.nist.gov/MatrixMarket/formats.html
|
||||
if format.lower() == 'array':
|
||||
format = self.FORMAT_ARRAY
|
||||
elif format.lower() == 'coordinate':
|
||||
format = self.FORMAT_COORDINATE
|
||||
|
||||
# skip comments
|
||||
while line.startswith(b'%'):
|
||||
line = stream.readline()
|
||||
|
||||
line = line.split()
|
||||
if format == self.FORMAT_ARRAY:
|
||||
if not len(line) == 2:
|
||||
raise ValueError("Header line not of length 2: " + line)
|
||||
rows, cols = map(int, line)
|
||||
entries = rows * cols
|
||||
else:
|
||||
if not len(line) == 3:
|
||||
raise ValueError("Header line not of length 3: " + line)
|
||||
rows, cols, entries = map(int, line)
|
||||
|
||||
return (rows, cols, entries, format, field.lower(),
|
||||
symmetry.lower())
|
||||
|
||||
finally:
|
||||
if close_it:
|
||||
stream.close()
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def _open(filespec, mode='rb'):
|
||||
""" Return an open file stream for reading based on source.
|
||||
|
||||
If source is a file name, open it (after trying to find it with mtx and
|
||||
gzipped mtx extensions). Otherwise, just return source.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filespec : str or file-like
|
||||
String giving file name or file-like object
|
||||
mode : str, optional
|
||||
Mode with which to open file, if `filespec` is a file name.
|
||||
|
||||
Returns
|
||||
-------
|
||||
fobj : file-like
|
||||
Open file-like object.
|
||||
close_it : bool
|
||||
True if the calling function should close this file when done,
|
||||
false otherwise.
|
||||
"""
|
||||
close_it = False
|
||||
if isinstance(filespec, string_types):
|
||||
close_it = True
|
||||
|
||||
# open for reading
|
||||
if mode[0] == 'r':
|
||||
|
||||
# determine filename plus extension
|
||||
if not os.path.isfile(filespec):
|
||||
if os.path.isfile(filespec+'.mtx'):
|
||||
filespec = filespec + '.mtx'
|
||||
elif os.path.isfile(filespec+'.mtx.gz'):
|
||||
filespec = filespec + '.mtx.gz'
|
||||
elif os.path.isfile(filespec+'.mtx.bz2'):
|
||||
filespec = filespec + '.mtx.bz2'
|
||||
# open filename
|
||||
if filespec.endswith('.gz'):
|
||||
import gzip
|
||||
stream = gzip.open(filespec, mode)
|
||||
elif filespec.endswith('.bz2'):
|
||||
import bz2
|
||||
stream = bz2.BZ2File(filespec, 'rb')
|
||||
else:
|
||||
stream = open(filespec, mode)
|
||||
|
||||
# open for writing
|
||||
else:
|
||||
if filespec[-4:] != '.mtx':
|
||||
filespec = filespec + '.mtx'
|
||||
stream = open(filespec, mode)
|
||||
else:
|
||||
stream = filespec
|
||||
|
||||
return stream, close_it
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def _get_symmetry(a):
|
||||
m, n = a.shape
|
||||
if m != n:
|
||||
return MMFile.SYMMETRY_GENERAL
|
||||
issymm = True
|
||||
isskew = True
|
||||
isherm = a.dtype.char in 'FD'
|
||||
|
||||
# sparse input
|
||||
if isspmatrix(a):
|
||||
# check if number of nonzero entries of lower and upper triangle
|
||||
# matrix are equal
|
||||
a = a.tocoo()
|
||||
(row, col) = a.nonzero()
|
||||
if (row < col).sum() != (row > col).sum():
|
||||
return MMFile.SYMMETRY_GENERAL
|
||||
|
||||
# define iterator over symmetric pair entries
|
||||
a = a.todok()
|
||||
|
||||
def symm_iterator():
|
||||
for ((i, j), aij) in a.items():
|
||||
if i > j:
|
||||
aji = a[j, i]
|
||||
yield (aij, aji)
|
||||
|
||||
# non-sparse input
|
||||
else:
|
||||
# define iterator over symmetric pair entries
|
||||
def symm_iterator():
|
||||
for j in range(n):
|
||||
for i in range(j+1, n):
|
||||
aij, aji = a[i][j], a[j][i]
|
||||
yield (aij, aji)
|
||||
|
||||
# check for symmetry
|
||||
for (aij, aji) in symm_iterator():
|
||||
if issymm and aij != aji:
|
||||
issymm = False
|
||||
if isskew and aij != -aji:
|
||||
isskew = False
|
||||
if isherm and aij != conj(aji):
|
||||
isherm = False
|
||||
if not (issymm or isskew or isherm):
|
||||
break
|
||||
|
||||
# return symmetry value
|
||||
if issymm:
|
||||
return MMFile.SYMMETRY_SYMMETRIC
|
||||
if isskew:
|
||||
return MMFile.SYMMETRY_SKEW_SYMMETRIC
|
||||
if isherm:
|
||||
return MMFile.SYMMETRY_HERMITIAN
|
||||
return MMFile.SYMMETRY_GENERAL
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def _field_template(field, precision):
|
||||
return {MMFile.FIELD_REAL: '%%.%ie\n' % precision,
|
||||
MMFile.FIELD_INTEGER: '%i\n',
|
||||
MMFile.FIELD_UNSIGNED: '%u\n',
|
||||
MMFile.FIELD_COMPLEX: '%%.%ie %%.%ie\n' %
|
||||
(precision, precision)
|
||||
}.get(field, None)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
def __init__(self, **kwargs):
|
||||
self._init_attrs(**kwargs)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
def read(self, source):
|
||||
"""
|
||||
Reads the contents of a Matrix Market file-like 'source' into a matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str or file-like
|
||||
Matrix Market filename (extensions .mtx, .mtz.gz)
|
||||
or open file object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
a : ndarray or coo_matrix
|
||||
Dense or sparse matrix depending on the matrix format in the
|
||||
Matrix Market file.
|
||||
"""
|
||||
stream, close_it = self._open(source)
|
||||
|
||||
try:
|
||||
self._parse_header(stream)
|
||||
return self._parse_body(stream)
|
||||
|
||||
finally:
|
||||
if close_it:
|
||||
stream.close()
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
def write(self, target, a, comment='', field=None, precision=None,
|
||||
symmetry=None):
|
||||
"""
|
||||
Writes sparse or dense array `a` to Matrix Market file-like `target`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target : str or file-like
|
||||
Matrix Market filename (extension .mtx) or open file-like object.
|
||||
a : array like
|
||||
Sparse or dense 2D array.
|
||||
comment : str, optional
|
||||
Comments to be prepended to the Matrix Market file.
|
||||
field : None or str, optional
|
||||
Either 'real', 'complex', 'pattern', or 'integer'.
|
||||
precision : None or int, optional
|
||||
Number of digits to display for real or complex values.
|
||||
symmetry : None or str, optional
|
||||
Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
|
||||
If symmetry is None the symmetry type of 'a' is determined by its
|
||||
values.
|
||||
"""
|
||||
|
||||
stream, close_it = self._open(target, 'wb')
|
||||
|
||||
try:
|
||||
self._write(stream, a, comment, field, precision, symmetry)
|
||||
|
||||
finally:
|
||||
if close_it:
|
||||
stream.close()
|
||||
else:
|
||||
stream.flush()
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
def _init_attrs(self, **kwargs):
|
||||
"""
|
||||
Initialize each attributes with the corresponding keyword arg value
|
||||
or a default of None
|
||||
"""
|
||||
|
||||
attrs = self.__class__.__slots__
|
||||
public_attrs = [attr[1:] for attr in attrs]
|
||||
invalid_keys = set(kwargs.keys()) - set(public_attrs)
|
||||
|
||||
if invalid_keys:
|
||||
raise ValueError('''found %s invalid keyword arguments, please only
|
||||
use %s''' % (tuple(invalid_keys),
|
||||
public_attrs))
|
||||
|
||||
for attr in attrs:
|
||||
setattr(self, attr, kwargs.get(attr[1:], None))
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
def _parse_header(self, stream):
|
||||
rows, cols, entries, format, field, symmetry = \
|
||||
self.__class__.info(stream)
|
||||
self._init_attrs(rows=rows, cols=cols, entries=entries, format=format,
|
||||
field=field, symmetry=symmetry)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
def _parse_body(self, stream):
|
||||
rows, cols, entries, format, field, symm = (self.rows, self.cols,
|
||||
self.entries, self.format,
|
||||
self.field, self.symmetry)
|
||||
|
||||
try:
|
||||
from scipy.sparse import coo_matrix
|
||||
except ImportError:
|
||||
coo_matrix = None
|
||||
|
||||
dtype = self.DTYPES_BY_FIELD.get(field, None)
|
||||
|
||||
has_symmetry = self.has_symmetry
|
||||
is_integer = field == self.FIELD_INTEGER
|
||||
is_unsigned_integer = field == self.FIELD_UNSIGNED
|
||||
is_complex = field == self.FIELD_COMPLEX
|
||||
is_skew = symm == self.SYMMETRY_SKEW_SYMMETRIC
|
||||
is_herm = symm == self.SYMMETRY_HERMITIAN
|
||||
is_pattern = field == self.FIELD_PATTERN
|
||||
|
||||
if format == self.FORMAT_ARRAY:
|
||||
a = zeros((rows, cols), dtype=dtype)
|
||||
line = 1
|
||||
i, j = 0, 0
|
||||
if is_skew:
|
||||
a[i, j] = 0
|
||||
if i < rows - 1:
|
||||
i += 1
|
||||
while line:
|
||||
line = stream.readline()
|
||||
if not line or line.startswith(b'%'):
|
||||
continue
|
||||
if is_integer:
|
||||
aij = int(line)
|
||||
elif is_unsigned_integer:
|
||||
aij = int(line)
|
||||
elif is_complex:
|
||||
aij = complex(*map(float, line.split()))
|
||||
else:
|
||||
aij = float(line)
|
||||
a[i, j] = aij
|
||||
if has_symmetry and i != j:
|
||||
if is_skew:
|
||||
a[j, i] = -aij
|
||||
elif is_herm:
|
||||
a[j, i] = conj(aij)
|
||||
else:
|
||||
a[j, i] = aij
|
||||
if i < rows-1:
|
||||
i = i + 1
|
||||
else:
|
||||
j = j + 1
|
||||
if not has_symmetry:
|
||||
i = 0
|
||||
else:
|
||||
i = j
|
||||
if is_skew:
|
||||
a[i, j] = 0
|
||||
if i < rows-1:
|
||||
i += 1
|
||||
|
||||
if is_skew:
|
||||
if not (i in [0, j] and j == cols - 1):
|
||||
raise ValueError("Parse error, did not read all lines.")
|
||||
else:
|
||||
if not (i in [0, j] and j == cols):
|
||||
raise ValueError("Parse error, did not read all lines.")
|
||||
|
||||
elif format == self.FORMAT_COORDINATE and coo_matrix is None:
|
||||
# Read sparse matrix to dense when coo_matrix is not available.
|
||||
a = zeros((rows, cols), dtype=dtype)
|
||||
line = 1
|
||||
k = 0
|
||||
while line:
|
||||
line = stream.readline()
|
||||
if not line or line.startswith(b'%'):
|
||||
continue
|
||||
l = line.split()
|
||||
i, j = map(int, l[:2])
|
||||
i, j = i-1, j-1
|
||||
if is_integer:
|
||||
aij = int(l[2])
|
||||
elif is_unsigned_integer:
|
||||
aij = int(l[2])
|
||||
elif is_complex:
|
||||
aij = complex(*map(float, l[2:]))
|
||||
else:
|
||||
aij = float(l[2])
|
||||
a[i, j] = aij
|
||||
if has_symmetry and i != j:
|
||||
if is_skew:
|
||||
a[j, i] = -aij
|
||||
elif is_herm:
|
||||
a[j, i] = conj(aij)
|
||||
else:
|
||||
a[j, i] = aij
|
||||
k = k + 1
|
||||
if not k == entries:
|
||||
ValueError("Did not read all entries")
|
||||
|
||||
elif format == self.FORMAT_COORDINATE:
|
||||
# Read sparse COOrdinate format
|
||||
|
||||
if entries == 0:
|
||||
# empty matrix
|
||||
return coo_matrix((rows, cols), dtype=dtype)
|
||||
|
||||
I = zeros(entries, dtype='intc')
|
||||
J = zeros(entries, dtype='intc')
|
||||
if is_pattern:
|
||||
V = ones(entries, dtype='int8')
|
||||
elif is_integer:
|
||||
V = zeros(entries, dtype='intp')
|
||||
elif is_unsigned_integer:
|
||||
V = zeros(entries, dtype='uint64')
|
||||
elif is_complex:
|
||||
V = zeros(entries, dtype='complex')
|
||||
else:
|
||||
V = zeros(entries, dtype='float')
|
||||
|
||||
entry_number = 0
|
||||
for line in stream:
|
||||
if not line or line.startswith(b'%'):
|
||||
continue
|
||||
|
||||
if entry_number+1 > entries:
|
||||
raise ValueError("'entries' in header is smaller than "
|
||||
"number of entries")
|
||||
l = line.split()
|
||||
I[entry_number], J[entry_number] = map(int, l[:2])
|
||||
|
||||
if not is_pattern:
|
||||
if is_integer:
|
||||
V[entry_number] = int(l[2])
|
||||
elif is_unsigned_integer:
|
||||
V[entry_number] = int(l[2])
|
||||
elif is_complex:
|
||||
V[entry_number] = complex(*map(float, l[2:]))
|
||||
else:
|
||||
V[entry_number] = float(l[2])
|
||||
entry_number += 1
|
||||
if entry_number < entries:
|
||||
raise ValueError("'entries' in header is larger than "
|
||||
"number of entries")
|
||||
|
||||
I -= 1 # adjust indices (base 1 -> base 0)
|
||||
J -= 1
|
||||
|
||||
if has_symmetry:
|
||||
mask = (I != J) # off diagonal mask
|
||||
od_I = I[mask]
|
||||
od_J = J[mask]
|
||||
od_V = V[mask]
|
||||
|
||||
I = concatenate((I, od_J))
|
||||
J = concatenate((J, od_I))
|
||||
|
||||
if is_skew:
|
||||
od_V *= -1
|
||||
elif is_herm:
|
||||
od_V = od_V.conjugate()
|
||||
|
||||
V = concatenate((V, od_V))
|
||||
|
||||
a = coo_matrix((V, (I, J)), shape=(rows, cols), dtype=dtype)
|
||||
else:
|
||||
raise NotImplementedError(format)
|
||||
|
||||
return a
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
def _write(self, stream, a, comment='', field=None, precision=None,
|
||||
symmetry=None):
|
||||
if isinstance(a, list) or isinstance(a, ndarray) or \
|
||||
isinstance(a, tuple) or hasattr(a, '__array__'):
|
||||
rep = self.FORMAT_ARRAY
|
||||
a = asarray(a)
|
||||
if len(a.shape) != 2:
|
||||
raise ValueError('Expected 2 dimensional array')
|
||||
rows, cols = a.shape
|
||||
|
||||
if field is not None:
|
||||
|
||||
if field == self.FIELD_INTEGER:
|
||||
if not can_cast(a.dtype, 'intp'):
|
||||
raise OverflowError("mmwrite does not support integer "
|
||||
"dtypes larger than native 'intp'.")
|
||||
a = a.astype('intp')
|
||||
elif field == self.FIELD_REAL:
|
||||
if a.dtype.char not in 'fd':
|
||||
a = a.astype('d')
|
||||
elif field == self.FIELD_COMPLEX:
|
||||
if a.dtype.char not in 'FD':
|
||||
a = a.astype('D')
|
||||
|
||||
else:
|
||||
if not isspmatrix(a):
|
||||
raise ValueError('unknown matrix type: %s' % type(a))
|
||||
|
||||
rep = 'coordinate'
|
||||
rows, cols = a.shape
|
||||
|
||||
typecode = a.dtype.char
|
||||
|
||||
if precision is None:
|
||||
if typecode in 'fF':
|
||||
precision = 8
|
||||
else:
|
||||
precision = 16
|
||||
if field is None:
|
||||
kind = a.dtype.kind
|
||||
if kind == 'i':
|
||||
if not can_cast(a.dtype, 'intp'):
|
||||
raise OverflowError("mmwrite does not support integer "
|
||||
"dtypes larger than native 'intp'.")
|
||||
field = 'integer'
|
||||
elif kind == 'f':
|
||||
field = 'real'
|
||||
elif kind == 'c':
|
||||
field = 'complex'
|
||||
elif kind == 'u':
|
||||
field = 'unsigned-integer'
|
||||
else:
|
||||
raise TypeError('unexpected dtype kind ' + kind)
|
||||
|
||||
if symmetry is None:
|
||||
symmetry = self._get_symmetry(a)
|
||||
|
||||
# validate rep, field, and symmetry
|
||||
self.__class__._validate_format(rep)
|
||||
self.__class__._validate_field(field)
|
||||
self.__class__._validate_symmetry(symmetry)
|
||||
|
||||
# write initial header line
|
||||
stream.write(asbytes('%%MatrixMarket matrix {0} {1} {2}\n'.format(rep,
|
||||
field, symmetry)))
|
||||
|
||||
# write comments
|
||||
for line in comment.split('\n'):
|
||||
stream.write(asbytes('%%%s\n' % (line)))
|
||||
|
||||
template = self._field_template(field, precision)
|
||||
# write dense format
|
||||
if rep == self.FORMAT_ARRAY:
|
||||
# write shape spec
|
||||
stream.write(asbytes('%i %i\n' % (rows, cols)))
|
||||
|
||||
if field in (self.FIELD_INTEGER, self.FIELD_REAL, self.FIELD_UNSIGNED):
|
||||
if symmetry == self.SYMMETRY_GENERAL:
|
||||
for j in range(cols):
|
||||
for i in range(rows):
|
||||
stream.write(asbytes(template % a[i, j]))
|
||||
|
||||
elif symmetry == self.SYMMETRY_SKEW_SYMMETRIC:
|
||||
for j in range(cols):
|
||||
for i in range(j + 1, rows):
|
||||
stream.write(asbytes(template % a[i, j]))
|
||||
|
||||
else:
|
||||
for j in range(cols):
|
||||
for i in range(j, rows):
|
||||
stream.write(asbytes(template % a[i, j]))
|
||||
|
||||
elif field == self.FIELD_COMPLEX:
|
||||
|
||||
if symmetry == self.SYMMETRY_GENERAL:
|
||||
for j in range(cols):
|
||||
for i in range(rows):
|
||||
aij = a[i, j]
|
||||
stream.write(asbytes(template % (real(aij),
|
||||
imag(aij))))
|
||||
else:
|
||||
for j in range(cols):
|
||||
for i in range(j, rows):
|
||||
aij = a[i, j]
|
||||
stream.write(asbytes(template % (real(aij),
|
||||
imag(aij))))
|
||||
|
||||
elif field == self.FIELD_PATTERN:
|
||||
raise ValueError('pattern type inconsisted with dense format')
|
||||
|
||||
else:
|
||||
raise TypeError('Unknown field type %s' % field)
|
||||
|
||||
# write sparse format
|
||||
else:
|
||||
coo = a.tocoo() # convert to COOrdinate format
|
||||
|
||||
# if symmetry format used, remove values above main diagonal
|
||||
if symmetry != self.SYMMETRY_GENERAL:
|
||||
lower_triangle_mask = coo.row >= coo.col
|
||||
coo = coo_matrix((coo.data[lower_triangle_mask],
|
||||
(coo.row[lower_triangle_mask],
|
||||
coo.col[lower_triangle_mask])),
|
||||
shape=coo.shape)
|
||||
|
||||
# write shape spec
|
||||
stream.write(asbytes('%i %i %i\n' % (rows, cols, coo.nnz)))
|
||||
|
||||
template = self._field_template(field, precision-1)
|
||||
|
||||
if field == self.FIELD_PATTERN:
|
||||
for r, c in zip(coo.row+1, coo.col+1):
|
||||
stream.write(asbytes("%i %i\n" % (r, c)))
|
||||
elif field in (self.FIELD_INTEGER, self.FIELD_REAL, self.FIELD_UNSIGNED):
|
||||
for r, c, d in zip(coo.row+1, coo.col+1, coo.data):
|
||||
stream.write(asbytes(("%i %i " % (r, c)) +
|
||||
(template % d)))
|
||||
elif field == self.FIELD_COMPLEX:
|
||||
for r, c, d in zip(coo.row+1, coo.col+1, coo.data):
|
||||
stream.write(asbytes(("%i %i " % (r, c)) +
|
||||
(template % (d.real, d.imag))))
|
||||
else:
|
||||
raise TypeError('Unknown field type %s' % field)
|
||||
|
||||
|
||||
def _is_fromfile_compatible(stream):
|
||||
"""
|
||||
Check whether `stream` is compatible with numpy.fromfile.
|
||||
|
||||
Passing a gzipped file object to ``fromfile/fromstring`` doesn't work with
|
||||
Python3.
|
||||
"""
|
||||
if sys.version_info[0] < 3:
|
||||
return True
|
||||
|
||||
bad_cls = []
|
||||
try:
|
||||
import gzip
|
||||
bad_cls.append(gzip.GzipFile)
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
import bz2
|
||||
bad_cls.append(bz2.BZ2File)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
bad_cls = tuple(bad_cls)
|
||||
return not isinstance(stream, bad_cls)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
if __name__ == '__main__':
|
||||
import time
|
||||
for filename in sys.argv[1:]:
|
||||
print('Reading', filename, '...', end=' ')
|
||||
sys.stdout.flush()
|
||||
t = time.time()
|
||||
mmread(filename)
|
||||
print('took %s seconds' % (time.time() - t))
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,20 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
|
||||
def configuration(parent_package='',top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
config = Configuration('io', parent_package, top_path)
|
||||
|
||||
config.add_extension('_test_fortran',
|
||||
sources=['_test_fortran.pyf', '_test_fortran.f'])
|
||||
|
||||
config.add_data_dir('tests')
|
||||
config.add_subpackage('matlab')
|
||||
config.add_subpackage('arff')
|
||||
config.add_subpackage('harwell_boeing')
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
@@ -1,159 +0,0 @@
|
||||
''' Tests for fortran sequential files '''
|
||||
|
||||
import tempfile
|
||||
import shutil
|
||||
from os import path, unlink
|
||||
from glob import iglob
|
||||
import re
|
||||
|
||||
from numpy.testing import assert_equal, assert_allclose
|
||||
import numpy as np
|
||||
|
||||
from scipy.io import FortranFile, _test_fortran
|
||||
|
||||
|
||||
DATA_PATH = path.join(path.dirname(__file__), 'data')
|
||||
|
||||
|
||||
def test_fortranfiles_read():
|
||||
for filename in iglob(path.join(DATA_PATH, "fortran-*-*x*x*.dat")):
|
||||
m = re.search(r'fortran-([^-]+)-(\d+)x(\d+)x(\d+).dat', filename, re.I)
|
||||
if not m:
|
||||
raise RuntimeError("Couldn't match %s filename to regex" % filename)
|
||||
|
||||
dims = (int(m.group(2)), int(m.group(3)), int(m.group(4)))
|
||||
|
||||
dtype = m.group(1).replace('s', '<')
|
||||
|
||||
f = FortranFile(filename, 'r', '<u4')
|
||||
data = f.read_record(dtype=dtype).reshape(dims, order='F')
|
||||
f.close()
|
||||
|
||||
expected = np.arange(np.prod(dims)).reshape(dims).astype(dtype)
|
||||
assert_equal(data, expected)
|
||||
|
||||
|
||||
def test_fortranfiles_mixed_record():
|
||||
filename = path.join(DATA_PATH, "fortran-mixed.dat")
|
||||
with FortranFile(filename, 'r', '<u4') as f:
|
||||
record = f.read_record('<i4,<f4,<i8,(2)<f8')
|
||||
|
||||
assert_equal(record['f0'][0], 1)
|
||||
assert_allclose(record['f1'][0], 2.3)
|
||||
assert_equal(record['f2'][0], 4)
|
||||
assert_allclose(record['f3'][0], [5.6, 7.8])
|
||||
|
||||
|
||||
def test_fortranfiles_write():
|
||||
for filename in iglob(path.join(DATA_PATH, "fortran-*-*x*x*.dat")):
|
||||
m = re.search(r'fortran-([^-]+)-(\d+)x(\d+)x(\d+).dat', filename, re.I)
|
||||
if not m:
|
||||
raise RuntimeError("Couldn't match %s filename to regex" % filename)
|
||||
dims = (int(m.group(2)), int(m.group(3)), int(m.group(4)))
|
||||
|
||||
dtype = m.group(1).replace('s', '<')
|
||||
data = np.arange(np.prod(dims)).reshape(dims).astype(dtype)
|
||||
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
testFile = path.join(tmpdir,path.basename(filename))
|
||||
f = FortranFile(testFile, 'w','<u4')
|
||||
f.write_record(data.T)
|
||||
f.close()
|
||||
originalfile = open(filename, 'rb')
|
||||
newfile = open(testFile, 'rb')
|
||||
assert_equal(originalfile.read(), newfile.read(),
|
||||
err_msg=filename)
|
||||
originalfile.close()
|
||||
newfile.close()
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_fortranfile_read_mixed_record():
|
||||
# The data file fortran-3x3d-2i.dat contains the program that
|
||||
# produced it at the end.
|
||||
#
|
||||
# double precision :: a(3,3)
|
||||
# integer :: b(2)
|
||||
# ...
|
||||
# open(1, file='fortran-3x3d-2i.dat', form='unformatted')
|
||||
# write(1) a, b
|
||||
# close(1)
|
||||
#
|
||||
|
||||
filename = path.join(DATA_PATH, "fortran-3x3d-2i.dat")
|
||||
with FortranFile(filename, 'r', '<u4') as f:
|
||||
record = f.read_record('(3,3)f8', '2i4')
|
||||
|
||||
ax = np.arange(3*3).reshape(3, 3).astype(np.double)
|
||||
bx = np.array([-1, -2], dtype=np.int32)
|
||||
|
||||
assert_equal(record[0], ax.T)
|
||||
assert_equal(record[1], bx.T)
|
||||
|
||||
|
||||
def test_fortranfile_write_mixed_record(tmpdir):
|
||||
tf = path.join(str(tmpdir), 'test.dat')
|
||||
|
||||
records = [
|
||||
(('f4', 'f4', 'i4'), (np.float32(2), np.float32(3), np.int32(100))),
|
||||
(('4f4', '(3,3)f4', '8i4'), (np.random.randint(255, size=[4]).astype(np.float32),
|
||||
np.random.randint(255, size=[3, 3]).astype(np.float32),
|
||||
np.random.randint(255, size=[8]).astype(np.int32)))
|
||||
]
|
||||
|
||||
for dtype, a in records:
|
||||
with FortranFile(tf, 'w') as f:
|
||||
f.write_record(*a)
|
||||
|
||||
with FortranFile(tf, 'r') as f:
|
||||
b = f.read_record(*dtype)
|
||||
|
||||
assert_equal(len(a), len(b))
|
||||
|
||||
for aa, bb in zip(a, b):
|
||||
assert_equal(bb, aa)
|
||||
|
||||
|
||||
def test_fortran_roundtrip(tmpdir):
|
||||
filename = path.join(str(tmpdir), 'test.dat')
|
||||
|
||||
np.random.seed(1)
|
||||
|
||||
# double precision
|
||||
m, n, k = 5, 3, 2
|
||||
a = np.random.randn(m, n, k)
|
||||
with FortranFile(filename, 'w') as f:
|
||||
f.write_record(a.T)
|
||||
a2 = _test_fortran.read_unformatted_double(m, n, k, filename)
|
||||
with FortranFile(filename, 'r') as f:
|
||||
a3 = f.read_record('(2,3,5)f8').T
|
||||
assert_equal(a2, a)
|
||||
assert_equal(a3, a)
|
||||
|
||||
# integer
|
||||
m, n, k = 5, 3, 2
|
||||
a = np.random.randn(m, n, k).astype(np.int32)
|
||||
with FortranFile(filename, 'w') as f:
|
||||
f.write_record(a.T)
|
||||
a2 = _test_fortran.read_unformatted_int(m, n, k, filename)
|
||||
with FortranFile(filename, 'r') as f:
|
||||
a3 = f.read_record('(2,3,5)i4').T
|
||||
assert_equal(a2, a)
|
||||
assert_equal(a3, a)
|
||||
|
||||
# mixed
|
||||
m, n, k = 5, 3, 2
|
||||
a = np.random.randn(m, n)
|
||||
b = np.random.randn(k).astype(np.intc)
|
||||
with FortranFile(filename, 'w') as f:
|
||||
f.write_record(a.T, b.T)
|
||||
a2, b2 = _test_fortran.read_unformatted_mixed(m, n, k, filename)
|
||||
with FortranFile(filename, 'r') as f:
|
||||
a3, b3 = f.read_record('(3,5)f8', '2i4')
|
||||
a3 = a3.T
|
||||
assert_equal(a2, a)
|
||||
assert_equal(a3, a)
|
||||
assert_equal(b2, b)
|
||||
assert_equal(b3, b)
|
||||
@@ -1,442 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from os import path
|
||||
import warnings
|
||||
|
||||
DATA_PATH = path.join(path.dirname(__file__), 'data')
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_equal, assert_array_equal,
|
||||
assert_)
|
||||
from scipy._lib._numpy_compat import suppress_warnings
|
||||
|
||||
from scipy.io.idl import readsav
|
||||
|
||||
|
||||
def object_array(*args):
|
||||
"""Constructs a numpy array of objects"""
|
||||
array = np.empty(len(args), dtype=object)
|
||||
for i in range(len(args)):
|
||||
array[i] = args[i]
|
||||
return array
|
||||
|
||||
|
||||
def assert_identical(a, b):
|
||||
"""Assert whether value AND type are the same"""
|
||||
assert_equal(a, b)
|
||||
if type(b) is str:
|
||||
assert_equal(type(a), type(b))
|
||||
else:
|
||||
assert_equal(np.asarray(a).dtype.type, np.asarray(b).dtype.type)
|
||||
|
||||
|
||||
def assert_array_identical(a, b):
|
||||
"""Assert whether values AND type are the same"""
|
||||
assert_array_equal(a, b)
|
||||
assert_equal(a.dtype.type, b.dtype.type)
|
||||
|
||||
|
||||
# Define vectorized ID function for pointer arrays
|
||||
vect_id = np.vectorize(id)
|
||||
|
||||
|
||||
class TestIdict:
|
||||
|
||||
def test_idict(self):
|
||||
custom_dict = {'a': np.int16(999)}
|
||||
original_id = id(custom_dict)
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_byte.sav'), idict=custom_dict, verbose=False)
|
||||
assert_equal(original_id, id(s))
|
||||
assert_('a' in s)
|
||||
assert_identical(s['a'], np.int16(999))
|
||||
assert_identical(s['i8u'], np.uint8(234))
|
||||
|
||||
|
||||
class TestScalars:
|
||||
# Test that scalar values are read in with the correct value and type
|
||||
|
||||
def test_byte(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_byte.sav'), verbose=False)
|
||||
assert_identical(s.i8u, np.uint8(234))
|
||||
|
||||
def test_int16(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_int16.sav'), verbose=False)
|
||||
assert_identical(s.i16s, np.int16(-23456))
|
||||
|
||||
def test_int32(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_int32.sav'), verbose=False)
|
||||
assert_identical(s.i32s, np.int32(-1234567890))
|
||||
|
||||
def test_float32(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_float32.sav'), verbose=False)
|
||||
assert_identical(s.f32, np.float32(-3.1234567e+37))
|
||||
|
||||
def test_float64(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_float64.sav'), verbose=False)
|
||||
assert_identical(s.f64, np.float64(-1.1976931348623157e+307))
|
||||
|
||||
def test_complex32(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_complex32.sav'), verbose=False)
|
||||
assert_identical(s.c32, np.complex64(3.124442e13-2.312442e31j))
|
||||
|
||||
def test_bytes(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_string.sav'), verbose=False)
|
||||
assert_identical(s.s, np.bytes_("The quick brown fox jumps over the lazy python"))
|
||||
|
||||
def test_structure(self):
|
||||
pass
|
||||
|
||||
def test_complex64(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_complex64.sav'), verbose=False)
|
||||
assert_identical(s.c64, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
|
||||
|
||||
def test_heap_pointer(self):
|
||||
pass
|
||||
|
||||
def test_object_reference(self):
|
||||
pass
|
||||
|
||||
def test_uint16(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_uint16.sav'), verbose=False)
|
||||
assert_identical(s.i16u, np.uint16(65511))
|
||||
|
||||
def test_uint32(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_uint32.sav'), verbose=False)
|
||||
assert_identical(s.i32u, np.uint32(4294967233))
|
||||
|
||||
def test_int64(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_int64.sav'), verbose=False)
|
||||
assert_identical(s.i64s, np.int64(-9223372036854774567))
|
||||
|
||||
def test_uint64(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_uint64.sav'), verbose=False)
|
||||
assert_identical(s.i64u, np.uint64(18446744073709529285))
|
||||
|
||||
|
||||
class TestCompressed(TestScalars):
|
||||
# Test that compressed .sav files can be read in
|
||||
|
||||
def test_compressed(self):
|
||||
s = readsav(path.join(DATA_PATH, 'various_compressed.sav'), verbose=False)
|
||||
|
||||
assert_identical(s.i8u, np.uint8(234))
|
||||
assert_identical(s.f32, np.float32(-3.1234567e+37))
|
||||
assert_identical(s.c64, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
|
||||
assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
|
||||
assert_identical(s.arrays.a[0], np.array([1, 2, 3], dtype=np.int16))
|
||||
assert_identical(s.arrays.b[0], np.array([4., 5., 6., 7.], dtype=np.float32))
|
||||
assert_identical(s.arrays.c[0], np.array([np.complex64(1+2j), np.complex64(7+8j)]))
|
||||
assert_identical(s.arrays.d[0], np.array([b"cheese", b"bacon", b"spam"], dtype=object))
|
||||
|
||||
|
||||
class TestArrayDimensions:
|
||||
# Test that multi-dimensional arrays are read in with the correct dimensions
|
||||
|
||||
def test_1d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_1d.sav'), verbose=False)
|
||||
assert_equal(s.array1d.shape, (123, ))
|
||||
|
||||
def test_2d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_2d.sav'), verbose=False)
|
||||
assert_equal(s.array2d.shape, (22, 12))
|
||||
|
||||
def test_3d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_3d.sav'), verbose=False)
|
||||
assert_equal(s.array3d.shape, (11, 22, 12))
|
||||
|
||||
def test_4d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_4d.sav'), verbose=False)
|
||||
assert_equal(s.array4d.shape, (4, 5, 8, 7))
|
||||
|
||||
def test_5d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_5d.sav'), verbose=False)
|
||||
assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
|
||||
|
||||
def test_6d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_6d.sav'), verbose=False)
|
||||
assert_equal(s.array6d.shape, (3, 6, 4, 5, 3, 4))
|
||||
|
||||
def test_7d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_7d.sav'), verbose=False)
|
||||
assert_equal(s.array7d.shape, (2, 1, 2, 3, 4, 3, 2))
|
||||
|
||||
def test_8d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_8d.sav'), verbose=False)
|
||||
assert_equal(s.array8d.shape, (4, 3, 2, 1, 2, 3, 5, 4))
|
||||
|
||||
|
||||
class TestStructures:
|
||||
|
||||
def test_scalars(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_scalars.sav'), verbose=False)
|
||||
assert_identical(s.scalars.a, np.array(np.int16(1)))
|
||||
assert_identical(s.scalars.b, np.array(np.int32(2)))
|
||||
assert_identical(s.scalars.c, np.array(np.float32(3.)))
|
||||
assert_identical(s.scalars.d, np.array(np.float64(4.)))
|
||||
assert_identical(s.scalars.e, np.array([b"spam"], dtype=object))
|
||||
assert_identical(s.scalars.f, np.array(np.complex64(-1.+3j)))
|
||||
|
||||
def test_scalars_replicated(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_scalars_replicated.sav'), verbose=False)
|
||||
assert_identical(s.scalars_rep.a, np.repeat(np.int16(1), 5))
|
||||
assert_identical(s.scalars_rep.b, np.repeat(np.int32(2), 5))
|
||||
assert_identical(s.scalars_rep.c, np.repeat(np.float32(3.), 5))
|
||||
assert_identical(s.scalars_rep.d, np.repeat(np.float64(4.), 5))
|
||||
assert_identical(s.scalars_rep.e, np.repeat(b"spam", 5).astype(object))
|
||||
assert_identical(s.scalars_rep.f, np.repeat(np.complex64(-1.+3j), 5))
|
||||
|
||||
def test_scalars_replicated_3d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_scalars_replicated_3d.sav'), verbose=False)
|
||||
assert_identical(s.scalars_rep.a, np.repeat(np.int16(1), 24).reshape(4, 3, 2))
|
||||
assert_identical(s.scalars_rep.b, np.repeat(np.int32(2), 24).reshape(4, 3, 2))
|
||||
assert_identical(s.scalars_rep.c, np.repeat(np.float32(3.), 24).reshape(4, 3, 2))
|
||||
assert_identical(s.scalars_rep.d, np.repeat(np.float64(4.), 24).reshape(4, 3, 2))
|
||||
assert_identical(s.scalars_rep.e, np.repeat(b"spam", 24).reshape(4, 3, 2).astype(object))
|
||||
assert_identical(s.scalars_rep.f, np.repeat(np.complex64(-1.+3j), 24).reshape(4, 3, 2))
|
||||
|
||||
def test_arrays(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_arrays.sav'), verbose=False)
|
||||
assert_array_identical(s.arrays.a[0], np.array([1, 2, 3], dtype=np.int16))
|
||||
assert_array_identical(s.arrays.b[0], np.array([4., 5., 6., 7.], dtype=np.float32))
|
||||
assert_array_identical(s.arrays.c[0], np.array([np.complex64(1+2j), np.complex64(7+8j)]))
|
||||
assert_array_identical(s.arrays.d[0], np.array([b"cheese", b"bacon", b"spam"], dtype=object))
|
||||
|
||||
def test_arrays_replicated(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_arrays_replicated.sav'), verbose=False)
|
||||
|
||||
# Check column types
|
||||
assert_(s.arrays_rep.a.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.b.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.c.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.d.dtype.type is np.object_)
|
||||
|
||||
# Check column shapes
|
||||
assert_equal(s.arrays_rep.a.shape, (5, ))
|
||||
assert_equal(s.arrays_rep.b.shape, (5, ))
|
||||
assert_equal(s.arrays_rep.c.shape, (5, ))
|
||||
assert_equal(s.arrays_rep.d.shape, (5, ))
|
||||
|
||||
# Check values
|
||||
for i in range(5):
|
||||
assert_array_identical(s.arrays_rep.a[i],
|
||||
np.array([1, 2, 3], dtype=np.int16))
|
||||
assert_array_identical(s.arrays_rep.b[i],
|
||||
np.array([4., 5., 6., 7.], dtype=np.float32))
|
||||
assert_array_identical(s.arrays_rep.c[i],
|
||||
np.array([np.complex64(1+2j),
|
||||
np.complex64(7+8j)]))
|
||||
assert_array_identical(s.arrays_rep.d[i],
|
||||
np.array([b"cheese", b"bacon", b"spam"],
|
||||
dtype=object))
|
||||
|
||||
def test_arrays_replicated_3d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_arrays_replicated_3d.sav'), verbose=False)
|
||||
|
||||
# Check column types
|
||||
assert_(s.arrays_rep.a.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.b.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.c.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.d.dtype.type is np.object_)
|
||||
|
||||
# Check column shapes
|
||||
assert_equal(s.arrays_rep.a.shape, (4, 3, 2))
|
||||
assert_equal(s.arrays_rep.b.shape, (4, 3, 2))
|
||||
assert_equal(s.arrays_rep.c.shape, (4, 3, 2))
|
||||
assert_equal(s.arrays_rep.d.shape, (4, 3, 2))
|
||||
|
||||
# Check values
|
||||
for i in range(4):
|
||||
for j in range(3):
|
||||
for k in range(2):
|
||||
assert_array_identical(s.arrays_rep.a[i, j, k],
|
||||
np.array([1, 2, 3], dtype=np.int16))
|
||||
assert_array_identical(s.arrays_rep.b[i, j, k],
|
||||
np.array([4., 5., 6., 7.],
|
||||
dtype=np.float32))
|
||||
assert_array_identical(s.arrays_rep.c[i, j, k],
|
||||
np.array([np.complex64(1+2j),
|
||||
np.complex64(7+8j)]))
|
||||
assert_array_identical(s.arrays_rep.d[i, j, k],
|
||||
np.array([b"cheese", b"bacon", b"spam"],
|
||||
dtype=object))
|
||||
|
||||
def test_inheritance(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_inherit.sav'), verbose=False)
|
||||
assert_identical(s.fc.x, np.array([0], dtype=np.int16))
|
||||
assert_identical(s.fc.y, np.array([0], dtype=np.int16))
|
||||
assert_identical(s.fc.r, np.array([0], dtype=np.int16))
|
||||
assert_identical(s.fc.c, np.array([4], dtype=np.int16))
|
||||
|
||||
def test_arrays_corrupt_idl80(self):
|
||||
# test byte arrays with missing nbyte information from IDL 8.0 .sav file
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(UserWarning, "Not able to verify number of bytes from header")
|
||||
s = readsav(path.join(DATA_PATH,'struct_arrays_byte_idl80.sav'),
|
||||
verbose=False)
|
||||
|
||||
assert_identical(s.y.x[0], np.array([55,66], dtype=np.uint8))
|
||||
|
||||
|
||||
class TestPointers:
|
||||
# Check that pointers in .sav files produce references to the same object in Python
|
||||
|
||||
def test_pointers(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_heap_pointer.sav'), verbose=False)
|
||||
assert_identical(s.c64_pointer1, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
|
||||
assert_identical(s.c64_pointer2, np.complex128(1.1987253647623157e+112-5.1987258887729157e+307j))
|
||||
assert_(s.c64_pointer1 is s.c64_pointer2)
|
||||
|
||||
|
||||
class TestPointerArray:
|
||||
# Test that pointers in arrays are correctly read in
|
||||
|
||||
def test_1d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_1d.sav'), verbose=False)
|
||||
assert_equal(s.array1d.shape, (123, ))
|
||||
assert_(np.all(s.array1d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array1d) == id(s.array1d[0])))
|
||||
|
||||
def test_2d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_2d.sav'), verbose=False)
|
||||
assert_equal(s.array2d.shape, (22, 12))
|
||||
assert_(np.all(s.array2d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array2d) == id(s.array2d[0,0])))
|
||||
|
||||
def test_3d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_3d.sav'), verbose=False)
|
||||
assert_equal(s.array3d.shape, (11, 22, 12))
|
||||
assert_(np.all(s.array3d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array3d) == id(s.array3d[0,0,0])))
|
||||
|
||||
def test_4d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_4d.sav'), verbose=False)
|
||||
assert_equal(s.array4d.shape, (4, 5, 8, 7))
|
||||
assert_(np.all(s.array4d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array4d) == id(s.array4d[0,0,0,0])))
|
||||
|
||||
def test_5d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_5d.sav'), verbose=False)
|
||||
assert_equal(s.array5d.shape, (4, 3, 4, 6, 5))
|
||||
assert_(np.all(s.array5d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array5d) == id(s.array5d[0,0,0,0,0])))
|
||||
|
||||
def test_6d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_6d.sav'), verbose=False)
|
||||
assert_equal(s.array6d.shape, (3, 6, 4, 5, 3, 4))
|
||||
assert_(np.all(s.array6d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array6d) == id(s.array6d[0,0,0,0,0,0])))
|
||||
|
||||
def test_7d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_7d.sav'), verbose=False)
|
||||
assert_equal(s.array7d.shape, (2, 1, 2, 3, 4, 3, 2))
|
||||
assert_(np.all(s.array7d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array7d) == id(s.array7d[0,0,0,0,0,0,0])))
|
||||
|
||||
def test_8d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'array_float32_pointer_8d.sav'), verbose=False)
|
||||
assert_equal(s.array8d.shape, (4, 3, 2, 1, 2, 3, 5, 4))
|
||||
assert_(np.all(s.array8d == np.float32(4.)))
|
||||
assert_(np.all(vect_id(s.array8d) == id(s.array8d[0,0,0,0,0,0,0,0])))
|
||||
|
||||
|
||||
class TestPointerStructures:
|
||||
# Test that structures are correctly read in
|
||||
|
||||
def test_scalars(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_pointers.sav'), verbose=False)
|
||||
assert_identical(s.pointers.g, np.array(np.float32(4.), dtype=np.object_))
|
||||
assert_identical(s.pointers.h, np.array(np.float32(4.), dtype=np.object_))
|
||||
assert_(id(s.pointers.g[0]) == id(s.pointers.h[0]))
|
||||
|
||||
def test_pointers_replicated(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_pointers_replicated.sav'), verbose=False)
|
||||
assert_identical(s.pointers_rep.g, np.repeat(np.float32(4.), 5).astype(np.object_))
|
||||
assert_identical(s.pointers_rep.h, np.repeat(np.float32(4.), 5).astype(np.object_))
|
||||
assert_(np.all(vect_id(s.pointers_rep.g) == vect_id(s.pointers_rep.h)))
|
||||
|
||||
def test_pointers_replicated_3d(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_pointers_replicated_3d.sav'), verbose=False)
|
||||
s_expect = np.repeat(np.float32(4.), 24).reshape(4, 3, 2).astype(np.object_)
|
||||
assert_identical(s.pointers_rep.g, s_expect)
|
||||
assert_identical(s.pointers_rep.h, s_expect)
|
||||
assert_(np.all(vect_id(s.pointers_rep.g) == vect_id(s.pointers_rep.h)))
|
||||
|
||||
def test_arrays(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_pointer_arrays.sav'), verbose=False)
|
||||
assert_array_identical(s.arrays.g[0], np.repeat(np.float32(4.), 2).astype(np.object_))
|
||||
assert_array_identical(s.arrays.h[0], np.repeat(np.float32(4.), 3).astype(np.object_))
|
||||
assert_(np.all(vect_id(s.arrays.g[0]) == id(s.arrays.g[0][0])))
|
||||
assert_(np.all(vect_id(s.arrays.h[0]) == id(s.arrays.h[0][0])))
|
||||
assert_(id(s.arrays.g[0][0]) == id(s.arrays.h[0][0]))
|
||||
|
||||
def test_arrays_replicated(self):
|
||||
s = readsav(path.join(DATA_PATH, 'struct_pointer_arrays_replicated.sav'), verbose=False)
|
||||
|
||||
# Check column types
|
||||
assert_(s.arrays_rep.g.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.h.dtype.type is np.object_)
|
||||
|
||||
# Check column shapes
|
||||
assert_equal(s.arrays_rep.g.shape, (5, ))
|
||||
assert_equal(s.arrays_rep.h.shape, (5, ))
|
||||
|
||||
# Check values
|
||||
for i in range(5):
|
||||
assert_array_identical(s.arrays_rep.g[i], np.repeat(np.float32(4.), 2).astype(np.object_))
|
||||
assert_array_identical(s.arrays_rep.h[i], np.repeat(np.float32(4.), 3).astype(np.object_))
|
||||
assert_(np.all(vect_id(s.arrays_rep.g[i]) == id(s.arrays_rep.g[0][0])))
|
||||
assert_(np.all(vect_id(s.arrays_rep.h[i]) == id(s.arrays_rep.h[0][0])))
|
||||
|
||||
def test_arrays_replicated_3d(self):
|
||||
pth = path.join(DATA_PATH, 'struct_pointer_arrays_replicated_3d.sav')
|
||||
s = readsav(pth, verbose=False)
|
||||
|
||||
# Check column types
|
||||
assert_(s.arrays_rep.g.dtype.type is np.object_)
|
||||
assert_(s.arrays_rep.h.dtype.type is np.object_)
|
||||
|
||||
# Check column shapes
|
||||
assert_equal(s.arrays_rep.g.shape, (4, 3, 2))
|
||||
assert_equal(s.arrays_rep.h.shape, (4, 3, 2))
|
||||
|
||||
# Check values
|
||||
for i in range(4):
|
||||
for j in range(3):
|
||||
for k in range(2):
|
||||
assert_array_identical(s.arrays_rep.g[i, j, k],
|
||||
np.repeat(np.float32(4.), 2).astype(np.object_))
|
||||
assert_array_identical(s.arrays_rep.h[i, j, k],
|
||||
np.repeat(np.float32(4.), 3).astype(np.object_))
|
||||
assert_(np.all(vect_id(s.arrays_rep.g[i, j, k]) == id(s.arrays_rep.g[0, 0, 0][0])))
|
||||
assert_(np.all(vect_id(s.arrays_rep.h[i, j, k]) == id(s.arrays_rep.h[0, 0, 0][0])))
|
||||
class TestTags:
|
||||
'''Test that sav files with description tag read at all'''
|
||||
|
||||
def test_description(self):
|
||||
s = readsav(path.join(DATA_PATH, 'scalar_byte_descr.sav'), verbose=False)
|
||||
assert_identical(s.i8u, np.uint8(234))
|
||||
|
||||
|
||||
def test_null_pointer():
|
||||
# Regression test for null pointers.
|
||||
s = readsav(path.join(DATA_PATH, 'null_pointer.sav'), verbose=False)
|
||||
assert_identical(s.point, None)
|
||||
assert_identical(s.check, np.int16(5))
|
||||
|
||||
|
||||
def test_invalid_pointer():
|
||||
# Regression test for invalid pointers (gh-4613).
|
||||
|
||||
# In some files in the wild, pointers can sometimes refer to a heap
|
||||
# variable that does not exist. In that case, we now gracefully fail for
|
||||
# that variable and replace the variable with None and emit a warning.
|
||||
# Since it's difficult to artificially produce such files, the file used
|
||||
# here has been edited to force the pointer reference to be invalid.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
s = readsav(path.join(DATA_PATH, 'invalid_pointer.sav'), verbose=False)
|
||||
assert_(len(w) == 1)
|
||||
assert_(str(w[0].message) == ("Variable referenced by pointer not found in "
|
||||
"heap: variable will be set to None"))
|
||||
assert_identical(s['a'], np.array([None, None]))
|
||||
|
||||
@@ -1,673 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
from tempfile import mkdtemp, mktemp
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import numpy as np
|
||||
from numpy import array, transpose, pi
|
||||
from numpy.testing import (assert_equal,
|
||||
assert_array_equal, assert_array_almost_equal)
|
||||
import pytest
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
import scipy.sparse
|
||||
from scipy.io.mmio import mminfo, mmread, mmwrite
|
||||
|
||||
parametrize_args = [('integer', 'int'),
|
||||
('unsigned-integer', 'uint')]
|
||||
|
||||
|
||||
class TestMMIOArray(object):
|
||||
def setup_method(self):
|
||||
self.tmpdir = mkdtemp()
|
||||
self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
|
||||
|
||||
def teardown_method(self):
|
||||
shutil.rmtree(self.tmpdir)
|
||||
|
||||
def check(self, a, info):
|
||||
mmwrite(self.fn, a)
|
||||
assert_equal(mminfo(self.fn), info)
|
||||
b = mmread(self.fn)
|
||||
assert_array_almost_equal(a, b)
|
||||
|
||||
def check_exact(self, a, info):
|
||||
mmwrite(self.fn, a)
|
||||
assert_equal(mminfo(self.fn), info)
|
||||
b = mmread(self.fn)
|
||||
assert_equal(a, b)
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_integer(self, typeval, dtype):
|
||||
self.check_exact(array([[1, 2], [3, 4]], dtype=dtype),
|
||||
(2, 2, 4, 'array', typeval, 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_32bit_integer(self, typeval, dtype):
|
||||
a = array([[2**31-1, 2**31-2], [2**31-3, 2**31-4]], dtype=dtype)
|
||||
self.check_exact(a, (2, 2, 4, 'array', typeval, 'general'))
|
||||
|
||||
def test_64bit_integer(self):
|
||||
a = array([[2**31, 2**32], [2**63-2, 2**63-1]], dtype=np.int64)
|
||||
if (np.intp(0).itemsize < 8):
|
||||
assert_raises(OverflowError, mmwrite, self.fn, a)
|
||||
else:
|
||||
self.check_exact(a, (2, 2, 4, 'array', 'integer', 'general'))
|
||||
|
||||
def test_64bit_unsigned_integer(self):
|
||||
a = array([[2**31, 2**32], [2**64-2, 2**64-1]], dtype=np.uint64)
|
||||
self.check_exact(a, (2, 2, 4, 'array', 'unsigned-integer', 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_upper_triangle_integer(self, typeval, dtype):
|
||||
self.check_exact(array([[0, 1], [0, 0]], dtype=dtype),
|
||||
(2, 2, 4, 'array', typeval, 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_lower_triangle_integer(self, typeval, dtype):
|
||||
self.check_exact(array([[0, 0], [1, 0]], dtype=dtype),
|
||||
(2, 2, 4, 'array', typeval, 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_rectangular_integer(self, typeval, dtype):
|
||||
self.check_exact(array([[1, 2, 3], [4, 5, 6]], dtype=dtype),
|
||||
(2, 3, 6, 'array', typeval, 'general'))
|
||||
|
||||
def test_simple_rectangular_float(self):
|
||||
self.check([[1, 2], [3.5, 4], [5, 6]],
|
||||
(3, 2, 6, 'array', 'real', 'general'))
|
||||
|
||||
def test_simple_float(self):
|
||||
self.check([[1, 2], [3, 4.0]],
|
||||
(2, 2, 4, 'array', 'real', 'general'))
|
||||
|
||||
def test_simple_complex(self):
|
||||
self.check([[1, 2], [3, 4j]],
|
||||
(2, 2, 4, 'array', 'complex', 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_symmetric_integer(self, typeval, dtype):
|
||||
self.check_exact(array([[1, 2], [2, 4]], dtype=dtype),
|
||||
(2, 2, 4, 'array', typeval, 'symmetric'))
|
||||
|
||||
def test_simple_skew_symmetric_integer(self):
|
||||
self.check_exact([[0, 2], [-2, 0]],
|
||||
(2, 2, 4, 'array', 'integer', 'skew-symmetric'))
|
||||
|
||||
def test_simple_skew_symmetric_float(self):
|
||||
self.check(array([[0, 2], [-2.0, 0.0]], 'f'),
|
||||
(2, 2, 4, 'array', 'real', 'skew-symmetric'))
|
||||
|
||||
def test_simple_hermitian_complex(self):
|
||||
self.check([[1, 2+3j], [2-3j, 4]],
|
||||
(2, 2, 4, 'array', 'complex', 'hermitian'))
|
||||
|
||||
def test_random_symmetric_float(self):
|
||||
sz = (20, 20)
|
||||
a = np.random.random(sz)
|
||||
a = a + transpose(a)
|
||||
self.check(a, (20, 20, 400, 'array', 'real', 'symmetric'))
|
||||
|
||||
def test_random_rectangular_float(self):
|
||||
sz = (20, 15)
|
||||
a = np.random.random(sz)
|
||||
self.check(a, (20, 15, 300, 'array', 'real', 'general'))
|
||||
|
||||
|
||||
class TestMMIOSparseCSR(TestMMIOArray):
|
||||
def setup_method(self):
|
||||
self.tmpdir = mkdtemp()
|
||||
self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
|
||||
|
||||
def teardown_method(self):
|
||||
shutil.rmtree(self.tmpdir)
|
||||
|
||||
def check(self, a, info):
|
||||
mmwrite(self.fn, a)
|
||||
assert_equal(mminfo(self.fn), info)
|
||||
b = mmread(self.fn)
|
||||
assert_array_almost_equal(a.todense(), b.todense())
|
||||
|
||||
def check_exact(self, a, info):
|
||||
mmwrite(self.fn, a)
|
||||
assert_equal(mminfo(self.fn), info)
|
||||
b = mmread(self.fn)
|
||||
assert_equal(a.todense(), b.todense())
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_integer(self, typeval, dtype):
|
||||
self.check_exact(scipy.sparse.csr_matrix([[1, 2], [3, 4]], dtype=dtype),
|
||||
(2, 2, 4, 'coordinate', typeval, 'general'))
|
||||
|
||||
def test_32bit_integer(self):
|
||||
a = scipy.sparse.csr_matrix(array([[2**31-1, -2**31+2],
|
||||
[2**31-3, 2**31-4]],
|
||||
dtype=np.int32))
|
||||
self.check_exact(a, (2, 2, 4, 'coordinate', 'integer', 'general'))
|
||||
|
||||
def test_64bit_integer(self):
|
||||
a = scipy.sparse.csr_matrix(array([[2**32+1, 2**32+1],
|
||||
[-2**63+2, 2**63-2]],
|
||||
dtype=np.int64))
|
||||
if (np.intp(0).itemsize < 8):
|
||||
assert_raises(OverflowError, mmwrite, self.fn, a)
|
||||
else:
|
||||
self.check_exact(a, (2, 2, 4, 'coordinate', 'integer', 'general'))
|
||||
|
||||
def test_32bit_unsigned_integer(self):
|
||||
a = scipy.sparse.csr_matrix(array([[2**31-1, 2**31-2],
|
||||
[2**31-3, 2**31-4]],
|
||||
dtype=np.uint32))
|
||||
self.check_exact(a, (2, 2, 4, 'coordinate', 'unsigned-integer', 'general'))
|
||||
|
||||
def test_64bit_unsigned_integer(self):
|
||||
a = scipy.sparse.csr_matrix(array([[2**32+1, 2**32+1],
|
||||
[2**64-2, 2**64-1]],
|
||||
dtype=np.uint64))
|
||||
self.check_exact(a, (2, 2, 4, 'coordinate', 'unsigned-integer', 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_upper_triangle_integer(self, typeval, dtype):
|
||||
self.check_exact(scipy.sparse.csr_matrix([[0, 1], [0, 0]], dtype=dtype),
|
||||
(2, 2, 1, 'coordinate', typeval, 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_lower_triangle_integer(self, typeval, dtype):
|
||||
self.check_exact(scipy.sparse.csr_matrix([[0, 0], [1, 0]], dtype=dtype),
|
||||
(2, 2, 1, 'coordinate', typeval, 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_rectangular_integer(self, typeval, dtype):
|
||||
self.check_exact(scipy.sparse.csr_matrix([[1, 2, 3], [4, 5, 6]], dtype=dtype),
|
||||
(2, 3, 6, 'coordinate', typeval, 'general'))
|
||||
|
||||
def test_simple_rectangular_float(self):
|
||||
self.check(scipy.sparse.csr_matrix([[1, 2], [3.5, 4], [5, 6]]),
|
||||
(3, 2, 6, 'coordinate', 'real', 'general'))
|
||||
|
||||
def test_simple_float(self):
|
||||
self.check(scipy.sparse.csr_matrix([[1, 2], [3, 4.0]]),
|
||||
(2, 2, 4, 'coordinate', 'real', 'general'))
|
||||
|
||||
def test_simple_complex(self):
|
||||
self.check(scipy.sparse.csr_matrix([[1, 2], [3, 4j]]),
|
||||
(2, 2, 4, 'coordinate', 'complex', 'general'))
|
||||
|
||||
@pytest.mark.parametrize('typeval, dtype', parametrize_args)
|
||||
def test_simple_symmetric_integer(self, typeval, dtype):
|
||||
self.check_exact(scipy.sparse.csr_matrix([[1, 2], [2, 4]], dtype=dtype),
|
||||
(2, 2, 3, 'coordinate', typeval, 'symmetric'))
|
||||
|
||||
def test_simple_skew_symmetric_integer(self):
|
||||
self.check_exact(scipy.sparse.csr_matrix([[1, 2], [-2, 4]]),
|
||||
(2, 2, 3, 'coordinate', 'integer', 'skew-symmetric'))
|
||||
|
||||
def test_simple_skew_symmetric_float(self):
|
||||
self.check(scipy.sparse.csr_matrix(array([[1, 2], [-2.0, 4]], 'f')),
|
||||
(2, 2, 3, 'coordinate', 'real', 'skew-symmetric'))
|
||||
|
||||
def test_simple_hermitian_complex(self):
|
||||
self.check(scipy.sparse.csr_matrix([[1, 2+3j], [2-3j, 4]]),
|
||||
(2, 2, 3, 'coordinate', 'complex', 'hermitian'))
|
||||
|
||||
def test_random_symmetric_float(self):
|
||||
sz = (20, 20)
|
||||
a = np.random.random(sz)
|
||||
a = a + transpose(a)
|
||||
a = scipy.sparse.csr_matrix(a)
|
||||
self.check(a, (20, 20, 210, 'coordinate', 'real', 'symmetric'))
|
||||
|
||||
def test_random_rectangular_float(self):
|
||||
sz = (20, 15)
|
||||
a = np.random.random(sz)
|
||||
a = scipy.sparse.csr_matrix(a)
|
||||
self.check(a, (20, 15, 300, 'coordinate', 'real', 'general'))
|
||||
|
||||
def test_simple_pattern(self):
|
||||
a = scipy.sparse.csr_matrix([[0, 1.5], [3.0, 2.5]])
|
||||
p = np.zeros_like(a.todense())
|
||||
p[a.todense() > 0] = 1
|
||||
info = (2, 2, 3, 'coordinate', 'pattern', 'general')
|
||||
mmwrite(self.fn, a, field='pattern')
|
||||
assert_equal(mminfo(self.fn), info)
|
||||
b = mmread(self.fn)
|
||||
assert_array_almost_equal(p, b.todense())
|
||||
|
||||
|
||||
_32bit_integer_dense_example = '''\
|
||||
%%MatrixMarket matrix array integer general
|
||||
2 2
|
||||
2147483647
|
||||
2147483646
|
||||
2147483647
|
||||
2147483646
|
||||
'''
|
||||
|
||||
_32bit_integer_sparse_example = '''\
|
||||
%%MatrixMarket matrix coordinate integer symmetric
|
||||
2 2 2
|
||||
1 1 2147483647
|
||||
2 2 2147483646
|
||||
'''
|
||||
|
||||
_64bit_integer_dense_example = '''\
|
||||
%%MatrixMarket matrix array integer general
|
||||
2 2
|
||||
2147483648
|
||||
-9223372036854775806
|
||||
-2147483648
|
||||
9223372036854775807
|
||||
'''
|
||||
|
||||
_64bit_integer_sparse_general_example = '''\
|
||||
%%MatrixMarket matrix coordinate integer general
|
||||
2 2 3
|
||||
1 1 2147483648
|
||||
1 2 9223372036854775807
|
||||
2 2 9223372036854775807
|
||||
'''
|
||||
|
||||
_64bit_integer_sparse_symmetric_example = '''\
|
||||
%%MatrixMarket matrix coordinate integer symmetric
|
||||
2 2 3
|
||||
1 1 2147483648
|
||||
1 2 -9223372036854775807
|
||||
2 2 9223372036854775807
|
||||
'''
|
||||
|
||||
_64bit_integer_sparse_skew_example = '''\
|
||||
%%MatrixMarket matrix coordinate integer skew-symmetric
|
||||
2 2 3
|
||||
1 1 2147483648
|
||||
1 2 -9223372036854775807
|
||||
2 2 9223372036854775807
|
||||
'''
|
||||
|
||||
_over64bit_integer_dense_example = '''\
|
||||
%%MatrixMarket matrix array integer general
|
||||
2 2
|
||||
2147483648
|
||||
9223372036854775807
|
||||
2147483648
|
||||
9223372036854775808
|
||||
'''
|
||||
|
||||
_over64bit_integer_sparse_example = '''\
|
||||
%%MatrixMarket matrix coordinate integer symmetric
|
||||
2 2 2
|
||||
1 1 2147483648
|
||||
2 2 19223372036854775808
|
||||
'''
|
||||
|
||||
class TestMMIOReadLargeIntegers(object):
|
||||
def setup_method(self):
|
||||
self.tmpdir = mkdtemp()
|
||||
self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
|
||||
|
||||
def teardown_method(self):
|
||||
shutil.rmtree(self.tmpdir)
|
||||
|
||||
def check_read(self, example, a, info, dense, over32, over64):
|
||||
with open(self.fn, 'w') as f:
|
||||
f.write(example)
|
||||
assert_equal(mminfo(self.fn), info)
|
||||
if (over32 and (np.intp(0).itemsize < 8)) or over64:
|
||||
assert_raises(OverflowError, mmread, self.fn)
|
||||
else:
|
||||
b = mmread(self.fn)
|
||||
if not dense:
|
||||
b = b.todense()
|
||||
assert_equal(a, b)
|
||||
|
||||
def test_read_32bit_integer_dense(self):
|
||||
a = array([[2**31-1, 2**31-1],
|
||||
[2**31-2, 2**31-2]], dtype=np.int64)
|
||||
self.check_read(_32bit_integer_dense_example,
|
||||
a,
|
||||
(2, 2, 4, 'array', 'integer', 'general'),
|
||||
dense=True,
|
||||
over32=False,
|
||||
over64=False)
|
||||
|
||||
def test_read_32bit_integer_sparse(self):
|
||||
a = array([[2**31-1, 0],
|
||||
[0, 2**31-2]], dtype=np.int64)
|
||||
self.check_read(_32bit_integer_sparse_example,
|
||||
a,
|
||||
(2, 2, 2, 'coordinate', 'integer', 'symmetric'),
|
||||
dense=False,
|
||||
over32=False,
|
||||
over64=False)
|
||||
|
||||
def test_read_64bit_integer_dense(self):
|
||||
a = array([[2**31, -2**31],
|
||||
[-2**63+2, 2**63-1]], dtype=np.int64)
|
||||
self.check_read(_64bit_integer_dense_example,
|
||||
a,
|
||||
(2, 2, 4, 'array', 'integer', 'general'),
|
||||
dense=True,
|
||||
over32=True,
|
||||
over64=False)
|
||||
|
||||
def test_read_64bit_integer_sparse_general(self):
|
||||
a = array([[2**31, 2**63-1],
|
||||
[0, 2**63-1]], dtype=np.int64)
|
||||
self.check_read(_64bit_integer_sparse_general_example,
|
||||
a,
|
||||
(2, 2, 3, 'coordinate', 'integer', 'general'),
|
||||
dense=False,
|
||||
over32=True,
|
||||
over64=False)
|
||||
|
||||
def test_read_64bit_integer_sparse_symmetric(self):
|
||||
a = array([[2**31, -2**63+1],
|
||||
[-2**63+1, 2**63-1]], dtype=np.int64)
|
||||
self.check_read(_64bit_integer_sparse_symmetric_example,
|
||||
a,
|
||||
(2, 2, 3, 'coordinate', 'integer', 'symmetric'),
|
||||
dense=False,
|
||||
over32=True,
|
||||
over64=False)
|
||||
|
||||
def test_read_64bit_integer_sparse_skew(self):
|
||||
a = array([[2**31, -2**63+1],
|
||||
[2**63-1, 2**63-1]], dtype=np.int64)
|
||||
self.check_read(_64bit_integer_sparse_skew_example,
|
||||
a,
|
||||
(2, 2, 3, 'coordinate', 'integer', 'skew-symmetric'),
|
||||
dense=False,
|
||||
over32=True,
|
||||
over64=False)
|
||||
|
||||
def test_read_over64bit_integer_dense(self):
|
||||
self.check_read(_over64bit_integer_dense_example,
|
||||
None,
|
||||
(2, 2, 4, 'array', 'integer', 'general'),
|
||||
dense=True,
|
||||
over32=True,
|
||||
over64=True)
|
||||
|
||||
def test_read_over64bit_integer_sparse(self):
|
||||
self.check_read(_over64bit_integer_sparse_example,
|
||||
None,
|
||||
(2, 2, 2, 'coordinate', 'integer', 'symmetric'),
|
||||
dense=False,
|
||||
over32=True,
|
||||
over64=True)
|
||||
|
||||
|
||||
_general_example = '''\
|
||||
%%MatrixMarket matrix coordinate real general
|
||||
%=================================================================================
|
||||
%
|
||||
% This ASCII file represents a sparse MxN matrix with L
|
||||
% nonzeros in the following Matrix Market format:
|
||||
%
|
||||
% +----------------------------------------------+
|
||||
% |%%MatrixMarket matrix coordinate real general | <--- header line
|
||||
% |% | <--+
|
||||
% |% comments | |-- 0 or more comment lines
|
||||
% |% | <--+
|
||||
% | M N L | <--- rows, columns, entries
|
||||
% | I1 J1 A(I1, J1) | <--+
|
||||
% | I2 J2 A(I2, J2) | |
|
||||
% | I3 J3 A(I3, J3) | |-- L lines
|
||||
% | . . . | |
|
||||
% | IL JL A(IL, JL) | <--+
|
||||
% +----------------------------------------------+
|
||||
%
|
||||
% Indices are 1-based, i.e. A(1,1) is the first element.
|
||||
%
|
||||
%=================================================================================
|
||||
5 5 8
|
||||
1 1 1.000e+00
|
||||
2 2 1.050e+01
|
||||
3 3 1.500e-02
|
||||
1 4 6.000e+00
|
||||
4 2 2.505e+02
|
||||
4 4 -2.800e+02
|
||||
4 5 3.332e+01
|
||||
5 5 1.200e+01
|
||||
'''
|
||||
|
||||
_hermitian_example = '''\
|
||||
%%MatrixMarket matrix coordinate complex hermitian
|
||||
5 5 7
|
||||
1 1 1.0 0
|
||||
2 2 10.5 0
|
||||
4 2 250.5 22.22
|
||||
3 3 1.5e-2 0
|
||||
4 4 -2.8e2 0
|
||||
5 5 12. 0
|
||||
5 4 0 33.32
|
||||
'''
|
||||
|
||||
_skew_example = '''\
|
||||
%%MatrixMarket matrix coordinate real skew-symmetric
|
||||
5 5 7
|
||||
1 1 1.0
|
||||
2 2 10.5
|
||||
4 2 250.5
|
||||
3 3 1.5e-2
|
||||
4 4 -2.8e2
|
||||
5 5 12.
|
||||
5 4 0
|
||||
'''
|
||||
|
||||
_symmetric_example = '''\
|
||||
%%MatrixMarket matrix coordinate real symmetric
|
||||
5 5 7
|
||||
1 1 1.0
|
||||
2 2 10.5
|
||||
4 2 250.5
|
||||
3 3 1.5e-2
|
||||
4 4 -2.8e2
|
||||
5 5 12.
|
||||
5 4 8
|
||||
'''
|
||||
|
||||
_symmetric_pattern_example = '''\
|
||||
%%MatrixMarket matrix coordinate pattern symmetric
|
||||
5 5 7
|
||||
1 1
|
||||
2 2
|
||||
4 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
5 4
|
||||
'''
|
||||
|
||||
|
||||
class TestMMIOCoordinate(object):
|
||||
def setup_method(self):
|
||||
self.tmpdir = mkdtemp()
|
||||
self.fn = os.path.join(self.tmpdir, 'testfile.mtx')
|
||||
|
||||
def teardown_method(self):
|
||||
shutil.rmtree(self.tmpdir)
|
||||
|
||||
def check_read(self, example, a, info):
|
||||
f = open(self.fn, 'w')
|
||||
f.write(example)
|
||||
f.close()
|
||||
assert_equal(mminfo(self.fn), info)
|
||||
b = mmread(self.fn).todense()
|
||||
assert_array_almost_equal(a, b)
|
||||
|
||||
def test_read_general(self):
|
||||
a = [[1, 0, 0, 6, 0],
|
||||
[0, 10.5, 0, 0, 0],
|
||||
[0, 0, .015, 0, 0],
|
||||
[0, 250.5, 0, -280, 33.32],
|
||||
[0, 0, 0, 0, 12]]
|
||||
self.check_read(_general_example, a,
|
||||
(5, 5, 8, 'coordinate', 'real', 'general'))
|
||||
|
||||
def test_read_hermitian(self):
|
||||
a = [[1, 0, 0, 0, 0],
|
||||
[0, 10.5, 0, 250.5 - 22.22j, 0],
|
||||
[0, 0, .015, 0, 0],
|
||||
[0, 250.5 + 22.22j, 0, -280, -33.32j],
|
||||
[0, 0, 0, 33.32j, 12]]
|
||||
self.check_read(_hermitian_example, a,
|
||||
(5, 5, 7, 'coordinate', 'complex', 'hermitian'))
|
||||
|
||||
def test_read_skew(self):
|
||||
a = [[1, 0, 0, 0, 0],
|
||||
[0, 10.5, 0, -250.5, 0],
|
||||
[0, 0, .015, 0, 0],
|
||||
[0, 250.5, 0, -280, 0],
|
||||
[0, 0, 0, 0, 12]]
|
||||
self.check_read(_skew_example, a,
|
||||
(5, 5, 7, 'coordinate', 'real', 'skew-symmetric'))
|
||||
|
||||
def test_read_symmetric(self):
|
||||
a = [[1, 0, 0, 0, 0],
|
||||
[0, 10.5, 0, 250.5, 0],
|
||||
[0, 0, .015, 0, 0],
|
||||
[0, 250.5, 0, -280, 8],
|
||||
[0, 0, 0, 8, 12]]
|
||||
self.check_read(_symmetric_example, a,
|
||||
(5, 5, 7, 'coordinate', 'real', 'symmetric'))
|
||||
|
||||
def test_read_symmetric_pattern(self):
|
||||
a = [[1, 0, 0, 0, 0],
|
||||
[0, 1, 0, 1, 0],
|
||||
[0, 0, 1, 0, 0],
|
||||
[0, 1, 0, 1, 1],
|
||||
[0, 0, 0, 1, 1]]
|
||||
self.check_read(_symmetric_pattern_example, a,
|
||||
(5, 5, 7, 'coordinate', 'pattern', 'symmetric'))
|
||||
|
||||
def test_empty_write_read(self):
|
||||
# https://github.com/scipy/scipy/issues/1410 (Trac #883)
|
||||
|
||||
b = scipy.sparse.coo_matrix((10, 10))
|
||||
mmwrite(self.fn, b)
|
||||
|
||||
assert_equal(mminfo(self.fn),
|
||||
(10, 10, 0, 'coordinate', 'real', 'symmetric'))
|
||||
a = b.todense()
|
||||
b = mmread(self.fn).todense()
|
||||
assert_array_almost_equal(a, b)
|
||||
|
||||
def test_bzip2_py3(self):
|
||||
# test if fix for #2152 works
|
||||
try:
|
||||
# bz2 module isn't always built when building Python.
|
||||
import bz2
|
||||
except ImportError:
|
||||
return
|
||||
I = array([0, 0, 1, 2, 3, 3, 3, 4])
|
||||
J = array([0, 3, 1, 2, 1, 3, 4, 4])
|
||||
V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
|
||||
|
||||
b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
|
||||
|
||||
mmwrite(self.fn, b)
|
||||
|
||||
fn_bzip2 = "%s.bz2" % self.fn
|
||||
with open(self.fn, 'rb') as f_in:
|
||||
f_out = bz2.BZ2File(fn_bzip2, 'wb')
|
||||
f_out.write(f_in.read())
|
||||
f_out.close()
|
||||
|
||||
a = mmread(fn_bzip2).todense()
|
||||
assert_array_almost_equal(a, b.todense())
|
||||
|
||||
def test_gzip_py3(self):
|
||||
# test if fix for #2152 works
|
||||
try:
|
||||
# gzip module can be missing from Python installation
|
||||
import gzip
|
||||
except ImportError:
|
||||
return
|
||||
I = array([0, 0, 1, 2, 3, 3, 3, 4])
|
||||
J = array([0, 3, 1, 2, 1, 3, 4, 4])
|
||||
V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
|
||||
|
||||
b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
|
||||
|
||||
mmwrite(self.fn, b)
|
||||
|
||||
fn_gzip = "%s.gz" % self.fn
|
||||
with open(self.fn, 'rb') as f_in:
|
||||
f_out = gzip.open(fn_gzip, 'wb')
|
||||
f_out.write(f_in.read())
|
||||
f_out.close()
|
||||
|
||||
a = mmread(fn_gzip).todense()
|
||||
assert_array_almost_equal(a, b.todense())
|
||||
|
||||
def test_real_write_read(self):
|
||||
I = array([0, 0, 1, 2, 3, 3, 3, 4])
|
||||
J = array([0, 3, 1, 2, 1, 3, 4, 4])
|
||||
V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
|
||||
|
||||
b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
|
||||
|
||||
mmwrite(self.fn, b)
|
||||
|
||||
assert_equal(mminfo(self.fn),
|
||||
(5, 5, 8, 'coordinate', 'real', 'general'))
|
||||
a = b.todense()
|
||||
b = mmread(self.fn).todense()
|
||||
assert_array_almost_equal(a, b)
|
||||
|
||||
def test_complex_write_read(self):
|
||||
I = array([0, 0, 1, 2, 3, 3, 3, 4])
|
||||
J = array([0, 3, 1, 2, 1, 3, 4, 4])
|
||||
V = array([1.0 + 3j, 6.0 + 2j, 10.50 + 0.9j, 0.015 + -4.4j,
|
||||
250.5 + 0j, -280.0 + 5j, 33.32 + 6.4j, 12.00 + 0.8j])
|
||||
|
||||
b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))
|
||||
|
||||
mmwrite(self.fn, b)
|
||||
|
||||
assert_equal(mminfo(self.fn),
|
||||
(5, 5, 8, 'coordinate', 'complex', 'general'))
|
||||
a = b.todense()
|
||||
b = mmread(self.fn).todense()
|
||||
assert_array_almost_equal(a, b)
|
||||
|
||||
def test_sparse_formats(self):
|
||||
mats = []
|
||||
|
||||
I = array([0, 0, 1, 2, 3, 3, 3, 4])
|
||||
J = array([0, 3, 1, 2, 1, 3, 4, 4])
|
||||
|
||||
V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])
|
||||
mats.append(scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5)))
|
||||
|
||||
V = array([1.0 + 3j, 6.0 + 2j, 10.50 + 0.9j, 0.015 + -4.4j,
|
||||
250.5 + 0j, -280.0 + 5j, 33.32 + 6.4j, 12.00 + 0.8j])
|
||||
mats.append(scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5)))
|
||||
|
||||
for mat in mats:
|
||||
expected = mat.todense()
|
||||
for fmt in ['csr', 'csc', 'coo']:
|
||||
fn = mktemp(dir=self.tmpdir) # safe, we own tmpdir
|
||||
mmwrite(fn, mat.asformat(fmt))
|
||||
|
||||
result = mmread(fn).todense()
|
||||
assert_array_almost_equal(result, expected)
|
||||
|
||||
def test_precision(self):
|
||||
test_values = [pi] + [10**(i) for i in range(0, -10, -1)]
|
||||
test_precisions = range(1, 10)
|
||||
for value in test_values:
|
||||
for precision in test_precisions:
|
||||
# construct sparse matrix with test value at last main diagonal
|
||||
n = 10**precision + 1
|
||||
A = scipy.sparse.dok_matrix((n, n))
|
||||
A[n-1, n-1] = value
|
||||
# write matrix with test precision and read again
|
||||
mmwrite(self.fn, A, precision=precision)
|
||||
A = scipy.io.mmread(self.fn)
|
||||
# check for right entries in matrix
|
||||
assert_array_equal(A.row, [n-1])
|
||||
assert_array_equal(A.col, [n-1])
|
||||
assert_array_almost_equal(A.data,
|
||||
[float('%%.%dg' % precision % value)])
|
||||
@@ -1,544 +0,0 @@
|
||||
''' Tests for netcdf '''
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import os
|
||||
from os.path import join as pjoin, dirname
|
||||
import shutil
|
||||
import tempfile
|
||||
import warnings
|
||||
from io import BytesIO
|
||||
from glob import glob
|
||||
from contextlib import contextmanager
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_, assert_allclose, assert_equal
|
||||
from pytest import raises as assert_raises
|
||||
|
||||
from scipy.io.netcdf import netcdf_file, IS_PYPY
|
||||
|
||||
from scipy._lib._numpy_compat import suppress_warnings
|
||||
from scipy._lib._tmpdirs import in_tempdir
|
||||
|
||||
TEST_DATA_PATH = pjoin(dirname(__file__), 'data')
|
||||
|
||||
N_EG_ELS = 11 # number of elements for example variable
|
||||
VARTYPE_EG = 'b' # var type for example variable
|
||||
|
||||
|
||||
@contextmanager
|
||||
def make_simple(*args, **kwargs):
|
||||
f = netcdf_file(*args, **kwargs)
|
||||
f.history = 'Created for a test'
|
||||
f.createDimension('time', N_EG_ELS)
|
||||
time = f.createVariable('time', VARTYPE_EG, ('time',))
|
||||
time[:] = np.arange(N_EG_ELS)
|
||||
time.units = 'days since 2008-01-01'
|
||||
f.flush()
|
||||
yield f
|
||||
f.close()
|
||||
|
||||
|
||||
def check_simple(ncfileobj):
|
||||
'''Example fileobj tests '''
|
||||
assert_equal(ncfileobj.history, b'Created for a test')
|
||||
time = ncfileobj.variables['time']
|
||||
assert_equal(time.units, b'days since 2008-01-01')
|
||||
assert_equal(time.shape, (N_EG_ELS,))
|
||||
assert_equal(time[-1], N_EG_ELS-1)
|
||||
|
||||
def assert_mask_matches(arr, expected_mask):
|
||||
'''
|
||||
Asserts that the mask of arr is effectively the same as expected_mask.
|
||||
|
||||
In contrast to numpy.ma.testutils.assert_mask_equal, this function allows
|
||||
testing the 'mask' of a standard numpy array (the mask in this case is treated
|
||||
as all False).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr: ndarray or MaskedArray
|
||||
Array to test.
|
||||
expected_mask: array_like of booleans
|
||||
A list giving the expected mask.
|
||||
'''
|
||||
|
||||
mask = np.ma.getmaskarray(arr)
|
||||
assert_equal(mask, expected_mask)
|
||||
|
||||
|
||||
def test_read_write_files():
|
||||
# test round trip for example file
|
||||
cwd = os.getcwd()
|
||||
try:
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
os.chdir(tmpdir)
|
||||
with make_simple('simple.nc', 'w') as f:
|
||||
pass
|
||||
# read the file we just created in 'a' mode
|
||||
with netcdf_file('simple.nc', 'a') as f:
|
||||
check_simple(f)
|
||||
# add something
|
||||
f._attributes['appendRan'] = 1
|
||||
|
||||
# To read the NetCDF file we just created::
|
||||
with netcdf_file('simple.nc') as f:
|
||||
# Using mmap is the default (but not on pypy)
|
||||
assert_equal(f.use_mmap, not IS_PYPY)
|
||||
check_simple(f)
|
||||
assert_equal(f._attributes['appendRan'], 1)
|
||||
|
||||
# Read it in append (and check mmap is off)
|
||||
with netcdf_file('simple.nc', 'a') as f:
|
||||
assert_(not f.use_mmap)
|
||||
check_simple(f)
|
||||
assert_equal(f._attributes['appendRan'], 1)
|
||||
|
||||
# Now without mmap
|
||||
with netcdf_file('simple.nc', mmap=False) as f:
|
||||
# Using mmap is the default
|
||||
assert_(not f.use_mmap)
|
||||
check_simple(f)
|
||||
|
||||
# To read the NetCDF file we just created, as file object, no
|
||||
# mmap. When n * n_bytes(var_type) is not divisible by 4, this
|
||||
# raised an error in pupynere 1.0.12 and scipy rev 5893, because
|
||||
# calculated vsize was rounding up in units of 4 - see
|
||||
# https://www.unidata.ucar.edu/software/netcdf/docs/user_guide.html
|
||||
with open('simple.nc', 'rb') as fobj:
|
||||
with netcdf_file(fobj) as f:
|
||||
# by default, don't use mmap for file-like
|
||||
assert_(not f.use_mmap)
|
||||
check_simple(f)
|
||||
|
||||
# Read file from fileobj, with mmap
|
||||
with suppress_warnings() as sup:
|
||||
if IS_PYPY:
|
||||
sup.filter(RuntimeWarning,
|
||||
"Cannot close a netcdf_file opened with mmap=True.*")
|
||||
with open('simple.nc', 'rb') as fobj:
|
||||
with netcdf_file(fobj, mmap=True) as f:
|
||||
assert_(f.use_mmap)
|
||||
check_simple(f)
|
||||
|
||||
# Again read it in append mode (adding another att)
|
||||
with open('simple.nc', 'r+b') as fobj:
|
||||
with netcdf_file(fobj, 'a') as f:
|
||||
assert_(not f.use_mmap)
|
||||
check_simple(f)
|
||||
f.createDimension('app_dim', 1)
|
||||
var = f.createVariable('app_var', 'i', ('app_dim',))
|
||||
var[:] = 42
|
||||
|
||||
# And... check that app_var made it in...
|
||||
with netcdf_file('simple.nc') as f:
|
||||
check_simple(f)
|
||||
assert_equal(f.variables['app_var'][:], 42)
|
||||
|
||||
except: # noqa: E722
|
||||
os.chdir(cwd)
|
||||
shutil.rmtree(tmpdir)
|
||||
raise
|
||||
os.chdir(cwd)
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
def test_read_write_sio():
|
||||
eg_sio1 = BytesIO()
|
||||
with make_simple(eg_sio1, 'w') as f1:
|
||||
str_val = eg_sio1.getvalue()
|
||||
|
||||
eg_sio2 = BytesIO(str_val)
|
||||
with netcdf_file(eg_sio2) as f2:
|
||||
check_simple(f2)
|
||||
|
||||
# Test that error is raised if attempting mmap for sio
|
||||
eg_sio3 = BytesIO(str_val)
|
||||
assert_raises(ValueError, netcdf_file, eg_sio3, 'r', True)
|
||||
# Test 64-bit offset write / read
|
||||
eg_sio_64 = BytesIO()
|
||||
with make_simple(eg_sio_64, 'w', version=2) as f_64:
|
||||
str_val = eg_sio_64.getvalue()
|
||||
|
||||
eg_sio_64 = BytesIO(str_val)
|
||||
with netcdf_file(eg_sio_64) as f_64:
|
||||
check_simple(f_64)
|
||||
assert_equal(f_64.version_byte, 2)
|
||||
# also when version 2 explicitly specified
|
||||
eg_sio_64 = BytesIO(str_val)
|
||||
with netcdf_file(eg_sio_64, version=2) as f_64:
|
||||
check_simple(f_64)
|
||||
assert_equal(f_64.version_byte, 2)
|
||||
|
||||
|
||||
def test_bytes():
|
||||
raw_file = BytesIO()
|
||||
f = netcdf_file(raw_file, mode='w')
|
||||
# Dataset only has a single variable, dimension and attribute to avoid
|
||||
# any ambiguity related to order.
|
||||
f.a = 'b'
|
||||
f.createDimension('dim', 1)
|
||||
var = f.createVariable('var', np.int16, ('dim',))
|
||||
var[0] = -9999
|
||||
var.c = 'd'
|
||||
f.sync()
|
||||
|
||||
actual = raw_file.getvalue()
|
||||
|
||||
expected = (b'CDF\x01'
|
||||
b'\x00\x00\x00\x00'
|
||||
b'\x00\x00\x00\x0a'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'\x00\x00\x00\x03'
|
||||
b'dim\x00'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'\x00\x00\x00\x0c'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'a\x00\x00\x00'
|
||||
b'\x00\x00\x00\x02'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'b\x00\x00\x00'
|
||||
b'\x00\x00\x00\x0b'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'\x00\x00\x00\x03'
|
||||
b'var\x00'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'\x00\x00\x00\x00'
|
||||
b'\x00\x00\x00\x0c'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'c\x00\x00\x00'
|
||||
b'\x00\x00\x00\x02'
|
||||
b'\x00\x00\x00\x01'
|
||||
b'd\x00\x00\x00'
|
||||
b'\x00\x00\x00\x03'
|
||||
b'\x00\x00\x00\x04'
|
||||
b'\x00\x00\x00\x78'
|
||||
b'\xd8\xf1\x80\x01')
|
||||
|
||||
assert_equal(actual, expected)
|
||||
|
||||
|
||||
def test_encoded_fill_value():
|
||||
with netcdf_file(BytesIO(), mode='w') as f:
|
||||
f.createDimension('x', 1)
|
||||
var = f.createVariable('var', 'S1', ('x',))
|
||||
assert_equal(var._get_encoded_fill_value(), b'\x00')
|
||||
var._FillValue = b'\x01'
|
||||
assert_equal(var._get_encoded_fill_value(), b'\x01')
|
||||
var._FillValue = b'\x00\x00' # invalid, wrong size
|
||||
assert_equal(var._get_encoded_fill_value(), b'\x00')
|
||||
|
||||
|
||||
def test_read_example_data():
|
||||
# read any example data files
|
||||
for fname in glob(pjoin(TEST_DATA_PATH, '*.nc')):
|
||||
with netcdf_file(fname, 'r') as f:
|
||||
pass
|
||||
with netcdf_file(fname, 'r', mmap=False) as f:
|
||||
pass
|
||||
|
||||
|
||||
def test_itemset_no_segfault_on_readonly():
|
||||
# Regression test for ticket #1202.
|
||||
# Open the test file in read-only mode.
|
||||
|
||||
filename = pjoin(TEST_DATA_PATH, 'example_1.nc')
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
"Cannot close a netcdf_file opened with mmap=True, when netcdf_variables or arrays referring to its data still exist")
|
||||
with netcdf_file(filename, 'r', mmap=True) as f:
|
||||
time_var = f.variables['time']
|
||||
|
||||
# time_var.assignValue(42) should raise a RuntimeError--not seg. fault!
|
||||
assert_raises(RuntimeError, time_var.assignValue, 42)
|
||||
|
||||
|
||||
def test_appending_issue_gh_8625():
|
||||
stream = BytesIO()
|
||||
|
||||
with make_simple(stream, mode='w') as f:
|
||||
f.createDimension('x', 2)
|
||||
f.createVariable('x', float, ('x',))
|
||||
f.variables['x'][...] = 1
|
||||
f.flush()
|
||||
contents = stream.getvalue()
|
||||
|
||||
stream = BytesIO(contents)
|
||||
with netcdf_file(stream, mode='a') as f:
|
||||
f.variables['x'][...] = 2
|
||||
|
||||
|
||||
def test_write_invalid_dtype():
|
||||
dtypes = ['int64', 'uint64']
|
||||
if np.dtype('int').itemsize == 8: # 64-bit machines
|
||||
dtypes.append('int')
|
||||
if np.dtype('uint').itemsize == 8: # 64-bit machines
|
||||
dtypes.append('uint')
|
||||
|
||||
with netcdf_file(BytesIO(), 'w') as f:
|
||||
f.createDimension('time', N_EG_ELS)
|
||||
for dt in dtypes:
|
||||
assert_raises(ValueError, f.createVariable, 'time', dt, ('time',))
|
||||
|
||||
|
||||
def test_flush_rewind():
|
||||
stream = BytesIO()
|
||||
with make_simple(stream, mode='w') as f:
|
||||
x = f.createDimension('x',4)
|
||||
v = f.createVariable('v', 'i2', ['x'])
|
||||
v[:] = 1
|
||||
f.flush()
|
||||
len_single = len(stream.getvalue())
|
||||
f.flush()
|
||||
len_double = len(stream.getvalue())
|
||||
|
||||
assert_(len_single == len_double)
|
||||
|
||||
|
||||
def test_dtype_specifiers():
|
||||
# Numpy 1.7.0-dev had a bug where 'i2' wouldn't work.
|
||||
# Specifying np.int16 or similar only works from the same commit as this
|
||||
# comment was made.
|
||||
with make_simple(BytesIO(), mode='w') as f:
|
||||
f.createDimension('x',4)
|
||||
f.createVariable('v1', 'i2', ['x'])
|
||||
f.createVariable('v2', np.int16, ['x'])
|
||||
f.createVariable('v3', np.dtype(np.int16), ['x'])
|
||||
|
||||
|
||||
def test_ticket_1720():
|
||||
io = BytesIO()
|
||||
|
||||
items = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
|
||||
|
||||
with netcdf_file(io, 'w') as f:
|
||||
f.history = 'Created for a test'
|
||||
f.createDimension('float_var', 10)
|
||||
float_var = f.createVariable('float_var', 'f', ('float_var',))
|
||||
float_var[:] = items
|
||||
float_var.units = 'metres'
|
||||
f.flush()
|
||||
contents = io.getvalue()
|
||||
|
||||
io = BytesIO(contents)
|
||||
with netcdf_file(io, 'r') as f:
|
||||
assert_equal(f.history, b'Created for a test')
|
||||
float_var = f.variables['float_var']
|
||||
assert_equal(float_var.units, b'metres')
|
||||
assert_equal(float_var.shape, (10,))
|
||||
assert_allclose(float_var[:], items)
|
||||
|
||||
|
||||
def test_mmaps_segfault():
|
||||
filename = pjoin(TEST_DATA_PATH, 'example_1.nc')
|
||||
|
||||
if not IS_PYPY:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with netcdf_file(filename, mmap=True) as f:
|
||||
x = f.variables['lat'][:]
|
||||
# should not raise warnings
|
||||
del x
|
||||
|
||||
def doit():
|
||||
with netcdf_file(filename, mmap=True) as f:
|
||||
return f.variables['lat'][:]
|
||||
|
||||
# should not crash
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(RuntimeWarning,
|
||||
"Cannot close a netcdf_file opened with mmap=True, when netcdf_variables or arrays referring to its data still exist")
|
||||
x = doit()
|
||||
x.sum()
|
||||
|
||||
|
||||
def test_zero_dimensional_var():
|
||||
io = BytesIO()
|
||||
with make_simple(io, 'w') as f:
|
||||
v = f.createVariable('zerodim', 'i2', [])
|
||||
# This is checking that .isrec returns a boolean - don't simplify it
|
||||
# to 'assert not ...'
|
||||
assert v.isrec is False, v.isrec
|
||||
f.flush()
|
||||
|
||||
|
||||
def test_byte_gatts():
|
||||
# Check that global "string" atts work like they did before py3k
|
||||
# unicode and general bytes confusion
|
||||
with in_tempdir():
|
||||
filename = 'g_byte_atts.nc'
|
||||
f = netcdf_file(filename, 'w')
|
||||
f._attributes['holy'] = b'grail'
|
||||
f._attributes['witch'] = 'floats'
|
||||
f.close()
|
||||
f = netcdf_file(filename, 'r')
|
||||
assert_equal(f._attributes['holy'], b'grail')
|
||||
assert_equal(f._attributes['witch'], b'floats')
|
||||
f.close()
|
||||
|
||||
|
||||
def test_open_append():
|
||||
# open 'w' put one attr
|
||||
with in_tempdir():
|
||||
filename = 'append_dat.nc'
|
||||
f = netcdf_file(filename, 'w')
|
||||
f._attributes['Kilroy'] = 'was here'
|
||||
f.close()
|
||||
|
||||
# open again in 'a', read the att and and a new one
|
||||
f = netcdf_file(filename, 'a')
|
||||
assert_equal(f._attributes['Kilroy'], b'was here')
|
||||
f._attributes['naughty'] = b'Zoot'
|
||||
f.close()
|
||||
|
||||
# open yet again in 'r' and check both atts
|
||||
f = netcdf_file(filename, 'r')
|
||||
assert_equal(f._attributes['Kilroy'], b'was here')
|
||||
assert_equal(f._attributes['naughty'], b'Zoot')
|
||||
f.close()
|
||||
|
||||
|
||||
def test_append_recordDimension():
|
||||
dataSize = 100
|
||||
|
||||
with in_tempdir():
|
||||
# Create file with record time dimension
|
||||
with netcdf_file('withRecordDimension.nc', 'w') as f:
|
||||
f.createDimension('time', None)
|
||||
f.createVariable('time', 'd', ('time',))
|
||||
f.createDimension('x', dataSize)
|
||||
x = f.createVariable('x', 'd', ('x',))
|
||||
x[:] = np.array(range(dataSize))
|
||||
f.createDimension('y', dataSize)
|
||||
y = f.createVariable('y', 'd', ('y',))
|
||||
y[:] = np.array(range(dataSize))
|
||||
f.createVariable('testData', 'i', ('time', 'x', 'y'))
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
for i in range(2):
|
||||
# Open the file in append mode and add data
|
||||
with netcdf_file('withRecordDimension.nc', 'a') as f:
|
||||
f.variables['time'].data = np.append(f.variables["time"].data, i)
|
||||
f.variables['testData'][i, :, :] = np.ones((dataSize, dataSize))*i
|
||||
f.flush()
|
||||
|
||||
# Read the file and check that append worked
|
||||
with netcdf_file('withRecordDimension.nc') as f:
|
||||
assert_equal(f.variables['time'][-1], i)
|
||||
assert_equal(f.variables['testData'][-1, :, :].copy(), np.ones((dataSize, dataSize))*i)
|
||||
assert_equal(f.variables['time'].data.shape[0], i+1)
|
||||
assert_equal(f.variables['testData'].data.shape[0], i+1)
|
||||
|
||||
# Read the file and check that 'data' was not saved as user defined
|
||||
# attribute of testData variable during append operation
|
||||
with netcdf_file('withRecordDimension.nc') as f:
|
||||
with assert_raises(KeyError) as ar:
|
||||
f.variables['testData']._attributes['data']
|
||||
ex = ar.value
|
||||
assert_equal(ex.args[0], 'data')
|
||||
|
||||
def test_maskandscale():
|
||||
t = np.linspace(20, 30, 15)
|
||||
t[3] = 100
|
||||
tm = np.ma.masked_greater(t, 99)
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_2.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
Temp = f.variables['Temperature']
|
||||
assert_equal(Temp.missing_value, 9999)
|
||||
assert_equal(Temp.add_offset, 20)
|
||||
assert_equal(Temp.scale_factor, np.float32(0.01))
|
||||
found = Temp[:].compressed()
|
||||
del Temp # Remove ref to mmap, so file can be closed.
|
||||
expected = np.round(tm.compressed(), 2)
|
||||
assert_allclose(found, expected)
|
||||
|
||||
with in_tempdir():
|
||||
newfname = 'ms.nc'
|
||||
f = netcdf_file(newfname, 'w', maskandscale=True)
|
||||
f.createDimension('Temperature', len(tm))
|
||||
temp = f.createVariable('Temperature', 'i', ('Temperature',))
|
||||
temp.missing_value = 9999
|
||||
temp.scale_factor = 0.01
|
||||
temp.add_offset = 20
|
||||
temp[:] = tm
|
||||
f.close()
|
||||
|
||||
with netcdf_file(newfname, maskandscale=True) as f:
|
||||
Temp = f.variables['Temperature']
|
||||
assert_equal(Temp.missing_value, 9999)
|
||||
assert_equal(Temp.add_offset, 20)
|
||||
assert_equal(Temp.scale_factor, np.float32(0.01))
|
||||
expected = np.round(tm.compressed(), 2)
|
||||
found = Temp[:].compressed()
|
||||
del Temp
|
||||
assert_allclose(found, expected)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Test reading with masked values (_FillValue / missing_value)
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def test_read_withValuesNearFillValue():
|
||||
# Regression test for ticket #5626
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
vardata = f.variables['var1_fillval0'][:]
|
||||
assert_mask_matches(vardata, [False, True, False])
|
||||
|
||||
def test_read_withNoFillValue():
|
||||
# For a variable with no fill value, reading data with maskandscale=True
|
||||
# should return unmasked data
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
vardata = f.variables['var2_noFillval'][:]
|
||||
assert_mask_matches(vardata, [False, False, False])
|
||||
assert_equal(vardata, [1,2,3])
|
||||
|
||||
def test_read_withFillValueAndMissingValue():
|
||||
# For a variable with both _FillValue and missing_value, the _FillValue
|
||||
# should be used
|
||||
IRRELEVANT_VALUE = 9999
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
vardata = f.variables['var3_fillvalAndMissingValue'][:]
|
||||
assert_mask_matches(vardata, [True, False, False])
|
||||
assert_equal(vardata, [IRRELEVANT_VALUE, 2, 3])
|
||||
|
||||
def test_read_withMissingValue():
|
||||
# For a variable with missing_value but not _FillValue, the missing_value
|
||||
# should be used
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
vardata = f.variables['var4_missingValue'][:]
|
||||
assert_mask_matches(vardata, [False, True, False])
|
||||
|
||||
def test_read_withFillValNaN():
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
vardata = f.variables['var5_fillvalNaN'][:]
|
||||
assert_mask_matches(vardata, [False, True, False])
|
||||
|
||||
def test_read_withChar():
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
vardata = f.variables['var6_char'][:]
|
||||
assert_mask_matches(vardata, [False, True, False])
|
||||
|
||||
def test_read_with2dVar():
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
with netcdf_file(fname, maskandscale=True) as f:
|
||||
vardata = f.variables['var7_2d'][:]
|
||||
assert_mask_matches(vardata, [[True, False], [False, False], [False, True]])
|
||||
|
||||
def test_read_withMaskAndScaleFalse():
|
||||
# If a variable has a _FillValue (or missing_value) attribute, but is read
|
||||
# with maskandscale set to False, the result should be unmasked
|
||||
fname = pjoin(TEST_DATA_PATH, 'example_3_maskedvals.nc')
|
||||
# Open file with mmap=False to avoid problems with closing a mmap'ed file
|
||||
# when arrays referring to its data still exist:
|
||||
with netcdf_file(fname, maskandscale=False, mmap=False) as f:
|
||||
vardata = f.variables['var3_fillvalAndMissingValue'][:]
|
||||
assert_mask_matches(vardata, [False, False, False])
|
||||
assert_equal(vardata, [1, 2, 3])
|
||||
@@ -1,88 +0,0 @@
|
||||
"""
|
||||
Ensure that we can use pathlib.Path objects in all relevant IO functions.
|
||||
"""
|
||||
import sys
|
||||
|
||||
try:
|
||||
from pathlib import Path
|
||||
except ImportError:
|
||||
# Not available. No fallback import, since we'll skip the entire
|
||||
# test suite for Python < 3.6.
|
||||
pass
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_
|
||||
import pytest
|
||||
|
||||
import scipy.io
|
||||
import scipy.io.wavfile
|
||||
from scipy._lib._tmpdirs import tempdir
|
||||
import scipy.sparse
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.version_info < (3, 6),
|
||||
reason='Passing path-like objects to IO functions requires Python >= 3.6')
|
||||
class TestPaths(object):
|
||||
data = np.arange(5).astype(np.int64)
|
||||
|
||||
def test_savemat(self):
|
||||
with tempdir() as temp_dir:
|
||||
path = Path(temp_dir) / 'data.mat'
|
||||
scipy.io.savemat(path, {'data': self.data})
|
||||
assert_(path.is_file())
|
||||
|
||||
def test_loadmat(self):
|
||||
# Save data with string path, load with pathlib.Path
|
||||
with tempdir() as temp_dir:
|
||||
path = Path(temp_dir) / 'data.mat'
|
||||
scipy.io.savemat(str(path), {'data': self.data})
|
||||
|
||||
mat_contents = scipy.io.loadmat(path)
|
||||
assert_((mat_contents['data'] == self.data).all())
|
||||
|
||||
def test_whosmat(self):
|
||||
# Save data with string path, load with pathlib.Path
|
||||
with tempdir() as temp_dir:
|
||||
path = Path(temp_dir) / 'data.mat'
|
||||
scipy.io.savemat(str(path), {'data': self.data})
|
||||
|
||||
contents = scipy.io.whosmat(path)
|
||||
assert_(contents[0] == ('data', (1, 5), 'int64'))
|
||||
|
||||
def test_readsav(self):
|
||||
path = Path(__file__).parent / 'data/scalar_string.sav'
|
||||
scipy.io.readsav(path)
|
||||
|
||||
def test_hb_read(self):
|
||||
# Save data with string path, load with pathlib.Path
|
||||
with tempdir() as temp_dir:
|
||||
data = scipy.sparse.csr_matrix(scipy.sparse.eye(3))
|
||||
path = Path(temp_dir) / 'data.hb'
|
||||
scipy.io.harwell_boeing.hb_write(str(path), data)
|
||||
|
||||
data_new = scipy.io.harwell_boeing.hb_read(path)
|
||||
assert_((data_new != data).nnz == 0)
|
||||
|
||||
def test_hb_write(self):
|
||||
with tempdir() as temp_dir:
|
||||
data = scipy.sparse.csr_matrix(scipy.sparse.eye(3))
|
||||
path = Path(temp_dir) / 'data.hb'
|
||||
scipy.io.harwell_boeing.hb_write(path, data)
|
||||
assert_(path.is_file())
|
||||
|
||||
def test_netcdf_file(self):
|
||||
path = Path(__file__).parent / 'data/example_1.nc'
|
||||
scipy.io.netcdf.netcdf_file(path)
|
||||
|
||||
def test_wavfile_read(self):
|
||||
path = Path(__file__).parent / 'data/test-8000Hz-le-2ch-1byteu.wav'
|
||||
scipy.io.wavfile.read(path)
|
||||
|
||||
def test_wavfile_write(self):
|
||||
# Read from str path, write to Path
|
||||
input_path = Path(__file__).parent / 'data/test-8000Hz-le-2ch-1byteu.wav'
|
||||
rate, data = scipy.io.wavfile.read(str(input_path))
|
||||
|
||||
with tempdir() as temp_dir:
|
||||
output_path = Path(temp_dir) / input_path.name
|
||||
scipy.io.wavfile.write(output_path, rate, data)
|
||||
@@ -1,159 +0,0 @@
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal, assert_, assert_array_equal
|
||||
from pytest import raises as assert_raises
|
||||
from scipy._lib._numpy_compat import suppress_warnings
|
||||
|
||||
from scipy.io import wavfile
|
||||
|
||||
|
||||
def datafile(fn):
|
||||
return os.path.join(os.path.dirname(__file__), 'data', fn)
|
||||
|
||||
|
||||
def test_read_1():
|
||||
for mmap in [False, True]:
|
||||
rate, data = wavfile.read(datafile('test-44100Hz-le-1ch-4bytes.wav'),
|
||||
mmap=mmap)
|
||||
|
||||
assert_equal(rate, 44100)
|
||||
assert_(np.issubdtype(data.dtype, np.int32))
|
||||
assert_equal(data.shape, (4410,))
|
||||
|
||||
del data
|
||||
|
||||
|
||||
def test_read_2():
|
||||
for mmap in [False, True]:
|
||||
rate, data = wavfile.read(datafile('test-8000Hz-le-2ch-1byteu.wav'),
|
||||
mmap=mmap)
|
||||
assert_equal(rate, 8000)
|
||||
assert_(np.issubdtype(data.dtype, np.uint8))
|
||||
assert_equal(data.shape, (800, 2))
|
||||
|
||||
del data
|
||||
|
||||
def test_read_3():
|
||||
for mmap in [False, True]:
|
||||
rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-le.wav'),
|
||||
mmap=mmap)
|
||||
assert_equal(rate, 44100)
|
||||
assert_(np.issubdtype(data.dtype, np.float32))
|
||||
assert_equal(data.shape, (441, 2))
|
||||
|
||||
del data
|
||||
|
||||
def test_read_4():
|
||||
for mmap in [False, True]:
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(wavfile.WavFileWarning,
|
||||
"Chunk .non-data. not understood, skipping it")
|
||||
rate, data = wavfile.read(datafile('test-48000Hz-2ch-64bit-float-le-wavex.wav'),
|
||||
mmap=mmap)
|
||||
|
||||
assert_equal(rate, 48000)
|
||||
assert_(np.issubdtype(data.dtype, np.float64))
|
||||
assert_equal(data.shape, (480, 2))
|
||||
|
||||
del data
|
||||
|
||||
|
||||
def test_read_5():
|
||||
for mmap in [False, True]:
|
||||
rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-be.wav'),
|
||||
mmap=mmap)
|
||||
assert_equal(rate, 44100)
|
||||
assert_(np.issubdtype(data.dtype, np.float32))
|
||||
assert_(data.dtype.byteorder == '>' or (sys.byteorder == 'big' and
|
||||
data.dtype.byteorder == '='))
|
||||
assert_equal(data.shape, (441, 2))
|
||||
|
||||
del data
|
||||
|
||||
|
||||
def test_read_fail():
|
||||
for mmap in [False, True]:
|
||||
fp = open(datafile('example_1.nc'), 'rb')
|
||||
assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
|
||||
fp.close()
|
||||
|
||||
|
||||
def test_read_early_eof():
|
||||
for mmap in [False, True]:
|
||||
fp = open(datafile('test-44100Hz-le-1ch-4bytes-early-eof.wav'), 'rb')
|
||||
assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
|
||||
fp.close()
|
||||
|
||||
|
||||
def test_read_incomplete_chunk():
|
||||
for mmap in [False, True]:
|
||||
fp = open(datafile('test-44100Hz-le-1ch-4bytes-incomplete-chunk.wav'), 'rb')
|
||||
assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
|
||||
fp.close()
|
||||
|
||||
|
||||
def _check_roundtrip(realfile, rate, dtype, channels):
|
||||
if realfile:
|
||||
fd, tmpfile = tempfile.mkstemp(suffix='.wav')
|
||||
os.close(fd)
|
||||
else:
|
||||
tmpfile = BytesIO()
|
||||
try:
|
||||
data = np.random.rand(100, channels)
|
||||
if channels == 1:
|
||||
data = data[:,0]
|
||||
if dtype.kind == 'f':
|
||||
# The range of the float type should be in [-1, 1]
|
||||
data = data.astype(dtype)
|
||||
else:
|
||||
data = (data*128).astype(dtype)
|
||||
|
||||
wavfile.write(tmpfile, rate, data)
|
||||
|
||||
for mmap in [False, True]:
|
||||
rate2, data2 = wavfile.read(tmpfile, mmap=mmap)
|
||||
|
||||
assert_equal(rate, rate2)
|
||||
assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
|
||||
assert_array_equal(data, data2)
|
||||
|
||||
del data2
|
||||
finally:
|
||||
if realfile:
|
||||
os.unlink(tmpfile)
|
||||
|
||||
|
||||
def test_write_roundtrip():
|
||||
for realfile in (False, True):
|
||||
for dtypechar in ('i', 'u', 'f', 'g', 'q'):
|
||||
for size in (1, 2, 4, 8):
|
||||
if size == 1 and dtypechar == 'i':
|
||||
# signed 8-bit integer PCM is not allowed
|
||||
continue
|
||||
if size > 1 and dtypechar == 'u':
|
||||
# unsigned > 8-bit integer PCM is not allowed
|
||||
continue
|
||||
if (size == 1 or size == 2) and dtypechar == 'f':
|
||||
# 8- or 16-bit float PCM is not expected
|
||||
continue
|
||||
if dtypechar in 'gq':
|
||||
# no size allowed for these types
|
||||
if size == 1:
|
||||
size = ''
|
||||
else:
|
||||
continue
|
||||
|
||||
for endianness in ('>', '<'):
|
||||
if size == 1 and endianness == '<':
|
||||
continue
|
||||
for rate in (8000, 32000):
|
||||
for channels in (1, 2, 5):
|
||||
dt = np.dtype('%s%s%s' % (endianness, dtypechar, size))
|
||||
_check_roundtrip(realfile, rate, dt, channels)
|
||||
|
||||
@@ -1,405 +0,0 @@
|
||||
"""
|
||||
Module to read / write wav files using numpy arrays
|
||||
|
||||
Functions
|
||||
---------
|
||||
`read`: Return the sample rate (in samples/sec) and data from a WAV file.
|
||||
|
||||
`write`: Write a numpy array as a WAV file.
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function, absolute_import
|
||||
|
||||
import sys
|
||||
import numpy
|
||||
import struct
|
||||
import warnings
|
||||
|
||||
|
||||
__all__ = [
|
||||
'WavFileWarning',
|
||||
'read',
|
||||
'write'
|
||||
]
|
||||
|
||||
|
||||
class WavFileWarning(UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
WAVE_FORMAT_PCM = 0x0001
|
||||
WAVE_FORMAT_IEEE_FLOAT = 0x0003
|
||||
WAVE_FORMAT_EXTENSIBLE = 0xfffe
|
||||
KNOWN_WAVE_FORMATS = (WAVE_FORMAT_PCM, WAVE_FORMAT_IEEE_FLOAT)
|
||||
|
||||
# assumes file pointer is immediately
|
||||
# after the 'fmt ' id
|
||||
|
||||
|
||||
def _read_fmt_chunk(fid, is_big_endian):
|
||||
"""
|
||||
Returns
|
||||
-------
|
||||
size : int
|
||||
size of format subchunk in bytes (minus 8 for "fmt " and itself)
|
||||
format_tag : int
|
||||
PCM, float, or compressed format
|
||||
channels : int
|
||||
number of channels
|
||||
fs : int
|
||||
sampling frequency in samples per second
|
||||
bytes_per_second : int
|
||||
overall byte rate for the file
|
||||
block_align : int
|
||||
bytes per sample, including all channels
|
||||
bit_depth : int
|
||||
bits per sample
|
||||
"""
|
||||
if is_big_endian:
|
||||
fmt = '>'
|
||||
else:
|
||||
fmt = '<'
|
||||
|
||||
size = res = struct.unpack(fmt+'I', fid.read(4))[0]
|
||||
bytes_read = 0
|
||||
|
||||
if size < 16:
|
||||
raise ValueError("Binary structure of wave file is not compliant")
|
||||
|
||||
res = struct.unpack(fmt+'HHIIHH', fid.read(16))
|
||||
bytes_read += 16
|
||||
|
||||
format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
|
||||
|
||||
if format_tag == WAVE_FORMAT_EXTENSIBLE and size >= (16+2):
|
||||
ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0]
|
||||
bytes_read += 2
|
||||
if ext_chunk_size >= 22:
|
||||
extensible_chunk_data = fid.read(22)
|
||||
bytes_read += 22
|
||||
raw_guid = extensible_chunk_data[2+4:2+4+16]
|
||||
# GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
|
||||
# MS GUID byte order: first three groups are native byte order,
|
||||
# rest is Big Endian
|
||||
if is_big_endian:
|
||||
tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71'
|
||||
else:
|
||||
tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71'
|
||||
if raw_guid.endswith(tail):
|
||||
format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0]
|
||||
else:
|
||||
raise ValueError("Binary structure of wave file is not compliant")
|
||||
|
||||
if format_tag not in KNOWN_WAVE_FORMATS:
|
||||
raise ValueError("Unknown wave file format")
|
||||
|
||||
# move file pointer to next chunk
|
||||
if size > (bytes_read):
|
||||
fid.read(size - bytes_read)
|
||||
|
||||
return (size, format_tag, channels, fs, bytes_per_second, block_align,
|
||||
bit_depth)
|
||||
|
||||
|
||||
# assumes file pointer is immediately after the 'data' id
|
||||
def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian,
|
||||
mmap=False):
|
||||
if is_big_endian:
|
||||
fmt = '>I'
|
||||
else:
|
||||
fmt = '<I'
|
||||
|
||||
# Size of the data subchunk in bytes
|
||||
size = struct.unpack(fmt, fid.read(4))[0]
|
||||
|
||||
# Number of bytes per sample
|
||||
bytes_per_sample = bit_depth//8
|
||||
if bit_depth == 8:
|
||||
dtype = 'u1'
|
||||
else:
|
||||
if is_big_endian:
|
||||
dtype = '>'
|
||||
else:
|
||||
dtype = '<'
|
||||
if format_tag == WAVE_FORMAT_PCM:
|
||||
dtype += 'i%d' % bytes_per_sample
|
||||
else:
|
||||
dtype += 'f%d' % bytes_per_sample
|
||||
if not mmap:
|
||||
data = numpy.frombuffer(fid.read(size), dtype=dtype)
|
||||
else:
|
||||
start = fid.tell()
|
||||
data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
|
||||
shape=(size//bytes_per_sample,))
|
||||
fid.seek(start + size)
|
||||
|
||||
if channels > 1:
|
||||
data = data.reshape(-1, channels)
|
||||
return data
|
||||
|
||||
|
||||
def _skip_unknown_chunk(fid, is_big_endian):
|
||||
if is_big_endian:
|
||||
fmt = '>I'
|
||||
else:
|
||||
fmt = '<I'
|
||||
|
||||
data = fid.read(4)
|
||||
# call unpack() and seek() only if we have really read data from file
|
||||
# otherwise empty read at the end of the file would trigger
|
||||
# unnecessary exception at unpack() call
|
||||
# in case data equals somehow to 0, there is no need for seek() anyway
|
||||
if data:
|
||||
size = struct.unpack(fmt, data)[0]
|
||||
fid.seek(size, 1)
|
||||
|
||||
|
||||
def _read_riff_chunk(fid):
|
||||
str1 = fid.read(4) # File signature
|
||||
if str1 == b'RIFF':
|
||||
is_big_endian = False
|
||||
fmt = '<I'
|
||||
elif str1 == b'RIFX':
|
||||
is_big_endian = True
|
||||
fmt = '>I'
|
||||
else:
|
||||
# There are also .wav files with "FFIR" or "XFIR" signatures?
|
||||
raise ValueError("File format {}... not "
|
||||
"understood.".format(repr(str1)))
|
||||
|
||||
# Size of entire file
|
||||
file_size = struct.unpack(fmt, fid.read(4))[0] + 8
|
||||
|
||||
str2 = fid.read(4)
|
||||
if str2 != b'WAVE':
|
||||
raise ValueError("Not a WAV file.")
|
||||
|
||||
return file_size, is_big_endian
|
||||
|
||||
|
||||
def read(filename, mmap=False):
|
||||
"""
|
||||
Open a WAV file
|
||||
|
||||
Return the sample rate (in samples/sec) and data from a WAV file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : string or open file handle
|
||||
Input wav file.
|
||||
mmap : bool, optional
|
||||
Whether to read data as memory-mapped.
|
||||
Only to be used on real files (Default: False).
|
||||
|
||||
.. versionadded:: 0.12.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
rate : int
|
||||
Sample rate of wav file.
|
||||
data : numpy array
|
||||
Data read from wav file. Data-type is determined from the file;
|
||||
see Notes.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function cannot read wav files with 24-bit data.
|
||||
|
||||
Common data types: [1]_
|
||||
|
||||
===================== =========== =========== =============
|
||||
WAV format Min Max NumPy dtype
|
||||
===================== =========== =========== =============
|
||||
32-bit floating-point -1.0 +1.0 float32
|
||||
32-bit PCM -2147483648 +2147483647 int32
|
||||
16-bit PCM -32768 +32767 int16
|
||||
8-bit PCM 0 255 uint8
|
||||
===================== =========== =========== =============
|
||||
|
||||
Note that 8-bit PCM is unsigned.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
|
||||
Interface and Data Specifications 1.0", section "Data Format of the
|
||||
Samples", August 1991
|
||||
http://www.tactilemedia.com/info/MCI_Control_Info.html
|
||||
|
||||
"""
|
||||
if hasattr(filename, 'read'):
|
||||
fid = filename
|
||||
mmap = False
|
||||
else:
|
||||
fid = open(filename, 'rb')
|
||||
|
||||
try:
|
||||
file_size, is_big_endian = _read_riff_chunk(fid)
|
||||
fmt_chunk_received = False
|
||||
channels = 1
|
||||
bit_depth = 8
|
||||
format_tag = WAVE_FORMAT_PCM
|
||||
while fid.tell() < file_size:
|
||||
# read the next chunk
|
||||
chunk_id = fid.read(4)
|
||||
|
||||
if not chunk_id:
|
||||
raise ValueError("Unexpected end of file.")
|
||||
elif len(chunk_id) < 4:
|
||||
raise ValueError("Incomplete wav chunk.")
|
||||
|
||||
if chunk_id == b'fmt ':
|
||||
fmt_chunk_received = True
|
||||
fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
|
||||
format_tag, channels, fs = fmt_chunk[1:4]
|
||||
bit_depth = fmt_chunk[6]
|
||||
if bit_depth not in (8, 16, 32, 64, 96, 128):
|
||||
raise ValueError("Unsupported bit depth: the wav file "
|
||||
"has {}-bit data.".format(bit_depth))
|
||||
elif chunk_id == b'fact':
|
||||
_skip_unknown_chunk(fid, is_big_endian)
|
||||
elif chunk_id == b'data':
|
||||
if not fmt_chunk_received:
|
||||
raise ValueError("No fmt chunk before data")
|
||||
data = _read_data_chunk(fid, format_tag, channels, bit_depth,
|
||||
is_big_endian, mmap)
|
||||
elif chunk_id == b'LIST':
|
||||
# Someday this could be handled properly but for now skip it
|
||||
_skip_unknown_chunk(fid, is_big_endian)
|
||||
elif chunk_id in (b'JUNK', b'Fake'):
|
||||
# Skip alignment chunks without warning
|
||||
_skip_unknown_chunk(fid, is_big_endian)
|
||||
else:
|
||||
warnings.warn("Chunk (non-data) not understood, skipping it.",
|
||||
WavFileWarning)
|
||||
_skip_unknown_chunk(fid, is_big_endian)
|
||||
finally:
|
||||
if not hasattr(filename, 'read'):
|
||||
fid.close()
|
||||
else:
|
||||
fid.seek(0)
|
||||
|
||||
return fs, data
|
||||
|
||||
|
||||
def write(filename, rate, data):
|
||||
"""
|
||||
Write a numpy array as a WAV file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : string or open file handle
|
||||
Output wav file.
|
||||
rate : int
|
||||
The sample rate (in samples/sec).
|
||||
data : ndarray
|
||||
A 1-D or 2-D numpy array of either integer or float data-type.
|
||||
|
||||
Notes
|
||||
-----
|
||||
* Writes a simple uncompressed WAV file.
|
||||
* To write multiple-channels, use a 2-D array of shape
|
||||
(Nsamples, Nchannels).
|
||||
* The bits-per-sample and PCM/float will be determined by the data-type.
|
||||
|
||||
Common data types: [1]_
|
||||
|
||||
===================== =========== =========== =============
|
||||
WAV format Min Max NumPy dtype
|
||||
===================== =========== =========== =============
|
||||
32-bit floating-point -1.0 +1.0 float32
|
||||
32-bit PCM -2147483648 +2147483647 int32
|
||||
16-bit PCM -32768 +32767 int16
|
||||
8-bit PCM 0 255 uint8
|
||||
===================== =========== =========== =============
|
||||
|
||||
Note that 8-bit PCM is unsigned.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
|
||||
Interface and Data Specifications 1.0", section "Data Format of the
|
||||
Samples", August 1991
|
||||
http://www.tactilemedia.com/info/MCI_Control_Info.html
|
||||
|
||||
"""
|
||||
if hasattr(filename, 'write'):
|
||||
fid = filename
|
||||
else:
|
||||
fid = open(filename, 'wb')
|
||||
|
||||
fs = rate
|
||||
|
||||
try:
|
||||
dkind = data.dtype.kind
|
||||
if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
|
||||
data.dtype.itemsize == 1)):
|
||||
raise ValueError("Unsupported data type '%s'" % data.dtype)
|
||||
|
||||
header_data = b''
|
||||
|
||||
header_data += b'RIFF'
|
||||
header_data += b'\x00\x00\x00\x00'
|
||||
header_data += b'WAVE'
|
||||
|
||||
# fmt chunk
|
||||
header_data += b'fmt '
|
||||
if dkind == 'f':
|
||||
format_tag = WAVE_FORMAT_IEEE_FLOAT
|
||||
else:
|
||||
format_tag = WAVE_FORMAT_PCM
|
||||
if data.ndim == 1:
|
||||
channels = 1
|
||||
else:
|
||||
channels = data.shape[1]
|
||||
bit_depth = data.dtype.itemsize * 8
|
||||
bytes_per_second = fs*(bit_depth // 8)*channels
|
||||
block_align = channels * (bit_depth // 8)
|
||||
|
||||
fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
|
||||
bytes_per_second, block_align, bit_depth)
|
||||
if not (dkind == 'i' or dkind == 'u'):
|
||||
# add cbSize field for non-PCM files
|
||||
fmt_chunk_data += b'\x00\x00'
|
||||
|
||||
header_data += struct.pack('<I', len(fmt_chunk_data))
|
||||
header_data += fmt_chunk_data
|
||||
|
||||
# fact chunk (non-PCM files)
|
||||
if not (dkind == 'i' or dkind == 'u'):
|
||||
header_data += b'fact'
|
||||
header_data += struct.pack('<II', 4, data.shape[0])
|
||||
|
||||
# check data size (needs to be immediately before the data chunk)
|
||||
if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
|
||||
raise ValueError("Data exceeds wave file size limit")
|
||||
|
||||
fid.write(header_data)
|
||||
|
||||
# data chunk
|
||||
fid.write(b'data')
|
||||
fid.write(struct.pack('<I', data.nbytes))
|
||||
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
|
||||
sys.byteorder == 'big'):
|
||||
data = data.byteswap()
|
||||
_array_tofile(fid, data)
|
||||
|
||||
# Determine file size and place it in correct
|
||||
# position at start of the file.
|
||||
size = fid.tell()
|
||||
fid.seek(4)
|
||||
fid.write(struct.pack('<I', size-8))
|
||||
|
||||
finally:
|
||||
if not hasattr(filename, 'write'):
|
||||
fid.close()
|
||||
else:
|
||||
fid.seek(0)
|
||||
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
def _array_tofile(fid, data):
|
||||
# ravel gives a c-contiguous buffer
|
||||
fid.write(data.ravel().view('b').data)
|
||||
else:
|
||||
def _array_tofile(fid, data):
|
||||
fid.write(data.tostring())
|
||||
Reference in New Issue
Block a user