Static code analysis and corrections

2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,32 @@
+"""
+Data IO api
+"""
+
+# flake8: noqa
+
+from pandas.io.parsers import read_csv, read_table, read_fwf
+from pandas.io.clipboards import read_clipboard
+from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
+from pandas.io.pytables import HDFStore, get_store, read_hdf
+from pandas.io.json import read_json
+from pandas.io.html import read_html
+from pandas.io.sql import read_sql, read_sql_table, read_sql_query
+from pandas.io.sas import read_sas
+from pandas.io.feather_format import read_feather
+from pandas.io.parquet import read_parquet
+from pandas.io.stata import read_stata
+from pandas.io.pickle import read_pickle, to_pickle
+from pandas.io.packers import read_msgpack, to_msgpack
+from pandas.io.gbq import read_gbq
+
+# deprecation, xref #13790
+def Term(*args, **kwargs):
+    import warnings
+
+    warnings.warn("pd.Term is deprecated as it is not "
+                  "applicable to user code. Instead use in-line "
+                  "string expressions in the where clause when "
+                  "searching in HDFStore",
+                  FutureWarning, stacklevel=2)
+    from pandas.io.pytables import Term
+    return Term(*args, **kwargs)
@@ -0,0 +1,125 @@
+"""
+Pyperclip
+
+A cross-platform clipboard module for Python. (only handles plain text for now)
+By Al Sweigart al@inventwithpython.com
+BSD License
+
+Usage:
+  import pyperclip
+  pyperclip.copy('The text to be copied to the clipboard.')
+  spam = pyperclip.paste()
+
+  if not pyperclip.copy:
+    print("Copy functionality unavailable!")
+
+On Windows, no additional modules are needed.
+On Mac, the module uses pbcopy and pbpaste, which should come with the os.
+On Linux, install xclip or xsel via package manager. For example, in Debian:
+sudo apt-get install xclip
+
+Otherwise on Linux, you will need the gtk, qtpy or PyQt modules installed.
+qtpy also requires a python-qt-bindings module: PyQt4, PyQt5, PySide, PySide2
+
+gtk and PyQt4 modules are not available for Python 3,
+and this module does not work with PyGObject yet.
+"""
+__version__ = '1.5.27'
+
+import platform
+import os
+import subprocess
+from .clipboards import (init_osx_clipboard,
+                         init_gtk_clipboard, init_qt_clipboard,
+                         init_xclip_clipboard, init_xsel_clipboard,
+                         init_klipper_clipboard, init_no_clipboard)
+from .windows import init_windows_clipboard
+
+# `import qtpy` sys.exit()s if DISPLAY is not in the environment.
+# Thus, we need to detect the presence of $DISPLAY manually
+# and not load qtpy if it is absent.
+HAS_DISPLAY = os.getenv("DISPLAY", False)
+CHECK_CMD = "where" if platform.system() == "Windows" else "which"
+
+
+def _executable_exists(name):
+    return subprocess.call([CHECK_CMD, name],
+                           stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
+
+
+def determine_clipboard():
+    # Determine the OS/platform and set
+    # the copy() and paste() functions accordingly.
+    if 'cygwin' in platform.system().lower():
+        # FIXME: pyperclip currently does not support Cygwin,
+        # see https://github.com/asweigart/pyperclip/issues/55
+        pass
+    elif os.name == 'nt' or platform.system() == 'Windows':
+        return init_windows_clipboard()
+    if os.name == 'mac' or platform.system() == 'Darwin':
+        return init_osx_clipboard()
+    if HAS_DISPLAY:
+        # Determine which command/module is installed, if any.
+        try:
+            # Check if gtk is installed
+            import gtk  # noqa
+        except ImportError:
+            pass
+        else:
+            return init_gtk_clipboard()
+
+        try:
+            # qtpy is a small abstraction layer that lets you write
+            # applications using a single api call to either PyQt or PySide
+            # https://pypi.org/project/QtPy
+            import qtpy  # noqa
+        except ImportError:
+            # If qtpy isn't installed, fall back on importing PyQt5, or PyQt5
+            try:
+                import PyQt5  # noqa
+            except ImportError:
+                try:
+                    import PyQt4  # noqa
+                except ImportError:
+                    pass  # fail fast for all non-ImportError exceptions.
+                else:
+                    return init_qt_clipboard()
+            else:
+                return init_qt_clipboard()
+            pass
+        else:
+            return init_qt_clipboard()
+
+        if _executable_exists("xclip"):
+            return init_xclip_clipboard()
+        if _executable_exists("xsel"):
+            return init_xsel_clipboard()
+        if _executable_exists("klipper") and _executable_exists("qdbus"):
+            return init_klipper_clipboard()
+
+    return init_no_clipboard()
+
+
+def set_clipboard(clipboard):
+    global copy, paste
+
+    clipboard_types = {'osx': init_osx_clipboard,
+                       'gtk': init_gtk_clipboard,
+                       'qt': init_qt_clipboard,
+                       'xclip': init_xclip_clipboard,
+                       'xsel': init_xsel_clipboard,
+                       'klipper': init_klipper_clipboard,
+                       'windows': init_windows_clipboard,
+                       'no': init_no_clipboard}
+
+    copy, paste = clipboard_types[clipboard]()
+
+
+copy, paste = determine_clipboard()
+
+__all__ = ["copy", "paste"]
+
+
+# pandas aliases
+clipboard_get = paste
+clipboard_set = copy
@@ -0,0 +1,143 @@
+import subprocess
+from .exceptions import PyperclipException
+from pandas.compat import PY2, text_type
+
+EXCEPT_MSG = """
+    Pyperclip could not find a copy/paste mechanism for your system.
+    For more information, please visit https://pyperclip.readthedocs.org """
+
+
+def init_osx_clipboard():
+    def copy_osx(text):
+        p = subprocess.Popen(['pbcopy', 'w'],
+                             stdin=subprocess.PIPE, close_fds=True)
+        p.communicate(input=text.encode('utf-8'))
+
+    def paste_osx():
+        p = subprocess.Popen(['pbpaste', 'r'],
+                             stdout=subprocess.PIPE, close_fds=True)
+        stdout, stderr = p.communicate()
+        return stdout.decode('utf-8')
+
+    return copy_osx, paste_osx
+
+
+def init_gtk_clipboard():
+    import gtk
+
+    def copy_gtk(text):
+        global cb
+        cb = gtk.Clipboard()
+        cb.set_text(text)
+        cb.store()
+
+    def paste_gtk():
+        clipboardContents = gtk.Clipboard().wait_for_text()
+        # for python 2, returns None if the clipboard is blank.
+        if clipboardContents is None:
+            return ''
+        else:
+            return clipboardContents
+
+    return copy_gtk, paste_gtk
+
+
+def init_qt_clipboard():
+    # $DISPLAY should exist
+
+    # Try to import from qtpy, but if that fails try PyQt5 then PyQt4
+    try:
+        from qtpy.QtWidgets import QApplication
+    except ImportError:
+        try:
+            from PyQt5.QtWidgets import QApplication
+        except ImportError:
+            from PyQt4.QtGui import QApplication
+
+    app = QApplication.instance()
+    if app is None:
+        app = QApplication([])
+
+    def copy_qt(text):
+        cb = app.clipboard()
+        cb.setText(text)
+
+    def paste_qt():
+        cb = app.clipboard()
+        return text_type(cb.text())
+
+    return copy_qt, paste_qt
+
+
+def init_xclip_clipboard():
+    def copy_xclip(text):
+        p = subprocess.Popen(['xclip', '-selection', 'c'],
+                             stdin=subprocess.PIPE, close_fds=True)
+        p.communicate(input=text.encode('utf-8'))
+
+    def paste_xclip():
+        p = subprocess.Popen(['xclip', '-selection', 'c', '-o'],
+                             stdout=subprocess.PIPE, close_fds=True)
+        stdout, stderr = p.communicate()
+        return stdout.decode('utf-8')
+
+    return copy_xclip, paste_xclip
+
+
+def init_xsel_clipboard():
+    def copy_xsel(text):
+        p = subprocess.Popen(['xsel', '-b', '-i'],
+                             stdin=subprocess.PIPE, close_fds=True)
+        p.communicate(input=text.encode('utf-8'))
+
+    def paste_xsel():
+        p = subprocess.Popen(['xsel', '-b', '-o'],
+                             stdout=subprocess.PIPE, close_fds=True)
+        stdout, stderr = p.communicate()
+        return stdout.decode('utf-8')
+
+    return copy_xsel, paste_xsel
+
+
+def init_klipper_clipboard():
+    def copy_klipper(text):
+        p = subprocess.Popen(
+            ['qdbus', 'org.kde.klipper', '/klipper', 'setClipboardContents',
+             text.encode('utf-8')],
+            stdin=subprocess.PIPE, close_fds=True)
+        p.communicate(input=None)
+
+    def paste_klipper():
+        p = subprocess.Popen(
+            ['qdbus', 'org.kde.klipper', '/klipper', 'getClipboardContents'],
+            stdout=subprocess.PIPE, close_fds=True)
+        stdout, stderr = p.communicate()
+
+        # Workaround for https://bugs.kde.org/show_bug.cgi?id=342874
+        # TODO: https://github.com/asweigart/pyperclip/issues/43
+        clipboardContents = stdout.decode('utf-8')
+        # even if blank, Klipper will append a newline at the end
+        assert len(clipboardContents) > 0
+        # make sure that newline is there
+        assert clipboardContents.endswith('\n')
+        if clipboardContents.endswith('\n'):
+            clipboardContents = clipboardContents[:-1]
+        return clipboardContents
+
+    return copy_klipper, paste_klipper
+
+
+def init_no_clipboard():
+    class ClipboardUnavailable(object):
+
+        def __call__(self, *args, **kwargs):
+            raise PyperclipException(EXCEPT_MSG)
+
+        if PY2:
+            def __nonzero__(self):
+                return False
+        else:
+            def __bool__(self):
+                return False
+
+    return ClipboardUnavailable(), ClipboardUnavailable()
@@ -0,0 +1,12 @@
+import ctypes
+
+
+class PyperclipException(RuntimeError):
+    pass
+
+
+class PyperclipWindowsException(PyperclipException):
+
+    def __init__(self, message):
+        message += " ({err})".format(err=ctypes.WinError())
+        super(PyperclipWindowsException, self).__init__(message)
@@ -0,0 +1,153 @@
+"""
+This module implements clipboard handling on Windows using ctypes.
+"""
+import time
+import contextlib
+import ctypes
+from ctypes import c_size_t, sizeof, c_wchar_p, get_errno, c_wchar
+from .exceptions import PyperclipWindowsException
+
+
+class CheckedCall(object):
+
+    def __init__(self, f):
+        super(CheckedCall, self).__setattr__("f", f)
+
+    def __call__(self, *args):
+        ret = self.f(*args)
+        if not ret and get_errno():
+            raise PyperclipWindowsException("Error calling " + self.f.__name__)
+        return ret
+
+    def __setattr__(self, key, value):
+        setattr(self.f, key, value)
+
+
+def init_windows_clipboard():
+    from ctypes.wintypes import (HGLOBAL, LPVOID, DWORD, LPCSTR, INT, HWND,
+                                 HINSTANCE, HMENU, BOOL, UINT, HANDLE)
+
+    windll = ctypes.windll
+
+    safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA)
+    safeCreateWindowExA.argtypes = [DWORD, LPCSTR, LPCSTR, DWORD, INT, INT,
+                                    INT, INT, HWND, HMENU, HINSTANCE, LPVOID]
+    safeCreateWindowExA.restype = HWND
+
+    safeDestroyWindow = CheckedCall(windll.user32.DestroyWindow)
+    safeDestroyWindow.argtypes = [HWND]
+    safeDestroyWindow.restype = BOOL
+
+    OpenClipboard = windll.user32.OpenClipboard
+    OpenClipboard.argtypes = [HWND]
+    OpenClipboard.restype = BOOL
+
+    safeCloseClipboard = CheckedCall(windll.user32.CloseClipboard)
+    safeCloseClipboard.argtypes = []
+    safeCloseClipboard.restype = BOOL
+
+    safeEmptyClipboard = CheckedCall(windll.user32.EmptyClipboard)
+    safeEmptyClipboard.argtypes = []
+    safeEmptyClipboard.restype = BOOL
+
+    safeGetClipboardData = CheckedCall(windll.user32.GetClipboardData)
+    safeGetClipboardData.argtypes = [UINT]
+    safeGetClipboardData.restype = HANDLE
+
+    safeSetClipboardData = CheckedCall(windll.user32.SetClipboardData)
+    safeSetClipboardData.argtypes = [UINT, HANDLE]
+    safeSetClipboardData.restype = HANDLE
+
+    safeGlobalAlloc = CheckedCall(windll.kernel32.GlobalAlloc)
+    safeGlobalAlloc.argtypes = [UINT, c_size_t]
+    safeGlobalAlloc.restype = HGLOBAL
+
+    safeGlobalLock = CheckedCall(windll.kernel32.GlobalLock)
+    safeGlobalLock.argtypes = [HGLOBAL]
+    safeGlobalLock.restype = LPVOID
+
+    safeGlobalUnlock = CheckedCall(windll.kernel32.GlobalUnlock)
+    safeGlobalUnlock.argtypes = [HGLOBAL]
+    safeGlobalUnlock.restype = BOOL
+
+    GMEM_MOVEABLE = 0x0002
+    CF_UNICODETEXT = 13
+
+    @contextlib.contextmanager
+    def window():
+        """
+        Context that provides a valid Windows hwnd.
+        """
+        # we really just need the hwnd, so setting "STATIC"
+        # as predefined lpClass is just fine.
+        hwnd = safeCreateWindowExA(0, b"STATIC", None, 0, 0, 0, 0, 0,
+                                   None, None, None, None)
+        try:
+            yield hwnd
+        finally:
+            safeDestroyWindow(hwnd)
+
+    @contextlib.contextmanager
+    def clipboard(hwnd):
+        """
+        Context manager that opens the clipboard and prevents
+        other applications from modifying the clipboard content.
+        """
+        # We may not get the clipboard handle immediately because
+        # some other application is accessing it (?)
+        # We try for at least 500ms to get the clipboard.
+        t = time.time() + 0.5
+        success = False
+        while time.time() < t:
+            success = OpenClipboard(hwnd)
+            if success:
+                break
+            time.sleep(0.01)
+        if not success:
+            raise PyperclipWindowsException("Error calling OpenClipboard")
+
+        try:
+            yield
+        finally:
+            safeCloseClipboard()
+
+    def copy_windows(text):
+        # This function is heavily based on
+        # http://msdn.com/ms649016#_win32_Copying_Information_to_the_Clipboard
+        with window() as hwnd:
+            # http://msdn.com/ms649048
+            # If an application calls OpenClipboard with hwnd set to NULL,
+            # EmptyClipboard sets the clipboard owner to NULL;
+            # this causes SetClipboardData to fail.
+            # => We need a valid hwnd to copy something.
+            with clipboard(hwnd):
+                safeEmptyClipboard()
+
+                if text:
+                    # http://msdn.com/ms649051
+                    # If the hMem parameter identifies a memory object,
+                    # the object must have been allocated using the
+                    # function with the GMEM_MOVEABLE flag.
+                    count = len(text) + 1
+                    handle = safeGlobalAlloc(GMEM_MOVEABLE,
+                                             count * sizeof(c_wchar))
+                    locked_handle = safeGlobalLock(handle)
+
+                    ctypes.memmove(c_wchar_p(locked_handle),
+                                   c_wchar_p(text), count * sizeof(c_wchar))
+
+                    safeGlobalUnlock(handle)
+                    safeSetClipboardData(CF_UNICODETEXT, handle)
+
+    def paste_windows():
+        with clipboard(None):
+            handle = safeGetClipboardData(CF_UNICODETEXT)
+            if not handle:
+                # GetClipboardData may return NULL with errno == NO_ERROR
+                # if the clipboard is empty.
+                # (Also, it may return a handle to an empty buffer,
+                # but technically that's not empty)
+                return ""
+            return c_wchar_p(handle).value
+
+    return copy_windows, paste_windows
@@ -0,0 +1,140 @@
+""" io on the clipboard """
+from pandas import compat, get_option, option_context, DataFrame
+from pandas.compat import StringIO, PY2, PY3
+import warnings
+
+
+def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
+    r"""
+    Read text from clipboard and pass to read_table. See read_table for the
+    full argument list
+
+    Parameters
+    ----------
+    sep : str, default '\s+'.
+        A string or regex delimiter. The default of '\s+' denotes
+        one or more whitespace characters.
+
+    Returns
+    -------
+    parsed : DataFrame
+    """
+    encoding = kwargs.pop('encoding', 'utf-8')
+
+    # only utf-8 is valid for passed value because that's what clipboard
+    # supports
+    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
+        raise NotImplementedError(
+            'reading from clipboard only supports utf-8 encoding')
+
+    from pandas.io.clipboard import clipboard_get
+    from pandas.io.parsers import read_table
+    text = clipboard_get()
+
+    # try to decode (if needed on PY3)
+    # Strange. linux py33 doesn't complain, win py33 does
+    if PY3:
+        try:
+            text = compat.bytes_to_str(
+                text, encoding=(kwargs.get('encoding') or
+                                get_option('display.encoding'))
+            )
+        except:
+            pass
+
+    # Excel copies into clipboard with \t separation
+    # inspect no more then the 10 first lines, if they
+    # all contain an equal number (>0) of tabs, infer
+    # that this came from excel and set 'sep' accordingly
+    lines = text[:10000].split('\n')[:-1][:10]
+
+    # Need to remove leading white space, since read_table
+    # accepts:
+    #    a  b
+    # 0  1  2
+    # 1  3  4
+
+    counts = {x.lstrip().count('\t') for x in lines}
+    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
+        sep = '\t'
+
+    # Edge case where sep is specified to be None, return to default
+    if sep is None and kwargs.get('delim_whitespace') is None:
+        sep = r'\s+'
+
+    # Regex separator currently only works with python engine.
+    # Default to python if separator is multi-character (regex)
+    if len(sep) > 1 and kwargs.get('engine') is None:
+        kwargs['engine'] = 'python'
+    elif len(sep) > 1 and kwargs.get('engine') == 'c':
+        warnings.warn('read_clipboard with regex separator does not work'
+                      ' properly with c engine')
+
+    # In PY2, the c table reader first encodes text with UTF-8 but Python
+    # table reader uses the format of the passed string. For consistency,
+    # encode strings for python engine so that output from python and c
+    # engines produce consistent results
+    if kwargs.get('engine') == 'python' and PY2:
+        text = text.encode('utf-8')
+
+    return read_table(StringIO(text), sep=sep, **kwargs)
+
+
+def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
+    """
+    Attempt to write text representation of object to the system clipboard
+    The clipboard can be then pasted into Excel for example.
+
+    Parameters
+    ----------
+    obj : the object to write to the clipboard
+    excel : boolean, defaults to True
+            if True, use the provided separator, writing in a csv
+            format for allowing easy pasting into excel.
+            if False, write a string representation of the object
+            to the clipboard
+    sep : optional, defaults to tab
+    other keywords are passed to to_csv
+
+    Notes
+    -----
+    Requirements for your platform
+      - Linux: xclip, or xsel (with gtk or PyQt4 modules)
+      - Windows:
+      - OS X:
+    """
+    encoding = kwargs.pop('encoding', 'utf-8')
+
+    # testing if an invalid encoding is passed to clipboard
+    if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
+        raise ValueError('clipboard only supports utf-8 encoding')
+
+    from pandas.io.clipboard import clipboard_set
+    if excel is None:
+        excel = True
+
+    if excel:
+        try:
+            if sep is None:
+                sep = '\t'
+            buf = StringIO()
+            # clipboard_set (pyperclip) expects unicode
+            obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
+            text = buf.getvalue()
+            if PY2:
+                text = text.decode('utf-8')
+            clipboard_set(text)
+            return
+        except TypeError:
+            warnings.warn('to_clipboard in excel mode requires a single '
+                          'character separator.')
+    elif sep is not None:
+        warnings.warn('to_clipboard with excel=False ignores the sep argument')
+
+    if isinstance(obj, DataFrame):
+        # str(df) has various unhelpful defaults, like truncation
+        with option_context('display.max_colwidth', 999999):
+            objstr = obj.to_string(**kwargs)
+    else:
+        objstr = str(obj)
+    clipboard_set(objstr)
@@ -0,0 +1,593 @@
+"""Common IO api utilities"""
+
+import os
+import csv
+import codecs
+import mmap
+from contextlib import contextmanager, closing
+import zipfile
+
+from pandas.compat import StringIO, BytesIO, string_types, text_type
+from pandas import compat
+from pandas.io.formats.printing import pprint_thing
+import pandas.core.common as com
+from pandas.core.dtypes.common import is_number, is_file_like
+
+# compat
+from pandas.errors import (ParserError, DtypeWarning,  # noqa
+                           EmptyDataError, ParserWarning)
+
+# gh-12665: Alias for now and remove later.
+CParserError = ParserError
+
+# common NA values
+# no longer excluding inf representations
+# '1.#INF','-1.#INF', '1.#INF000000',
+_NA_VALUES = set([
+    '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A',
+    'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''
+])
+
+
+if compat.PY3:
+    from urllib.request import urlopen, pathname2url
+    _urlopen = urlopen
+    from urllib.parse import urlparse as parse_url
+    from urllib.parse import (uses_relative, uses_netloc, uses_params,
+                              urlencode, urljoin)
+    from urllib.error import URLError
+    from http.client import HTTPException  # noqa
+else:
+    from urllib2 import urlopen as _urlopen
+    from urllib import urlencode, pathname2url  # noqa
+    from urlparse import urlparse as parse_url
+    from urlparse import uses_relative, uses_netloc, uses_params, urljoin
+    from urllib2 import URLError  # noqa
+    from httplib import HTTPException  # noqa
+    from contextlib import contextmanager, closing  # noqa
+    from functools import wraps  # noqa
+
+    # @wraps(_urlopen)
+    @contextmanager
+    def urlopen(*args, **kwargs):
+        with closing(_urlopen(*args, **kwargs)) as f:
+            yield f
+
+
+_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
+_VALID_URLS.discard('')
+
+
+class BaseIterator(object):
+    """Subclass this and provide a "__next__()" method to obtain an iterator.
+    Useful only when the object being iterated is non-reusable (e.g. OK for a
+    parser, not for an in-memory table, yes for its iterator)."""
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        raise com.AbstractMethodError(self)
+
+
+if not compat.PY3:
+    BaseIterator.next = lambda self: self.__next__()
+
+
+def _is_url(url):
+    """Check to see if a URL has a valid protocol.
+
+    Parameters
+    ----------
+    url : str or unicode
+
+    Returns
+    -------
+    isurl : bool
+        If `url` has a valid protocol return True otherwise False.
+    """
+    try:
+        return parse_url(url).scheme in _VALID_URLS
+    except:
+        return False
+
+
+def _expand_user(filepath_or_buffer):
+    """Return the argument with an initial component of ~ or ~user
+       replaced by that user's home directory.
+
+    Parameters
+    ----------
+    filepath_or_buffer : object to be converted if possible
+
+    Returns
+    -------
+    expanded_filepath_or_buffer : an expanded filepath or the
+                                  input if not expandable
+    """
+    if isinstance(filepath_or_buffer, string_types):
+        return os.path.expanduser(filepath_or_buffer)
+    return filepath_or_buffer
+
+
+def _validate_header_arg(header):
+    if isinstance(header, bool):
+        raise TypeError("Passing a bool to header is invalid. "
+                        "Use header=None for no header or "
+                        "header=int or list-like of ints to specify "
+                        "the row(s) making up the column names")
+
+
+def _stringify_path(filepath_or_buffer):
+    """Attempt to convert a path-like object to a string.
+
+    Parameters
+    ----------
+    filepath_or_buffer : object to be converted
+
+    Returns
+    -------
+    str_filepath_or_buffer : maybe a string version of the object
+
+    Notes
+    -----
+    Objects supporting the fspath protocol (python 3.6+) are coerced
+    according to its __fspath__ method.
+
+    For backwards compatibility with older pythons, pathlib.Path and
+    py.path objects are specially coerced.
+
+    Any other object is passed through unchanged, which includes bytes,
+    strings, buffers, or anything else that's not even path-like.
+    """
+    try:
+        import pathlib
+        _PATHLIB_INSTALLED = True
+    except ImportError:
+        _PATHLIB_INSTALLED = False
+
+    try:
+        from py.path import local as LocalPath
+        _PY_PATH_INSTALLED = True
+    except ImportError:
+        _PY_PATH_INSTALLED = False
+
+    if hasattr(filepath_or_buffer, '__fspath__'):
+        return filepath_or_buffer.__fspath__()
+    if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path):
+        return text_type(filepath_or_buffer)
+    if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath):
+        return filepath_or_buffer.strpath
+    return filepath_or_buffer
+
+
+def is_s3_url(url):
+    """Check for an s3, s3n, or s3a url"""
+    try:
+        return parse_url(url).scheme in ['s3', 's3n', 's3a']
+    except:  # noqa
+        return False
+
+
+def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
+                           compression=None, mode=None):
+    """
+    If the filepath_or_buffer is a url, translate and return the buffer.
+    Otherwise passthrough.
+
+    Parameters
+    ----------
+    filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
+                         or buffer
+    encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
+    mode : str, optional
+
+    Returns
+    -------
+    tuple of ({a filepath_ or buffer or S3File instance},
+              encoding, str,
+              compression, str,
+              should_close, bool)
+    """
+    filepath_or_buffer = _stringify_path(filepath_or_buffer)
+
+    if _is_url(filepath_or_buffer):
+        req = _urlopen(filepath_or_buffer)
+        content_encoding = req.headers.get('Content-Encoding', None)
+        if content_encoding == 'gzip':
+            # Override compression based on Content-Encoding header
+            compression = 'gzip'
+        reader = BytesIO(req.read())
+        req.close()
+        return reader, encoding, compression, True
+
+    if is_s3_url(filepath_or_buffer):
+        from pandas.io import s3
+        return s3.get_filepath_or_buffer(filepath_or_buffer,
+                                         encoding=encoding,
+                                         compression=compression,
+                                         mode=mode)
+
+    if isinstance(filepath_or_buffer, (compat.string_types,
+                                       compat.binary_type,
+                                       mmap.mmap)):
+        return _expand_user(filepath_or_buffer), None, compression, False
+
+    if not is_file_like(filepath_or_buffer):
+        msg = "Invalid file path or buffer object type: {_type}"
+        raise ValueError(msg.format(_type=type(filepath_or_buffer)))
+
+    return filepath_or_buffer, None, compression, False
+
+
+def file_path_to_url(path):
+    """
+    converts an absolute native path to a FILE URL.
+
+    Parameters
+    ----------
+    path : a path in native format
+
+    Returns
+    -------
+    a valid FILE URL
+    """
+    return urljoin('file:', pathname2url(path))
+
+
+_compression_to_extension = {
+    'gzip': '.gz',
+    'bz2': '.bz2',
+    'zip': '.zip',
+    'xz': '.xz',
+}
+
+
+def _infer_compression(filepath_or_buffer, compression):
+    """
+    Get the compression method for filepath_or_buffer. If compression='infer',
+    the inferred compression method is returned. Otherwise, the input
+    compression method is returned unchanged, unless it's invalid, in which
+    case an error is raised.
+
+    Parameters
+    ----------
+    filepath_or_buf :
+        a path (str) or buffer
+    compression : str or None
+        the compression method including None for no compression and 'infer'
+
+    Returns
+    -------
+    string or None :
+        compression method
+
+    Raises
+    ------
+    ValueError on invalid compression specified
+    """
+
+    # No compression has been explicitly specified
+    if compression is None:
+        return None
+
+    # Infer compression
+    if compression == 'infer':
+        # Convert all path types (e.g. pathlib.Path) to strings
+        filepath_or_buffer = _stringify_path(filepath_or_buffer)
+        if not isinstance(filepath_or_buffer, compat.string_types):
+            # Cannot infer compression of a buffer, assume no compression
+            return None
+
+        # Infer compression from the filename/URL extension
+        for compression, extension in _compression_to_extension.items():
+            if filepath_or_buffer.endswith(extension):
+                return compression
+        return None
+
+    # Compression has been specified. Check that it's valid
+    if compression in _compression_to_extension:
+        return compression
+
+    msg = 'Unrecognized compression type: {}'.format(compression)
+    valid = ['infer', None] + sorted(_compression_to_extension)
+    msg += '\nValid compression types are {}'.format(valid)
+    raise ValueError(msg)
+
+
+def _get_handle(path_or_buf, mode, encoding=None, compression=None,
+                memory_map=False, is_text=True):
+    """
+    Get file handle for given path/buffer and mode.
+
+    Parameters
+    ----------
+    path_or_buf :
+        a path (str) or buffer
+    mode : str
+        mode to open path_or_buf with
+    encoding : str or None
+    compression : str or None
+        Supported compression protocols are gzip, bz2, zip, and xz
+    memory_map : boolean, default False
+        See parsers._parser_params for more information.
+    is_text : boolean, default True
+        whether file/buffer is in text format (csv, json, etc.), or in binary
+        mode (pickle, etc.)
+
+    Returns
+    -------
+    f : file-like
+        A file-like object
+    handles : list of file-like objects
+        A list of file-like object that were opened in this function.
+    """
+    try:
+        from s3fs import S3File
+        need_text_wrapping = (BytesIO, S3File)
+    except ImportError:
+        need_text_wrapping = (BytesIO,)
+
+    handles = list()
+    f = path_or_buf
+
+    # Convert pathlib.Path/py.path.local or string
+    path_or_buf = _stringify_path(path_or_buf)
+    is_path = isinstance(path_or_buf, compat.string_types)
+
+    if compression:
+
+        if compat.PY2 and not is_path and encoding:
+            msg = 'compression with encoding is not yet supported in Python 2'
+            raise ValueError(msg)
+
+        # GZ Compression
+        if compression == 'gzip':
+            import gzip
+            if is_path:
+                f = gzip.open(path_or_buf, mode)
+            else:
+                f = gzip.GzipFile(fileobj=path_or_buf)
+
+        # BZ Compression
+        elif compression == 'bz2':
+            import bz2
+            if is_path:
+                f = bz2.BZ2File(path_or_buf, mode)
+            elif compat.PY2:
+                # Python 2's bz2 module can't take file objects, so have to
+                # run through decompress manually
+                f = StringIO(bz2.decompress(path_or_buf.read()))
+                path_or_buf.close()
+            else:
+                f = bz2.BZ2File(path_or_buf)
+
+        # ZIP Compression
+        elif compression == 'zip':
+            zf = BytesZipFile(path_or_buf, mode)
+            if zf.mode == 'w':
+                f = zf
+            elif zf.mode == 'r':
+                zip_names = zf.namelist()
+                if len(zip_names) == 1:
+                    f = zf.open(zip_names.pop())
+                elif len(zip_names) == 0:
+                    raise ValueError('Zero files found in ZIP file {}'
+                                     .format(path_or_buf))
+                else:
+                    raise ValueError('Multiple files found in ZIP file.'
+                                     ' Only one file per ZIP: {}'
+                                     .format(zip_names))
+
+        # XZ Compression
+        elif compression == 'xz':
+            lzma = compat.import_lzma()
+            f = lzma.LZMAFile(path_or_buf, mode)
+
+        # Unrecognized Compression
+        else:
+            msg = 'Unrecognized compression type: {}'.format(compression)
+            raise ValueError(msg)
+
+        handles.append(f)
+
+    elif is_path:
+        if compat.PY2:
+            # Python 2
+            f = open(path_or_buf, mode)
+        elif encoding:
+            # Python 3 and encoding
+            f = open(path_or_buf, mode, encoding=encoding)
+        elif is_text:
+            # Python 3 and no explicit encoding
+            f = open(path_or_buf, mode, errors='replace')
+        else:
+            # Python 3 and binary mode
+            f = open(path_or_buf, mode)
+        handles.append(f)
+
+    # in Python 3, convert BytesIO or fileobjects passed with an encoding
+    if compat.PY3 and is_text and\
+            (compression or isinstance(f, need_text_wrapping)):
+        from io import TextIOWrapper
+        f = TextIOWrapper(f, encoding=encoding)
+        handles.append(f)
+
+    if memory_map and hasattr(f, 'fileno'):
+        try:
+            g = MMapWrapper(f)
+            f.close()
+            f = g
+        except Exception:
+            # we catch any errors that may have occurred
+            # because that is consistent with the lower-level
+            # functionality of the C engine (pd.read_csv), so
+            # leave the file handler as is then
+            pass
+
+    return f, handles
+
+
+class BytesZipFile(zipfile.ZipFile, BytesIO):
+    """
+    Wrapper for standard library class ZipFile and allow the returned file-like
+    handle to accept byte strings via `write` method.
+
+    BytesIO provides attributes of file-like object and ZipFile.writestr writes
+    bytes strings into a member of the archive.
+    """
+    # GH 17778
+    def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
+        if mode in ['wb', 'rb']:
+            mode = mode.replace('b', '')
+        super(BytesZipFile, self).__init__(file, mode, compression, **kwargs)
+
+    def write(self, data):
+        super(BytesZipFile, self).writestr(self.filename, data)
+
+    @property
+    def closed(self):
+        return self.fp is None
+
+
+class MMapWrapper(BaseIterator):
+    """
+    Wrapper for the Python's mmap class so that it can be properly read in
+    by Python's csv.reader class.
+
+    Parameters
+    ----------
+    f : file object
+        File object to be mapped onto memory. Must support the 'fileno'
+        method or have an equivalent attribute
+
+    """
+
+    def __init__(self, f):
+        self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+
+    def __getattr__(self, name):
+        return getattr(self.mmap, name)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        newline = self.mmap.readline()
+
+        # readline returns bytes, not str, in Python 3,
+        # but Python's CSV reader expects str, so convert
+        # the output to str before continuing
+        if compat.PY3:
+            newline = compat.bytes_to_str(newline)
+
+        # mmap doesn't raise if reading past the allocated
+        # data but instead returns an empty string, so raise
+        # if that is returned
+        if newline == '':
+            raise StopIteration
+        return newline
+
+
+if not compat.PY3:
+    MMapWrapper.next = lambda self: self.__next__()
+
+
+class UTF8Recoder(BaseIterator):
+
+    """
+    Iterator that reads an encoded stream and reencodes the input to UTF-8
+    """
+
+    def __init__(self, f, encoding):
+        self.reader = codecs.getreader(encoding)(f)
+
+    def read(self, bytes=-1):
+        return self.reader.read(bytes).encode("utf-8")
+
+    def readline(self):
+        return self.reader.readline().encode("utf-8")
+
+    def next(self):
+        return next(self.reader).encode("utf-8")
+
+
+if compat.PY3:  # pragma: no cover
+    def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
+        # ignore encoding
+        return csv.reader(f, dialect=dialect, **kwds)
+
+    def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
+        return csv.writer(f, dialect=dialect, **kwds)
+else:
+    class UnicodeReader(BaseIterator):
+
+        """
+        A CSV reader which will iterate over lines in the CSV file "f",
+        which is encoded in the given encoding.
+
+        On Python 3, this is replaced (below) by csv.reader, which handles
+        unicode.
+        """
+
+        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+            f = UTF8Recoder(f, encoding)
+            self.reader = csv.reader(f, dialect=dialect, **kwds)
+
+        def __next__(self):
+            row = next(self.reader)
+            return [compat.text_type(s, "utf-8") for s in row]
+
+    class UnicodeWriter(object):
+
+        """
+        A CSV writer which will write rows to CSV file "f",
+        which is encoded in the given encoding.
+        """
+
+        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+            # Redirect output to a queue
+            self.queue = StringIO()
+            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
+            self.stream = f
+            self.encoder = codecs.getincrementalencoder(encoding)()
+            self.quoting = kwds.get("quoting", None)
+
+        def writerow(self, row):
+            def _check_as_is(x):
+                return (self.quoting == csv.QUOTE_NONNUMERIC and
+                        is_number(x)) or isinstance(x, str)
+
+            row = [x if _check_as_is(x)
+                   else pprint_thing(x).encode("utf-8") for x in row]
+
+            self.writer.writerow([s for s in row])
+            # Fetch UTF-8 output from the queue ...
+            data = self.queue.getvalue()
+            data = data.decode("utf-8")
+            # ... and re-encode it into the target encoding
+            data = self.encoder.encode(data)
+            # write to the target stream
+            self.stream.write(data)
+            # empty queue
+            self.queue.truncate(0)
+
+        def writerows(self, rows):
+            def _check_as_is(x):
+                return (self.quoting == csv.QUOTE_NONNUMERIC and
+                        is_number(x)) or isinstance(x, str)
+
+            for i, row in enumerate(rows):
+                rows[i] = [x if _check_as_is(x)
+                           else pprint_thing(x).encode("utf-8") for x in row]
+
+            self.writer.writerows([[s for s in row] for row in rows])
+            # Fetch UTF-8 output from the queue ...
+            data = self.queue.getvalue()
+            data = data.decode("utf-8")
+            # ... and re-encode it into the target encoding
+            data = self.encoder.encode(data)
+            # write to the target stream
+            self.stream.write(data)
+            # empty queue
+            self.queue.truncate(0)
@@ -0,0 +1,63 @@
+"""This module is designed for community supported date conversion functions"""
+from pandas.compat import range, map
+import numpy as np
+from pandas._libs.tslibs import parsing
+
+
+def parse_date_time(date_col, time_col):
+    date_col = _maybe_cast(date_col)
+    time_col = _maybe_cast(time_col)
+    return parsing.try_parse_date_and_time(date_col, time_col)
+
+
+def parse_date_fields(year_col, month_col, day_col):
+    year_col = _maybe_cast(year_col)
+    month_col = _maybe_cast(month_col)
+    day_col = _maybe_cast(day_col)
+    return parsing.try_parse_year_month_day(year_col, month_col, day_col)
+
+
+def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
+                     second_col):
+    year_col = _maybe_cast(year_col)
+    month_col = _maybe_cast(month_col)
+    day_col = _maybe_cast(day_col)
+    hour_col = _maybe_cast(hour_col)
+    minute_col = _maybe_cast(minute_col)
+    second_col = _maybe_cast(second_col)
+    return parsing.try_parse_datetime_components(year_col, month_col, day_col,
+                                                 hour_col, minute_col,
+                                                 second_col)
+
+
+def generic_parser(parse_func, *cols):
+    N = _check_columns(cols)
+    results = np.empty(N, dtype=object)
+
+    for i in range(N):
+        args = [c[i] for c in cols]
+        results[i] = parse_func(*args)
+
+    return results
+
+
+def _maybe_cast(arr):
+    if not arr.dtype.type == np.object_:
+        arr = np.array(arr, dtype=object)
+    return arr
+
+
+def _check_columns(cols):
+    if not len(cols):
+        raise AssertionError("There must be at least 1 column")
+
+    head, tail = cols[0], cols[1:]
+
+    N = len(head)
+
+    for i, n in enumerate(map(len, tail)):
+        if n != N:
+            raise AssertionError('All columns must have the same length: {0}; '
+                                 'column {1} has length {2}'.format(N, i, n))
+
+    return N
@@ -0,0 +1,112 @@
+""" feather-format compat """
+
+from distutils.version import LooseVersion
+from pandas import DataFrame, RangeIndex, Int64Index
+from pandas.compat import range
+from pandas.io.common import _stringify_path
+
+
+def _try_import():
+    # since pandas is a dependency of feather
+    # we need to import on first use
+
+    try:
+        import feather
+    except ImportError:
+
+        # give a nice error message
+        raise ImportError("the feather-format library is not installed\n"
+                          "you can install via conda\n"
+                          "conda install feather-format -c conda-forge\n"
+                          "or via pip\n"
+                          "pip install -U feather-format\n")
+
+    try:
+        LooseVersion(feather.__version__) >= LooseVersion('0.3.1')
+    except AttributeError:
+        raise ImportError("the feather-format library must be >= "
+                          "version 0.3.1\n"
+                          "you can install via conda\n"
+                          "conda install feather-format -c conda-forge"
+                          "or via pip\n"
+                          "pip install -U feather-format\n")
+
+    return feather
+
+
+def to_feather(df, path):
+    """
+    Write a DataFrame to the feather-format
+
+    Parameters
+    ----------
+    df : DataFrame
+    path : string file path, or file-like object
+
+    """
+    path = _stringify_path(path)
+    if not isinstance(df, DataFrame):
+        raise ValueError("feather only support IO with DataFrames")
+
+    feather = _try_import()
+    valid_types = {'string', 'unicode'}
+
+    # validate index
+    # --------------
+
+    # validate that we have only a default index
+    # raise on anything else as we don't serialize the index
+
+    if not isinstance(df.index, Int64Index):
+        raise ValueError("feather does not support serializing {} "
+                         "for the index; you can .reset_index()"
+                         "to make the index into column(s)".format(
+                             type(df.index)))
+
+    if not df.index.equals(RangeIndex.from_range(range(len(df)))):
+        raise ValueError("feather does not support serializing a "
+                         "non-default index for the index; you "
+                         "can .reset_index() to make the index "
+                         "into column(s)")
+
+    if df.index.name is not None:
+        raise ValueError("feather does not serialize index meta-data on a "
+                         "default index")
+
+    # validate columns
+    # ----------------
+
+    # must have value column names (strings only)
+    if df.columns.inferred_type not in valid_types:
+        raise ValueError("feather must have string column names")
+
+    feather.write_dataframe(df, path)
+
+
+def read_feather(path, nthreads=1):
+    """
+    Load a feather-format object from the file path
+
+    .. versionadded 0.20.0
+
+    Parameters
+    ----------
+    path : string file path, or file-like object
+    nthreads : int, default 1
+        Number of CPU threads to use when reading to pandas.DataFrame
+
+       .. versionadded 0.21.0
+
+    Returns
+    -------
+    type of object stored in file
+
+    """
+
+    feather = _try_import()
+    path = _stringify_path(path)
+
+    if LooseVersion(feather.__version__) < LooseVersion('0.4.0'):
+        return feather.read_dataframe(path)
+
+    return feather.read_dataframe(path, nthreads=nthreads)
@@ -0,0 +1,84 @@
+"""
+Internal module for console introspection
+"""
+
+import sys
+import locale
+from pandas.io.formats.terminal import get_terminal_size
+
+# -----------------------------------------------------------------------------
+# Global formatting options
+_initial_defencoding = None
+
+
+def detect_console_encoding():
+    """
+    Try to find the most capable encoding supported by the console.
+    slightly modified from the way IPython handles the same issue.
+    """
+    global _initial_defencoding
+
+    encoding = None
+    try:
+        encoding = sys.stdout.encoding or sys.stdin.encoding
+    except AttributeError:
+        pass
+
+    # try again for something better
+    if not encoding or 'ascii' in encoding.lower():
+        try:
+            encoding = locale.getpreferredencoding()
+        except Exception:
+            pass
+
+    # when all else fails. this will usually be "ascii"
+    if not encoding or 'ascii' in encoding.lower():
+        encoding = sys.getdefaultencoding()
+
+    # GH3360, save the reported defencoding at import time
+    # MPL backends may change it. Make available for debugging.
+    if not _initial_defencoding:
+        _initial_defencoding = sys.getdefaultencoding()
+
+    return encoding
+
+
+def get_console_size():
+    """Return console size as tuple = (width, height).
+
+    Returns (None,None) in non-interactive session.
+    """
+    from pandas import get_option
+    from pandas.core import common as com
+
+    display_width = get_option('display.width')
+    # deprecated.
+    display_height = get_option('display.max_rows')
+
+    # Consider
+    # interactive shell terminal, can detect term size
+    # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term
+    # size non-interactive script, should disregard term size
+
+    # in addition
+    # width,height have default values, but setting to 'None' signals
+    # should use Auto-Detection, But only in interactive shell-terminal.
+    # Simple. yeah.
+
+    if com.in_interactive_session():
+        if com.in_ipython_frontend():
+            # sane defaults for interactive non-shell terminal
+            # match default for width,height in config_init
+            from pandas.core.config import get_default_val
+            terminal_width = get_default_val('display.width')
+            terminal_height = get_default_val('display.max_rows')
+        else:
+            # pure terminal
+            terminal_width, terminal_height = get_terminal_size()
+    else:
+        terminal_width, terminal_height = None, None
+
+    # Note if the User sets width/Height to None (auto-detection)
+    # and we're in a script (non-inter), this will return (None,None)
+    # caller needs to deal.
+    return (display_width or terminal_width, display_height or terminal_height)
@@ -0,0 +1,250 @@
+"""Utilities for interpreting CSS from Stylers for formatting non-HTML outputs
+"""
+
+import re
+import warnings
+
+
+class CSSWarning(UserWarning):
+    """This CSS syntax cannot currently be parsed"""
+    pass
+
+
+class CSSResolver(object):
+    """A callable for parsing and resolving CSS to atomic properties
+
+    """
+
+    INITIAL_STYLE = {
+    }
+
+    def __call__(self, declarations_str, inherited=None):
+        """ the given declarations to atomic properties
+
+        Parameters
+        ----------
+        declarations_str : str
+            A list of CSS declarations
+        inherited : dict, optional
+            Atomic properties indicating the inherited style context in which
+            declarations_str is to be resolved. ``inherited`` should already
+            be resolved, i.e. valid output of this method.
+
+        Returns
+        -------
+        props : dict
+            Atomic CSS 2.2 properties
+
+        Examples
+        --------
+        >>> resolve = CSSResolver()
+        >>> inherited = {'font-family': 'serif', 'font-weight': 'bold'}
+        >>> out = resolve('''
+        ...               border-color: BLUE RED;
+        ...               font-size: 1em;
+        ...               font-size: 2em;
+        ...               font-weight: normal;
+        ...               font-weight: inherit;
+        ...               ''', inherited)
+        >>> sorted(out.items())  # doctest: +NORMALIZE_WHITESPACE
+        [('border-bottom-color', 'blue'),
+         ('border-left-color', 'red'),
+         ('border-right-color', 'red'),
+         ('border-top-color', 'blue'),
+         ('font-family', 'serif'),
+         ('font-size', '24pt'),
+         ('font-weight', 'bold')]
+        """
+
+        props = dict(self.atomize(self.parse(declarations_str)))
+        if inherited is None:
+            inherited = {}
+
+        # 1. resolve inherited, initial
+        for prop, val in inherited.items():
+            if prop not in props:
+                props[prop] = val
+
+        for prop, val in list(props.items()):
+            if val == 'inherit':
+                val = inherited.get(prop, 'initial')
+            if val == 'initial':
+                val = self.INITIAL_STYLE.get(prop)
+
+            if val is None:
+                # we do not define a complete initial stylesheet
+                del props[prop]
+            else:
+                props[prop] = val
+
+        # 2. resolve relative font size
+        if props.get('font-size'):
+            if 'font-size' in inherited:
+                em_pt = inherited['font-size']
+                assert em_pt[-2:] == 'pt'
+                em_pt = float(em_pt[:-2])
+            else:
+                em_pt = None
+            props['font-size'] = self.size_to_pt(
+                props['font-size'], em_pt, conversions=self.FONT_SIZE_RATIOS)
+
+            font_size = float(props['font-size'][:-2])
+        else:
+            font_size = None
+
+        # 3. TODO: resolve other font-relative units
+        for side in self.SIDES:
+            prop = 'border-{side}-width'.format(side=side)
+            if prop in props:
+                props[prop] = self.size_to_pt(
+                    props[prop], em_pt=font_size,
+                    conversions=self.BORDER_WIDTH_RATIOS)
+            for prop in ['margin-{side}'.format(side=side),
+                         'padding-{side}'.format(side=side)]:
+                if prop in props:
+                    # TODO: support %
+                    props[prop] = self.size_to_pt(
+                        props[prop], em_pt=font_size,
+                        conversions=self.MARGIN_RATIOS)
+
+        return props
+
+    UNIT_RATIOS = {
+        'rem': ('pt', 12),
+        'ex': ('em', .5),
+        # 'ch':
+        'px': ('pt', .75),
+        'pc': ('pt', 12),
+        'in': ('pt', 72),
+        'cm': ('in', 1 / 2.54),
+        'mm': ('in', 1 / 25.4),
+        'q': ('mm', .25),
+        '!!default': ('em', 0),
+    }
+
+    FONT_SIZE_RATIOS = UNIT_RATIOS.copy()
+    FONT_SIZE_RATIOS.update({
+        '%': ('em', .01),
+        'xx-small': ('rem', .5),
+        'x-small': ('rem', .625),
+        'small': ('rem', .8),
+        'medium': ('rem', 1),
+        'large': ('rem', 1.125),
+        'x-large': ('rem', 1.5),
+        'xx-large': ('rem', 2),
+        'smaller': ('em', 1 / 1.2),
+        'larger': ('em', 1.2),
+        '!!default': ('em', 1),
+    })
+
+    MARGIN_RATIOS = UNIT_RATIOS.copy()
+    MARGIN_RATIOS.update({
+        'none': ('pt', 0),
+    })
+
+    BORDER_WIDTH_RATIOS = UNIT_RATIOS.copy()
+    BORDER_WIDTH_RATIOS.update({
+        'none': ('pt', 0),
+        'thick': ('px', 4),
+        'medium': ('px', 2),
+        'thin': ('px', 1),
+        # Default: medium only if solid
+    })
+
+    def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS):
+        def _error():
+            warnings.warn('Unhandled size: {val!r}'.format(val=in_val),
+                          CSSWarning)
+            return self.size_to_pt('1!!default', conversions=conversions)
+
+        try:
+            val, unit = re.match(r'^(\S*?)([a-zA-Z%!].*)', in_val).groups()
+        except AttributeError:
+            return _error()
+        if val == '':
+            # hack for 'large' etc.
+            val = 1
+        else:
+            try:
+                val = float(val)
+            except ValueError:
+                return _error()
+
+        while unit != 'pt':
+            if unit == 'em':
+                if em_pt is None:
+                    unit = 'rem'
+                else:
+                    val *= em_pt
+                    unit = 'pt'
+                continue
+
+            try:
+                unit, mul = conversions[unit]
+            except KeyError:
+                return _error()
+            val *= mul
+
+        val = round(val, 5)
+        if int(val) == val:
+            size_fmt = '{fmt:d}pt'.format(fmt=int(val))
+        else:
+            size_fmt = '{fmt:f}pt'.format(fmt=val)
+        return size_fmt
+
+    def atomize(self, declarations):
+        for prop, value in declarations:
+            attr = 'expand_' + prop.replace('-', '_')
+            try:
+                expand = getattr(self, attr)
+            except AttributeError:
+                yield prop, value
+            else:
+                for prop, value in expand(prop, value):
+                    yield prop, value
+
+    SIDE_SHORTHANDS = {
+        1: [0, 0, 0, 0],
+        2: [0, 1, 0, 1],
+        3: [0, 1, 2, 1],
+        4: [0, 1, 2, 3],
+    }
+    SIDES = ('top', 'right', 'bottom', 'left')
+
+    def _side_expander(prop_fmt):
+        def expand(self, prop, value):
+            tokens = value.split()
+            try:
+                mapping = self.SIDE_SHORTHANDS[len(tokens)]
+            except KeyError:
+                warnings.warn('Could not expand "{prop}: {val}"'
+                              .format(prop=prop, val=value), CSSWarning)
+                return
+            for key, idx in zip(self.SIDES, mapping):
+                yield prop_fmt.format(key), tokens[idx]
+
+        return expand
+
+    expand_border_color = _side_expander('border-{:s}-color')
+    expand_border_style = _side_expander('border-{:s}-style')
+    expand_border_width = _side_expander('border-{:s}-width')
+    expand_margin = _side_expander('margin-{:s}')
+    expand_padding = _side_expander('padding-{:s}')
+
+    def parse(self, declarations_str):
+        """Generates (prop, value) pairs from declarations
+
+        In a future version may generate parsed tokens from tinycss/tinycss2
+        """
+        for decl in declarations_str.split(';'):
+            if not decl.strip():
+                continue
+            prop, sep, val = decl.partition(':')
+            prop = prop.strip().lower()
+            # TODO: don't lowercase case sensitive parts of values (strings)
+            val = val.strip().lower()
+            if sep:
+                yield prop, val
+            else:
+                warnings.warn('Ill-formatted attribute: expected a colon '
+                              'in {decl!r}'.format(decl=decl), CSSWarning)
@@ -0,0 +1,313 @@
+# -*- coding: utf-8 -*-
+"""
+Module for formatting output data into CSV files.
+"""
+
+from __future__ import print_function
+
+import warnings
+
+import csv as csvlib
+from zipfile import ZipFile
+import numpy as np
+
+from pandas.core.dtypes.missing import notna
+from pandas.core.index import Index, MultiIndex
+from pandas import compat
+from pandas.compat import (StringIO, range, zip)
+
+from pandas.io.common import (_get_handle, UnicodeWriter, _expand_user,
+                              _stringify_path)
+from pandas._libs import writers as libwriters
+from pandas.core.indexes.datetimes import DatetimeIndex
+from pandas.core.indexes.period import PeriodIndex
+
+
+class CSVFormatter(object):
+
+    def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
+                 float_format=None, cols=None, header=True, index=True,
+                 index_label=None, mode='w', nanRep=None, encoding=None,
+                 compression=None, quoting=None, line_terminator='\n',
+                 chunksize=None, tupleize_cols=False, quotechar='"',
+                 date_format=None, doublequote=True, escapechar=None,
+                 decimal='.'):
+
+        self.obj = obj
+
+        if path_or_buf is None:
+            path_or_buf = StringIO()
+
+        self.path_or_buf = _expand_user(_stringify_path(path_or_buf))
+        self.sep = sep
+        self.na_rep = na_rep
+        self.float_format = float_format
+        self.decimal = decimal
+
+        self.header = header
+        self.index = index
+        self.index_label = index_label
+        self.mode = mode
+        self.encoding = encoding
+        self.compression = compression
+
+        if quoting is None:
+            quoting = csvlib.QUOTE_MINIMAL
+        self.quoting = quoting
+
+        if quoting == csvlib.QUOTE_NONE:
+            # prevents crash in _csv
+            quotechar = None
+        self.quotechar = quotechar
+
+        self.doublequote = doublequote
+        self.escapechar = escapechar
+
+        self.line_terminator = line_terminator
+
+        self.date_format = date_format
+
+        self.tupleize_cols = tupleize_cols
+        self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and
+                               not self.tupleize_cols)
+
+        # validate mi options
+        if self.has_mi_columns:
+            if cols is not None:
+                raise TypeError("cannot specify cols with a MultiIndex on the "
+                                "columns")
+
+        if cols is not None:
+            if isinstance(cols, Index):
+                cols = cols.to_native_types(na_rep=na_rep,
+                                            float_format=float_format,
+                                            date_format=date_format,
+                                            quoting=self.quoting)
+            else:
+                cols = list(cols)
+            self.obj = self.obj.loc[:, cols]
+
+        # update columns to include possible multiplicity of dupes
+        # and make sure sure cols is just a list of labels
+        cols = self.obj.columns
+        if isinstance(cols, Index):
+            cols = cols.to_native_types(na_rep=na_rep,
+                                        float_format=float_format,
+                                        date_format=date_format,
+                                        quoting=self.quoting)
+        else:
+            cols = list(cols)
+
+        # save it
+        self.cols = cols
+
+        # preallocate data 2d list
+        self.blocks = self.obj._data.blocks
+        ncols = sum(b.shape[0] for b in self.blocks)
+        self.data = [None] * ncols
+
+        if chunksize is None:
+            chunksize = (100000 // (len(self.cols) or 1)) or 1
+        self.chunksize = int(chunksize)
+
+        self.data_index = obj.index
+        if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and
+                date_format is not None):
+            self.data_index = Index([x.strftime(date_format) if notna(x) else
+                                     '' for x in self.data_index])
+
+        self.nlevels = getattr(self.data_index, 'nlevels', 1)
+        if not index:
+            self.nlevels = 0
+
+    def save(self):
+        # create the writer & save
+        if self.encoding is None:
+            if compat.PY2:
+                encoding = 'ascii'
+            else:
+                encoding = 'utf-8'
+        else:
+            encoding = self.encoding
+
+        # GH 21227 internal compression is not used when file-like passed.
+        if self.compression and hasattr(self.path_or_buf, 'write'):
+            msg = ("compression has no effect when passing file-like "
+                   "object as input.")
+            warnings.warn(msg, RuntimeWarning, stacklevel=2)
+
+        # when zip compression is called.
+        is_zip = isinstance(self.path_or_buf, ZipFile) or (
+            not hasattr(self.path_or_buf, 'write')
+            and self.compression == 'zip')
+
+        if is_zip:
+            # zipfile doesn't support writing string to archive. uses string
+            # buffer to receive csv writing and dump into zip compression
+            # file handle. GH 21241, 21118
+            f = StringIO()
+            close = False
+        elif hasattr(self.path_or_buf, 'write'):
+            f = self.path_or_buf
+            close = False
+        else:
+            f, handles = _get_handle(self.path_or_buf, self.mode,
+                                     encoding=encoding,
+                                     compression=self.compression)
+            close = True
+
+        try:
+            writer_kwargs = dict(lineterminator=self.line_terminator,
+                                 delimiter=self.sep, quoting=self.quoting,
+                                 doublequote=self.doublequote,
+                                 escapechar=self.escapechar,
+                                 quotechar=self.quotechar)
+            if encoding == 'ascii':
+                self.writer = csvlib.writer(f, **writer_kwargs)
+            else:
+                writer_kwargs['encoding'] = encoding
+                self.writer = UnicodeWriter(f, **writer_kwargs)
+
+            self._save()
+
+        finally:
+            if is_zip:
+                # GH 17778 handles zip compression separately.
+                buf = f.getvalue()
+                if hasattr(self.path_or_buf, 'write'):
+                    self.path_or_buf.write(buf)
+                else:
+                    f, handles = _get_handle(self.path_or_buf, self.mode,
+                                             encoding=encoding,
+                                             compression=self.compression)
+                    f.write(buf)
+                    close = True
+            if close:
+                f.close()
+                for _fh in handles:
+                    _fh.close()
+
+    def _save_header(self):
+
+        writer = self.writer
+        obj = self.obj
+        index_label = self.index_label
+        cols = self.cols
+        has_mi_columns = self.has_mi_columns
+        header = self.header
+        encoded_labels = []
+
+        has_aliases = isinstance(header, (tuple, list, np.ndarray, Index))
+        if not (has_aliases or self.header):
+            return
+        if has_aliases:
+            if len(header) != len(cols):
+                raise ValueError(('Writing {ncols} cols but got {nalias} '
+                                 'aliases'.format(ncols=len(cols),
+                                                  nalias=len(header))))
+            else:
+                write_cols = header
+        else:
+            write_cols = cols
+
+        if self.index:
+            # should write something for index label
+            if index_label is not False:
+                if index_label is None:
+                    if isinstance(obj.index, MultiIndex):
+                        index_label = []
+                        for i, name in enumerate(obj.index.names):
+                            if name is None:
+                                name = ''
+                            index_label.append(name)
+                    else:
+                        index_label = obj.index.name
+                        if index_label is None:
+                            index_label = ['']
+                        else:
+                            index_label = [index_label]
+                elif not isinstance(index_label,
+                                    (list, tuple, np.ndarray, Index)):
+                    # given a string for a DF with Index
+                    index_label = [index_label]
+
+                encoded_labels = list(index_label)
+            else:
+                encoded_labels = []
+
+        if not has_mi_columns or has_aliases:
+            encoded_labels += list(write_cols)
+            writer.writerow(encoded_labels)
+        else:
+            # write out the mi
+            columns = obj.columns
+
+            # write out the names for each level, then ALL of the values for
+            # each level
+            for i in range(columns.nlevels):
+
+                # we need at least 1 index column to write our col names
+                col_line = []
+                if self.index:
+
+                    # name is the first column
+                    col_line.append(columns.names[i])
+
+                    if isinstance(index_label, list) and len(index_label) > 1:
+                        col_line.extend([''] * (len(index_label) - 1))
+
+                col_line.extend(columns._get_level_values(i))
+
+                writer.writerow(col_line)
+
+            # Write out the index line if it's not empty.
+            # Otherwise, we will print out an extraneous
+            # blank line between the mi and the data rows.
+            if encoded_labels and set(encoded_labels) != set(['']):
+                encoded_labels.extend([''] * len(columns))
+                writer.writerow(encoded_labels)
+
+    def _save(self):
+
+        self._save_header()
+
+        nrows = len(self.data_index)
+
+        # write in chunksize bites
+        chunksize = self.chunksize
+        chunks = int(nrows / chunksize) + 1
+
+        for i in range(chunks):
+            start_i = i * chunksize
+            end_i = min((i + 1) * chunksize, nrows)
+            if start_i >= end_i:
+                break
+
+            self._save_chunk(start_i, end_i)
+
+    def _save_chunk(self, start_i, end_i):
+
+        data_index = self.data_index
+
+        # create the data for a chunk
+        slicer = slice(start_i, end_i)
+        for i in range(len(self.blocks)):
+            b = self.blocks[i]
+            d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
+                                  float_format=self.float_format,
+                                  decimal=self.decimal,
+                                  date_format=self.date_format,
+                                  quoting=self.quoting)
+
+            for col_loc, col in zip(b.mgr_locs, d):
+                # self.data is a preallocated list
+                self.data[col_loc] = col
+
+        ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep,
+                                        float_format=self.float_format,
+                                        decimal=self.decimal,
+                                        date_format=self.date_format,
+                                        quoting=self.quoting)
+
+        libwriters.write_csv_rows(self.data, ix, self.nlevels,
+                                  self.cols, self.writer)
@@ -0,0 +1,654 @@
+"""Utilities for conversion to writer-agnostic Excel representation
+"""
+
+import re
+import warnings
+import itertools
+
+import numpy as np
+
+from pandas.compat import reduce
+from pandas.io.formats.css import CSSResolver, CSSWarning
+from pandas.io.formats.printing import pprint_thing
+import pandas.core.common as com
+from pandas.core.dtypes.common import is_float, is_scalar
+from pandas.core.dtypes import missing
+from pandas import Index, MultiIndex, PeriodIndex
+from pandas.io.formats.format import get_level_lengths
+
+
+class ExcelCell(object):
+    __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend')
+    __slots__ = __fields__
+
+    def __init__(self, row, col, val, style=None, mergestart=None,
+                 mergeend=None):
+        self.row = row
+        self.col = col
+        self.val = val
+        self.style = style
+        self.mergestart = mergestart
+        self.mergeend = mergeend
+
+
+header_style = {"font": {"bold": True},
+                "borders": {"top": "thin",
+                            "right": "thin",
+                            "bottom": "thin",
+                            "left": "thin"},
+                "alignment": {"horizontal": "center",
+                              "vertical": "top"}}
+
+
+class CSSToExcelConverter(object):
+    """A callable for converting CSS declarations to ExcelWriter styles
+
+    Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow),
+    focusing on font styling, backgrounds, borders and alignment.
+
+    Operates by first computing CSS styles in a fairly generic
+    way (see :meth:`compute_css`) then determining Excel style
+    properties from CSS properties (see :meth:`build_xlstyle`).
+
+    Parameters
+    ----------
+    inherited : str, optional
+        CSS declarations understood to be the containing scope for the
+        CSS processed by :meth:`__call__`.
+    """
+    # NB: Most of the methods here could be classmethods, as only __init__
+    #     and __call__ make use of instance attributes.  We leave them as
+    #     instancemethods so that users can easily experiment with extensions
+    #     without monkey-patching.
+
+    def __init__(self, inherited=None):
+        if inherited is not None:
+            inherited = self.compute_css(inherited,
+                                         self.compute_css.INITIAL_STYLE)
+
+        self.inherited = inherited
+
+    compute_css = CSSResolver()
+
+    def __call__(self, declarations_str):
+        """Convert CSS declarations to ExcelWriter style
+
+        Parameters
+        ----------
+        declarations_str : str
+            List of CSS declarations.
+            e.g. "font-weight: bold; background: blue"
+
+        Returns
+        -------
+        xlstyle : dict
+            A style as interpreted by ExcelWriter when found in
+            ExcelCell.style.
+        """
+        # TODO: memoize?
+        properties = self.compute_css(declarations_str, self.inherited)
+        return self.build_xlstyle(properties)
+
+    def build_xlstyle(self, props):
+        out = {
+            'alignment': self.build_alignment(props),
+            'border': self.build_border(props),
+            'fill': self.build_fill(props),
+            'font': self.build_font(props),
+        }
+        # TODO: support number format
+        # TODO: handle cell width and height: needs support in pandas.io.excel
+
+        def remove_none(d):
+            """Remove key where value is None, through nested dicts"""
+            for k, v in list(d.items()):
+                if v is None:
+                    del d[k]
+                elif isinstance(v, dict):
+                    remove_none(v)
+                    if not v:
+                        del d[k]
+
+        remove_none(out)
+        return out
+
+    VERTICAL_MAP = {
+        'top': 'top',
+        'text-top': 'top',
+        'middle': 'center',
+        'baseline': 'bottom',
+        'bottom': 'bottom',
+        'text-bottom': 'bottom',
+        # OpenXML also has 'justify', 'distributed'
+    }
+
+    def build_alignment(self, props):
+        # TODO: text-indent, padding-left -> alignment.indent
+        return {'horizontal': props.get('text-align'),
+                'vertical': self.VERTICAL_MAP.get(props.get('vertical-align')),
+                'wrap_text': (None if props.get('white-space') is None else
+                              props['white-space'] not in
+                              ('nowrap', 'pre', 'pre-line'))
+                }
+
+    def build_border(self, props):
+        return {side: {
+            'style': self._border_style(props.get('border-{side}-style'
+                                        .format(side=side)),
+                                        props.get('border-{side}-width'
+                                        .format(side=side))),
+            'color': self.color_to_excel(
+                props.get('border-{side}-color'.format(side=side))),
+        } for side in ['top', 'right', 'bottom', 'left']}
+
+    def _border_style(self, style, width):
+        # convert styles and widths to openxml, one of:
+        #       'dashDot'
+        #       'dashDotDot'
+        #       'dashed'
+        #       'dotted'
+        #       'double'
+        #       'hair'
+        #       'medium'
+        #       'mediumDashDot'
+        #       'mediumDashDotDot'
+        #       'mediumDashed'
+        #       'slantDashDot'
+        #       'thick'
+        #       'thin'
+        if width is None and style is None:
+            return None
+        if style == 'none' or style == 'hidden':
+            return None
+
+        if width is None:
+            width = '2pt'
+        width = float(width[:-2])
+        if width < 1e-5:
+            return None
+        elif width < 1.3:
+            width_name = 'thin'
+        elif width < 2.8:
+            width_name = 'medium'
+        else:
+            width_name = 'thick'
+
+        if style in (None, 'groove', 'ridge', 'inset', 'outset'):
+            # not handled
+            style = 'solid'
+
+        if style == 'double':
+            return 'double'
+        if style == 'solid':
+            return width_name
+        if style == 'dotted':
+            if width_name in ('hair', 'thin'):
+                return 'dotted'
+            return 'mediumDashDotDot'
+        if style == 'dashed':
+            if width_name in ('hair', 'thin'):
+                return 'dashed'
+            return 'mediumDashed'
+
+    def build_fill(self, props):
+        # TODO: perhaps allow for special properties
+        #       -excel-pattern-bgcolor and -excel-pattern-type
+        fill_color = props.get('background-color')
+        if fill_color not in (None, 'transparent', 'none'):
+            return {
+                'fgColor': self.color_to_excel(fill_color),
+                'patternType': 'solid',
+            }
+
+    BOLD_MAP = {'bold': True, 'bolder': True, '600': True, '700': True,
+                '800': True, '900': True,
+                'normal': False, 'lighter': False, '100': False, '200': False,
+                '300': False, '400': False, '500': False}
+    ITALIC_MAP = {'normal': False, 'italic': True, 'oblique': True}
+
+    def build_font(self, props):
+        size = props.get('font-size')
+        if size is not None:
+            assert size.endswith('pt')
+            size = float(size[:-2])
+
+        font_names_tmp = re.findall(r'''(?x)
+            (
+            "(?:[^"]|\\")+"
+            |
+            '(?:[^']|\\')+'
+            |
+            [^'",]+
+            )(?=,|\s*$)
+        ''', props.get('font-family', ''))
+        font_names = []
+        for name in font_names_tmp:
+            if name[:1] == '"':
+                name = name[1:-1].replace('\\"', '"')
+            elif name[:1] == '\'':
+                name = name[1:-1].replace('\\\'', '\'')
+            else:
+                name = name.strip()
+            if name:
+                font_names.append(name)
+
+        family = None
+        for name in font_names:
+            if name == 'serif':
+                family = 1  # roman
+                break
+            elif name == 'sans-serif':
+                family = 2  # swiss
+                break
+            elif name == 'cursive':
+                family = 4  # script
+                break
+            elif name == 'fantasy':
+                family = 5  # decorative
+                break
+
+        decoration = props.get('text-decoration')
+        if decoration is not None:
+            decoration = decoration.split()
+        else:
+            decoration = ()
+
+        return {
+            'name': font_names[0] if font_names else None,
+            'family': family,
+            'size': size,
+            'bold': self.BOLD_MAP.get(props.get('font-weight')),
+            'italic': self.ITALIC_MAP.get(props.get('font-style')),
+            'underline': ('single' if
+                          'underline' in decoration
+                          else None),
+            'strike': ('line-through' in decoration) or None,
+            'color': self.color_to_excel(props.get('color')),
+            # shadow if nonzero digit before shadow color
+            'shadow': (bool(re.search('^[^#(]*[1-9]',
+                                      props['text-shadow']))
+                       if 'text-shadow' in props else None),
+            # 'vertAlign':,
+            # 'charset': ,
+            # 'scheme': ,
+            # 'outline': ,
+            # 'condense': ,
+        }
+
+    NAMED_COLORS = {
+        'maroon': '800000',
+        'brown': 'A52A2A',
+        'red': 'FF0000',
+        'pink': 'FFC0CB',
+        'orange': 'FFA500',
+        'yellow': 'FFFF00',
+        'olive': '808000',
+        'green': '008000',
+        'purple': '800080',
+        'fuchsia': 'FF00FF',
+        'lime': '00FF00',
+        'teal': '008080',
+        'aqua': '00FFFF',
+        'blue': '0000FF',
+        'navy': '000080',
+        'black': '000000',
+        'gray': '808080',
+        'grey': '808080',
+        'silver': 'C0C0C0',
+        'white': 'FFFFFF',
+    }
+
+    def color_to_excel(self, val):
+        if val is None:
+            return None
+        if val.startswith('#') and len(val) == 7:
+            return val[1:].upper()
+        if val.startswith('#') and len(val) == 4:
+            return (val[1] * 2 + val[2] * 2 + val[3] * 2).upper()
+        try:
+            return self.NAMED_COLORS[val]
+        except KeyError:
+            warnings.warn('Unhandled color format: {val!r}'.format(val=val),
+                          CSSWarning)
+
+
+class ExcelFormatter(object):
+    """
+    Class for formatting a DataFrame to a list of ExcelCells,
+
+    Parameters
+    ----------
+    df : DataFrame or Styler
+    na_rep: na representation
+    float_format : string, default None
+            Format string for floating point numbers
+    cols : sequence, optional
+        Columns to write
+    header : boolean or list of string, default True
+        Write out column names. If a list of string is given it is
+        assumed to be aliases for the column names
+    index : boolean, default True
+        output row names (index)
+    index_label : string or sequence, default None
+            Column label for index column(s) if desired. If None is given, and
+            `header` and `index` are True, then the index names are used. A
+            sequence should be given if the DataFrame uses MultiIndex.
+    merge_cells : boolean, default False
+            Format MultiIndex and Hierarchical Rows as merged cells.
+    inf_rep : string, default `'inf'`
+        representation for np.inf values (which aren't representable in Excel)
+        A `'-'` sign will be added in front of -inf.
+    style_converter : callable, optional
+        This translates Styler styles (CSS) into ExcelWriter styles.
+        Defaults to ``CSSToExcelConverter()``.
+        It should have signature css_declarations string -> excel style.
+        This is only called for body cells.
+    """
+
+    def __init__(self, df, na_rep='', float_format=None, cols=None,
+                 header=True, index=True, index_label=None, merge_cells=False,
+                 inf_rep='inf', style_converter=None):
+        self.rowcounter = 0
+        self.na_rep = na_rep
+        if hasattr(df, 'render'):
+            self.styler = df
+            df = df.data
+            if style_converter is None:
+                style_converter = CSSToExcelConverter()
+            self.style_converter = style_converter
+        else:
+            self.styler = None
+        self.df = df
+        if cols is not None:
+
+            # all missing, raise
+            if not len(Index(cols) & df.columns):
+                raise KeyError(
+                    "passes columns are not ALL present dataframe")
+
+            # deprecatedin gh-17295
+            # 1 missing is ok (for now)
+            if len(Index(cols) & df.columns) != len(cols):
+                warnings.warn(
+                    "Not all names specified in 'columns' are found; "
+                    "this will raise a KeyError in the future",
+                    FutureWarning)
+
+            self.df = df.reindex(columns=cols)
+        self.columns = self.df.columns
+        self.float_format = float_format
+        self.index = index
+        self.index_label = index_label
+        self.header = header
+        self.merge_cells = merge_cells
+        self.inf_rep = inf_rep
+
+    def _format_value(self, val):
+        if is_scalar(val) and missing.isna(val):
+            val = self.na_rep
+        elif is_float(val):
+            if missing.isposinf_scalar(val):
+                val = self.inf_rep
+            elif missing.isneginf_scalar(val):
+                val = '-{inf}'.format(inf=self.inf_rep)
+            elif self.float_format is not None:
+                val = float(self.float_format % val)
+        return val
+
+    def _format_header_mi(self):
+        if self.columns.nlevels > 1:
+            if not self.index:
+                raise NotImplementedError("Writing to Excel with MultiIndex"
+                                          " columns and no index "
+                                          "('index'=False) is not yet "
+                                          "implemented.")
+
+        has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
+        if not (has_aliases or self.header):
+            return
+
+        columns = self.columns
+        level_strs = columns.format(sparsify=self.merge_cells, adjoin=False,
+                                    names=False)
+        level_lengths = get_level_lengths(level_strs)
+        coloffset = 0
+        lnum = 0
+
+        if self.index and isinstance(self.df.index, MultiIndex):
+            coloffset = len(self.df.index[0]) - 1
+
+        if self.merge_cells:
+            # Format multi-index as a merged cells.
+            for lnum in range(len(level_lengths)):
+                name = columns.names[lnum]
+                yield ExcelCell(lnum, coloffset, name, header_style)
+
+            for lnum, (spans, levels, labels) in enumerate(zip(
+                    level_lengths, columns.levels, columns.labels)):
+                values = levels.take(labels)
+                for i in spans:
+                    if spans[i] > 1:
+                        yield ExcelCell(lnum, coloffset + i + 1, values[i],
+                                        header_style, lnum,
+                                        coloffset + i + spans[i])
+                    else:
+                        yield ExcelCell(lnum, coloffset + i + 1, values[i],
+                                        header_style)
+        else:
+            # Format in legacy format with dots to indicate levels.
+            for i, values in enumerate(zip(*level_strs)):
+                v = ".".join(map(pprint_thing, values))
+                yield ExcelCell(lnum, coloffset + i + 1, v, header_style)
+
+        self.rowcounter = lnum
+
+    def _format_header_regular(self):
+        has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
+        if has_aliases or self.header:
+            coloffset = 0
+
+            if self.index:
+                coloffset = 1
+                if isinstance(self.df.index, MultiIndex):
+                    coloffset = len(self.df.index[0])
+
+            colnames = self.columns
+            if has_aliases:
+                if len(self.header) != len(self.columns):
+                    raise ValueError('Writing {cols} cols but got {alias} '
+                                     'aliases'.format(cols=len(self.columns),
+                                                      alias=len(self.header)))
+                else:
+                    colnames = self.header
+
+            for colindex, colname in enumerate(colnames):
+                yield ExcelCell(self.rowcounter, colindex + coloffset, colname,
+                                header_style)
+
+    def _format_header(self):
+        if isinstance(self.columns, MultiIndex):
+            gen = self._format_header_mi()
+        else:
+            gen = self._format_header_regular()
+
+        gen2 = ()
+        if self.df.index.names:
+            row = [x if x is not None else ''
+                   for x in self.df.index.names] + [''] * len(self.columns)
+            if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
+                gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style)
+                        for colindex, val in enumerate(row))
+                self.rowcounter += 1
+        return itertools.chain(gen, gen2)
+
+    def _format_body(self):
+
+        if isinstance(self.df.index, MultiIndex):
+            return self._format_hierarchical_rows()
+        else:
+            return self._format_regular_rows()
+
+    def _format_regular_rows(self):
+        has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
+        if has_aliases or self.header:
+            self.rowcounter += 1
+
+        # output index and index_label?
+        if self.index:
+            # check aliases
+            # if list only take first as this is not a MultiIndex
+            if (self.index_label and
+                    isinstance(self.index_label, (list, tuple, np.ndarray,
+                                                  Index))):
+                index_label = self.index_label[0]
+            # if string good to go
+            elif self.index_label and isinstance(self.index_label, str):
+                index_label = self.index_label
+            else:
+                index_label = self.df.index.names[0]
+
+            if isinstance(self.columns, MultiIndex):
+                self.rowcounter += 1
+
+            if index_label and self.header is not False:
+                yield ExcelCell(self.rowcounter - 1, 0, index_label,
+                                header_style)
+
+            # write index_values
+            index_values = self.df.index
+            if isinstance(self.df.index, PeriodIndex):
+                index_values = self.df.index.to_timestamp()
+
+            for idx, idxval in enumerate(index_values):
+                yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)
+
+            coloffset = 1
+        else:
+            coloffset = 0
+
+        for cell in self._generate_body(coloffset):
+            yield cell
+
+    def _format_hierarchical_rows(self):
+        has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
+        if has_aliases or self.header:
+            self.rowcounter += 1
+
+        gcolidx = 0
+
+        if self.index:
+            index_labels = self.df.index.names
+            # check for aliases
+            if (self.index_label and
+                    isinstance(self.index_label, (list, tuple, np.ndarray,
+                                                  Index))):
+                index_labels = self.index_label
+
+            # MultiIndex columns require an extra row
+            # with index names (blank if None) for
+            # unambigous round-trip, unless not merging,
+            # in which case the names all go on one row Issue #11328
+            if isinstance(self.columns, MultiIndex) and self.merge_cells:
+                self.rowcounter += 1
+
+            # if index labels are not empty go ahead and dump
+            if com._any_not_none(*index_labels) and self.header is not False:
+
+                for cidx, name in enumerate(index_labels):
+                    yield ExcelCell(self.rowcounter - 1, cidx, name,
+                                    header_style)
+
+            if self.merge_cells:
+                # Format hierarchical rows as merged cells.
+                level_strs = self.df.index.format(sparsify=True, adjoin=False,
+                                                  names=False)
+                level_lengths = get_level_lengths(level_strs)
+
+                for spans, levels, labels in zip(level_lengths,
+                                                 self.df.index.levels,
+                                                 self.df.index.labels):
+
+                    values = levels.take(labels,
+                                         allow_fill=levels._can_hold_na,
+                                         fill_value=True)
+
+                    for i in spans:
+                        if spans[i] > 1:
+                            yield ExcelCell(self.rowcounter + i, gcolidx,
+                                            values[i], header_style,
+                                            self.rowcounter + i + spans[i] - 1,
+                                            gcolidx)
+                        else:
+                            yield ExcelCell(self.rowcounter + i, gcolidx,
+                                            values[i], header_style)
+                    gcolidx += 1
+
+            else:
+                # Format hierarchical rows with non-merged values.
+                for indexcolvals in zip(*self.df.index):
+                    for idx, indexcolval in enumerate(indexcolvals):
+                        yield ExcelCell(self.rowcounter + idx, gcolidx,
+                                        indexcolval, header_style)
+                    gcolidx += 1
+
+        for cell in self._generate_body(gcolidx):
+            yield cell
+
+    def _generate_body(self, coloffset):
+        if self.styler is None:
+            styles = None
+        else:
+            styles = self.styler._compute().ctx
+            if not styles:
+                styles = None
+        xlstyle = None
+
+        # Write the body of the frame data series by series.
+        for colidx in range(len(self.columns)):
+            series = self.df.iloc[:, colidx]
+            for i, val in enumerate(series):
+                if styles is not None:
+                    xlstyle = self.style_converter(';'.join(styles[i, colidx]))
+                yield ExcelCell(self.rowcounter + i, colidx + coloffset, val,
+                                xlstyle)
+
+    def get_formatted_cells(self):
+        for cell in itertools.chain(self._format_header(),
+                                    self._format_body()):
+            cell.val = self._format_value(cell.val)
+            yield cell
+
+    def write(self, writer, sheet_name='Sheet1', startrow=0,
+              startcol=0, freeze_panes=None, engine=None):
+        """
+        writer : string or ExcelWriter object
+            File path or existing ExcelWriter
+        sheet_name : string, default 'Sheet1'
+            Name of sheet which will contain DataFrame
+        startrow :
+            upper left cell row to dump data frame
+        startcol :
+            upper left cell column to dump data frame
+        freeze_panes : tuple of integer (length 2), default None
+            Specifies the one-based bottommost row and rightmost column that
+            is to be frozen
+        engine : string, default None
+            write engine to use if writer is a path - you can also set this
+            via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``,
+            and ``io.excel.xlsm.writer``.
+        """
+        from pandas.io.excel import ExcelWriter
+        from pandas.io.common import _stringify_path
+
+        if isinstance(writer, ExcelWriter):
+            need_save = False
+        else:
+            writer = ExcelWriter(_stringify_path(writer), engine=engine)
+            need_save = True
+
+        formatted_cells = self.get_formatted_cells()
+        writer.write_cells(formatted_cells, sheet_name,
+                           startrow=startrow, startcol=startcol,
+                           freeze_panes=freeze_panes)
+        if need_save:
+            writer.save()
@@ -0,0 +1,506 @@
+# -*- coding: utf-8 -*-
+"""
+Module for formatting output data in HTML.
+"""
+
+from __future__ import print_function
+from distutils.version import LooseVersion
+
+from textwrap import dedent
+
+import pandas.core.common as com
+from pandas.core.index import MultiIndex
+from pandas import compat
+from pandas.compat import (lzip, range, map, zip, u,
+                           OrderedDict, unichr)
+from pandas.core.config import get_option
+from pandas.io.formats.printing import pprint_thing
+from pandas.io.formats.format import (get_level_lengths,
+                                      buffer_put_lines)
+from pandas.io.formats.format import TableFormatter
+
+
+class HTMLFormatter(TableFormatter):
+
+    indent_delta = 2
+
+    def __init__(self, formatter, classes=None, max_rows=None, max_cols=None,
+                 notebook=False, border=None, table_id=None):
+        self.fmt = formatter
+        self.classes = classes
+
+        self.frame = self.fmt.frame
+        self.columns = self.fmt.tr_frame.columns
+        self.elements = []
+        self.bold_rows = self.fmt.kwds.get('bold_rows', False)
+        self.escape = self.fmt.kwds.get('escape', True)
+
+        self.max_rows = max_rows or len(self.fmt.frame)
+        self.max_cols = max_cols or len(self.fmt.columns)
+        self.show_dimensions = self.fmt.show_dimensions
+        self.is_truncated = (self.max_rows < len(self.fmt.frame) or
+                             self.max_cols < len(self.fmt.columns))
+        self.notebook = notebook
+        if border is None:
+            border = get_option('display.html.border')
+        self.border = border
+        self.table_id = table_id
+
+    def write(self, s, indent=0):
+        rs = pprint_thing(s)
+        self.elements.append(' ' * indent + rs)
+
+    def write_th(self, s, indent=0, tags=None):
+        if self.fmt.col_space is not None and self.fmt.col_space > 0:
+            tags = (tags or "")
+            tags += ('style="min-width: {colspace};"'
+                     .format(colspace=self.fmt.col_space))
+
+        return self._write_cell(s, kind='th', indent=indent, tags=tags)
+
+    def write_td(self, s, indent=0, tags=None):
+        return self._write_cell(s, kind='td', indent=indent, tags=tags)
+
+    def _write_cell(self, s, kind='td', indent=0, tags=None):
+        if tags is not None:
+            start_tag = '<{kind} {tags}>'.format(kind=kind, tags=tags)
+        else:
+            start_tag = '<{kind}>'.format(kind=kind)
+
+        if self.escape:
+            # escape & first to prevent double escaping of &
+            esc = OrderedDict([('&', r'&amp;'), ('<', r'&lt;'),
+                               ('>', r'&gt;')])
+        else:
+            esc = {}
+        rs = pprint_thing(s, escape_chars=esc).strip()
+        self.write(u'{start}{rs}</{kind}>'
+                   .format(start=start_tag, rs=rs, kind=kind), indent)
+
+    def write_tr(self, line, indent=0, indent_delta=4, header=False,
+                 align=None, tags=None, nindex_levels=0):
+        if tags is None:
+            tags = {}
+
+        if align is None:
+            self.write('<tr>', indent)
+        else:
+            self.write('<tr style="text-align: {align};">'
+                       .format(align=align), indent)
+        indent += indent_delta
+
+        for i, s in enumerate(line):
+            val_tag = tags.get(i, None)
+            if header or (self.bold_rows and i < nindex_levels):
+                self.write_th(s, indent, tags=val_tag)
+            else:
+                self.write_td(s, indent, tags=val_tag)
+
+        indent -= indent_delta
+        self.write('</tr>', indent)
+
+    def write_style(self):
+        # We use the "scoped" attribute here so that the desired
+        # style properties for the data frame are not then applied
+        # throughout the entire notebook.
+        template_first = """\
+            <style scoped>"""
+        template_last = """\
+            </style>"""
+        template_select = """\
+                .dataframe %s {
+                    %s: %s;
+                }"""
+        element_props = [('tbody tr th:only-of-type',
+                          'vertical-align',
+                          'middle'),
+                         ('tbody tr th',
+                          'vertical-align',
+                          'top')]
+        if isinstance(self.columns, MultiIndex):
+            element_props.append(('thead tr th',
+                                  'text-align',
+                                  'left'))
+            if all((self.fmt.has_index_names,
+                    self.fmt.index,
+                    self.fmt.show_index_names)):
+                element_props.append(('thead tr:last-of-type th',
+                                      'text-align',
+                                      'right'))
+        else:
+            element_props.append(('thead th',
+                                  'text-align',
+                                  'right'))
+        template_mid = '\n\n'.join(map(lambda t: template_select % t,
+                                       element_props))
+        template = dedent('\n'.join((template_first,
+                                     template_mid,
+                                     template_last)))
+        if self.notebook:
+            self.write(template)
+
+    def write_result(self, buf):
+        indent = 0
+        id_section = ""
+        frame = self.frame
+
+        _classes = ['dataframe']  # Default class.
+        use_mathjax = get_option("display.html.use_mathjax")
+        if not use_mathjax:
+            _classes.append('tex2jax_ignore')
+        if self.classes is not None:
+            if isinstance(self.classes, str):
+                self.classes = self.classes.split()
+            if not isinstance(self.classes, (list, tuple)):
+                raise AssertionError('classes must be list or tuple, not {typ}'
+                                     .format(typ=type(self.classes)))
+            _classes.extend(self.classes)
+
+        if self.notebook:
+            div_style = ''
+            try:
+                import IPython
+                if IPython.__version__ < LooseVersion('3.0.0'):
+                    div_style = ' style="max-width:1500px;overflow:auto;"'
+            except (ImportError, AttributeError):
+                pass
+
+            self.write('<div{style}>'.format(style=div_style))
+
+        self.write_style()
+
+        if self.table_id is not None:
+            id_section = ' id="{table_id}"'.format(table_id=self.table_id)
+        self.write('<table border="{border}" class="{cls}"{id_section}>'
+                   .format(border=self.border, cls=' '.join(_classes),
+                           id_section=id_section), indent)
+
+        indent += self.indent_delta
+        indent = self._write_header(indent)
+        indent = self._write_body(indent)
+
+        self.write('</table>', indent)
+        if self.should_show_dimensions:
+            by = chr(215) if compat.PY3 else unichr(215)  # ×
+            self.write(u('<p>{rows} rows {by} {cols} columns</p>')
+                       .format(rows=len(frame),
+                               by=by,
+                               cols=len(frame.columns)))
+
+        if self.notebook:
+            self.write('</div>')
+
+        buffer_put_lines(buf, self.elements)
+
+    def _write_header(self, indent):
+        truncate_h = self.fmt.truncate_h
+        row_levels = self.frame.index.nlevels
+        if not self.fmt.header:
+            # write nothing
+            return indent
+
+        def _column_header():
+            if self.fmt.index:
+                row = [''] * (self.frame.index.nlevels - 1)
+            else:
+                row = []
+
+            if isinstance(self.columns, MultiIndex):
+                if self.fmt.has_column_names and self.fmt.index:
+                    row.append(single_column_table(self.columns.names))
+                else:
+                    row.append('')
+                style = "text-align: {just};".format(just=self.fmt.justify)
+                row.extend([single_column_table(c, self.fmt.justify, style)
+                            for c in self.columns])
+            else:
+                if self.fmt.index:
+                    row.append(self.columns.name or '')
+                row.extend(self.columns)
+            return row
+
+        self.write('<thead>', indent)
+        row = []
+
+        indent += self.indent_delta
+
+        if isinstance(self.columns, MultiIndex):
+            template = 'colspan="{span:d}" halign="left"'
+
+            if self.fmt.sparsify:
+                # GH3547
+                sentinel = com.sentinel_factory()
+            else:
+                sentinel = None
+            levels = self.columns.format(sparsify=sentinel, adjoin=False,
+                                         names=False)
+            level_lengths = get_level_lengths(levels, sentinel)
+            inner_lvl = len(level_lengths) - 1
+            for lnum, (records, values) in enumerate(zip(level_lengths,
+                                                         levels)):
+                if truncate_h:
+                    # modify the header lines
+                    ins_col = self.fmt.tr_col_num
+                    if self.fmt.sparsify:
+                        recs_new = {}
+                        # Increment tags after ... col.
+                        for tag, span in list(records.items()):
+                            if tag >= ins_col:
+                                recs_new[tag + 1] = span
+                            elif tag + span > ins_col:
+                                recs_new[tag] = span + 1
+                                if lnum == inner_lvl:
+                                    values = (values[:ins_col] + (u('...'),) +
+                                              values[ins_col:])
+                                else:
+                                    # sparse col headers do not receive a ...
+                                    values = (values[:ins_col] +
+                                              (values[ins_col - 1], ) +
+                                              values[ins_col:])
+                            else:
+                                recs_new[tag] = span
+                            # if ins_col lies between tags, all col headers
+                            # get ...
+                            if tag + span == ins_col:
+                                recs_new[ins_col] = 1
+                                values = (values[:ins_col] + (u('...'),) +
+                                          values[ins_col:])
+                        records = recs_new
+                        inner_lvl = len(level_lengths) - 1
+                        if lnum == inner_lvl:
+                            records[ins_col] = 1
+                    else:
+                        recs_new = {}
+                        for tag, span in list(records.items()):
+                            if tag >= ins_col:
+                                recs_new[tag + 1] = span
+                            else:
+                                recs_new[tag] = span
+                        recs_new[ins_col] = 1
+                        records = recs_new
+                        values = (values[:ins_col] + [u('...')] +
+                                  values[ins_col:])
+
+                name = self.columns.names[lnum]
+                row = [''] * (row_levels - 1) + ['' if name is None else
+                                                 pprint_thing(name)]
+
+                if row == [""] and self.fmt.index is False:
+                    row = []
+
+                tags = {}
+                j = len(row)
+                for i, v in enumerate(values):
+                    if i in records:
+                        if records[i] > 1:
+                            tags[j] = template.format(span=records[i])
+                    else:
+                        continue
+                    j += 1
+                    row.append(v)
+                self.write_tr(row, indent, self.indent_delta, tags=tags,
+                              header=True)
+        else:
+            col_row = _column_header()
+            align = self.fmt.justify
+
+            if truncate_h:
+                ins_col = row_levels + self.fmt.tr_col_num
+                col_row.insert(ins_col, '...')
+
+            self.write_tr(col_row, indent, self.indent_delta, header=True,
+                          align=align)
+
+        if all((self.fmt.has_index_names,
+                self.fmt.index,
+                self.fmt.show_index_names)):
+            row = ([x if x is not None else ''
+                    for x in self.frame.index.names] +
+                   [''] * min(len(self.columns), self.max_cols))
+            if truncate_h:
+                ins_col = row_levels + self.fmt.tr_col_num
+                row.insert(ins_col, '')
+            self.write_tr(row, indent, self.indent_delta, header=True)
+
+        indent -= self.indent_delta
+        self.write('</thead>', indent)
+
+        return indent
+
+    def _write_body(self, indent):
+        self.write('<tbody>', indent)
+        indent += self.indent_delta
+
+        fmt_values = {}
+        for i in range(min(len(self.columns), self.max_cols)):
+            fmt_values[i] = self.fmt._format_col(i)
+
+        # write values
+        if self.fmt.index:
+            if isinstance(self.frame.index, MultiIndex):
+                self._write_hierarchical_rows(fmt_values, indent)
+            else:
+                self._write_regular_rows(fmt_values, indent)
+        else:
+            for i in range(min(len(self.frame), self.max_rows)):
+                row = [fmt_values[j][i] for j in range(len(self.columns))]
+                self.write_tr(row, indent, self.indent_delta, tags=None)
+
+        indent -= self.indent_delta
+        self.write('</tbody>', indent)
+        indent -= self.indent_delta
+
+        return indent
+
+    def _write_regular_rows(self, fmt_values, indent):
+        truncate_h = self.fmt.truncate_h
+        truncate_v = self.fmt.truncate_v
+
+        ncols = len(self.fmt.tr_frame.columns)
+        nrows = len(self.fmt.tr_frame)
+        fmt = self.fmt._get_formatter('__index__')
+        if fmt is not None:
+            index_values = self.fmt.tr_frame.index.map(fmt)
+        else:
+            index_values = self.fmt.tr_frame.index.format()
+
+        row = []
+        for i in range(nrows):
+
+            if truncate_v and i == (self.fmt.tr_row_num):
+                str_sep_row = ['...' for ele in row]
+                self.write_tr(str_sep_row, indent, self.indent_delta,
+                              tags=None, nindex_levels=1)
+
+            row = []
+            row.append(index_values[i])
+            row.extend(fmt_values[j][i] for j in range(ncols))
+
+            if truncate_h:
+                dot_col_ix = self.fmt.tr_col_num + 1
+                row.insert(dot_col_ix, '...')
+            self.write_tr(row, indent, self.indent_delta, tags=None,
+                          nindex_levels=1)
+
+    def _write_hierarchical_rows(self, fmt_values, indent):
+        template = 'rowspan="{span}" valign="top"'
+
+        truncate_h = self.fmt.truncate_h
+        truncate_v = self.fmt.truncate_v
+        frame = self.fmt.tr_frame
+        ncols = len(frame.columns)
+        nrows = len(frame)
+        row_levels = self.frame.index.nlevels
+
+        idx_values = frame.index.format(sparsify=False, adjoin=False,
+                                        names=False)
+        idx_values = lzip(*idx_values)
+
+        if self.fmt.sparsify:
+            # GH3547
+            sentinel = com.sentinel_factory()
+            levels = frame.index.format(sparsify=sentinel, adjoin=False,
+                                        names=False)
+
+            level_lengths = get_level_lengths(levels, sentinel)
+            inner_lvl = len(level_lengths) - 1
+            if truncate_v:
+                # Insert ... row and adjust idx_values and
+                # level_lengths to take this into account.
+                ins_row = self.fmt.tr_row_num
+                inserted = False
+                for lnum, records in enumerate(level_lengths):
+                    rec_new = {}
+                    for tag, span in list(records.items()):
+                        if tag >= ins_row:
+                            rec_new[tag + 1] = span
+                        elif tag + span > ins_row:
+                            rec_new[tag] = span + 1
+
+                            # GH 14882 - Make sure insertion done once
+                            if not inserted:
+                                dot_row = list(idx_values[ins_row - 1])
+                                dot_row[-1] = u('...')
+                                idx_values.insert(ins_row, tuple(dot_row))
+                                inserted = True
+                            else:
+                                dot_row = list(idx_values[ins_row])
+                                dot_row[inner_lvl - lnum] = u('...')
+                                idx_values[ins_row] = tuple(dot_row)
+                        else:
+                            rec_new[tag] = span
+                        # If ins_row lies between tags, all cols idx cols
+                        # receive ...
+                        if tag + span == ins_row:
+                            rec_new[ins_row] = 1
+                            if lnum == 0:
+                                idx_values.insert(ins_row, tuple(
+                                    [u('...')] * len(level_lengths)))
+
+                            # GH 14882 - Place ... in correct level
+                            elif inserted:
+                                dot_row = list(idx_values[ins_row])
+                                dot_row[inner_lvl - lnum] = u('...')
+                                idx_values[ins_row] = tuple(dot_row)
+                    level_lengths[lnum] = rec_new
+
+                level_lengths[inner_lvl][ins_row] = 1
+                for ix_col in range(len(fmt_values)):
+                    fmt_values[ix_col].insert(ins_row, '...')
+                nrows += 1
+
+            for i in range(nrows):
+                row = []
+                tags = {}
+
+                sparse_offset = 0
+                j = 0
+                for records, v in zip(level_lengths, idx_values[i]):
+                    if i in records:
+                        if records[i] > 1:
+                            tags[j] = template.format(span=records[i])
+                    else:
+                        sparse_offset += 1
+                        continue
+
+                    j += 1
+                    row.append(v)
+
+                row.extend(fmt_values[j][i] for j in range(ncols))
+                if truncate_h:
+                    row.insert(row_levels - sparse_offset +
+                               self.fmt.tr_col_num, '...')
+                self.write_tr(row, indent, self.indent_delta, tags=tags,
+                              nindex_levels=len(levels) - sparse_offset)
+        else:
+            for i in range(len(frame)):
+                idx_values = list(zip(*frame.index.format(
+                    sparsify=False, adjoin=False, names=False)))
+                row = []
+                row.extend(idx_values[i])
+                row.extend(fmt_values[j][i] for j in range(ncols))
+                if truncate_h:
+                    row.insert(row_levels + self.fmt.tr_col_num, '...')
+                self.write_tr(row, indent, self.indent_delta, tags=None,
+                              nindex_levels=frame.index.nlevels)
+
+
+def single_column_table(column, align=None, style=None):
+    table = '<table'
+    if align is not None:
+        table += (' align="{align}"'.format(align=align))
+    if style is not None:
+        table += (' style="{style}"'.format(style=style))
+    table += '><tbody>'
+    for i in column:
+        table += ('<tr><td>{i!s}</td></tr>'.format(i=i))
+    table += '</tbody></table>'
+    return table
+
+
+def single_row_table(row):  # pragma: no cover
+    table = '<table><tbody><tr>'
+    for i in row:
+        table += ('<td>{i!s}</td>'.format(i=i))
+    table += '</tr></tbody></table>'
+    return table
@@ -0,0 +1,243 @@
+# -*- coding: utf-8 -*-
+"""
+Module for formatting output data in Latex.
+"""
+
+from __future__ import print_function
+
+from pandas.core.index import MultiIndex
+from pandas import compat
+from pandas.compat import range, map, zip, u
+from pandas.io.formats.format import TableFormatter
+import numpy as np
+
+
+class LatexFormatter(TableFormatter):
+    """ Used to render a DataFrame to a LaTeX tabular/longtable environment
+    output.
+
+    Parameters
+    ----------
+    formatter : `DataFrameFormatter`
+    column_format : str, default None
+        The columns format as specified in `LaTeX table format
+        <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
+    longtable : boolean, default False
+        Use a longtable environment instead of tabular.
+
+    See Also
+    --------
+    HTMLFormatter
+    """
+
+    def __init__(self, formatter, column_format=None, longtable=False,
+                 multicolumn=False, multicolumn_format=None, multirow=False):
+        self.fmt = formatter
+        self.frame = self.fmt.frame
+        self.bold_rows = self.fmt.kwds.get('bold_rows', False)
+        self.column_format = column_format
+        self.longtable = longtable
+        self.multicolumn = multicolumn
+        self.multicolumn_format = multicolumn_format
+        self.multirow = multirow
+
+    def write_result(self, buf):
+        """
+        Render a DataFrame to a LaTeX tabular/longtable environment output.
+        """
+
+        # string representation of the columns
+        if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
+            info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
+                         .format(name=type(self.frame).__name__,
+                                 col=self.frame.columns,
+                                 idx=self.frame.index))
+            strcols = [[info_line]]
+        else:
+            strcols = self.fmt._to_str_columns()
+
+        def get_col_type(dtype):
+            if issubclass(dtype.type, np.number):
+                return 'r'
+            else:
+                return 'l'
+
+        # reestablish the MultiIndex that has been joined by _to_str_column
+        if self.fmt.index and isinstance(self.frame.index, MultiIndex):
+            out = self.frame.index.format(
+                adjoin=False, sparsify=self.fmt.sparsify,
+                names=self.fmt.has_index_names, na_rep=self.fmt.na_rep
+            )
+
+            # index.format will sparsify repeated entries with empty strings
+            # so pad these with some empty space
+            def pad_empties(x):
+                for pad in reversed(x):
+                    if pad:
+                        break
+                return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]]
+            out = (pad_empties(i) for i in out)
+
+            # Add empty spaces for each column level
+            clevels = self.frame.columns.nlevels
+            out = [[' ' * len(i[-1])] * clevels + i for i in out]
+
+            # Add the column names to the last index column
+            cnames = self.frame.columns.names
+            if any(cnames):
+                new_names = [i if i else '{}' for i in cnames]
+                out[self.frame.index.nlevels - 1][:clevels] = new_names
+
+            # Get rid of old multiindex column and add new ones
+            strcols = out + strcols[1:]
+
+        column_format = self.column_format
+        if column_format is None:
+            dtypes = self.frame.dtypes._values
+            column_format = ''.join(map(get_col_type, dtypes))
+            if self.fmt.index:
+                index_format = 'l' * self.frame.index.nlevels
+                column_format = index_format + column_format
+        elif not isinstance(column_format,
+                            compat.string_types):  # pragma: no cover
+            raise AssertionError('column_format must be str or unicode, '
+                                 'not {typ}'.format(typ=type(column_format)))
+
+        if not self.longtable:
+            buf.write('\\begin{{tabular}}{{{fmt}}}\n'
+                      .format(fmt=column_format))
+            buf.write('\\toprule\n')
+        else:
+            buf.write('\\begin{{longtable}}{{{fmt}}}\n'
+                      .format(fmt=column_format))
+            buf.write('\\toprule\n')
+
+        ilevels = self.frame.index.nlevels
+        clevels = self.frame.columns.nlevels
+        nlevels = clevels
+        if self.fmt.has_index_names and self.fmt.show_index_names:
+            nlevels += 1
+        strrows = list(zip(*strcols))
+        self.clinebuf = []
+
+        for i, row in enumerate(strrows):
+            if i == nlevels and self.fmt.header:
+                buf.write('\\midrule\n')  # End of header
+                if self.longtable:
+                    buf.write('\\endhead\n')
+                    buf.write('\\midrule\n')
+                    buf.write('\\multicolumn{{{n}}}{{r}}{{{{Continued on next '
+                              'page}}}} \\\\\n'.format(n=len(row)))
+                    buf.write('\\midrule\n')
+                    buf.write('\\endfoot\n\n')
+                    buf.write('\\bottomrule\n')
+                    buf.write('\\endlastfoot\n')
+            if self.fmt.kwds.get('escape', True):
+                # escape backslashes first
+                crow = [(x.replace('\\', '\\textbackslash ')
+                         .replace('_', '\\_')
+                         .replace('%', '\\%').replace('$', '\\$')
+                         .replace('#', '\\#').replace('{', '\\{')
+                         .replace('}', '\\}').replace('~', '\\textasciitilde ')
+                         .replace('^', '\\textasciicircum ')
+                         .replace('&', '\\&')
+                         if (x and x != '{}') else '{}') for x in row]
+            else:
+                crow = [x if x else '{}' for x in row]
+            if self.bold_rows and self.fmt.index:
+                # bold row labels
+                crow = ['\\textbf{{{x}}}'.format(x=x)
+                        if j < ilevels and x.strip() not in ['', '{}'] else x
+                        for j, x in enumerate(crow)]
+            if i < clevels and self.fmt.header and self.multicolumn:
+                # sum up columns to multicolumns
+                crow = self._format_multicolumn(crow, ilevels)
+            if (i >= nlevels and self.fmt.index and self.multirow and
+                    ilevels > 1):
+                # sum up rows to multirows
+                crow = self._format_multirow(crow, ilevels, i, strrows)
+            buf.write(' & '.join(crow))
+            buf.write(' \\\\\n')
+            if self.multirow and i < len(strrows) - 1:
+                self._print_cline(buf, i, len(strcols))
+
+        if not self.longtable:
+            buf.write('\\bottomrule\n')
+            buf.write('\\end{tabular}\n')
+        else:
+            buf.write('\\end{longtable}\n')
+
+    def _format_multicolumn(self, row, ilevels):
+        r"""
+        Combine columns belonging to a group to a single multicolumn entry
+        according to self.multicolumn_format
+
+        e.g.:
+        a &  &  & b & c &
+        will become
+        \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
+        """
+        row2 = list(row[:ilevels])
+        ncol = 1
+        coltext = ''
+
+        def append_col():
+            # write multicolumn if needed
+            if ncol > 1:
+                row2.append('\\multicolumn{{{ncol:d}}}{{{fmt:s}}}{{{txt:s}}}'
+                            .format(ncol=ncol, fmt=self.multicolumn_format,
+                                    txt=coltext.strip()))
+            # don't modify where not needed
+            else:
+                row2.append(coltext)
+        for c in row[ilevels:]:
+            # if next col has text, write the previous
+            if c.strip():
+                if coltext:
+                    append_col()
+                coltext = c
+                ncol = 1
+            # if not, add it to the previous multicolumn
+            else:
+                ncol += 1
+        # write last column name
+        if coltext:
+            append_col()
+        return row2
+
+    def _format_multirow(self, row, ilevels, i, rows):
+        r"""
+        Check following rows, whether row should be a multirow
+
+        e.g.:     becomes:
+        a & 0 &   \multirow{2}{*}{a} & 0 &
+          & 1 &     & 1 &
+        b & 0 &   \cline{1-2}
+                  b & 0 &
+        """
+        for j in range(ilevels):
+            if row[j].strip():
+                nrow = 1
+                for r in rows[i + 1:]:
+                    if not r[j].strip():
+                        nrow += 1
+                    else:
+                        break
+                if nrow > 1:
+                    # overwrite non-multirow entry
+                    row[j] = '\\multirow{{{nrow:d}}}{{*}}{{{row:s}}}'.format(
+                        nrow=nrow, row=row[j].strip())
+                    # save when to end the current block with \cline
+                    self.clinebuf.append([i + nrow - 1, j + 1])
+        return row
+
+    def _print_cline(self, buf, i, icol):
+        """
+        Print clines after multirow-blocks are finished
+        """
+        for cl in self.clinebuf:
+            if cl[0] == i:
+                buf.write('\\cline{{{cl:d}-{icol:d}}}\n'
+                          .format(cl=cl[1], icol=icol))
+        # remove entries that have been written to buffer
+        self.clinebuf = [x for x in self.clinebuf if x[0] != i]
@@ -0,0 +1,263 @@
+"""
+printing tools
+"""
+
+import sys
+from pandas.core.dtypes.inference import is_sequence
+from pandas import compat
+from pandas.compat import u
+from pandas.core.config import get_option
+
+
+def adjoin(space, *lists, **kwargs):
+    """
+    Glues together two sets of strings using the amount of space requested.
+    The idea is to prettify.
+
+    ----------
+    space : int
+        number of spaces for padding
+    lists : str
+        list of str which being joined
+    strlen : callable
+        function used to calculate the length of each str. Needed for unicode
+        handling.
+    justfunc : callable
+        function used to justify str. Needed for unicode handling.
+    """
+    strlen = kwargs.pop('strlen', len)
+    justfunc = kwargs.pop('justfunc', justify)
+
+    out_lines = []
+    newLists = []
+    lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
+    # not the last one
+    lengths.append(max(map(len, lists[-1])))
+    maxLen = max(map(len, lists))
+    for i, lst in enumerate(lists):
+        nl = justfunc(lst, lengths[i], mode='left')
+        nl.extend([' ' * lengths[i]] * (maxLen - len(lst)))
+        newLists.append(nl)
+    toJoin = zip(*newLists)
+    for lines in toJoin:
+        out_lines.append(_join_unicode(lines))
+    return _join_unicode(out_lines, sep='\n')
+
+
+def justify(texts, max_len, mode='right'):
+    """
+    Perform ljust, center, rjust against string or list-like
+    """
+    if mode == 'left':
+        return [x.ljust(max_len) for x in texts]
+    elif mode == 'center':
+        return [x.center(max_len) for x in texts]
+    else:
+        return [x.rjust(max_len) for x in texts]
+
+
+def _join_unicode(lines, sep=''):
+    try:
+        return sep.join(lines)
+    except UnicodeDecodeError:
+        sep = compat.text_type(sep)
+        return sep.join([x.decode('utf-8') if isinstance(x, str) else x
+                         for x in lines])
+
+
+# Unicode consolidation
+# ---------------------
+#
+# pprinting utility functions for generating Unicode text or
+# bytes(3.x)/str(2.x) representations of objects.
+# Try to use these as much as possible rather then rolling your own.
+#
+# When to use
+# -----------
+#
+# 1) If you're writing code internal to pandas (no I/O directly involved),
+#    use pprint_thing().
+#
+#    It will always return unicode text which can handled by other
+#    parts of the package without breakage.
+#
+# 2) if you need to write something out to file, use
+#    pprint_thing_encoded(encoding).
+#
+#    If no encoding is specified, it defaults to utf-8. Since encoding pure
+#    ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
+#    working with straight ascii.
+
+
+def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
+    """
+    internal. pprinter for iterables. you should probably use pprint_thing()
+    rather then calling this directly.
+
+    bounds length of printed sequence, depending on options
+    """
+    if isinstance(seq, set):
+        fmt = u("{{{body}}}")
+    else:
+        fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})")
+
+    if max_seq_items is False:
+        nitems = len(seq)
+    else:
+        nitems = max_seq_items or get_option("max_seq_items") or len(seq)
+
+    s = iter(seq)
+    r = []
+    for i in range(min(nitems, len(seq))):  # handle sets, no slicing
+        r.append(pprint_thing(
+            next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
+    body = ", ".join(r)
+
+    if nitems < len(seq):
+        body += ", ..."
+    elif isinstance(seq, tuple) and len(seq) == 1:
+        body += ','
+
+    return fmt.format(body=body)
+
+
+def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
+    """
+    internal. pprinter for iterables. you should probably use pprint_thing()
+    rather then calling this directly.
+    """
+    fmt = u("{{{things}}}")
+    pairs = []
+
+    pfmt = u("{key}: {val}")
+
+    if max_seq_items is False:
+        nitems = len(seq)
+    else:
+        nitems = max_seq_items or get_option("max_seq_items") or len(seq)
+
+    for k, v in list(seq.items())[:nitems]:
+        pairs.append(
+            pfmt.format(
+                key=pprint_thing(k, _nest_lvl + 1,
+                                 max_seq_items=max_seq_items, **kwds),
+                val=pprint_thing(v, _nest_lvl + 1,
+                                 max_seq_items=max_seq_items, **kwds)))
+
+    if nitems < len(seq):
+        return fmt.format(things=", ".join(pairs) + ", ...")
+    else:
+        return fmt.format(things=", ".join(pairs))
+
+
+def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
+                 quote_strings=False, max_seq_items=None):
+    """
+    This function is the sanctioned way of converting objects
+    to a unicode representation.
+
+    properly handles nested sequences containing unicode strings
+    (unicode(object) does not)
+
+    Parameters
+    ----------
+    thing : anything to be formatted
+    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
+        with pprint_sequence, this argument is used to keep track of the
+        current nesting level, and limit it.
+    escape_chars : list or dict, optional
+        Characters to escape. If a dict is passed the values are the
+        replacements
+    default_escapes : bool, default False
+        Whether the input escape characters replaces or adds to the defaults
+    max_seq_items : False, int, default None
+        Pass thru to other pretty printers to limit sequence printing
+
+    Returns
+    -------
+    result - unicode object on py2, str on py3. Always Unicode.
+
+    """
+
+    def as_escaped_unicode(thing, escape_chars=escape_chars):
+        # Unicode is fine, else we try to decode using utf-8 and 'replace'
+        # if that's not it either, we have no way of knowing and the user
+        # should deal with it himself.
+
+        try:
+            result = compat.text_type(thing)  # we should try this first
+        except UnicodeDecodeError:
+            # either utf-8 or we replace errors
+            result = str(thing).decode('utf-8', "replace")
+
+        translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', }
+        if isinstance(escape_chars, dict):
+            if default_escapes:
+                translate.update(escape_chars)
+            else:
+                translate = escape_chars
+            escape_chars = list(escape_chars.keys())
+        else:
+            escape_chars = escape_chars or tuple()
+        for c in escape_chars:
+            result = result.replace(c, translate[c])
+
+        return compat.text_type(result)
+
+    if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'):
+        return compat.text_type(thing)
+    elif (isinstance(thing, dict) and
+          _nest_lvl < get_option("display.pprint_nest_depth")):
+        result = _pprint_dict(thing, _nest_lvl, quote_strings=True,
+                              max_seq_items=max_seq_items)
+    elif (is_sequence(thing) and
+          _nest_lvl < get_option("display.pprint_nest_depth")):
+        result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
+                             quote_strings=quote_strings,
+                             max_seq_items=max_seq_items)
+    elif isinstance(thing, compat.string_types) and quote_strings:
+        if compat.PY3:
+            fmt = u("'{thing}'")
+        else:
+            fmt = u("u'{thing}'")
+        result = fmt.format(thing=as_escaped_unicode(thing))
+    else:
+        result = as_escaped_unicode(thing)
+
+    return compat.text_type(result)  # always unicode
+
+
+def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds):
+    value = pprint_thing(object)  # get unicode representation of object
+    return value.encode(encoding, errors, **kwds)
+
+
+def _enable_data_resource_formatter(enable):
+    if 'IPython' not in sys.modules:
+        # definitely not in IPython
+        return
+    from IPython import get_ipython
+    ip = get_ipython()
+    if ip is None:
+        # still not in IPython
+        return
+
+    formatters = ip.display_formatter.formatters
+    mimetype = "application/vnd.dataresource+json"
+
+    if enable:
+        if mimetype not in formatters:
+            # define tableschema formatter
+            from IPython.core.formatters import BaseFormatter
+
+            class TableSchemaFormatter(BaseFormatter):
+                print_method = '_repr_data_resource_'
+                _return_type = (dict,)
+            # register it:
+            formatters[mimetype] = TableSchemaFormatter()
+        # enable it if it's been disabled:
+        formatters[mimetype].enabled = True
+    else:
+        # unregister tableschema mime-type
+        if mimetype in formatters:
+            formatters[mimetype].enabled = False
@@ -0,0 +1,70 @@
+{# Update the template_structure.html document too #}
+{%- block before_style -%}{%- endblock before_style -%}
+{% block style %}
+<style  type="text/css" >
+{% block table_styles %}
+{% for s in table_styles %}
+    #T_{{uuid}} {{s.selector}} {
+    {% for p,val in s.props %}
+      {{p}}: {{val}};
+    {% endfor -%}
+    }
+{%- endfor -%}
+{% endblock table_styles %}
+{% block before_cellstyle %}{% endblock before_cellstyle %}
+{% block cellstyle %}
+{%- for s in cellstyle %}
+    #T_{{uuid}}{{s.selector}} {
+    {% for p,val in s.props %}
+        {{p}}: {{val}};
+    {% endfor %}
+    }
+{%- endfor -%}
+{%- endblock cellstyle %}
+</style>
+{%- endblock style %}
+{%- block before_table %}{% endblock before_table %} 
+{%- block table %}  
+<table id="T_{{uuid}}" {% if table_attributes %}{{ table_attributes }}{% endif %}> 
+{%- block caption %} 
+{%- if caption -%} 
+    <caption>{{caption}}</caption> 
+{%- endif -%} 
+{%- endblock caption %} 
+{%- block thead %} 
+<thead> 
+    {%- block before_head_rows %}{% endblock %}
+    {%- for r in head %} 
+    {%- block head_tr scoped %}
+    <tr> 
+        {%- for c in r %} 
+        {%- if c.is_visible != False %} 
+        <{{ c.type }} class="{{c.class}}" {{ c.attributes|join(" ") }}>{{c.value}}</{{ c.type }}>
+        {%- endif %} 
+        {%- endfor %} 
+    </tr> 
+    {%- endblock head_tr %}
+    {%- endfor %} 
+    {%- block after_head_rows %}{% endblock %}
+</thead> 
+{%- endblock thead %} 
+{%- block tbody %} 
+<tbody> 
+    {%- block before_rows %}{%- endblock before_rows %}
+    {%- for r in body %} 
+    {%- block tr scoped %}
+    <tr> 
+        {%- for c in r %} 
+        {%- if c.is_visible != False %} 
+        <{{ c.type }} id="T_{{ uuid }}{{ c.id }}" class="{{ c.class }}" {{ c.attributes|join(" ") }}>{{ c.display_value }}</{{ c.type }}>
+        {%- endif %} 
+        {%- endfor %} 
+    </tr> 
+    {%- endblock tr %}
+    {%- endfor %} 
+    {%- block after_rows %}{%- endblock after_rows %}
+</tbody> 
+{%- endblock tbody %} 
+</table> 
+{%- endblock table %} 
+{%- block after_table %}{% endblock after_table %} 
@@ -0,0 +1,145 @@
+"""
+get_terminal_size() -- return width and height of terminal as a tuple
+
+code from:
+http://stackoverflow.com/questions/566746/how-to-get-console- window-width-in-
+python
+
+written by
+Harco Kuppens (http://stackoverflow.com/users/825214/harco-kuppens)
+
+It is mentioned in the stackoverflow response that this code works
+on linux, os x, windows and cygwin (windows).
+"""
+from __future__ import print_function
+
+import os
+import shutil
+from pandas.compat import PY3
+
+
+__all__ = ['get_terminal_size', 'is_terminal']
+
+
+def get_terminal_size():
+    """
+    Detect terminal size and return tuple = (width, height).
+
+    Only to be used when running in a terminal. Note that the IPython notebook,
+    IPython zmq frontends, or IDLE do not run in a terminal,
+    """
+    import platform
+
+    if PY3:
+        return shutil.get_terminal_size()
+
+    current_os = platform.system()
+    tuple_xy = None
+    if current_os == 'Windows':
+        tuple_xy = _get_terminal_size_windows()
+        if tuple_xy is None:
+            tuple_xy = _get_terminal_size_tput()
+            # needed for window's python in cygwin's xterm!
+    if current_os == 'Linux' or \
+        current_os == 'Darwin' or \
+            current_os.startswith('CYGWIN'):
+        tuple_xy = _get_terminal_size_linux()
+    if tuple_xy is None:
+        tuple_xy = (80, 25)      # default value
+    return tuple_xy
+
+
+def is_terminal():
+    """
+    Detect if Python is running in a terminal.
+
+    Returns True if Python is running in a terminal or False if not.
+    """
+    try:
+        ip = get_ipython()
+    except NameError:  # assume standard Python interpreter in a terminal
+        return True
+    else:
+        if hasattr(ip, 'kernel'):  # IPython as a Jupyter kernel
+            return False
+        else:  # IPython in a terminal
+            return True
+
+
+def _get_terminal_size_windows():
+    res = None
+    try:
+        from ctypes import windll, create_string_buffer
+
+        # stdin handle is -10
+        # stdout handle is -11
+        # stderr handle is -12
+
+        h = windll.kernel32.GetStdHandle(-12)
+        csbi = create_string_buffer(22)
+        res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi)
+    except:
+        return None
+    if res:
+        import struct
+        (bufx, bufy, curx, cury, wattr, left, top, right, bottom, maxx,
+         maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw)
+        sizex = right - left + 1
+        sizey = bottom - top + 1
+        return sizex, sizey
+    else:
+        return None
+
+
+def _get_terminal_size_tput():
+    # get terminal width
+    # src: http://stackoverflow.com/questions/263890/how-do-i-find-the-width
+    # -height-of-a-terminal-window
+    try:
+        import subprocess
+        proc = subprocess.Popen(["tput", "cols"],
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+        output = proc.communicate(input=None)
+        cols = int(output[0])
+        proc = subprocess.Popen(["tput", "lines"],
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+        output = proc.communicate(input=None)
+        rows = int(output[0])
+        return (cols, rows)
+    except:
+        return None
+
+
+def _get_terminal_size_linux():
+    def ioctl_GWINSZ(fd):
+        try:
+            import fcntl
+            import termios
+            import struct
+            cr = struct.unpack(
+                'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
+        except:
+            return None
+        return cr
+    cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
+    if not cr:
+        try:
+            fd = os.open(os.ctermid(), os.O_RDONLY)
+            cr = ioctl_GWINSZ(fd)
+            os.close(fd)
+        except:
+            pass
+    if not cr or cr == (0, 0):
+        try:
+            from os import environ as env
+            cr = (env['LINES'], env['COLUMNS'])
+        except:
+            return None
+    return int(cr[1]), int(cr[0])
+
+
+if __name__ == "__main__":
+    sizex, sizey = get_terminal_size()
+    print('width = {w} height = {h}'.format(w=sizex, h=sizey))
@@ -0,0 +1,119 @@
+""" Google BigQuery support """
+
+
+def _try_import():
+    # since pandas is a dependency of pandas-gbq
+    # we need to import on first use
+    try:
+        import pandas_gbq
+    except ImportError:
+
+        # give a nice error message
+        raise ImportError("Load data from Google BigQuery\n"
+                          "\n"
+                          "the pandas-gbq package is not installed\n"
+                          "see the docs: https://pandas-gbq.readthedocs.io\n"
+                          "\n"
+                          "you can install via pip or conda:\n"
+                          "pip install pandas-gbq\n"
+                          "conda install pandas-gbq -c conda-forge\n")
+
+    return pandas_gbq
+
+
+def read_gbq(query, project_id=None, index_col=None, col_order=None,
+             reauth=False, verbose=None, private_key=None, dialect='legacy',
+             **kwargs):
+    """
+    Load data from Google BigQuery.
+
+    This function requires the `pandas-gbq package
+    <https://pandas-gbq.readthedocs.io>`__.
+
+    Authentication to the Google BigQuery service is via OAuth 2.0.
+
+    - If "private_key" is not provided:
+
+      By default "application default credentials" are used.
+
+      If default application credentials are not found or are restrictive,
+      user account credentials are used. In this case, you will be asked to
+      grant permissions for product name 'pandas GBQ'.
+
+    - If "private_key" is provided:
+
+      Service account credentials will be used to authenticate.
+
+    Parameters
+    ----------
+    query : str
+        SQL-Like Query to return data values.
+    project_id : str
+        Google BigQuery Account project ID.
+    index_col : str, optional
+        Name of result column to use for index in results DataFrame.
+    col_order : list(str), optional
+        List of BigQuery column names in the desired order for results
+        DataFrame.
+    reauth : boolean, default False
+        Force Google BigQuery to reauthenticate the user. This is useful
+        if multiple accounts are used.
+    private_key : str, optional
+        Service account private key in JSON format. Can be file path
+        or string contents. This is useful for remote server
+        authentication (eg. Jupyter/IPython notebook on remote host).
+    dialect : str, default 'legacy'
+        SQL syntax dialect to use. Value can be one of:
+
+        ``'legacy'``
+            Use BigQuery's legacy SQL dialect. For more information see
+            `BigQuery Legacy SQL Reference
+            <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
+        ``'standard'``
+            Use BigQuery's standard SQL, which is
+            compliant with the SQL 2011 standard. For more information
+            see `BigQuery Standard SQL Reference
+            <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
+    verbose : boolean, deprecated
+        *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module
+        to adjust verbosity instead
+        <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
+    kwargs : dict
+        Arbitrary keyword arguments.
+        configuration (dict): query config parameters for job processing.
+        For example:
+
+            configuration = {'query': {'useQueryCache': False}}
+
+        For more information see `BigQuery SQL Reference
+        <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
+
+    Returns
+    -------
+    df: DataFrame
+        DataFrame representing results of query.
+
+    See Also
+    --------
+    pandas_gbq.read_gbq : This function in the pandas-gbq library.
+    pandas.DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
+    """
+    pandas_gbq = _try_import()
+    return pandas_gbq.read_gbq(
+        query, project_id=project_id,
+        index_col=index_col, col_order=col_order,
+        reauth=reauth, verbose=verbose,
+        private_key=private_key,
+        dialect=dialect,
+        **kwargs)
+
+
+def to_gbq(dataframe, destination_table, project_id, chunksize=None,
+           verbose=None, reauth=False, if_exists='fail', private_key=None,
+           auth_local_webserver=False, table_schema=None):
+    pandas_gbq = _try_import()
+    return pandas_gbq.to_gbq(
+        dataframe, destination_table, project_id, chunksize=chunksize,
+        verbose=verbose, reauth=reauth, if_exists=if_exists,
+        private_key=private_key, auth_local_webserver=auth_local_webserver,
+        table_schema=table_schema)
@@ -0,0 +1,987 @@
+""":mod:`pandas.io.html` is a module containing functionality for dealing with
+HTML IO.
+
+"""
+
+import os
+import re
+import numbers
+import collections
+
+from distutils.version import LooseVersion
+
+import numpy as np
+
+from pandas.core.dtypes.common import is_list_like
+from pandas.errors import EmptyDataError
+from pandas.io.common import _is_url, urlopen, _validate_header_arg
+from pandas.io.parsers import TextParser
+from pandas.compat import (lrange, lmap, u, string_types, iteritems,
+                           raise_with_traceback, binary_type)
+from pandas import Series
+import pandas.core.common as com
+from pandas.io.formats.printing import pprint_thing
+
+_IMPORTS = False
+_HAS_BS4 = False
+_HAS_LXML = False
+_HAS_HTML5LIB = False
+
+
+def _importers():
+    # import things we need
+    # but make this done on a first use basis
+
+    global _IMPORTS
+    if _IMPORTS:
+        return
+
+    global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB
+
+    try:
+        import bs4  # noqa
+        _HAS_BS4 = True
+    except ImportError:
+        pass
+
+    try:
+        import lxml  # noqa
+        _HAS_LXML = True
+    except ImportError:
+        pass
+
+    try:
+        import html5lib  # noqa
+        _HAS_HTML5LIB = True
+    except ImportError:
+        pass
+
+    _IMPORTS = True
+
+
+#############
+# READ HTML #
+#############
+_RE_WHITESPACE = re.compile(r'[\r\n]+|\s{2,}')
+
+
+char_types = string_types + (binary_type,)
+
+
+def _remove_whitespace(s, regex=_RE_WHITESPACE):
+    """Replace extra whitespace inside of a string with a single space.
+
+    Parameters
+    ----------
+    s : str or unicode
+        The string from which to remove extra whitespace.
+
+    regex : regex
+        The regular expression to use to remove extra whitespace.
+
+    Returns
+    -------
+    subd : str or unicode
+        `s` with all extra whitespace replaced with a single space.
+    """
+    return regex.sub(' ', s.strip())
+
+
+def _get_skiprows(skiprows):
+    """Get an iterator given an integer, slice or container.
+
+    Parameters
+    ----------
+    skiprows : int, slice, container
+        The iterator to use to skip rows; can also be a slice.
+
+    Raises
+    ------
+    TypeError
+        * If `skiprows` is not a slice, integer, or Container
+
+    Returns
+    -------
+    it : iterable
+        A proper iterator to use to skip rows of a DataFrame.
+    """
+    if isinstance(skiprows, slice):
+        return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1)
+    elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows):
+        return skiprows
+    elif skiprows is None:
+        return 0
+    raise TypeError('%r is not a valid type for skipping rows' %
+                    type(skiprows).__name__)
+
+
+def _read(obj):
+    """Try to read from a url, file or string.
+
+    Parameters
+    ----------
+    obj : str, unicode, or file-like
+
+    Returns
+    -------
+    raw_text : str
+    """
+    if _is_url(obj):
+        with urlopen(obj) as url:
+            text = url.read()
+    elif hasattr(obj, 'read'):
+        text = obj.read()
+    elif isinstance(obj, char_types):
+        text = obj
+        try:
+            if os.path.isfile(text):
+                with open(text, 'rb') as f:
+                    return f.read()
+        except (TypeError, ValueError):
+            pass
+    else:
+        raise TypeError("Cannot read object of type %r" % type(obj).__name__)
+    return text
+
+
+class _HtmlFrameParser(object):
+    """Base class for parsers that parse HTML into DataFrames.
+
+    Parameters
+    ----------
+    io : str or file-like
+        This can be either a string of raw HTML, a valid URL using the HTTP,
+        FTP, or FILE protocols or a file-like object.
+
+    match : str or regex
+        The text to match in the document.
+
+    attrs : dict
+        List of HTML <table> element attributes to match.
+
+    encoding : str
+        Encoding to be used by parser
+
+    displayed_only : bool
+        Whether or not items with "display:none" should be ignored
+
+        .. versionadded:: 0.23.0
+
+    Attributes
+    ----------
+    io : str or file-like
+        raw HTML, URL, or file-like object
+
+    match : regex
+        The text to match in the raw HTML
+
+    attrs : dict-like
+        A dictionary of valid table attributes to use to search for table
+        elements.
+
+    encoding : str
+        Encoding to be used by parser
+
+    displayed_only : bool
+        Whether or not items with "display:none" should be ignored
+
+        .. versionadded:: 0.23.0
+
+    Notes
+    -----
+    To subclass this class effectively you must override the following methods:
+        * :func:`_build_doc`
+        * :func:`_text_getter`
+        * :func:`_parse_td`
+        * :func:`_parse_tables`
+        * :func:`_parse_tr`
+        * :func:`_parse_thead`
+        * :func:`_parse_tbody`
+        * :func:`_parse_tfoot`
+    See each method's respective documentation for details on their
+    functionality.
+    """
+
+    def __init__(self, io, match, attrs, encoding, displayed_only):
+        self.io = io
+        self.match = match
+        self.attrs = attrs
+        self.encoding = encoding
+        self.displayed_only = displayed_only
+
+    def parse_tables(self):
+        tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
+        return (self._build_table(table) for table in tables)
+
+    def _parse_raw_data(self, rows):
+        """Parse the raw data into a list of lists.
+
+        Parameters
+        ----------
+        rows : iterable of node-like
+            A list of row elements.
+
+        text_getter : callable
+            A callable that gets the text from an individual node. This must be
+            defined by subclasses.
+
+        column_finder : callable
+            A callable that takes a row node as input and returns a list of the
+            column node in that row. This must be defined by subclasses.
+
+        Returns
+        -------
+        data : list of list of strings
+        """
+        data = [[_remove_whitespace(self._text_getter(col)) for col in
+                 self._parse_td(row)] for row in rows]
+        return data
+
+    def _text_getter(self, obj):
+        """Return the text of an individual DOM node.
+
+        Parameters
+        ----------
+        obj : node-like
+            A DOM node.
+
+        Returns
+        -------
+        text : str or unicode
+            The text from an individual DOM node.
+        """
+        raise com.AbstractMethodError(self)
+
+    def _parse_td(self, obj):
+        """Return the td elements from a row element.
+
+        Parameters
+        ----------
+        obj : node-like
+
+        Returns
+        -------
+        columns : list of node-like
+            These are the elements of each row, i.e., the columns.
+        """
+        raise com.AbstractMethodError(self)
+
+    def _parse_tables(self, doc, match, attrs):
+        """Return all tables from the parsed DOM.
+
+        Parameters
+        ----------
+        doc : tree-like
+            The DOM from which to parse the table element.
+
+        match : str or regular expression
+            The text to search for in the DOM tree.
+
+        attrs : dict
+            A dictionary of table attributes that can be used to disambiguate
+            multiple tables on a page.
+
+        Raises
+        ------
+        ValueError
+            * If `match` does not match any text in the document.
+
+        Returns
+        -------
+        tables : list of node-like
+            A list of <table> elements to be parsed into raw data.
+        """
+        raise com.AbstractMethodError(self)
+
+    def _parse_tr(self, table):
+        """Return the list of row elements from the parsed table element.
+
+        Parameters
+        ----------
+        table : node-like
+            A table element that contains row elements.
+
+        Returns
+        -------
+        rows : list of node-like
+            A list row elements of a table, usually <tr> or <th> elements.
+        """
+        raise com.AbstractMethodError(self)
+
+    def _parse_thead(self, table):
+        """Return the header of a table.
+
+        Parameters
+        ----------
+        table : node-like
+            A table element that contains row elements.
+
+        Returns
+        -------
+        thead : node-like
+            A <thead>...</thead> element.
+        """
+        raise com.AbstractMethodError(self)
+
+    def _parse_tbody(self, table):
+        """Return the list of tbody elements from the parsed table element.
+
+        Parameters
+        ----------
+        table : node-like
+            A table element that contains row elements.
+
+        Returns
+        -------
+        tbodys : list of node-like
+            A list of <tbody>...</tbody> elements
+        """
+        raise com.AbstractMethodError(self)
+
+    def _parse_tfoot(self, table):
+        """Return the footer of the table if any.
+
+        Parameters
+        ----------
+        table : node-like
+            A table element that contains row elements.
+
+        Returns
+        -------
+        tfoot : node-like
+            A <tfoot>...</tfoot> element.
+        """
+        raise com.AbstractMethodError(self)
+
+    def _build_doc(self):
+        """Return a tree-like object that can be used to iterate over the DOM.
+
+        Returns
+        -------
+        obj : tree-like
+        """
+        raise com.AbstractMethodError(self)
+
+    def _build_table(self, table):
+        header = self._parse_raw_thead(table)
+        body = self._parse_raw_tbody(table)
+        footer = self._parse_raw_tfoot(table)
+        return header, body, footer
+
+    def _parse_raw_thead(self, table):
+        thead = self._parse_thead(table)
+        res = []
+        if thead:
+            trs = self._parse_tr(thead[0])
+            for tr in trs:
+                cols = lmap(self._text_getter, self._parse_td(tr))
+                if any(col != '' for col in cols):
+                    res.append(cols)
+        return res
+
+    def _parse_raw_tfoot(self, table):
+        tfoot = self._parse_tfoot(table)
+        res = []
+        if tfoot:
+            res = lmap(self._text_getter, self._parse_td(tfoot[0]))
+        return np.atleast_1d(
+            np.array(res).squeeze()) if res and len(res) == 1 else res
+
+    def _parse_raw_tbody(self, table):
+        tbodies = self._parse_tbody(table)
+
+        raw_data = []
+
+        if tbodies:
+            for tbody in tbodies:
+                raw_data.extend(self._parse_tr(tbody))
+        else:
+            raw_data.extend(self._parse_tr(table))
+
+        return self._parse_raw_data(raw_data)
+
+    def _handle_hidden_tables(self, tbl_list, attr_name):
+        """Returns list of tables, potentially removing hidden elements
+
+        Parameters
+        ----------
+        tbl_list : list of Tag or list of Element
+            Type of list elements will vary depending upon parser used
+        attr_name : str
+            Name of the accessor for retrieving HTML attributes
+
+        Returns
+        -------
+        list of Tag or list of Element
+            Return type matches `tbl_list`
+        """
+        if not self.displayed_only:
+            return tbl_list
+
+        return [x for x in tbl_list if "display:none" not in
+                getattr(x, attr_name).get('style', '').replace(" ", "")]
+
+
+class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
+    """HTML to DataFrame parser that uses BeautifulSoup under the hood.
+
+    See Also
+    --------
+    pandas.io.html._HtmlFrameParser
+    pandas.io.html._LxmlFrameParser
+
+    Notes
+    -----
+    Documentation strings for this class are in the base class
+    :class:`pandas.io.html._HtmlFrameParser`.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(_BeautifulSoupHtml5LibFrameParser, self).__init__(*args,
+                                                                **kwargs)
+        from bs4 import SoupStrainer
+        self._strainer = SoupStrainer('table')
+
+    def _text_getter(self, obj):
+        return obj.text
+
+    def _parse_td(self, row):
+        return row.find_all(('td', 'th'))
+
+    def _parse_tr(self, element):
+        return element.find_all('tr')
+
+    def _parse_th(self, element):
+        return element.find_all('th')
+
+    def _parse_thead(self, table):
+        return table.find_all('thead')
+
+    def _parse_tbody(self, table):
+        return table.find_all('tbody')
+
+    def _parse_tfoot(self, table):
+        return table.find_all('tfoot')
+
+    def _parse_tables(self, doc, match, attrs):
+        element_name = self._strainer.name
+        tables = doc.find_all(element_name, attrs=attrs)
+
+        if not tables:
+            raise ValueError('No tables found')
+
+        result = []
+        unique_tables = set()
+        tables = self._handle_hidden_tables(tables, "attrs")
+
+        for table in tables:
+            if self.displayed_only:
+                for elem in table.find_all(
+                        style=re.compile(r"display:\s*none")):
+                    elem.decompose()
+
+            if (table not in unique_tables and
+                    table.find(text=match) is not None):
+                result.append(table)
+            unique_tables.add(table)
+
+        if not result:
+            raise ValueError("No tables found matching pattern {patt!r}"
+                             .format(patt=match.pattern))
+        return result
+
+    def _setup_build_doc(self):
+        raw_text = _read(self.io)
+        if not raw_text:
+            raise ValueError('No text parsed from document: {doc}'
+                             .format(doc=self.io))
+        return raw_text
+
+    def _build_doc(self):
+        from bs4 import BeautifulSoup
+        return BeautifulSoup(self._setup_build_doc(), features='html5lib',
+                             from_encoding=self.encoding)
+
+
+def _build_xpath_expr(attrs):
+    """Build an xpath expression to simulate bs4's ability to pass in kwargs to
+    search for attributes when using the lxml parser.
+
+    Parameters
+    ----------
+    attrs : dict
+        A dict of HTML attributes. These are NOT checked for validity.
+
+    Returns
+    -------
+    expr : unicode
+        An XPath expression that checks for the given HTML attributes.
+    """
+    # give class attribute as class_ because class is a python keyword
+    if 'class_' in attrs:
+        attrs['class'] = attrs.pop('class_')
+
+    s = [u("@{key}={val!r}").format(key=k, val=v) for k, v in iteritems(attrs)]
+    return u('[{expr}]').format(expr=' and '.join(s))
+
+
+_re_namespace = {'re': 'http://exslt.org/regular-expressions'}
+_valid_schemes = 'http', 'file', 'ftp'
+
+
+class _LxmlFrameParser(_HtmlFrameParser):
+    """HTML to DataFrame parser that uses lxml under the hood.
+
+    Warning
+    -------
+    This parser can only handle HTTP, FTP, and FILE urls.
+
+    See Also
+    --------
+    _HtmlFrameParser
+    _BeautifulSoupLxmlFrameParser
+
+    Notes
+    -----
+    Documentation strings for this class are in the base class
+    :class:`_HtmlFrameParser`.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(_LxmlFrameParser, self).__init__(*args, **kwargs)
+
+    def _text_getter(self, obj):
+        return obj.text_content()
+
+    def _parse_td(self, row):
+        return row.xpath('.//td|.//th')
+
+    def _parse_tr(self, table):
+        return table.xpath('.//tr')
+
+    def _parse_tables(self, doc, match, kwargs):
+        pattern = match.pattern
+
+        # 1. check all descendants for the given pattern and only search tables
+        # 2. go up the tree until we find a table
+        query = '//table//*[re:test(text(), {patt!r})]/ancestor::table'
+        xpath_expr = u(query).format(patt=pattern)
+
+        # if any table attributes were given build an xpath expression to
+        # search for them
+        if kwargs:
+            xpath_expr += _build_xpath_expr(kwargs)
+
+        tables = doc.xpath(xpath_expr, namespaces=_re_namespace)
+
+        tables = self._handle_hidden_tables(tables, "attrib")
+        if self.displayed_only:
+            for table in tables:
+                # lxml utilizes XPATH 1.0 which does not have regex
+                # support. As a result, we find all elements with a style
+                # attribute and iterate them to check for display:none
+                for elem in table.xpath('.//*[@style]'):
+                    if "display:none" in elem.attrib.get(
+                            "style", "").replace(" ", ""):
+                        elem.getparent().remove(elem)
+
+        if not tables:
+            raise ValueError("No tables found matching regex {patt!r}"
+                             .format(patt=pattern))
+        return tables
+
+    def _build_doc(self):
+        """
+        Raises
+        ------
+        ValueError
+            * If a URL that lxml cannot parse is passed.
+
+        Exception
+            * Any other ``Exception`` thrown. For example, trying to parse a
+              URL that is syntactically correct on a machine with no internet
+              connection will fail.
+
+        See Also
+        --------
+        pandas.io.html._HtmlFrameParser._build_doc
+        """
+        from lxml.html import parse, fromstring, HTMLParser
+        from lxml.etree import XMLSyntaxError
+        parser = HTMLParser(recover=True, encoding=self.encoding)
+
+        try:
+            if _is_url(self.io):
+                with urlopen(self.io) as f:
+                    r = parse(f, parser=parser)
+            else:
+                # try to parse the input in the simplest way
+                r = parse(self.io, parser=parser)
+            try:
+                r = r.getroot()
+            except AttributeError:
+                pass
+        except (UnicodeDecodeError, IOError) as e:
+            # if the input is a blob of html goop
+            if not _is_url(self.io):
+                r = fromstring(self.io, parser=parser)
+
+                try:
+                    r = r.getroot()
+                except AttributeError:
+                    pass
+            else:
+                raise e
+        else:
+            if not hasattr(r, 'text_content'):
+                raise XMLSyntaxError("no text parsed from document", 0, 0, 0)
+        return r
+
+    def _parse_tbody(self, table):
+        return table.xpath('.//tbody')
+
+    def _parse_thead(self, table):
+        return table.xpath('.//thead')
+
+    def _parse_tfoot(self, table):
+        return table.xpath('.//tfoot')
+
+    def _parse_raw_thead(self, table):
+        expr = './/thead'
+        thead = table.xpath(expr)
+        res = []
+        if thead:
+            # Grab any directly descending table headers first
+            ths = thead[0].xpath('./th')
+            if ths:
+                cols = [_remove_whitespace(x.text_content()) for x in ths]
+                if any(col != '' for col in cols):
+                    res.append(cols)
+            else:
+                trs = self._parse_tr(thead[0])
+
+                for tr in trs:
+                    cols = [_remove_whitespace(x.text_content()) for x in
+                            self._parse_td(tr)]
+
+                    if any(col != '' for col in cols):
+                        res.append(cols)
+        return res
+
+    def _parse_raw_tfoot(self, table):
+        expr = './/tfoot//th|//tfoot//td'
+        return [_remove_whitespace(x.text_content()) for x in
+                table.xpath(expr)]
+
+
+def _expand_elements(body):
+    lens = Series(lmap(len, body))
+    lens_max = lens.max()
+    not_max = lens[lens != lens_max]
+
+    empty = ['']
+    for ind, length in iteritems(not_max):
+        body[ind] += empty * (lens_max - length)
+
+
+def _data_to_frame(**kwargs):
+    head, body, foot = kwargs.pop('data')
+    header = kwargs.pop('header')
+    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
+    if head:
+        rows = lrange(len(head))
+        body = head + body
+        if header is None:  # special case when a table has <th> elements
+            header = 0 if rows == [0] else rows
+
+    if foot:
+        body += [foot]
+
+    # fill out elements of body that are "ragged"
+    _expand_elements(body)
+    tp = TextParser(body, header=header, **kwargs)
+    df = tp.read()
+    return df
+
+
+_valid_parsers = {'lxml': _LxmlFrameParser, None: _LxmlFrameParser,
+                  'html5lib': _BeautifulSoupHtml5LibFrameParser,
+                  'bs4': _BeautifulSoupHtml5LibFrameParser}
+
+
+def _parser_dispatch(flavor):
+    """Choose the parser based on the input flavor.
+
+    Parameters
+    ----------
+    flavor : str
+        The type of parser to use. This must be a valid backend.
+
+    Returns
+    -------
+    cls : _HtmlFrameParser subclass
+        The parser class based on the requested input flavor.
+
+    Raises
+    ------
+    ValueError
+        * If `flavor` is not a valid backend.
+    ImportError
+        * If you do not have the requested `flavor`
+    """
+    valid_parsers = list(_valid_parsers.keys())
+    if flavor not in valid_parsers:
+        raise ValueError('{invalid!r} is not a valid flavor, valid flavors '
+                         'are {valid}'
+                         .format(invalid=flavor, valid=valid_parsers))
+
+    if flavor in ('bs4', 'html5lib'):
+        if not _HAS_HTML5LIB:
+            raise ImportError("html5lib not found, please install it")
+        if not _HAS_BS4:
+            raise ImportError(
+                "BeautifulSoup4 (bs4) not found, please install it")
+        import bs4
+        if LooseVersion(bs4.__version__) <= LooseVersion('4.2.0'):
+            raise ValueError("A minimum version of BeautifulSoup 4.2.1 "
+                             "is required")
+
+    else:
+        if not _HAS_LXML:
+            raise ImportError("lxml not found, please install it")
+    return _valid_parsers[flavor]
+
+
+def _print_as_set(s):
+    return '{{arg}}'.format(arg=', '.join(pprint_thing(el) for el in s))
+
+
+def _validate_flavor(flavor):
+    if flavor is None:
+        flavor = 'lxml', 'bs4'
+    elif isinstance(flavor, string_types):
+        flavor = flavor,
+    elif isinstance(flavor, collections.Iterable):
+        if not all(isinstance(flav, string_types) for flav in flavor):
+            raise TypeError('Object of type {typ!r} is not an iterable of '
+                            'strings'
+                            .format(typ=type(flavor).__name__))
+    else:
+        fmt = '{flavor!r}' if isinstance(flavor, string_types) else '{flavor}'
+        fmt += ' is not a valid flavor'
+        raise ValueError(fmt.format(flavor=flavor))
+
+    flavor = tuple(flavor)
+    valid_flavors = set(_valid_parsers)
+    flavor_set = set(flavor)
+
+    if not flavor_set & valid_flavors:
+        raise ValueError('{invalid} is not a valid set of flavors, valid '
+                         'flavors are {valid}'
+                         .format(invalid=_print_as_set(flavor_set),
+                                 valid=_print_as_set(valid_flavors)))
+    return flavor
+
+
+def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
+    flavor = _validate_flavor(flavor)
+    compiled_match = re.compile(match)  # you can pass a compiled regex here
+
+    # hack around python 3 deleting the exception variable
+    retained = None
+    for flav in flavor:
+        parser = _parser_dispatch(flav)
+        p = parser(io, compiled_match, attrs, encoding, displayed_only)
+
+        try:
+            tables = p.parse_tables()
+        except Exception as caught:
+            # if `io` is an io-like object, check if it's seekable
+            # and try to rewind it before trying the next parser
+            if hasattr(io, 'seekable') and io.seekable():
+                io.seek(0)
+            elif hasattr(io, 'seekable') and not io.seekable():
+                # if we couldn't rewind it, let the user know
+                raise ValueError('The flavor {} failed to parse your input. '
+                                 'Since you passed a non-rewindable file '
+                                 'object, we can\'t rewind it to try '
+                                 'another parser. Try read_html() with a '
+                                 'different flavor.'.format(flav))
+
+            retained = caught
+        else:
+            break
+    else:
+        raise_with_traceback(retained)
+
+    ret = []
+    for table in tables:
+        try:
+            ret.append(_data_to_frame(data=table, **kwargs))
+        except EmptyDataError:  # empty table
+            continue
+    return ret
+
+
+def read_html(io, match='.+', flavor=None, header=None, index_col=None,
+              skiprows=None, attrs=None, parse_dates=False,
+              tupleize_cols=None, thousands=',', encoding=None,
+              decimal='.', converters=None, na_values=None,
+              keep_default_na=True, displayed_only=True):
+    r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
+
+    Parameters
+    ----------
+    io : str or file-like
+        A URL, a file-like object, or a raw string containing HTML. Note that
+        lxml only accepts the http, ftp and file url protocols. If you have a
+        URL that starts with ``'https'`` you might try removing the ``'s'``.
+
+    match : str or compiled regular expression, optional
+        The set of tables containing text matching this regex or string will be
+        returned. Unless the HTML is extremely simple you will probably need to
+        pass a non-empty string here. Defaults to '.+' (match any non-empty
+        string). The default value will return all tables contained on a page.
+        This value is converted to a regular expression so that there is
+        consistent behavior between Beautiful Soup and lxml.
+
+    flavor : str or None, container of strings
+        The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
+        each other, they are both there for backwards compatibility. The
+        default of ``None`` tries to use ``lxml`` to parse and if that fails it
+        falls back on ``bs4`` + ``html5lib``.
+
+    header : int or list-like or None, optional
+        The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
+        make the columns headers.
+
+    index_col : int or list-like or None, optional
+        The column (or list of columns) to use to create the index.
+
+    skiprows : int or list-like or slice or None, optional
+        0-based. Number of rows to skip after parsing the column integer. If a
+        sequence of integers or a slice is given, will skip the rows indexed by
+        that sequence.  Note that a single element sequence means 'skip the nth
+        row' whereas an integer means 'skip n rows'.
+
+    attrs : dict or None, optional
+        This is a dictionary of attributes that you can pass to use to identify
+        the table in the HTML. These are not checked for validity before being
+        passed to lxml or Beautiful Soup. However, these attributes must be
+        valid HTML table attributes to work correctly. For example, ::
+
+            attrs = {'id': 'table'}
+
+        is a valid attribute dictionary because the 'id' HTML tag attribute is
+        a valid HTML attribute for *any* HTML tag as per `this document
+        <http://www.w3.org/TR/html-markup/global-attributes.html>`__. ::
+
+            attrs = {'asdf': 'table'}
+
+        is *not* a valid attribute dictionary because 'asdf' is not a valid
+        HTML attribute even if it is a valid XML attribute.  Valid HTML 4.01
+        table attributes can be found `here
+        <http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2>`__. A
+        working draft of the HTML 5 spec can be found `here
+        <http://www.w3.org/TR/html-markup/table.html>`__. It contains the
+        latest information on table attributes for the modern web.
+
+    parse_dates : bool, optional
+        See :func:`~pandas.read_csv` for more details.
+
+    tupleize_cols : bool, optional
+        If ``False`` try to parse multiple header rows into a
+        :class:`~pandas.MultiIndex`, otherwise return raw tuples. Defaults to
+        ``False``.
+
+        .. deprecated:: 0.21.0
+           This argument will be removed and will always convert to MultiIndex
+
+    thousands : str, optional
+        Separator to use to parse thousands. Defaults to ``','``.
+
+    encoding : str or None, optional
+        The encoding used to decode the web page. Defaults to ``None``.``None``
+        preserves the previous encoding behavior, which depends on the
+        underlying parser library (e.g., the parser library will try to use
+        the encoding provided by the document).
+
+    decimal : str, default '.'
+        Character to recognize as decimal point (e.g. use ',' for European
+        data).
+
+        .. versionadded:: 0.19.0
+
+    converters : dict, default None
+        Dict of functions for converting values in certain columns. Keys can
+        either be integers or column labels, values are functions that take one
+        input argument, the cell (not column) content, and return the
+        transformed content.
+
+        .. versionadded:: 0.19.0
+
+    na_values : iterable, default None
+        Custom NA values
+
+        .. versionadded:: 0.19.0
+
+    keep_default_na : bool, default True
+        If na_values are specified and keep_default_na is False the default NaN
+        values are overridden, otherwise they're appended to
+
+        .. versionadded:: 0.19.0
+
+    display_only : bool, default True
+        Whether elements with "display: none" should be parsed
+
+        .. versionadded:: 0.23.0
+
+    Returns
+    -------
+    dfs : list of DataFrames
+
+    Notes
+    -----
+    Before using this function you should read the :ref:`gotchas about the
+    HTML parsing libraries <io.html.gotchas>`.
+
+    Expect to do some cleanup after you call this function. For example, you
+    might need to manually assign column names if the column names are
+    converted to NaN when you pass the `header=0` argument. We try to assume as
+    little as possible about the structure of the table and push the
+    idiosyncrasies of the HTML contained in the table to the user.
+
+    This function searches for ``<table>`` elements and only for ``<tr>``
+    and ``<th>`` rows and ``<td>`` elements within each ``<tr>`` or ``<th>``
+    element in the table. ``<td>`` stands for "table data".
+
+    Similar to :func:`~pandas.read_csv` the `header` argument is applied
+    **after** `skiprows` is applied.
+
+    This function will *always* return a list of :class:`DataFrame` *or*
+    it will fail, e.g., it will *not* return an empty list.
+
+    Examples
+    --------
+    See the :ref:`read_html documentation in the IO section of the docs
+    <io.read_html>` for some examples of reading in HTML tables.
+
+    See Also
+    --------
+    pandas.read_csv
+    """
+    _importers()
+
+    # Type check here. We don't want to parse only to fail because of an
+    # invalid value of an integer skiprows.
+    if isinstance(skiprows, numbers.Integral) and skiprows < 0:
+        raise ValueError('cannot skip rows starting from the end of the '
+                         'data (you passed a negative value)')
+    _validate_header_arg(header)
+    return _parse(flavor=flavor, io=io, match=match, header=header,
+                  index_col=index_col, skiprows=skiprows,
+                  parse_dates=parse_dates, tupleize_cols=tupleize_cols,
+                  thousands=thousands, attrs=attrs, encoding=encoding,
+                  decimal=decimal, converters=converters, na_values=na_values,
+                  keep_default_na=keep_default_na,
+                  displayed_only=displayed_only)
@@ -0,0 +1,5 @@
+from .json import to_json, read_json, loads, dumps  # noqa
+from .normalize import json_normalize  # noqa
+from .table_schema import build_table_schema  # noqa
+
+del json, normalize, table_schema  # noqa
@@ -0,0 +1,929 @@
+# pylint: disable-msg=E1101,W0613,W0603
+from itertools import islice
+import os
+import numpy as np
+
+import pandas._libs.json as json
+from pandas._libs.tslib import iNaT
+from pandas.compat import StringIO, long, u, to_str
+from pandas import compat, isna
+from pandas import Series, DataFrame, to_datetime, MultiIndex
+from pandas.io.common import (get_filepath_or_buffer, _get_handle,
+                              _infer_compression, _stringify_path,
+                              BaseIterator)
+from pandas.io.parsers import _validate_integer
+import pandas.core.common as com
+from pandas.core.reshape.concat import concat
+from pandas.io.formats.printing import pprint_thing
+from .normalize import _convert_to_line_delimits
+from .table_schema import build_table_schema, parse_table_schema
+from pandas.core.dtypes.common import is_period_dtype
+
+loads = json.loads
+dumps = json.dumps
+
+TABLE_SCHEMA_VERSION = '0.20.0'
+
+
+# interface to/from
+def to_json(path_or_buf, obj, orient=None, date_format='epoch',
+            double_precision=10, force_ascii=True, date_unit='ms',
+            default_handler=None, lines=False, compression=None,
+            index=True):
+
+    if not index and orient not in ['split', 'table']:
+        raise ValueError("'index=False' is only valid when 'orient' is "
+                         "'split' or 'table'")
+
+    path_or_buf = _stringify_path(path_or_buf)
+    if lines and orient != 'records':
+        raise ValueError(
+            "'lines' keyword only valid when 'orient' is records")
+
+    if orient == 'table' and isinstance(obj, Series):
+        obj = obj.to_frame(name=obj.name or 'values')
+    if orient == 'table' and isinstance(obj, DataFrame):
+        writer = JSONTableWriter
+    elif isinstance(obj, Series):
+        writer = SeriesWriter
+    elif isinstance(obj, DataFrame):
+        writer = FrameWriter
+    else:
+        raise NotImplementedError("'obj' should be a Series or a DataFrame")
+
+    s = writer(
+        obj, orient=orient, date_format=date_format,
+        double_precision=double_precision, ensure_ascii=force_ascii,
+        date_unit=date_unit, default_handler=default_handler,
+        index=index).write()
+
+    if lines:
+        s = _convert_to_line_delimits(s)
+
+    if isinstance(path_or_buf, compat.string_types):
+        fh, handles = _get_handle(path_or_buf, 'w', compression=compression)
+        try:
+            fh.write(s)
+        finally:
+            fh.close()
+    elif path_or_buf is None:
+        return s
+    else:
+        path_or_buf.write(s)
+
+
+class Writer(object):
+
+    def __init__(self, obj, orient, date_format, double_precision,
+                 ensure_ascii, date_unit, index, default_handler=None):
+        self.obj = obj
+
+        if orient is None:
+            orient = self._default_orient
+
+        self.orient = orient
+        self.date_format = date_format
+        self.double_precision = double_precision
+        self.ensure_ascii = ensure_ascii
+        self.date_unit = date_unit
+        self.default_handler = default_handler
+        self.index = index
+
+        self.is_copy = None
+        self._format_axes()
+
+    def _format_axes(self):
+        raise com.AbstractMethodError(self)
+
+    def write(self):
+        return self._write(self.obj, self.orient, self.double_precision,
+                           self.ensure_ascii, self.date_unit,
+                           self.date_format == 'iso', self.default_handler)
+
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
+        return dumps(
+            obj,
+            orient=orient,
+            double_precision=double_precision,
+            ensure_ascii=ensure_ascii,
+            date_unit=date_unit,
+            iso_dates=iso_dates,
+            default_handler=default_handler
+        )
+
+
+class SeriesWriter(Writer):
+    _default_orient = 'index'
+
+    def _format_axes(self):
+        if not self.obj.index.is_unique and self.orient == 'index':
+            raise ValueError("Series index must be unique for orient="
+                             "'{orient}'".format(orient=self.orient))
+
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
+        if not self.index and orient == 'split':
+            obj = {"name": obj.name, "data": obj.values}
+        return super(SeriesWriter, self)._write(obj, orient,
+                                                double_precision,
+                                                ensure_ascii, date_unit,
+                                                iso_dates, default_handler)
+
+
+class FrameWriter(Writer):
+    _default_orient = 'columns'
+
+    def _format_axes(self):
+        """ try to axes if they are datelike """
+        if not self.obj.index.is_unique and self.orient in (
+                'index', 'columns'):
+            raise ValueError("DataFrame index must be unique for orient="
+                             "'{orient}'.".format(orient=self.orient))
+        if not self.obj.columns.is_unique and self.orient in (
+                'index', 'columns', 'records'):
+            raise ValueError("DataFrame columns must be unique for orient="
+                             "'{orient}'.".format(orient=self.orient))
+
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
+        if not self.index and orient == 'split':
+            obj = obj.to_dict(orient='split')
+            del obj["index"]
+        return super(FrameWriter, self)._write(obj, orient,
+                                               double_precision,
+                                               ensure_ascii, date_unit,
+                                               iso_dates, default_handler)
+
+
+class JSONTableWriter(FrameWriter):
+    _default_orient = 'records'
+
+    def __init__(self, obj, orient, date_format, double_precision,
+                 ensure_ascii, date_unit, index, default_handler=None):
+        """
+        Adds a `schema` attribute with the Table Schema, resets
+        the index (can't do in caller, because the schema inference needs
+        to know what the index is, forces orient to records, and forces
+        date_format to 'iso'.
+        """
+        super(JSONTableWriter, self).__init__(
+            obj, orient, date_format, double_precision, ensure_ascii,
+            date_unit, index, default_handler=default_handler)
+
+        if date_format != 'iso':
+            msg = ("Trying to write with `orient='table'` and "
+                   "`date_format='{fmt}'`. Table Schema requires dates "
+                   "to be formatted with `date_format='iso'`"
+                   .format(fmt=date_format))
+            raise ValueError(msg)
+
+        self.schema = build_table_schema(obj, index=self.index)
+
+        # NotImplementd on a column MultiIndex
+        if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
+            raise NotImplementedError(
+                "orient='table' is not supported for MultiIndex")
+
+        # TODO: Do this timedelta properly in objToJSON.c See GH #15137
+        if ((obj.ndim == 1) and (obj.name in set(obj.index.names)) or
+                len(obj.columns & obj.index.names)):
+            msg = "Overlapping names between the index and columns"
+            raise ValueError(msg)
+
+        obj = obj.copy()
+        timedeltas = obj.select_dtypes(include=['timedelta']).columns
+        if len(timedeltas):
+            obj[timedeltas] = obj[timedeltas].applymap(
+                lambda x: x.isoformat())
+        # Convert PeriodIndex to datetimes before serialzing
+        if is_period_dtype(obj.index):
+            obj.index = obj.index.to_timestamp()
+
+        # exclude index from obj if index=False
+        if not self.index:
+            self.obj = obj.reset_index(drop=True)
+        else:
+            self.obj = obj.reset_index(drop=False)
+        self.date_format = 'iso'
+        self.orient = 'records'
+        self.index = index
+
+    def _write(self, obj, orient, double_precision, ensure_ascii,
+               date_unit, iso_dates, default_handler):
+        data = super(JSONTableWriter, self)._write(obj, orient,
+                                                   double_precision,
+                                                   ensure_ascii, date_unit,
+                                                   iso_dates,
+                                                   default_handler)
+        serialized = '{{"schema": {schema}, "data": {data}}}'.format(
+                     schema=dumps(self.schema), data=data)
+        return serialized
+
+
+def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
+              convert_axes=True, convert_dates=True, keep_default_dates=True,
+              numpy=False, precise_float=False, date_unit=None, encoding=None,
+              lines=False, chunksize=None, compression='infer'):
+    """
+    Convert a JSON string to pandas object
+
+    Parameters
+    ----------
+    path_or_buf : a valid JSON string or file-like, default: None
+        The string could be a URL. Valid URL schemes include http, ftp, s3, and
+        file. For file URLs, a host is expected. For instance, a local file
+        could be ``file://localhost/path/to/table.json``
+
+    orient : string,
+        Indication of expected JSON string format.
+        Compatible JSON strings can be produced by ``to_json()`` with a
+        corresponding orient value.
+        The set of possible orients is:
+
+        - ``'split'`` : dict like
+          ``{index -> [index], columns -> [columns], data -> [values]}``
+        - ``'records'`` : list like
+          ``[{column -> value}, ... , {column -> value}]``
+        - ``'index'`` : dict like ``{index -> {column -> value}}``
+        - ``'columns'`` : dict like ``{column -> {index -> value}}``
+        - ``'values'`` : just the values array
+
+        The allowed and default values depend on the value
+        of the `typ` parameter.
+
+        * when ``typ == 'series'``,
+
+          - allowed orients are ``{'split','records','index'}``
+          - default is ``'index'``
+          - The Series index must be unique for orient ``'index'``.
+
+        * when ``typ == 'frame'``,
+
+          - allowed orients are ``{'split','records','index',
+            'columns','values', 'table'}``
+          - default is ``'columns'``
+          - The DataFrame index must be unique for orients ``'index'`` and
+            ``'columns'``.
+          - The DataFrame columns must be unique for orients ``'index'``,
+            ``'columns'``, and ``'records'``.
+
+        .. versionadded:: 0.23.0
+           'table' as an allowed value for the ``orient`` argument
+
+    typ : type of object to recover (series or frame), default 'frame'
+    dtype : boolean or dict, default True
+        If True, infer dtypes, if a dict of column to dtype, then use those,
+        if False, then don't infer dtypes at all, applies only to the data.
+    convert_axes : boolean, default True
+        Try to convert the axes to the proper dtypes.
+    convert_dates : boolean, default True
+        List of columns to parse for dates; If True, then try to parse
+        datelike columns default is True; a column label is datelike if
+
+        * it ends with ``'_at'``,
+
+        * it ends with ``'_time'``,
+
+        * it begins with ``'timestamp'``,
+
+        * it is ``'modified'``, or
+
+        * it is ``'date'``
+
+    keep_default_dates : boolean, default True
+        If parsing dates, then parse the default datelike columns
+    numpy : boolean, default False
+        Direct decoding to numpy arrays. Supports numeric data only, but
+        non-numeric column and index labels are supported. Note also that the
+        JSON ordering MUST be the same for each term if numpy=True.
+    precise_float : boolean, default False
+        Set to enable usage of higher precision (strtod) function when
+        decoding string to double values. Default (False) is to use fast but
+        less precise builtin functionality
+    date_unit : string, default None
+        The timestamp unit to detect if converting dates. The default behaviour
+        is to try and detect the correct precision, but if this is not desired
+        then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds,
+        milliseconds, microseconds or nanoseconds respectively.
+    lines : boolean, default False
+        Read the file as a json object per line.
+
+        .. versionadded:: 0.19.0
+
+    encoding : str, default is 'utf-8'
+        The encoding to use to decode py3 bytes.
+
+        .. versionadded:: 0.19.0
+
+    chunksize: integer, default None
+        Return JsonReader object for iteration.
+        See the `line-delimted json docs
+        <http://pandas.pydata.org/pandas-docs/stable/io.html#io-jsonl>`_
+        for more information on ``chunksize``.
+        This can only be passed if `lines=True`.
+        If this is None, the file will be read into memory all at once.
+
+        .. versionadded:: 0.21.0
+
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer', then use
+        gzip, bz2, zip or xz if path_or_buf is a string ending in
+        '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+        otherwise. If using 'zip', the ZIP file must contain only one data
+        file to be read in. Set to None for no decompression.
+
+        .. versionadded:: 0.21.0
+
+    Returns
+    -------
+    result : Series or DataFrame, depending on the value of `typ`.
+
+    Notes
+    -----
+    Specific to ``orient='table'``, if a :class:`DataFrame` with a literal
+    :class:`Index` name of `index` gets written with :func:`to_json`, the
+    subsequent read operation will incorrectly set the :class:`Index` name to
+    ``None``. This is because `index` is also used by :func:`DataFrame.to_json`
+    to denote a missing :class:`Index` name, and the subsequent
+    :func:`read_json` operation cannot distinguish between the two. The same
+    limitation is encountered with a :class:`MultiIndex` and any names
+    beginning with ``'level_'``.
+
+    See Also
+    --------
+    DataFrame.to_json
+
+    Examples
+    --------
+
+    >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
+    ...                   index=['row 1', 'row 2'],
+    ...                   columns=['col 1', 'col 2'])
+
+    Encoding/decoding a Dataframe using ``'split'`` formatted JSON:
+
+    >>> df.to_json(orient='split')
+    '{"columns":["col 1","col 2"],
+      "index":["row 1","row 2"],
+      "data":[["a","b"],["c","d"]]}'
+    >>> pd.read_json(_, orient='split')
+          col 1 col 2
+    row 1     a     b
+    row 2     c     d
+
+    Encoding/decoding a Dataframe using ``'index'`` formatted JSON:
+
+    >>> df.to_json(orient='index')
+    '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'
+    >>> pd.read_json(_, orient='index')
+          col 1 col 2
+    row 1     a     b
+    row 2     c     d
+
+    Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
+    Note that index labels are not preserved with this encoding.
+
+    >>> df.to_json(orient='records')
+    '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]'
+    >>> pd.read_json(_, orient='records')
+      col 1 col 2
+    0     a     b
+    1     c     d
+
+    Encoding with Table Schema
+
+    >>> df.to_json(orient='table')
+    '{"schema": {"fields": [{"name": "index", "type": "string"},
+                            {"name": "col 1", "type": "string"},
+                            {"name": "col 2", "type": "string"}],
+                    "primaryKey": "index",
+                    "pandas_version": "0.20.0"},
+        "data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
+                {"index": "row 2", "col 1": "c", "col 2": "d"}]}'
+    """
+
+    compression = _infer_compression(path_or_buf, compression)
+    filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
+        path_or_buf, encoding=encoding, compression=compression,
+    )
+
+    json_reader = JsonReader(
+        filepath_or_buffer, orient=orient, typ=typ, dtype=dtype,
+        convert_axes=convert_axes, convert_dates=convert_dates,
+        keep_default_dates=keep_default_dates, numpy=numpy,
+        precise_float=precise_float, date_unit=date_unit, encoding=encoding,
+        lines=lines, chunksize=chunksize, compression=compression,
+    )
+
+    if chunksize:
+        return json_reader
+
+    result = json_reader.read()
+    if should_close:
+        try:
+            filepath_or_buffer.close()
+        except:  # noqa: flake8
+            pass
+    return result
+
+
+class JsonReader(BaseIterator):
+    """
+    JsonReader provides an interface for reading in a JSON file.
+
+    If initialized with ``lines=True`` and ``chunksize``, can be iterated over
+    ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the
+    whole document.
+    """
+    def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes,
+                 convert_dates, keep_default_dates, numpy, precise_float,
+                 date_unit, encoding, lines, chunksize, compression):
+
+        self.path_or_buf = filepath_or_buffer
+        self.orient = orient
+        self.typ = typ
+        self.dtype = dtype
+        self.convert_axes = convert_axes
+        self.convert_dates = convert_dates
+        self.keep_default_dates = keep_default_dates
+        self.numpy = numpy
+        self.precise_float = precise_float
+        self.date_unit = date_unit
+        self.encoding = encoding
+        self.compression = compression
+        self.lines = lines
+        self.chunksize = chunksize
+        self.nrows_seen = 0
+        self.should_close = False
+
+        if self.chunksize is not None:
+            self.chunksize = _validate_integer("chunksize", self.chunksize, 1)
+            if not self.lines:
+                raise ValueError("chunksize can only be passed if lines=True")
+
+        data = self._get_data_from_filepath(filepath_or_buffer)
+        self.data = self._preprocess_data(data)
+
+    def _preprocess_data(self, data):
+        """
+        At this point, the data either has a `read` attribute (e.g. a file
+        object or a StringIO) or is a string that is a JSON document.
+
+        If self.chunksize, we prepare the data for the `__next__` method.
+        Otherwise, we read it into memory for the `read` method.
+        """
+        if hasattr(data, 'read') and not self.chunksize:
+            data = data.read()
+        if not hasattr(data, 'read') and self.chunksize:
+            data = StringIO(data)
+
+        return data
+
+    def _get_data_from_filepath(self, filepath_or_buffer):
+        """
+        read_json accepts three input types:
+            1. filepath (string-like)
+            2. file-like object (e.g. open file object, StringIO)
+            3. JSON string
+
+        This method turns (1) into (2) to simplify the rest of the processing.
+        It returns input types (2) and (3) unchanged.
+        """
+
+        data = filepath_or_buffer
+
+        exists = False
+        if isinstance(data, compat.string_types):
+            try:
+                exists = os.path.exists(filepath_or_buffer)
+            # gh-5874: if the filepath is too long will raise here
+            except (TypeError, ValueError):
+                pass
+
+        if exists or self.compression is not None:
+            data, _ = _get_handle(filepath_or_buffer, 'r',
+                                  encoding=self.encoding,
+                                  compression=self.compression)
+            self.should_close = True
+            self.open_stream = data
+
+        return data
+
+    def _combine_lines(self, lines):
+        """Combines a list of JSON objects into one JSON object"""
+        lines = filter(None, map(lambda x: x.strip(), lines))
+        return '[' + ','.join(lines) + ']'
+
+    def read(self):
+        """Read the whole JSON input into a pandas object"""
+        if self.lines and self.chunksize:
+            obj = concat(self)
+        elif self.lines:
+
+            data = to_str(self.data)
+            obj = self._get_object_parser(
+                self._combine_lines(data.split('\n'))
+            )
+        else:
+            obj = self._get_object_parser(self.data)
+        self.close()
+        return obj
+
+    def _get_object_parser(self, json):
+        """parses a json document into a pandas object"""
+        typ = self.typ
+        dtype = self.dtype
+        kwargs = {
+            "orient": self.orient, "dtype": self.dtype,
+            "convert_axes": self.convert_axes,
+            "convert_dates": self.convert_dates,
+            "keep_default_dates": self.keep_default_dates, "numpy": self.numpy,
+            "precise_float": self.precise_float, "date_unit": self.date_unit
+        }
+        obj = None
+        if typ == 'frame':
+            obj = FrameParser(json, **kwargs).parse()
+
+        if typ == 'series' or obj is None:
+            if not isinstance(dtype, bool):
+                dtype = dict(data=dtype)
+            obj = SeriesParser(json, **kwargs).parse()
+
+        return obj
+
+    def close(self):
+        """
+        If we opened a stream earlier, in _get_data_from_filepath, we should
+        close it. If an open stream or file was passed, we leave it open.
+        """
+        if self.should_close:
+            try:
+                self.open_stream.close()
+            except (IOError, AttributeError):
+                pass
+
+    def __next__(self):
+        lines = list(islice(self.data, self.chunksize))
+        if lines:
+            lines_json = self._combine_lines(lines)
+            obj = self._get_object_parser(lines_json)
+
+            # Make sure that the returned objects have the right index.
+            obj.index = range(self.nrows_seen, self.nrows_seen + len(obj))
+            self.nrows_seen += len(obj)
+
+            return obj
+
+        self.close()
+        raise StopIteration
+
+
+class Parser(object):
+
+    _STAMP_UNITS = ('s', 'ms', 'us', 'ns')
+    _MIN_STAMPS = {
+        's': long(31536000),
+        'ms': long(31536000000),
+        'us': long(31536000000000),
+        'ns': long(31536000000000000)}
+
+    def __init__(self, json, orient, dtype=True, convert_axes=True,
+                 convert_dates=True, keep_default_dates=False, numpy=False,
+                 precise_float=False, date_unit=None):
+        self.json = json
+
+        if orient is None:
+            orient = self._default_orient
+
+        self.orient = orient
+        self.dtype = dtype
+
+        if orient == "split":
+            numpy = False
+
+        if date_unit is not None:
+            date_unit = date_unit.lower()
+            if date_unit not in self._STAMP_UNITS:
+                raise ValueError('date_unit must be one of {units}'
+                                 .format(units=self._STAMP_UNITS))
+            self.min_stamp = self._MIN_STAMPS[date_unit]
+        else:
+            self.min_stamp = self._MIN_STAMPS['s']
+
+        self.numpy = numpy
+        self.precise_float = precise_float
+        self.convert_axes = convert_axes
+        self.convert_dates = convert_dates
+        self.date_unit = date_unit
+        self.keep_default_dates = keep_default_dates
+        self.obj = None
+
+    def check_keys_split(self, decoded):
+        "checks that dict has only the appropriate keys for orient='split'"
+        bad_keys = set(decoded.keys()).difference(set(self._split_keys))
+        if bad_keys:
+            bad_keys = ", ".join(bad_keys)
+            raise ValueError(u("JSON data had unexpected key(s): {bad_keys}")
+                             .format(bad_keys=pprint_thing(bad_keys)))
+
+    def parse(self):
+
+        # try numpy
+        numpy = self.numpy
+        if numpy:
+            self._parse_numpy()
+
+        else:
+            self._parse_no_numpy()
+
+        if self.obj is None:
+            return None
+        if self.convert_axes:
+            self._convert_axes()
+        self._try_convert_types()
+        return self.obj
+
+    def _convert_axes(self):
+        """ try to convert axes """
+        for axis in self.obj._AXIS_NUMBERS.keys():
+            new_axis, result = self._try_convert_data(
+                axis, self.obj._get_axis(axis), use_dtypes=False,
+                convert_dates=True)
+            if result:
+                setattr(self.obj, axis, new_axis)
+
+    def _try_convert_types(self):
+        raise com.AbstractMethodError(self)
+
+    def _try_convert_data(self, name, data, use_dtypes=True,
+                          convert_dates=True):
+        """ try to parse a ndarray like into a column by inferring dtype """
+
+        # don't try to coerce, unless a force conversion
+        if use_dtypes:
+            if self.dtype is False:
+                return data, False
+            elif self.dtype is True:
+                pass
+
+            else:
+
+                # dtype to force
+                dtype = (self.dtype.get(name)
+                         if isinstance(self.dtype, dict) else self.dtype)
+                if dtype is not None:
+                    try:
+                        dtype = np.dtype(dtype)
+                        return data.astype(dtype), True
+                    except (TypeError, ValueError):
+                        return data, False
+
+        if convert_dates:
+            new_data, result = self._try_convert_to_date(data)
+            if result:
+                return new_data, True
+
+        result = False
+
+        if data.dtype == 'object':
+
+            # try float
+            try:
+                data = data.astype('float64')
+                result = True
+            except (TypeError, ValueError):
+                pass
+
+        if data.dtype.kind == 'f':
+
+            if data.dtype != 'float64':
+
+                # coerce floats to 64
+                try:
+                    data = data.astype('float64')
+                    result = True
+                except (TypeError, ValueError):
+                    pass
+
+        # do't coerce 0-len data
+        if len(data) and (data.dtype == 'float' or data.dtype == 'object'):
+
+            # coerce ints if we can
+            try:
+                new_data = data.astype('int64')
+                if (new_data == data).all():
+                    data = new_data
+                    result = True
+            except (TypeError, ValueError):
+                pass
+
+        # coerce ints to 64
+        if data.dtype == 'int':
+
+            # coerce floats to 64
+            try:
+                data = data.astype('int64')
+                result = True
+            except (TypeError, ValueError):
+                pass
+
+        return data, result
+
+    def _try_convert_to_date(self, data):
+        """ try to parse a ndarray like into a date column
+            try to coerce object in epoch/iso formats and
+            integer/float in epcoh formats, return a boolean if parsing
+            was successful """
+
+        # no conversion on empty
+        if not len(data):
+            return data, False
+
+        new_data = data
+        if new_data.dtype == 'object':
+            try:
+                new_data = data.astype('int64')
+            except (TypeError, ValueError, OverflowError):
+                pass
+
+        # ignore numbers that are out of range
+        if issubclass(new_data.dtype.type, np.number):
+            in_range = (isna(new_data.values) | (new_data > self.min_stamp) |
+                        (new_data.values == iNaT))
+            if not in_range.all():
+                return data, False
+
+        date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS
+        for date_unit in date_units:
+            try:
+                new_data = to_datetime(new_data, errors='raise',
+                                       unit=date_unit)
+            except ValueError:
+                continue
+            except Exception:
+                break
+            return new_data, True
+        return data, False
+
+    def _try_convert_dates(self):
+        raise com.AbstractMethodError(self)
+
+
+class SeriesParser(Parser):
+    _default_orient = 'index'
+    _split_keys = ('name', 'index', 'data')
+
+    def _parse_no_numpy(self):
+
+        json = self.json
+        orient = self.orient
+        if orient == "split":
+            decoded = {str(k): v for k, v in compat.iteritems(
+                loads(json, precise_float=self.precise_float))}
+            self.check_keys_split(decoded)
+            self.obj = Series(dtype=None, **decoded)
+        else:
+            self.obj = Series(
+                loads(json, precise_float=self.precise_float), dtype=None)
+
+    def _parse_numpy(self):
+
+        json = self.json
+        orient = self.orient
+        if orient == "split":
+            decoded = loads(json, dtype=None, numpy=True,
+                            precise_float=self.precise_float)
+            decoded = {str(k): v for k, v in compat.iteritems(decoded)}
+            self.check_keys_split(decoded)
+            self.obj = Series(**decoded)
+        elif orient == "columns" or orient == "index":
+            self.obj = Series(*loads(json, dtype=None, numpy=True,
+                                     labelled=True,
+                                     precise_float=self.precise_float))
+        else:
+            self.obj = Series(loads(json, dtype=None, numpy=True,
+                                    precise_float=self.precise_float))
+
+    def _try_convert_types(self):
+        if self.obj is None:
+            return
+        obj, result = self._try_convert_data(
+            'data', self.obj, convert_dates=self.convert_dates)
+        if result:
+            self.obj = obj
+
+
+class FrameParser(Parser):
+    _default_orient = 'columns'
+    _split_keys = ('columns', 'index', 'data')
+
+    def _parse_numpy(self):
+
+        json = self.json
+        orient = self.orient
+
+        if orient == "columns":
+            args = loads(json, dtype=None, numpy=True, labelled=True,
+                         precise_float=self.precise_float)
+            if len(args):
+                args = (args[0].T, args[2], args[1])
+            self.obj = DataFrame(*args)
+        elif orient == "split":
+            decoded = loads(json, dtype=None, numpy=True,
+                            precise_float=self.precise_float)
+            decoded = {str(k): v for k, v in compat.iteritems(decoded)}
+            self.check_keys_split(decoded)
+            self.obj = DataFrame(**decoded)
+        elif orient == "values":
+            self.obj = DataFrame(loads(json, dtype=None, numpy=True,
+                                       precise_float=self.precise_float))
+        else:
+            self.obj = DataFrame(*loads(json, dtype=None, numpy=True,
+                                        labelled=True,
+                                        precise_float=self.precise_float))
+
+    def _parse_no_numpy(self):
+
+        json = self.json
+        orient = self.orient
+
+        if orient == "columns":
+            self.obj = DataFrame(
+                loads(json, precise_float=self.precise_float), dtype=None)
+        elif orient == "split":
+            decoded = {str(k): v for k, v in compat.iteritems(
+                loads(json, precise_float=self.precise_float))}
+            self.check_keys_split(decoded)
+            self.obj = DataFrame(dtype=None, **decoded)
+        elif orient == "index":
+            self.obj = DataFrame(
+                loads(json, precise_float=self.precise_float), dtype=None).T
+        elif orient == 'table':
+            self.obj = parse_table_schema(json,
+                                          precise_float=self.precise_float)
+        else:
+            self.obj = DataFrame(
+                loads(json, precise_float=self.precise_float), dtype=None)
+
+    def _process_converter(self, f, filt=None):
+        """ take a conversion function and possibly recreate the frame """
+
+        if filt is None:
+            filt = lambda col, c: True
+
+        needs_new_obj = False
+        new_obj = dict()
+        for i, (col, c) in enumerate(self.obj.iteritems()):
+            if filt(col, c):
+                new_data, result = f(col, c)
+                if result:
+                    c = new_data
+                    needs_new_obj = True
+            new_obj[i] = c
+
+        if needs_new_obj:
+
+            # possibly handle dup columns
+            new_obj = DataFrame(new_obj, index=self.obj.index)
+            new_obj.columns = self.obj.columns
+            self.obj = new_obj
+
+    def _try_convert_types(self):
+        if self.obj is None:
+            return
+        if self.convert_dates:
+            self._try_convert_dates()
+
+        self._process_converter(
+            lambda col, c: self._try_convert_data(col, c, convert_dates=False))
+
+    def _try_convert_dates(self):
+        if self.obj is None:
+            return
+
+        # our columns to parse
+        convert_dates = self.convert_dates
+        if convert_dates is True:
+            convert_dates = []
+        convert_dates = set(convert_dates)
+
+        def is_ok(col):
+            """ return if this col is ok to try for a date parse """
+            if not isinstance(col, compat.string_types):
+                return False
+
+            col_lower = col.lower()
+            if (col_lower.endswith('_at') or
+                    col_lower.endswith('_time') or
+                    col_lower == 'modified' or
+                    col_lower == 'date' or
+                    col_lower == 'datetime' or
+                    col_lower.startswith('timestamp')):
+                return True
+            return False
+
+        self._process_converter(
+            lambda col, c: self._try_convert_to_date(c),
+            lambda col, c: ((self.keep_default_dates and is_ok(col)) or
+                            col in convert_dates))
@@ -0,0 +1,281 @@
+# ---------------------------------------------------------------------
+# JSON normalization routines
+
+import copy
+from collections import defaultdict
+import numpy as np
+
+from pandas._libs.writers import convert_json_to_lines
+from pandas import compat, DataFrame
+
+
+def _convert_to_line_delimits(s):
+    """Helper function that converts json lists to line delimited json."""
+
+    # Determine we have a JSON list to turn to lines otherwise just return the
+    # json object, only lists can
+    if not s[0] == '[' and s[-1] == ']':
+        return s
+    s = s[1:-1]
+
+    return convert_json_to_lines(s)
+
+
+def nested_to_record(ds, prefix="", sep=".", level=0):
+    """a simplified json_normalize
+
+    converts a nested dict into a flat dict ("record"), unlike json_normalize,
+    it does not attempt to extract a subset of the data.
+
+    Parameters
+    ----------
+    ds : dict or list of dicts
+    prefix: the prefix, optional, default: ""
+    sep : string, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+
+        .. versionadded:: 0.20.0
+
+    level: the number of levels in the jason string, optional, default: 0
+
+    Returns
+    -------
+    d - dict or list of dicts, matching `ds`
+
+    Examples
+    --------
+
+    IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
+                                  nested=dict(e=dict(c=1,d=2),d=2)))
+    Out[52]:
+    {'dict1.c': 1,
+     'dict1.d': 2,
+     'flat1': 1,
+     'nested.d': 2,
+     'nested.e.c': 1,
+     'nested.e.d': 2}
+    """
+    singleton = False
+    if isinstance(ds, dict):
+        ds = [ds]
+        singleton = True
+
+    new_ds = []
+    for d in ds:
+
+        new_d = copy.deepcopy(d)
+        for k, v in d.items():
+            # each key gets renamed with prefix
+            if not isinstance(k, compat.string_types):
+                k = str(k)
+            if level == 0:
+                newkey = k
+            else:
+                newkey = prefix + sep + k
+
+            # only dicts gets recurse-flattend
+            # only at level>1 do we rename the rest of the keys
+            if not isinstance(v, dict):
+                if level != 0:  # so we skip copying for top level, common case
+                    v = new_d.pop(k)
+                    new_d[newkey] = v
+                continue
+            else:
+                v = new_d.pop(k)
+                new_d.update(nested_to_record(v, newkey, sep, level + 1))
+        new_ds.append(new_d)
+
+    if singleton:
+        return new_ds[0]
+    return new_ds
+
+
+def json_normalize(data, record_path=None, meta=None,
+                   meta_prefix=None,
+                   record_prefix=None,
+                   errors='raise',
+                   sep='.'):
+    """
+    "Normalize" semi-structured JSON data into a flat table
+
+    Parameters
+    ----------
+    data : dict or list of dicts
+        Unserialized JSON objects
+    record_path : string or list of strings, default None
+        Path in each object to list of records. If not passed, data will be
+        assumed to be an array of records
+    meta : list of paths (string or list of strings), default None
+        Fields to use as metadata for each record in resulting table
+    record_prefix : string, default None
+        If True, prefix records with dotted (?) path, e.g. foo.bar.field if
+        path to records is ['foo', 'bar']
+    meta_prefix : string, default None
+    errors : {'raise', 'ignore'}, default 'raise'
+
+        * 'ignore' : will ignore KeyError if keys listed in meta are not
+          always present
+        * 'raise' : will raise KeyError if keys listed in meta are not
+          always present
+
+        .. versionadded:: 0.20.0
+
+    sep : string, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+
+        .. versionadded:: 0.20.0
+
+
+    Returns
+    -------
+    frame : DataFrame
+
+    Examples
+    --------
+
+    >>> from pandas.io.json import json_normalize
+    >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
+    ...         {'name': {'given': 'Mose', 'family': 'Regner'}},
+    ...         {'id': 2, 'name': 'Faye Raker'}]
+    >>> json_normalize(data)
+        id        name name.family name.first name.given name.last
+    0  1.0         NaN         NaN     Coleen        NaN      Volk
+    1  NaN         NaN      Regner        NaN       Mose       NaN
+    2  2.0  Faye Raker         NaN        NaN        NaN       NaN
+
+    >>> data = [{'state': 'Florida',
+    ...          'shortname': 'FL',
+    ...          'info': {
+    ...               'governor': 'Rick Scott'
+    ...          },
+    ...          'counties': [{'name': 'Dade', 'population': 12345},
+    ...                      {'name': 'Broward', 'population': 40000},
+    ...                      {'name': 'Palm Beach', 'population': 60000}]},
+    ...         {'state': 'Ohio',
+    ...          'shortname': 'OH',
+    ...          'info': {
+    ...               'governor': 'John Kasich'
+    ...          },
+    ...          'counties': [{'name': 'Summit', 'population': 1234},
+    ...                       {'name': 'Cuyahoga', 'population': 1337}]}]
+    >>> result = json_normalize(data, 'counties', ['state', 'shortname',
+    ...                                           ['info', 'governor']])
+    >>> result
+             name  population info.governor    state shortname
+    0        Dade       12345    Rick Scott  Florida        FL
+    1     Broward       40000    Rick Scott  Florida        FL
+    2  Palm Beach       60000    Rick Scott  Florida        FL
+    3      Summit        1234   John Kasich     Ohio        OH
+    4    Cuyahoga        1337   John Kasich     Ohio        OH
+
+    >>> data = {'A': [1, 2]}
+    >>> json_normalize(data, 'A', record_prefix='Prefix.')
+        Prefix.0
+    0          1
+    1          2
+    """
+    def _pull_field(js, spec):
+        result = js
+        if isinstance(spec, list):
+            for field in spec:
+                result = result[field]
+        else:
+            result = result[spec]
+
+        return result
+
+    if isinstance(data, list) and not data:
+        return DataFrame()
+
+    # A bit of a hackjob
+    if isinstance(data, dict):
+        data = [data]
+
+    if record_path is None:
+        if any([[isinstance(x, dict)
+                for x in compat.itervalues(y)] for y in data]):
+            # naive normalization, this is idempotent for flat records
+            # and potentially will inflate the data considerably for
+            # deeply nested structures:
+            #  {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
+            #
+            # TODO: handle record value which are lists, at least error
+            #       reasonably
+            data = nested_to_record(data, sep=sep)
+        return DataFrame(data)
+    elif not isinstance(record_path, list):
+        record_path = [record_path]
+
+    if meta is None:
+        meta = []
+    elif not isinstance(meta, list):
+        meta = [meta]
+
+    meta = [m if isinstance(m, list) else [m] for m in meta]
+
+    # Disastrously inefficient for now
+    records = []
+    lengths = []
+
+    meta_vals = defaultdict(list)
+    if not isinstance(sep, compat.string_types):
+        sep = str(sep)
+    meta_keys = [sep.join(val) for val in meta]
+
+    def _recursive_extract(data, path, seen_meta, level=0):
+        if len(path) > 1:
+            for obj in data:
+                for val, key in zip(meta, meta_keys):
+                    if level + 1 == len(val):
+                        seen_meta[key] = _pull_field(obj, val[-1])
+
+                _recursive_extract(obj[path[0]], path[1:],
+                                   seen_meta, level=level + 1)
+        else:
+            for obj in data:
+                recs = _pull_field(obj, path[0])
+
+                # For repeating the metadata later
+                lengths.append(len(recs))
+
+                for val, key in zip(meta, meta_keys):
+                    if level + 1 > len(val):
+                        meta_val = seen_meta[key]
+                    else:
+                        try:
+                            meta_val = _pull_field(obj, val[level:])
+                        except KeyError as e:
+                            if errors == 'ignore':
+                                meta_val = np.nan
+                            else:
+                                raise \
+                                    KeyError("Try running with "
+                                             "errors='ignore' as key "
+                                             "{err} is not always present"
+                                             .format(err=e))
+                    meta_vals[key].append(meta_val)
+
+                records.extend(recs)
+
+    _recursive_extract(data, record_path, {}, level=0)
+
+    result = DataFrame(records)
+
+    if record_prefix is not None:
+        result = result.rename(
+            columns=lambda x: "{p}{c}".format(p=record_prefix, c=x))
+
+    # Data types, a problem
+    for k, v in compat.iteritems(meta_vals):
+        if meta_prefix is not None:
+            k = meta_prefix + k
+
+        if k in result:
+            raise ValueError('Conflicting metadata name {name}, '
+                             'need distinguishing prefix '.format(name=k))
+
+        result[k] = np.array(v).repeat(lengths)
+
+    return result
@@ -0,0 +1,324 @@
+"""
+Table Schema builders
+
+http://specs.frictionlessdata.io/json-table-schema/
+"""
+import warnings
+
+import pandas._libs.json as json
+from pandas import DataFrame
+from pandas.api.types import CategoricalDtype
+import pandas.core.common as com
+from pandas.core.dtypes.common import (
+    is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype,
+    is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
+    is_categorical_dtype, is_period_dtype, is_string_dtype
+)
+
+loads = json.loads
+
+
+def as_json_table_type(x):
+    """
+    Convert a NumPy / pandas type to its corresponding json_table.
+
+    Parameters
+    ----------
+    x : array or dtype
+
+    Returns
+    -------
+    t : str
+        the Table Schema data types
+
+    Notes
+    -----
+    This table shows the relationship between NumPy / pandas dtypes,
+    and Table Schema dtypes.
+
+    ==============  =================
+    Pandas type     Table Schema type
+    ==============  =================
+    int64           integer
+    float64         number
+    bool            boolean
+    datetime64[ns]  datetime
+    timedelta64[ns] duration
+    object          str
+    categorical     any
+    =============== =================
+    """
+    if is_integer_dtype(x):
+        return 'integer'
+    elif is_bool_dtype(x):
+        return 'boolean'
+    elif is_numeric_dtype(x):
+        return 'number'
+    elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or
+          is_period_dtype(x)):
+        return 'datetime'
+    elif is_timedelta64_dtype(x):
+        return 'duration'
+    elif is_categorical_dtype(x):
+        return 'any'
+    elif is_string_dtype(x):
+        return 'string'
+    else:
+        return 'any'
+
+
+def set_default_names(data):
+    """Sets index names to 'index' for regular, or 'level_x' for Multi"""
+    if com._all_not_none(*data.index.names):
+        nms = data.index.names
+        if len(nms) == 1 and data.index.name == 'index':
+            warnings.warn("Index name of 'index' is not round-trippable")
+        elif len(nms) > 1 and any(x.startswith('level_') for x in nms):
+            warnings.warn("Index names beginning with 'level_' are not "
+                          "round-trippable")
+        return data
+
+    data = data.copy()
+    if data.index.nlevels > 1:
+        names = [name if name is not None else 'level_{}'.format(i)
+                 for i, name in enumerate(data.index.names)]
+        data.index.names = names
+    else:
+        data.index.name = data.index.name or 'index'
+    return data
+
+
+def convert_pandas_type_to_json_field(arr, dtype=None):
+    dtype = dtype or arr.dtype
+    if arr.name is None:
+        name = 'values'
+    else:
+        name = arr.name
+    field = {'name': name,
+             'type': as_json_table_type(dtype)}
+
+    if is_categorical_dtype(arr):
+        if hasattr(arr, 'categories'):
+            cats = arr.categories
+            ordered = arr.ordered
+        else:
+            cats = arr.cat.categories
+            ordered = arr.cat.ordered
+        field['constraints'] = {"enum": list(cats)}
+        field['ordered'] = ordered
+    elif is_period_dtype(arr):
+        field['freq'] = arr.freqstr
+    elif is_datetime64tz_dtype(arr):
+        if hasattr(arr, 'dt'):
+            field['tz'] = arr.dt.tz.zone
+        else:
+            field['tz'] = arr.tz.zone
+    return field
+
+
+def convert_json_field_to_pandas_type(field):
+    """
+    Converts a JSON field descriptor into its corresponding NumPy / pandas type
+
+    Parameters
+    ----------
+    field
+        A JSON field descriptor
+
+    Returns
+    -------
+    dtype
+
+    Raises
+    -----
+    ValueError
+        If the type of the provided field is unknown or currently unsupported
+
+    Examples
+    --------
+    >>> convert_json_field_to_pandas_type({'name': 'an_int',
+                                           'type': 'integer'})
+    'int64'
+    >>> convert_json_field_to_pandas_type({'name': 'a_categorical',
+                                           'type': 'any',
+                                           'contraints': {'enum': [
+                                                          'a', 'b', 'c']},
+                                           'ordered': True})
+    'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)'
+    >>> convert_json_field_to_pandas_type({'name': 'a_datetime',
+                                           'type': 'datetime'})
+    'datetime64[ns]'
+    >>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz',
+                                           'type': 'datetime',
+                                           'tz': 'US/Central'})
+    'datetime64[ns, US/Central]'
+    """
+    typ = field['type']
+    if typ == 'string':
+        return 'object'
+    elif typ == 'integer':
+        return 'int64'
+    elif typ == 'number':
+        return 'float64'
+    elif typ == 'boolean':
+        return 'bool'
+    elif typ == 'duration':
+        return 'timedelta64'
+    elif typ == 'datetime':
+        if field.get('tz'):
+            return 'datetime64[ns, {tz}]'.format(tz=field['tz'])
+        else:
+            return 'datetime64[ns]'
+    elif typ == 'any':
+        if 'constraints' in field and 'ordered' in field:
+            return CategoricalDtype(categories=field['constraints']['enum'],
+                                    ordered=field['ordered'])
+        else:
+            return 'object'
+
+    raise ValueError("Unsupported or invalid field type: {}".format(typ))
+
+
+def build_table_schema(data, index=True, primary_key=None, version=True):
+    """
+    Create a Table schema from ``data``.
+
+    Parameters
+    ----------
+    data : Series, DataFrame
+    index : bool, default True
+        Whether to include ``data.index`` in the schema.
+    primary_key : bool or None, default True
+        column names to designate as the primary key.
+        The default `None` will set `'primaryKey'` to the index
+        level or levels if the index is unique.
+    version : bool, default True
+        Whether to include a field `pandas_version` with the version
+        of pandas that generated the schema.
+
+    Returns
+    -------
+    schema : dict
+
+    Examples
+    --------
+    >>> df = pd.DataFrame(
+    ...     {'A': [1, 2, 3],
+    ...      'B': ['a', 'b', 'c'],
+    ...      'C': pd.date_range('2016-01-01', freq='d', periods=3),
+    ...     }, index=pd.Index(range(3), name='idx'))
+    >>> build_table_schema(df)
+    {'fields': [{'name': 'idx', 'type': 'integer'},
+    {'name': 'A', 'type': 'integer'},
+    {'name': 'B', 'type': 'string'},
+    {'name': 'C', 'type': 'datetime'}],
+    'pandas_version': '0.20.0',
+    'primaryKey': ['idx']}
+
+    Notes
+    -----
+    See `_as_json_table_type` for conversion types.
+    Timedeltas as converted to ISO8601 duration format with
+    9 decimal places after the secnods field for nanosecond precision.
+
+    Categoricals are converted to the `any` dtype, and use the `enum` field
+    constraint to list the allowed values. The `ordered` attribute is included
+    in an `ordered` field.
+    """
+    if index is True:
+        data = set_default_names(data)
+
+    schema = {}
+    fields = []
+
+    if index:
+        if data.index.nlevels > 1:
+            for level in data.index.levels:
+                fields.append(convert_pandas_type_to_json_field(level))
+        else:
+            fields.append(convert_pandas_type_to_json_field(data.index))
+
+    if data.ndim > 1:
+        for column, s in data.iteritems():
+            fields.append(convert_pandas_type_to_json_field(s))
+    else:
+        fields.append(convert_pandas_type_to_json_field(data))
+
+    schema['fields'] = fields
+    if index and data.index.is_unique and primary_key is None:
+        if data.index.nlevels == 1:
+            schema['primaryKey'] = [data.index.name]
+        else:
+            schema['primaryKey'] = data.index.names
+    elif primary_key is not None:
+        schema['primaryKey'] = primary_key
+
+    if version:
+        schema['pandas_version'] = '0.20.0'
+    return schema
+
+
+def parse_table_schema(json, precise_float):
+    """
+    Builds a DataFrame from a given schema
+
+    Parameters
+    ----------
+    json :
+        A JSON table schema
+    precise_float : boolean
+        Flag controlling precision when decoding string to double values, as
+        dictated by ``read_json``
+
+    Returns
+    -------
+    df : DataFrame
+
+    Raises
+    ------
+    NotImplementedError
+        If the JSON table schema contains either timezone or timedelta data
+
+    Notes
+    -----
+        Because :func:`DataFrame.to_json` uses the string 'index' to denote a
+        name-less :class:`Index`, this function sets the name of the returned
+        :class:`DataFrame` to ``None`` when said string is encountered with a
+        normal :class:`Index`. For a :class:`MultiIndex`, the same limitation
+        applies to any strings beginning with 'level_'. Therefore, an
+        :class:`Index` name of 'index'  and :class:`MultiIndex` names starting
+        with 'level_' are not supported.
+
+    See also
+    --------
+    build_table_schema : inverse function
+    pandas.read_json
+    """
+    table = loads(json, precise_float=precise_float)
+    col_order = [field['name'] for field in table['schema']['fields']]
+    df = DataFrame(table['data'], columns=col_order)[col_order]
+
+    dtypes = {field['name']: convert_json_field_to_pandas_type(field)
+              for field in table['schema']['fields']}
+
+    # Cannot directly use as_type with timezone data on object; raise for now
+    if any(str(x).startswith('datetime64[ns, ') for x in dtypes.values()):
+        raise NotImplementedError('table="orient" can not yet read timezone '
+                                  'data')
+
+    # No ISO constructor for Timedelta as of yet, so need to raise
+    if 'timedelta64' in dtypes.values():
+        raise NotImplementedError('table="orient" can not yet read '
+                                  'ISO-formatted Timedelta data')
+
+    df = df.astype(dtypes)
+
+    df = df.set_index(table['schema']['primaryKey'])
+    if len(df.index.names) == 1:
+        if df.index.name == 'index':
+            df.index.name = None
+    else:
+        df.index.names = [None if x.startswith('level_') else x for x in
+                          df.index.names]
+
+    return df
@@ -0,0 +1,50 @@
+# coding: utf-8
+
+from collections import namedtuple
+
+from pandas.io.msgpack.exceptions import *  # noqa
+from pandas.io.msgpack._version import version  # noqa
+
+
+class ExtType(namedtuple('ExtType', 'code data')):
+    """ExtType represents ext type in msgpack."""
+    def __new__(cls, code, data):
+        if not isinstance(code, int):
+            raise TypeError("code must be int")
+        if not isinstance(data, bytes):
+            raise TypeError("data must be bytes")
+        if not 0 <= code <= 127:
+            raise ValueError("code must be 0~127")
+        return super(ExtType, cls).__new__(cls, code, data)
+
+import os  # noqa
+
+from pandas.io.msgpack._packer import Packer  # noqa
+from pandas.io.msgpack._unpacker import unpack, unpackb, Unpacker  # noqa
+
+
+def pack(o, stream, **kwargs):
+    """
+    Pack object `o` and write it to `stream`
+
+    See :class:`Packer` for options.
+    """
+    packer = Packer(**kwargs)
+    stream.write(packer.pack(o))
+
+
+def packb(o, **kwargs):
+    """
+    Pack object `o` and return packed bytes
+
+    See :class:`Packer` for options.
+    """
+    return Packer(**kwargs).pack(o)
+
+
+# alias for compatibility to simplejson/marshal/pickle.
+load = unpack
+loads = unpackb
+
+dump = pack
+dumps = packb
@@ -0,0 +1 @@
+version = (0, 4, 6)
@@ -0,0 +1,32 @@
+class UnpackException(Exception):
+    pass
+
+
+class BufferFull(UnpackException):
+    pass
+
+
+class OutOfData(UnpackException):
+    pass
+
+
+class UnpackValueError(UnpackException, ValueError):
+    pass
+
+
+class ExtraData(ValueError):
+
+    def __init__(self, unpacked, extra):
+        self.unpacked = unpacked
+        self.extra = extra
+
+    def __str__(self):
+        return "unpack(b) received extra data."
+
+
+class PackException(Exception):
+    pass
+
+
+class PackValueError(PackException, ValueError):
+    pass
@@ -0,0 +1,820 @@
+"""
+Msgpack serializer support for reading and writing pandas data structures
+to disk
+
+portions of msgpack_numpy package, by Lev Givon were incorporated
+into this module (and tests_packers.py)
+
+License
+=======
+
+Copyright (c) 2013, Lev Givon.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the following
+  disclaimer in the documentation and/or other materials provided
+  with the distribution.
+* Neither the name of Lev Givon nor the names of any
+  contributors may be used to endorse or promote products derived
+  from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from datetime import datetime, date, timedelta
+from dateutil.parser import parse
+import os
+from textwrap import dedent
+import warnings
+
+import numpy as np
+from pandas import compat
+from pandas.compat import u, u_safe
+
+from pandas.core.dtypes.common import (
+    is_categorical_dtype, is_object_dtype,
+    needs_i8_conversion, pandas_dtype)
+
+from pandas import (Timestamp, Period, Series, DataFrame,  # noqa
+                    Index, MultiIndex, Float64Index, Int64Index,
+                    Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT,
+                    Categorical, CategoricalIndex, IntervalIndex, Interval,
+                    TimedeltaIndex)
+from pandas.core.sparse.api import SparseSeries, SparseDataFrame
+from pandas.core.sparse.array import BlockIndex, IntIndex
+from pandas.core.generic import NDFrame
+from pandas.errors import PerformanceWarning
+from pandas.io.common import get_filepath_or_buffer, _stringify_path
+from pandas.core.internals import BlockManager, make_block, _safe_reshape
+import pandas.core.internals as internals
+
+from pandas.io.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType
+from pandas.util._move import (
+    BadMove as _BadMove,
+    move_into_mutable_buffer as _move_into_mutable_buffer,
+)
+
+# check which compression libs we have installed
+try:
+    import zlib
+
+    def _check_zlib():
+        pass
+except ImportError:
+    def _check_zlib():
+        raise ImportError('zlib is not installed')
+
+_check_zlib.__doc__ = dedent(
+    """\
+    Check if zlib is installed.
+
+    Raises
+    ------
+    ImportError
+        Raised when zlib is not installed.
+    """,
+)
+
+try:
+    import blosc
+
+    def _check_blosc():
+        pass
+except ImportError:
+    def _check_blosc():
+        raise ImportError('blosc is not installed')
+
+_check_blosc.__doc__ = dedent(
+    """\
+    Check if blosc is installed.
+
+    Raises
+    ------
+    ImportError
+        Raised when blosc is not installed.
+    """,
+)
+
+# until we can pass this into our conversion functions,
+# this is pretty hacky
+compressor = None
+
+
+def to_msgpack(path_or_buf, *args, **kwargs):
+    """
+    msgpack (serialize) object to input file path
+
+    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
+    may not be stable until a future release.
+
+    Parameters
+    ----------
+    path_or_buf : string File path, buffer-like, or None
+                  if None, return generated string
+    args : an object or objects to serialize
+    encoding: encoding for unicode objects
+    append : boolean whether to append to an existing msgpack
+             (default is False)
+    compress : type of compressor (zlib or blosc), default to None (no
+               compression)
+    """
+    global compressor
+    compressor = kwargs.pop('compress', None)
+    if compressor:
+        compressor = u(compressor)
+    append = kwargs.pop('append', None)
+    if append:
+        mode = 'a+b'
+    else:
+        mode = 'wb'
+
+    def writer(fh):
+        for a in args:
+            fh.write(pack(a, **kwargs))
+
+    path_or_buf = _stringify_path(path_or_buf)
+    if isinstance(path_or_buf, compat.string_types):
+        with open(path_or_buf, mode) as fh:
+            writer(fh)
+    elif path_or_buf is None:
+        buf = compat.BytesIO()
+        writer(buf)
+        return buf.getvalue()
+    else:
+        writer(path_or_buf)
+
+
+def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
+    """
+    Load msgpack pandas object from the specified
+    file path
+
+    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
+    may not be stable until a future release.
+
+    Parameters
+    ----------
+    path_or_buf : string File path, BytesIO like or string
+    encoding: Encoding for decoding msgpack str type
+    iterator : boolean, if True, return an iterator to the unpacker
+               (default is False)
+
+    Returns
+    -------
+    obj : type of object stored in file
+
+    """
+    path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf)
+    if iterator:
+        return Iterator(path_or_buf)
+
+    def read(fh):
+        l = list(unpack(fh, encoding=encoding, **kwargs))
+        if len(l) == 1:
+            return l[0]
+
+        if should_close:
+            try:
+                path_or_buf.close()
+            except:  # noqa: flake8
+                pass
+        return l
+
+    # see if we have an actual file
+    if isinstance(path_or_buf, compat.string_types):
+        try:
+            exists = os.path.exists(path_or_buf)
+        except (TypeError, ValueError):
+            exists = False
+
+        if exists:
+            with open(path_or_buf, 'rb') as fh:
+                return read(fh)
+
+    if isinstance(path_or_buf, compat.binary_type):
+        # treat as a binary-like
+        fh = None
+        try:
+            # We can't distinguish between a path and a buffer of bytes in
+            # Python 2 so instead assume the first byte of a valid path is
+            # less than 0x80.
+            if compat.PY3 or ord(path_or_buf[0]) >= 0x80:
+                fh = compat.BytesIO(path_or_buf)
+                return read(fh)
+        finally:
+            if fh is not None:
+                fh.close()
+    elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read):
+        # treat as a buffer like
+        return read(path_or_buf)
+
+    raise ValueError('path_or_buf needs to be a string file path or file-like')
+
+
+dtype_dict = {21: np.dtype('M8[ns]'),
+              u('datetime64[ns]'): np.dtype('M8[ns]'),
+              u('datetime64[us]'): np.dtype('M8[us]'),
+              22: np.dtype('m8[ns]'),
+              u('timedelta64[ns]'): np.dtype('m8[ns]'),
+              u('timedelta64[us]'): np.dtype('m8[us]'),
+
+              # this is platform int, which we need to remap to np.int64
+              # for compat on windows platforms
+              7: np.dtype('int64'),
+              'category': 'category'
+              }
+
+
+def dtype_for(t):
+    """ return my dtype mapping, whether number or name """
+    if t in dtype_dict:
+        return dtype_dict[t]
+    return np.typeDict.get(t, t)
+
+
+c2f_dict = {'complex': np.float64,
+            'complex128': np.float64,
+            'complex64': np.float32}
+
+# numpy 1.6.1 compat
+if hasattr(np, 'float128'):
+    c2f_dict['complex256'] = np.float128
+
+
+def c2f(r, i, ctype_name):
+    """
+    Convert strings to complex number instance with specified numpy type.
+    """
+
+    ftype = c2f_dict[ctype_name]
+    return np.typeDict[ctype_name](ftype(r) + 1j * ftype(i))
+
+
+def convert(values):
+    """ convert the numpy values to a list """
+
+    dtype = values.dtype
+
+    if is_categorical_dtype(values):
+        return values
+
+    elif is_object_dtype(dtype):
+        return values.ravel().tolist()
+
+    if needs_i8_conversion(dtype):
+        values = values.view('i8')
+    v = values.ravel()
+
+    if compressor == 'zlib':
+        _check_zlib()
+
+        # return string arrays like they are
+        if dtype == np.object_:
+            return v.tolist()
+
+        # convert to a bytes array
+        v = v.tostring()
+        return ExtType(0, zlib.compress(v))
+
+    elif compressor == 'blosc':
+        _check_blosc()
+
+        # return string arrays like they are
+        if dtype == np.object_:
+            return v.tolist()
+
+        # convert to a bytes array
+        v = v.tostring()
+        return ExtType(0, blosc.compress(v, typesize=dtype.itemsize))
+
+    # ndarray (on original dtype)
+    return ExtType(0, v.tostring())
+
+
+def unconvert(values, dtype, compress=None):
+
+    as_is_ext = isinstance(values, ExtType) and values.code == 0
+
+    if as_is_ext:
+        values = values.data
+
+    if is_categorical_dtype(dtype):
+        return values
+
+    elif is_object_dtype(dtype):
+        return np.array(values, dtype=object)
+
+    dtype = pandas_dtype(dtype).base
+
+    if not as_is_ext:
+        values = values.encode('latin1')
+
+    if compress:
+        if compress == u'zlib':
+            _check_zlib()
+            decompress = zlib.decompress
+        elif compress == u'blosc':
+            _check_blosc()
+            decompress = blosc.decompress
+        else:
+            raise ValueError("compress must be one of 'zlib' or 'blosc'")
+
+        try:
+            return np.frombuffer(
+                _move_into_mutable_buffer(decompress(values)),
+                dtype=dtype,
+            )
+        except _BadMove as e:
+            # Pull the decompressed data off of the `_BadMove` exception.
+            # We don't just store this in the locals because we want to
+            # minimize the risk of giving users access to a `bytes` object
+            # whose data is also given to a mutable buffer.
+            values = e.args[0]
+            if len(values) > 1:
+                # The empty string and single characters are memoized in many
+                # string creating functions in the capi. This case should not
+                # warn even though we need to make a copy because we are only
+                # copying at most 1 byte.
+                warnings.warn(
+                    'copying data after decompressing; this may mean that'
+                    ' decompress is caching its result',
+                    PerformanceWarning,
+                )
+                # fall through to copying `np.fromstring`
+
+    # Copy the bytes into a numpy array.
+    buf = np.frombuffer(values, dtype=dtype)
+    buf = buf.copy()  # required to not mutate the original data
+    buf.flags.writeable = True
+    return buf
+
+
+def encode(obj):
+    """
+    Data encoder
+    """
+    tobj = type(obj)
+    if isinstance(obj, Index):
+        if isinstance(obj, RangeIndex):
+            return {u'typ': u'range_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'start': getattr(obj, '_start', None),
+                    u'stop': getattr(obj, '_stop', None),
+                    u'step': getattr(obj, '_step', None)}
+        elif isinstance(obj, PeriodIndex):
+            return {u'typ': u'period_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'freq': u_safe(getattr(obj, 'freqstr', None)),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.asi8),
+                    u'compress': compressor}
+        elif isinstance(obj, DatetimeIndex):
+            tz = getattr(obj, 'tz', None)
+
+            # store tz info and data as UTC
+            if tz is not None:
+                tz = u(tz.zone)
+                obj = obj.tz_convert('UTC')
+            return {u'typ': u'datetime_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.asi8),
+                    u'freq': u_safe(getattr(obj, 'freqstr', None)),
+                    u'tz': tz,
+                    u'compress': compressor}
+        elif isinstance(obj, IntervalIndex):
+            return {u'typ': u'interval_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'left': getattr(obj, '_left', None),
+                    u'right': getattr(obj, '_right', None),
+                    u'closed': getattr(obj, '_closed', None)}
+        elif isinstance(obj, MultiIndex):
+            return {u'typ': u'multi_index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'names': getattr(obj, 'names', None),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.values),
+                    u'compress': compressor}
+        else:
+            return {u'typ': u'index',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.values),
+                    u'compress': compressor}
+
+    elif isinstance(obj, Categorical):
+        return {u'typ': u'category',
+                u'klass': u(obj.__class__.__name__),
+                u'name': getattr(obj, 'name', None),
+                u'codes': obj.codes,
+                u'categories': obj.categories,
+                u'ordered': obj.ordered,
+                u'compress': compressor}
+
+    elif isinstance(obj, Series):
+        if isinstance(obj, SparseSeries):
+            raise NotImplementedError(
+                'msgpack sparse series is not implemented'
+            )
+            # d = {'typ': 'sparse_series',
+            #     'klass': obj.__class__.__name__,
+            #     'dtype': obj.dtype.name,
+            #     'index': obj.index,
+            #     'sp_index': obj.sp_index,
+            #     'sp_values': convert(obj.sp_values),
+            #     'compress': compressor}
+            # for f in ['name', 'fill_value', 'kind']:
+            #    d[f] = getattr(obj, f, None)
+            # return d
+        else:
+            return {u'typ': u'series',
+                    u'klass': u(obj.__class__.__name__),
+                    u'name': getattr(obj, 'name', None),
+                    u'index': obj.index,
+                    u'dtype': u(obj.dtype.name),
+                    u'data': convert(obj.values),
+                    u'compress': compressor}
+    elif issubclass(tobj, NDFrame):
+        if isinstance(obj, SparseDataFrame):
+            raise NotImplementedError(
+                'msgpack sparse frame is not implemented'
+            )
+            # d = {'typ': 'sparse_dataframe',
+            #     'klass': obj.__class__.__name__,
+            #     'columns': obj.columns}
+            # for f in ['default_fill_value', 'default_kind']:
+            #    d[f] = getattr(obj, f, None)
+            # d['data'] = dict([(name, ss)
+            #                 for name, ss in compat.iteritems(obj)])
+            # return d
+        else:
+
+            data = obj._data
+            if not data.is_consolidated():
+                data = data.consolidate()
+
+            # the block manager
+            return {u'typ': u'block_manager',
+                    u'klass': u(obj.__class__.__name__),
+                    u'axes': data.axes,
+                    u'blocks': [{u'locs': b.mgr_locs.as_array,
+                                 u'values': convert(b.values),
+                                 u'shape': b.values.shape,
+                                 u'dtype': u(b.dtype.name),
+                                 u'klass': u(b.__class__.__name__),
+                                 u'compress': compressor} for b in data.blocks]
+                    }
+
+    elif isinstance(obj, (datetime, date, np.datetime64, timedelta,
+                          np.timedelta64)) or obj is NaT:
+        if isinstance(obj, Timestamp):
+            tz = obj.tzinfo
+            if tz is not None:
+                tz = u(tz.zone)
+            freq = obj.freq
+            if freq is not None:
+                freq = u(freq.freqstr)
+            return {u'typ': u'timestamp',
+                    u'value': obj.value,
+                    u'freq': freq,
+                    u'tz': tz}
+        if obj is NaT:
+            return {u'typ': u'nat'}
+        elif isinstance(obj, np.timedelta64):
+            return {u'typ': u'timedelta64',
+                    u'data': obj.view('i8')}
+        elif isinstance(obj, timedelta):
+            return {u'typ': u'timedelta',
+                    u'data': (obj.days, obj.seconds, obj.microseconds)}
+        elif isinstance(obj, np.datetime64):
+            return {u'typ': u'datetime64',
+                    u'data': u(str(obj))}
+        elif isinstance(obj, datetime):
+            return {u'typ': u'datetime',
+                    u'data': u(obj.isoformat())}
+        elif isinstance(obj, date):
+            return {u'typ': u'date',
+                    u'data': u(obj.isoformat())}
+        raise Exception("cannot encode this datetimelike object: %s" % obj)
+    elif isinstance(obj, Period):
+        return {u'typ': u'period',
+                u'ordinal': obj.ordinal,
+                u'freq': u_safe(obj.freqstr)}
+    elif isinstance(obj, Interval):
+        return {u'typ': u'interval',
+                u'left': obj.left,
+                u'right': obj.right,
+                u'closed': obj.closed}
+    elif isinstance(obj, BlockIndex):
+        return {u'typ': u'block_index',
+                u'klass': u(obj.__class__.__name__),
+                u'blocs': obj.blocs,
+                u'blengths': obj.blengths,
+                u'length': obj.length}
+    elif isinstance(obj, IntIndex):
+        return {u'typ': u'int_index',
+                u'klass': u(obj.__class__.__name__),
+                u'indices': obj.indices,
+                u'length': obj.length}
+    elif isinstance(obj, np.ndarray):
+        return {u'typ': u'ndarray',
+                u'shape': obj.shape,
+                u'ndim': obj.ndim,
+                u'dtype': u(obj.dtype.name),
+                u'data': convert(obj),
+                u'compress': compressor}
+    elif isinstance(obj, np.number):
+        if np.iscomplexobj(obj):
+            return {u'typ': u'np_scalar',
+                    u'sub_typ': u'np_complex',
+                    u'dtype': u(obj.dtype.name),
+                    u'real': u(obj.real.__repr__()),
+                    u'imag': u(obj.imag.__repr__())}
+        else:
+            return {u'typ': u'np_scalar',
+                    u'dtype': u(obj.dtype.name),
+                    u'data': u(obj.__repr__())}
+    elif isinstance(obj, complex):
+        return {u'typ': u'np_complex',
+                u'real': u(obj.real.__repr__()),
+                u'imag': u(obj.imag.__repr__())}
+
+    return obj
+
+
+def decode(obj):
+    """
+    Decoder for deserializing numpy data types.
+    """
+
+    typ = obj.get(u'typ')
+    if typ is None:
+        return obj
+    elif typ == u'timestamp':
+        freq = obj[u'freq'] if 'freq' in obj else obj[u'offset']
+        return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq)
+    elif typ == u'nat':
+        return NaT
+    elif typ == u'period':
+        return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
+    elif typ == u'index':
+        dtype = dtype_for(obj[u'dtype'])
+        data = unconvert(obj[u'data'], dtype,
+                         obj.get(u'compress'))
+        return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name'])
+    elif typ == u'range_index':
+        return globals()[obj[u'klass']](obj[u'start'],
+                                        obj[u'stop'],
+                                        obj[u'step'],
+                                        name=obj[u'name'])
+    elif typ == u'multi_index':
+        dtype = dtype_for(obj[u'dtype'])
+        data = unconvert(obj[u'data'], dtype,
+                         obj.get(u'compress'))
+        data = [tuple(x) for x in data]
+        return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names'])
+    elif typ == u'period_index':
+        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
+        d = dict(name=obj[u'name'], freq=obj[u'freq'])
+        return globals()[obj[u'klass']]._from_ordinals(data, **d)
+    elif typ == u'datetime_index':
+        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
+        d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
+        result = globals()[obj[u'klass']](data, **d)
+        tz = obj[u'tz']
+
+        # reverse tz conversion
+        if tz is not None:
+            result = result.tz_localize('UTC').tz_convert(tz)
+        return result
+
+    elif typ == u'interval_index':
+        return globals()[obj[u'klass']].from_arrays(obj[u'left'],
+                                                    obj[u'right'],
+                                                    obj[u'closed'],
+                                                    name=obj[u'name'])
+    elif typ == u'category':
+        from_codes = globals()[obj[u'klass']].from_codes
+        return from_codes(codes=obj[u'codes'],
+                          categories=obj[u'categories'],
+                          ordered=obj[u'ordered'])
+
+    elif typ == u'interval':
+        return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])
+    elif typ == u'series':
+        dtype = dtype_for(obj[u'dtype'])
+        pd_dtype = pandas_dtype(dtype)
+
+        index = obj[u'index']
+        result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
+                                                    obj[u'compress']),
+                                          index=index,
+                                          dtype=pd_dtype,
+                                          name=obj[u'name'])
+        return result
+
+    elif typ == u'block_manager':
+        axes = obj[u'axes']
+
+        def create_block(b):
+            values = _safe_reshape(unconvert(
+                b[u'values'], dtype_for(b[u'dtype']),
+                b[u'compress']), b[u'shape'])
+
+            # locs handles duplicate column names, and should be used instead
+            # of items; see GH 9618
+            if u'locs' in b:
+                placement = b[u'locs']
+            else:
+                placement = axes[0].get_indexer(b[u'items'])
+            return make_block(values=values,
+                              klass=getattr(internals, b[u'klass']),
+                              placement=placement,
+                              dtype=b[u'dtype'])
+
+        blocks = [create_block(b) for b in obj[u'blocks']]
+        return globals()[obj[u'klass']](BlockManager(blocks, axes))
+    elif typ == u'datetime':
+        return parse(obj[u'data'])
+    elif typ == u'datetime64':
+        return np.datetime64(parse(obj[u'data']))
+    elif typ == u'date':
+        return parse(obj[u'data']).date()
+    elif typ == u'timedelta':
+        return timedelta(*obj[u'data'])
+    elif typ == u'timedelta64':
+        return np.timedelta64(int(obj[u'data']))
+    # elif typ == 'sparse_series':
+    #    dtype = dtype_for(obj['dtype'])
+    #    return globals()[obj['klass']](
+    #        unconvert(obj['sp_values'], dtype, obj['compress']),
+    #        sparse_index=obj['sp_index'], index=obj['index'],
+    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
+    # elif typ == 'sparse_dataframe':
+    #    return globals()[obj['klass']](
+    #        obj['data'], columns=obj['columns'],
+    #        default_fill_value=obj['default_fill_value'],
+    #        default_kind=obj['default_kind']
+    #    )
+    # elif typ == 'sparse_panel':
+    #    return globals()[obj['klass']](
+    #        obj['data'], items=obj['items'],
+    #        default_fill_value=obj['default_fill_value'],
+    #        default_kind=obj['default_kind'])
+    elif typ == u'block_index':
+        return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
+                                        obj[u'blengths'])
+    elif typ == u'int_index':
+        return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
+    elif typ == u'ndarray':
+        return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
+                         obj.get(u'compress')).reshape(obj[u'shape'])
+    elif typ == u'np_scalar':
+        if obj.get(u'sub_typ') == u'np_complex':
+            return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
+        else:
+            dtype = dtype_for(obj[u'dtype'])
+            try:
+                return dtype(obj[u'data'])
+            except:
+                return dtype.type(obj[u'data'])
+    elif typ == u'np_complex':
+        return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
+    elif isinstance(obj, (dict, list, set)):
+        return obj
+    else:
+        return obj
+
+
+def pack(o, default=encode,
+         encoding='utf-8', unicode_errors='strict', use_single_float=False,
+         autoreset=1, use_bin_type=1):
+    """
+    Pack an object and return the packed bytes.
+    """
+
+    return Packer(default=default, encoding=encoding,
+                  unicode_errors=unicode_errors,
+                  use_single_float=use_single_float,
+                  autoreset=autoreset,
+                  use_bin_type=use_bin_type).pack(o)
+
+
+def unpack(packed, object_hook=decode,
+           list_hook=None, use_list=False, encoding='utf-8',
+           unicode_errors='strict', object_pairs_hook=None,
+           max_buffer_size=0, ext_hook=ExtType):
+    """
+    Unpack a packed object, return an iterator
+    Note: packed lists will be returned as tuples
+    """
+
+    return Unpacker(packed, object_hook=object_hook,
+                    list_hook=list_hook,
+                    use_list=use_list, encoding=encoding,
+                    unicode_errors=unicode_errors,
+                    object_pairs_hook=object_pairs_hook,
+                    max_buffer_size=max_buffer_size,
+                    ext_hook=ext_hook)
+
+
+class Packer(_Packer):
+
+    def __init__(self, default=encode,
+                 encoding='utf-8',
+                 unicode_errors='strict',
+                 use_single_float=False,
+                 autoreset=1,
+                 use_bin_type=1):
+        super(Packer, self).__init__(default=default,
+                                     encoding=encoding,
+                                     unicode_errors=unicode_errors,
+                                     use_single_float=use_single_float,
+                                     autoreset=autoreset,
+                                     use_bin_type=use_bin_type)
+
+
+class Unpacker(_Unpacker):
+
+    def __init__(self, file_like=None, read_size=0, use_list=False,
+                 object_hook=decode,
+                 object_pairs_hook=None, list_hook=None, encoding='utf-8',
+                 unicode_errors='strict', max_buffer_size=0, ext_hook=ExtType):
+        super(Unpacker, self).__init__(file_like=file_like,
+                                       read_size=read_size,
+                                       use_list=use_list,
+                                       object_hook=object_hook,
+                                       object_pairs_hook=object_pairs_hook,
+                                       list_hook=list_hook,
+                                       encoding=encoding,
+                                       unicode_errors=unicode_errors,
+                                       max_buffer_size=max_buffer_size,
+                                       ext_hook=ext_hook)
+
+
+class Iterator(object):
+
+    """ manage the unpacking iteration,
+        close the file on completion """
+
+    def __init__(self, path, **kwargs):
+        self.path = path
+        self.kwargs = kwargs
+
+    def __iter__(self):
+
+        needs_closing = True
+        try:
+
+            # see if we have an actual file
+            if isinstance(self.path, compat.string_types):
+
+                try:
+                    path_exists = os.path.exists(self.path)
+                except TypeError:
+                    path_exists = False
+
+                if path_exists:
+                    fh = open(self.path, 'rb')
+                else:
+                    fh = compat.BytesIO(self.path)
+
+            else:
+
+                if not hasattr(self.path, 'read'):
+                    fh = compat.BytesIO(self.path)
+
+                else:
+
+                    # a file-like
+                    needs_closing = False
+                    fh = self.path
+
+            unpacker = unpack(fh)
+            for o in unpacker:
+                yield o
+        finally:
+            if needs_closing:
+                fh.close()
@@ -0,0 +1,288 @@
+""" parquet compat """
+
+from warnings import catch_warnings
+from distutils.version import LooseVersion
+from pandas import DataFrame, RangeIndex, Int64Index, get_option
+from pandas.compat import string_types
+import pandas.core.common as com
+from pandas.io.common import get_filepath_or_buffer, is_s3_url
+
+
+def get_engine(engine):
+    """ return our implementation """
+
+    if engine == 'auto':
+        engine = get_option('io.parquet.engine')
+
+    if engine == 'auto':
+        # try engines in this order
+        try:
+            return PyArrowImpl()
+        except ImportError:
+            pass
+
+        try:
+            return FastParquetImpl()
+        except ImportError:
+            pass
+
+        raise ImportError("Unable to find a usable engine; "
+                          "tried using: 'pyarrow', 'fastparquet'.\n"
+                          "pyarrow or fastparquet is required for parquet "
+                          "support")
+
+    if engine not in ['pyarrow', 'fastparquet']:
+        raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")
+
+    if engine == 'pyarrow':
+        return PyArrowImpl()
+    elif engine == 'fastparquet':
+        return FastParquetImpl()
+
+
+class BaseImpl(object):
+
+    api = None  # module
+
+    @staticmethod
+    def validate_dataframe(df):
+
+        if not isinstance(df, DataFrame):
+            raise ValueError("to_parquet only supports IO with DataFrames")
+
+        # must have value column names (strings only)
+        if df.columns.inferred_type not in {'string', 'unicode'}:
+            raise ValueError("parquet must have string column names")
+
+        # index level names must be strings
+        valid_names = all(
+            isinstance(name, string_types)
+            for name in df.index.names
+            if name is not None
+        )
+        if not valid_names:
+            raise ValueError("Index level names must be strings")
+
+    def write(self, df, path, compression, **kwargs):
+        raise com.AbstractMethodError(self)
+
+    def read(self, path, columns=None, **kwargs):
+        raise com.AbstractMethodError(self)
+
+
+class PyArrowImpl(BaseImpl):
+
+    def __init__(self):
+        # since pandas is a dependency of pyarrow
+        # we need to import on first use
+        try:
+            import pyarrow
+            import pyarrow.parquet
+        except ImportError:
+            raise ImportError(
+                "pyarrow is required for parquet support\n\n"
+                "you can install via conda\n"
+                "conda install pyarrow -c conda-forge\n"
+                "\nor via pip\n"
+                "pip install -U pyarrow\n"
+            )
+        if LooseVersion(pyarrow.__version__) < '0.4.1':
+            raise ImportError(
+                "pyarrow >= 0.4.1 is required for parquet support\n\n"
+                "you can install via conda\n"
+                "conda install pyarrow -c conda-forge\n"
+                "\nor via pip\n"
+                "pip install -U pyarrow\n"
+            )
+
+        self._pyarrow_lt_060 = (
+            LooseVersion(pyarrow.__version__) < LooseVersion('0.6.0'))
+        self._pyarrow_lt_070 = (
+            LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'))
+
+        self.api = pyarrow
+
+    def write(self, df, path, compression='snappy',
+              coerce_timestamps='ms', **kwargs):
+        self.validate_dataframe(df)
+        if self._pyarrow_lt_070:
+            self._validate_write_lt_070(df)
+        path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
+
+        if self._pyarrow_lt_060:
+            table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
+            self.api.parquet.write_table(
+                table, path, compression=compression, **kwargs)
+
+        else:
+            table = self.api.Table.from_pandas(df)
+            self.api.parquet.write_table(
+                table, path, compression=compression,
+                coerce_timestamps=coerce_timestamps, **kwargs)
+
+    def read(self, path, columns=None, **kwargs):
+        path, _, _, should_close = get_filepath_or_buffer(path)
+        if self._pyarrow_lt_070:
+            result = self.api.parquet.read_pandas(path, columns=columns,
+                                                  **kwargs).to_pandas()
+        else:
+            kwargs['use_pandas_metadata'] = True
+            result = self.api.parquet.read_table(path, columns=columns,
+                                                 **kwargs).to_pandas()
+        if should_close:
+            try:
+                path.close()
+            except:  # noqa: flake8
+                pass
+
+        return result
+
+    def _validate_write_lt_070(self, df):
+        # Compatibility shim for pyarrow < 0.7.0
+        # TODO: Remove in pandas 0.23.0
+        from pandas.core.indexes.multi import MultiIndex
+        if isinstance(df.index, MultiIndex):
+            msg = (
+                "Multi-index DataFrames are only supported "
+                "with pyarrow >= 0.7.0"
+            )
+            raise ValueError(msg)
+        # Validate index
+        if not isinstance(df.index, Int64Index):
+            msg = (
+                "pyarrow < 0.7.0 does not support serializing {} for the "
+                "index; you can .reset_index() to make the index into "
+                "column(s), or install the latest version of pyarrow or "
+                "fastparquet."
+            )
+            raise ValueError(msg.format(type(df.index)))
+        if not df.index.equals(RangeIndex(len(df))):
+            raise ValueError(
+                "pyarrow < 0.7.0 does not support serializing a non-default "
+                "index; you can .reset_index() to make the index into "
+                "column(s), or install the latest version of pyarrow or "
+                "fastparquet."
+            )
+        if df.index.name is not None:
+            raise ValueError(
+                "pyarrow < 0.7.0 does not serialize indexes with a name; you "
+                "can set the index.name to None or install the latest version "
+                "of pyarrow or fastparquet."
+            )
+
+
+class FastParquetImpl(BaseImpl):
+
+    def __init__(self):
+        # since pandas is a dependency of fastparquet
+        # we need to import on first use
+        try:
+            import fastparquet
+        except ImportError:
+            raise ImportError(
+                "fastparquet is required for parquet support\n\n"
+                "you can install via conda\n"
+                "conda install fastparquet -c conda-forge\n"
+                "\nor via pip\n"
+                "pip install -U fastparquet"
+            )
+        if LooseVersion(fastparquet.__version__) < '0.1.0':
+            raise ImportError(
+                "fastparquet >= 0.1.0 is required for parquet "
+                "support\n\n"
+                "you can install via conda\n"
+                "conda install fastparquet -c conda-forge\n"
+                "\nor via pip\n"
+                "pip install -U fastparquet"
+            )
+        self.api = fastparquet
+
+    def write(self, df, path, compression='snappy', **kwargs):
+        self.validate_dataframe(df)
+        # thriftpy/protocol/compact.py:339:
+        # DeprecationWarning: tostring() is deprecated.
+        # Use tobytes() instead.
+
+        if is_s3_url(path):
+            # path is s3:// so we need to open the s3file in 'wb' mode.
+            # TODO: Support 'ab'
+
+            path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
+            # And pass the opened s3file to the fastparquet internal impl.
+            kwargs['open_with'] = lambda path, _: path
+        else:
+            path, _, _, _ = get_filepath_or_buffer(path)
+
+        with catch_warnings(record=True):
+            self.api.write(path, df,
+                           compression=compression, **kwargs)
+
+    def read(self, path, columns=None, **kwargs):
+        if is_s3_url(path):
+            # When path is s3:// an S3File is returned.
+            # We need to retain the original path(str) while also
+            # pass the S3File().open function to fsatparquet impl.
+            s3, _, _, should_close = get_filepath_or_buffer(path)
+            try:
+                parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open)
+            finally:
+                s3.close()
+        else:
+            path, _, _, _ = get_filepath_or_buffer(path)
+            parquet_file = self.api.ParquetFile(path)
+
+        return parquet_file.to_pandas(columns=columns, **kwargs)
+
+
+def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
+    """
+    Write a DataFrame to the parquet format.
+
+    Parameters
+    ----------
+    df : DataFrame
+    path : string
+        File path
+    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
+        Parquet library to use. If 'auto', then the option
+        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
+        behavior is to try 'pyarrow', falling back to 'fastparquet' if
+        'pyarrow' is unavailable.
+    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
+        Name of the compression to use. Use ``None`` for no compression.
+    kwargs
+        Additional keyword arguments passed to the engine
+    """
+    impl = get_engine(engine)
+    return impl.write(df, path, compression=compression, **kwargs)
+
+
+def read_parquet(path, engine='auto', columns=None, **kwargs):
+    """
+    Load a parquet object from the file path, returning a DataFrame.
+
+    .. versionadded 0.21.0
+
+    Parameters
+    ----------
+    path : string
+        File path
+    columns: list, default=None
+        If not None, only these columns will be read from the file.
+
+        .. versionadded 0.21.1
+    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
+        Parquet library to use. If 'auto', then the option
+        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
+        behavior is to try 'pyarrow', falling back to 'fastparquet' if
+        'pyarrow' is unavailable.
+    kwargs are passed to the engine
+
+    Returns
+    -------
+    DataFrame
+
+    """
+
+    impl = get_engine(engine)
+    return impl.read(path, columns=columns, **kwargs)
@@ -0,0 +1,205 @@
+""" pickle compat """
+import warnings
+
+import numpy as np
+from numpy.lib.format import read_array, write_array
+from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3
+from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE
+from pandas.io.common import _get_handle, _infer_compression, _stringify_path
+
+
+def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
+    """
+    Pickle (serialize) object to file.
+
+    Parameters
+    ----------
+    obj : any object
+        Any python object.
+    path : str
+        File path where the pickled object will be stored.
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
+        A string representing the compression to use in the output file. By
+        default, infers from the file extension in specified path.
+
+        .. versionadded:: 0.20.0
+    protocol : int
+        Int which indicates which protocol should be used by the pickler,
+        default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
+        values for this parameter depend on the version of Python. For Python
+        2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
+        For Python >= 3.4, 4 is a valid value. A negative value for the
+        protocol parameter is equivalent to setting its value to
+        HIGHEST_PROTOCOL.
+
+        .. [1] https://docs.python.org/3/library/pickle.html
+        .. versionadded:: 0.21.0
+
+    See Also
+    --------
+    read_pickle : Load pickled pandas object (or any object) from file.
+    DataFrame.to_hdf : Write DataFrame to an HDF5 file.
+    DataFrame.to_sql : Write DataFrame to a SQL database.
+    DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
+
+    Examples
+    --------
+    >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
+    >>> original_df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    >>> pd.to_pickle(original_df, "./dummy.pkl")
+
+    >>> unpickled_df = pd.read_pickle("./dummy.pkl")
+    >>> unpickled_df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+
+    >>> import os
+    >>> os.remove("./dummy.pkl")
+    """
+    path = _stringify_path(path)
+    inferred_compression = _infer_compression(path, compression)
+    f, fh = _get_handle(path, 'wb',
+                        compression=inferred_compression,
+                        is_text=False)
+    if protocol < 0:
+        protocol = pkl.HIGHEST_PROTOCOL
+    try:
+        f.write(pkl.dumps(obj, protocol=protocol))
+    finally:
+        for _f in fh:
+            _f.close()
+
+
+def read_pickle(path, compression='infer'):
+    """
+    Load pickled pandas object (or any object) from file.
+
+    .. warning::
+
+       Loading pickled data received from untrusted sources can be
+       unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
+
+    Parameters
+    ----------
+    path : str
+        File path where the pickled object will be loaded.
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer', then use
+        gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz',
+        or '.zip' respectively, and no decompression otherwise.
+        Set to None for no decompression.
+
+        .. versionadded:: 0.20.0
+
+    Returns
+    -------
+    unpickled : type of object stored in file
+
+    See Also
+    --------
+    DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
+    Series.to_pickle : Pickle (serialize) Series object to file.
+    read_hdf : Read HDF5 file into a DataFrame.
+    read_sql : Read SQL query or database table into a DataFrame.
+    read_parquet : Load a parquet object, returning a DataFrame.
+
+    Examples
+    --------
+    >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
+    >>> original_df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    >>> pd.to_pickle(original_df, "./dummy.pkl")
+
+    >>> unpickled_df = pd.read_pickle("./dummy.pkl")
+    >>> unpickled_df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+
+    >>> import os
+    >>> os.remove("./dummy.pkl")
+    """
+    path = _stringify_path(path)
+    inferred_compression = _infer_compression(path, compression)
+
+    def read_wrapper(func):
+        # wrapper file handle open/close operation
+        f, fh = _get_handle(path, 'rb',
+                            compression=inferred_compression,
+                            is_text=False)
+        try:
+            return func(f)
+        finally:
+            for _f in fh:
+                _f.close()
+
+    def try_read(path, encoding=None):
+        # try with cPickle
+        # try with current pickle, if we have a Type Error then
+        # try with the compat pickle to handle subclass changes
+        # pass encoding only if its not None as py2 doesn't handle
+        # the param
+
+        # cpickle
+        # GH 6899
+        try:
+            with warnings.catch_warnings(record=True):
+                # We want to silencce any warnings about, e.g. moved modules.
+                return read_wrapper(lambda f: pkl.load(f))
+        except Exception:
+            # reg/patched pickle
+            try:
+                return read_wrapper(
+                    lambda f: pc.load(f, encoding=encoding, compat=False))
+            # compat pickle
+            except:
+                return read_wrapper(
+                    lambda f: pc.load(f, encoding=encoding, compat=True))
+    try:
+        return try_read(path)
+    except:
+        if PY3:
+            return try_read(path, encoding='latin1')
+        raise
+
+
+# compat with sparse pickle / unpickle
+
+
+def _pickle_array(arr):
+    arr = arr.view(np.ndarray)
+
+    buf = BytesIO()
+    write_array(buf, arr)
+
+    return buf.getvalue()
+
+
+def _unpickle_array(bytes):
+    arr = read_array(BytesIO(bytes))
+
+    # All datetimes should be stored as M8[ns].  When unpickling with
+    # numpy1.6, it will read these as M8[us].  So this ensures all
+    # datetime64 types are read as MS[ns]
+    if is_datetime64_dtype(arr):
+        arr = arr.view(_NS_DTYPE)
+
+    return arr
@@ -0,0 +1,39 @@
+""" s3 support for remote file interactivity """
+from pandas import compat
+try:
+    import s3fs
+    from botocore.exceptions import NoCredentialsError
+except:
+    raise ImportError("The s3fs library is required to handle s3 files")
+
+if compat.PY3:
+    from urllib.parse import urlparse as parse_url
+else:
+    from urlparse import urlparse as parse_url
+
+
+def _strip_schema(url):
+    """Returns the url without the s3:// part"""
+    result = parse_url(url)
+    return result.netloc + result.path
+
+
+def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
+                           compression=None, mode=None):
+
+    if mode is None:
+        mode = 'rb'
+
+    fs = s3fs.S3FileSystem(anon=False)
+    try:
+        filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
+    except (compat.FileNotFoundError, NoCredentialsError):
+        # boto3 has troubles when trying to access a public file
+        # when credentialed...
+        # An OSError is raised if you have credentials, but they
+        # aren't valid for that bucket.
+        # A NoCredentialsError is raised if you don't have creds
+        # for that bucket.
+        fs = s3fs.S3FileSystem(anon=True)
+        filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
+    return filepath_or_buffer, None, compression, True
@@ -0,0 +1 @@
+from .sasreader import read_sas  # noqa
@@ -0,0 +1,687 @@
+"""
+Read SAS7BDAT files
+
+Based on code written by Jared Hobbs:
+  https://bitbucket.org/jaredhobbs/sas7bdat
+
+See also:
+  https://github.com/BioStatMatt/sas7bdat
+
+Partial documentation of the file format:
+  https://cran.r-project.org/web/packages/sas7bdat/vignettes/sas7bdat.pdf
+
+Reference for binary data compression:
+  http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
+"""
+
+import pandas as pd
+from pandas import compat
+from pandas.io.common import get_filepath_or_buffer, BaseIterator
+from pandas.errors import EmptyDataError
+import numpy as np
+import struct
+import pandas.io.sas.sas_constants as const
+from pandas.io.sas._sas import Parser
+
+
+class _subheader_pointer(object):
+    pass
+
+
+class _column(object):
+    pass
+
+
+# SAS7BDAT represents a SAS data file in SAS7BDAT format.
+class SAS7BDATReader(BaseIterator):
+    """
+    Read SAS files in SAS7BDAT format.
+
+    Parameters
+    ----------
+    path_or_buf : path name or buffer
+        Name of SAS file or file-like object pointing to SAS file
+        contents.
+    index : column identifier, defaults to None
+        Column to use as index.
+    convert_dates : boolean, defaults to True
+        Attempt to convert dates to Pandas datetime values.  Note that
+        some rarely used SAS date formats may be unsupported.
+    blank_missing : boolean, defaults to True
+        Convert empty strings to missing values (SAS uses blanks to
+        indicate missing character variables).
+    chunksize : int, defaults to None
+        Return SAS7BDATReader object for iterations, returns chunks
+        with given number of lines.
+    encoding : string, defaults to None
+        String encoding.
+    convert_text : bool, defaults to True
+        If False, text variables are left as raw bytes.
+    convert_header_text : bool, defaults to True
+        If False, header text, including column names, are left as raw
+        bytes.
+    """
+
+    def __init__(self, path_or_buf, index=None, convert_dates=True,
+                 blank_missing=True, chunksize=None, encoding=None,
+                 convert_text=True, convert_header_text=True):
+
+        self.index = index
+        self.convert_dates = convert_dates
+        self.blank_missing = blank_missing
+        self.chunksize = chunksize
+        self.encoding = encoding
+        self.convert_text = convert_text
+        self.convert_header_text = convert_header_text
+
+        self.default_encoding = "latin-1"
+        self.compression = ""
+        self.column_names_strings = []
+        self.column_names = []
+        self.column_types = []
+        self.column_formats = []
+        self.columns = []
+
+        self._current_page_data_subheader_pointers = []
+        self._cached_page = None
+        self._column_data_lengths = []
+        self._column_data_offsets = []
+        self._current_row_in_file_index = 0
+        self._current_row_on_page_index = 0
+        self._current_row_in_file_index = 0
+
+        self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf)
+        if isinstance(self._path_or_buf, compat.string_types):
+            self._path_or_buf = open(self._path_or_buf, 'rb')
+            self.handle = self._path_or_buf
+
+        self._get_properties()
+        self._parse_metadata()
+
+    def close(self):
+        try:
+            self.handle.close()
+        except AttributeError:
+            pass
+
+    def _get_properties(self):
+
+        # Check magic number
+        self._path_or_buf.seek(0)
+        self._cached_page = self._path_or_buf.read(288)
+        if self._cached_page[0:len(const.magic)] != const.magic:
+            self.close()
+            raise ValueError("magic number mismatch (not a SAS file?)")
+
+        # Get alignment information
+        align1, align2 = 0, 0
+        buf = self._read_bytes(const.align_1_offset, const.align_1_length)
+        if buf == const.u64_byte_checker_value:
+            align2 = const.align_2_value
+            self.U64 = True
+            self._int_length = 8
+            self._page_bit_offset = const.page_bit_offset_x64
+            self._subheader_pointer_length = const.subheader_pointer_length_x64
+        else:
+            self.U64 = False
+            self._page_bit_offset = const.page_bit_offset_x86
+            self._subheader_pointer_length = const.subheader_pointer_length_x86
+            self._int_length = 4
+        buf = self._read_bytes(const.align_2_offset, const.align_2_length)
+        if buf == const.align_1_checker_value:
+            align1 = const.align_2_value
+        total_align = align1 + align2
+
+        # Get endianness information
+        buf = self._read_bytes(const.endianness_offset,
+                               const.endianness_length)
+        if buf == b'\x01':
+            self.byte_order = "<"
+        else:
+            self.byte_order = ">"
+
+        # Get encoding information
+        buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0]
+        if buf in const.encoding_names:
+            self.file_encoding = const.encoding_names[buf]
+        else:
+            self.file_encoding = "unknown (code=%s)" % str(buf)
+
+        # Get platform information
+        buf = self._read_bytes(const.platform_offset, const.platform_length)
+        if buf == b'1':
+            self.platform = "unix"
+        elif buf == b'2':
+            self.platform = "windows"
+        else:
+            self.platform = "unknown"
+
+        buf = self._read_bytes(const.dataset_offset, const.dataset_length)
+        self.name = buf.rstrip(b'\x00 ')
+        if self.convert_header_text:
+            self.name = self.name.decode(
+                self.encoding or self.default_encoding)
+
+        buf = self._read_bytes(const.file_type_offset, const.file_type_length)
+        self.file_type = buf.rstrip(b'\x00 ')
+        if self.convert_header_text:
+            self.file_type = self.file_type.decode(
+                self.encoding or self.default_encoding)
+
+        # Timestamp is epoch 01/01/1960
+        epoch = pd.datetime(1960, 1, 1)
+        x = self._read_float(const.date_created_offset + align1,
+                             const.date_created_length)
+        self.date_created = epoch + pd.to_timedelta(x, unit='s')
+        x = self._read_float(const.date_modified_offset + align1,
+                             const.date_modified_length)
+        self.date_modified = epoch + pd.to_timedelta(x, unit='s')
+
+        self.header_length = self._read_int(const.header_size_offset + align1,
+                                            const.header_size_length)
+
+        # Read the rest of the header into cached_page.
+        buf = self._path_or_buf.read(self.header_length - 288)
+        self._cached_page += buf
+        if len(self._cached_page) != self.header_length:
+            self.close()
+            raise ValueError("The SAS7BDAT file appears to be truncated.")
+
+        self._page_length = self._read_int(const.page_size_offset + align1,
+                                           const.page_size_length)
+        self._page_count = self._read_int(const.page_count_offset + align1,
+                                          const.page_count_length)
+
+        buf = self._read_bytes(const.sas_release_offset + total_align,
+                               const.sas_release_length)
+        self.sas_release = buf.rstrip(b'\x00 ')
+        if self.convert_header_text:
+            self.sas_release = self.sas_release.decode(
+                self.encoding or self.default_encoding)
+
+        buf = self._read_bytes(const.sas_server_type_offset + total_align,
+                               const.sas_server_type_length)
+        self.server_type = buf.rstrip(b'\x00 ')
+        if self.convert_header_text:
+            self.server_type = self.server_type.decode(
+                self.encoding or self.default_encoding)
+
+        buf = self._read_bytes(const.os_version_number_offset + total_align,
+                               const.os_version_number_length)
+        self.os_version = buf.rstrip(b'\x00 ')
+        if self.convert_header_text:
+            self.os_version = self.os_version.decode(
+                self.encoding or self.default_encoding)
+
+        buf = self._read_bytes(const.os_name_offset + total_align,
+                               const.os_name_length)
+        buf = buf.rstrip(b'\x00 ')
+        if len(buf) > 0:
+            self.os_name = buf.decode(self.encoding or self.default_encoding)
+        else:
+            buf = self._read_bytes(const.os_maker_offset + total_align,
+                                   const.os_maker_length)
+            self.os_name = buf.rstrip(b'\x00 ')
+            if self.convert_header_text:
+                self.os_name = self.os_name.decode(
+                    self.encoding or self.default_encoding)
+
+    def __next__(self):
+        da = self.read(nrows=self.chunksize or 1)
+        if da is None:
+            raise StopIteration
+        return da
+
+    # Read a single float of the given width (4 or 8).
+    def _read_float(self, offset, width):
+        if width not in (4, 8):
+            self.close()
+            raise ValueError("invalid float width")
+        buf = self._read_bytes(offset, width)
+        fd = "f" if width == 4 else "d"
+        return struct.unpack(self.byte_order + fd, buf)[0]
+
+    # Read a single signed integer of the given width (1, 2, 4 or 8).
+    def _read_int(self, offset, width):
+        if width not in (1, 2, 4, 8):
+            self.close()
+            raise ValueError("invalid int width")
+        buf = self._read_bytes(offset, width)
+        it = {1: "b", 2: "h", 4: "l", 8: "q"}[width]
+        iv = struct.unpack(self.byte_order + it, buf)[0]
+        return iv
+
+    def _read_bytes(self, offset, length):
+        if self._cached_page is None:
+            self._path_or_buf.seek(offset)
+            buf = self._path_or_buf.read(length)
+            if len(buf) < length:
+                self.close()
+                msg = "Unable to read {:d} bytes from file position {:d}."
+                raise ValueError(msg.format(length, offset))
+            return buf
+        else:
+            if offset + length > len(self._cached_page):
+                self.close()
+                raise ValueError("The cached page is too small.")
+            return self._cached_page[offset:offset + length]
+
+    def _parse_metadata(self):
+        done = False
+        while not done:
+            self._cached_page = self._path_or_buf.read(self._page_length)
+            if len(self._cached_page) <= 0:
+                break
+            if len(self._cached_page) != self._page_length:
+                self.close()
+                raise ValueError(
+                    "Failed to read a meta data page from the SAS file.")
+            done = self._process_page_meta()
+
+    def _process_page_meta(self):
+        self._read_page_header()
+        pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
+        if self._current_page_type in pt:
+            self._process_page_metadata()
+        return ((self._current_page_type in [256] + const.page_mix_types) or
+                (self._current_page_data_subheader_pointers is not None))
+
+    def _read_page_header(self):
+        bit_offset = self._page_bit_offset
+        tx = const.page_type_offset + bit_offset
+        self._current_page_type = self._read_int(tx, const.page_type_length)
+        tx = const.block_count_offset + bit_offset
+        self._current_page_block_count = self._read_int(
+            tx, const.block_count_length)
+        tx = const.subheader_count_offset + bit_offset
+        self._current_page_subheaders_count = (
+            self._read_int(tx, const.subheader_count_length))
+
+    def _process_page_metadata(self):
+        bit_offset = self._page_bit_offset
+
+        for i in range(self._current_page_subheaders_count):
+            pointer = self._process_subheader_pointers(
+                const.subheader_pointers_offset + bit_offset, i)
+            if pointer.length == 0:
+                continue
+            if pointer.compression == const.truncated_subheader_id:
+                continue
+            subheader_signature = self._read_subheader_signature(
+                pointer.offset)
+            subheader_index = (
+                self._get_subheader_index(subheader_signature,
+                                          pointer.compression, pointer.ptype))
+            self._process_subheader(subheader_index, pointer)
+
+    def _get_subheader_index(self, signature, compression, ptype):
+        index = const.subheader_signature_to_index.get(signature)
+        if index is None:
+            f1 = ((compression == const.compressed_subheader_id) or
+                  (compression == 0))
+            f2 = (ptype == const.compressed_subheader_type)
+            if (self.compression != "") and f1 and f2:
+                index = const.SASIndex.data_subheader_index
+            else:
+                self.close()
+                raise ValueError("Unknown subheader signature")
+        return index
+
+    def _process_subheader_pointers(self, offset, subheader_pointer_index):
+
+        subheader_pointer_length = self._subheader_pointer_length
+        total_offset = (offset +
+                        subheader_pointer_length * subheader_pointer_index)
+
+        subheader_offset = self._read_int(total_offset, self._int_length)
+        total_offset += self._int_length
+
+        subheader_length = self._read_int(total_offset, self._int_length)
+        total_offset += self._int_length
+
+        subheader_compression = self._read_int(total_offset, 1)
+        total_offset += 1
+
+        subheader_type = self._read_int(total_offset, 1)
+
+        x = _subheader_pointer()
+        x.offset = subheader_offset
+        x.length = subheader_length
+        x.compression = subheader_compression
+        x.ptype = subheader_type
+
+        return x
+
+    def _read_subheader_signature(self, offset):
+        subheader_signature = self._read_bytes(offset, self._int_length)
+        return subheader_signature
+
+    def _process_subheader(self, subheader_index, pointer):
+        offset = pointer.offset
+        length = pointer.length
+
+        if subheader_index == const.SASIndex.row_size_index:
+            processor = self._process_rowsize_subheader
+        elif subheader_index == const.SASIndex.column_size_index:
+            processor = self._process_columnsize_subheader
+        elif subheader_index == const.SASIndex.column_text_index:
+            processor = self._process_columntext_subheader
+        elif subheader_index == const.SASIndex.column_name_index:
+            processor = self._process_columnname_subheader
+        elif subheader_index == const.SASIndex.column_attributes_index:
+            processor = self._process_columnattributes_subheader
+        elif subheader_index == const.SASIndex.format_and_label_index:
+            processor = self._process_format_subheader
+        elif subheader_index == const.SASIndex.column_list_index:
+            processor = self._process_columnlist_subheader
+        elif subheader_index == const.SASIndex.subheader_counts_index:
+            processor = self._process_subheader_counts
+        elif subheader_index == const.SASIndex.data_subheader_index:
+            self._current_page_data_subheader_pointers.append(pointer)
+            return
+        else:
+            raise ValueError("unknown subheader index")
+
+        processor(offset, length)
+
+    def _process_rowsize_subheader(self, offset, length):
+
+        int_len = self._int_length
+        lcs_offset = offset
+        lcp_offset = offset
+        if self.U64:
+            lcs_offset += 682
+            lcp_offset += 706
+        else:
+            lcs_offset += 354
+            lcp_offset += 378
+
+        self.row_length = self._read_int(
+            offset + const.row_length_offset_multiplier * int_len, int_len)
+        self.row_count = self._read_int(
+            offset + const.row_count_offset_multiplier * int_len, int_len)
+        self.col_count_p1 = self._read_int(
+            offset + const.col_count_p1_multiplier * int_len, int_len)
+        self.col_count_p2 = self._read_int(
+            offset + const.col_count_p2_multiplier * int_len, int_len)
+        mx = const.row_count_on_mix_page_offset_multiplier * int_len
+        self._mix_page_row_count = self._read_int(offset + mx, int_len)
+        self._lcs = self._read_int(lcs_offset, 2)
+        self._lcp = self._read_int(lcp_offset, 2)
+
+    def _process_columnsize_subheader(self, offset, length):
+        int_len = self._int_length
+        offset += int_len
+        self.column_count = self._read_int(offset, int_len)
+        if (self.col_count_p1 + self.col_count_p2 !=
+                self.column_count):
+            print("Warning: column count mismatch (%d + %d != %d)\n",
+                  self.col_count_p1, self.col_count_p2, self.column_count)
+
+    # Unknown purpose
+    def _process_subheader_counts(self, offset, length):
+        pass
+
+    def _process_columntext_subheader(self, offset, length):
+
+        offset += self._int_length
+        text_block_size = self._read_int(offset, const.text_block_size_length)
+
+        buf = self._read_bytes(offset, text_block_size)
+        cname_raw = buf[0:text_block_size].rstrip(b"\x00 ")
+        cname = cname_raw
+        if self.convert_header_text:
+            cname = cname.decode(self.encoding or self.default_encoding)
+        self.column_names_strings.append(cname)
+
+        if len(self.column_names_strings) == 1:
+            compression_literal = ""
+            for cl in const.compression_literals:
+                if cl in cname_raw:
+                    compression_literal = cl
+            self.compression = compression_literal
+            offset -= self._int_length
+
+            offset1 = offset + 16
+            if self.U64:
+                offset1 += 4
+
+            buf = self._read_bytes(offset1, self._lcp)
+            compression_literal = buf.rstrip(b"\x00")
+            if compression_literal == "":
+                self._lcs = 0
+                offset1 = offset + 32
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0:self._lcp]
+            elif compression_literal == const.rle_compression:
+                offset1 = offset + 40
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0:self._lcp]
+            elif self._lcs > 0:
+                self._lcp = 0
+                offset1 = offset + 16
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcs)
+                self.creator_proc = buf[0:self._lcp]
+            if self.convert_header_text:
+                if hasattr(self, "creator_proc"):
+                    self.creator_proc = self.creator_proc.decode(
+                        self.encoding or self.default_encoding)
+
+    def _process_columnname_subheader(self, offset, length):
+        int_len = self._int_length
+        offset += int_len
+        column_name_pointers_count = (length - 2 * int_len - 12) // 8
+        for i in range(column_name_pointers_count):
+            text_subheader = offset + const.column_name_pointer_length * \
+                (i + 1) + const.column_name_text_subheader_offset
+            col_name_offset = offset + const.column_name_pointer_length * \
+                (i + 1) + const.column_name_offset_offset
+            col_name_length = offset + const.column_name_pointer_length * \
+                (i + 1) + const.column_name_length_offset
+
+            idx = self._read_int(
+                text_subheader, const.column_name_text_subheader_length)
+            col_offset = self._read_int(
+                col_name_offset, const.column_name_offset_length)
+            col_len = self._read_int(
+                col_name_length, const.column_name_length_length)
+
+            name_str = self.column_names_strings[idx]
+            self.column_names.append(name_str[col_offset:col_offset + col_len])
+
+    def _process_columnattributes_subheader(self, offset, length):
+        int_len = self._int_length
+        column_attributes_vectors_count = (
+            length - 2 * int_len - 12) // (int_len + 8)
+        self.column_types = np.empty(
+            column_attributes_vectors_count, dtype=np.dtype('S1'))
+        self._column_data_lengths = np.empty(
+            column_attributes_vectors_count, dtype=np.int64)
+        self._column_data_offsets = np.empty(
+            column_attributes_vectors_count, dtype=np.int64)
+        for i in range(column_attributes_vectors_count):
+            col_data_offset = (offset + int_len +
+                               const.column_data_offset_offset +
+                               i * (int_len + 8))
+            col_data_len = (offset + 2 * int_len +
+                            const.column_data_length_offset +
+                            i * (int_len + 8))
+            col_types = (offset + 2 * int_len +
+                         const.column_type_offset + i * (int_len + 8))
+
+            x = self._read_int(col_data_offset, int_len)
+            self._column_data_offsets[i] = x
+
+            x = self._read_int(col_data_len, const.column_data_length_length)
+            self._column_data_lengths[i] = x
+
+            x = self._read_int(col_types, const.column_type_length)
+            if x == 1:
+                self.column_types[i] = b'd'
+            else:
+                self.column_types[i] = b's'
+
+    def _process_columnlist_subheader(self, offset, length):
+        # unknown purpose
+        pass
+
+    def _process_format_subheader(self, offset, length):
+        int_len = self._int_length
+        text_subheader_format = (
+            offset +
+            const.column_format_text_subheader_index_offset +
+            3 * int_len)
+        col_format_offset = (offset +
+                             const.column_format_offset_offset +
+                             3 * int_len)
+        col_format_len = (offset +
+                          const.column_format_length_offset +
+                          3 * int_len)
+        text_subheader_label = (
+            offset +
+            const.column_label_text_subheader_index_offset +
+            3 * int_len)
+        col_label_offset = (offset +
+                            const.column_label_offset_offset +
+                            3 * int_len)
+        col_label_len = offset + const.column_label_length_offset + 3 * int_len
+
+        x = self._read_int(text_subheader_format,
+                           const.column_format_text_subheader_index_length)
+        format_idx = min(x, len(self.column_names_strings) - 1)
+
+        format_start = self._read_int(
+            col_format_offset, const.column_format_offset_length)
+        format_len = self._read_int(
+            col_format_len, const.column_format_length_length)
+
+        label_idx = self._read_int(
+            text_subheader_label,
+            const.column_label_text_subheader_index_length)
+        label_idx = min(label_idx, len(self.column_names_strings) - 1)
+
+        label_start = self._read_int(
+            col_label_offset, const.column_label_offset_length)
+        label_len = self._read_int(col_label_len,
+                                   const.column_label_length_length)
+
+        label_names = self.column_names_strings[label_idx]
+        column_label = label_names[label_start: label_start + label_len]
+        format_names = self.column_names_strings[format_idx]
+        column_format = format_names[format_start: format_start + format_len]
+        current_column_number = len(self.columns)
+
+        col = _column()
+        col.col_id = current_column_number
+        col.name = self.column_names[current_column_number]
+        col.label = column_label
+        col.format = column_format
+        col.ctype = self.column_types[current_column_number]
+        col.length = self._column_data_lengths[current_column_number]
+
+        self.column_formats.append(column_format)
+        self.columns.append(col)
+
+    def read(self, nrows=None):
+
+        if (nrows is None) and (self.chunksize is not None):
+            nrows = self.chunksize
+        elif nrows is None:
+            nrows = self.row_count
+
+        if len(self.column_types) == 0:
+            self.close()
+            raise EmptyDataError("No columns to parse from file")
+
+        if self._current_row_in_file_index >= self.row_count:
+            return None
+
+        m = self.row_count - self._current_row_in_file_index
+        if nrows > m:
+            nrows = m
+
+        nd = (self.column_types == b'd').sum()
+        ns = (self.column_types == b's').sum()
+
+        self._string_chunk = np.empty((ns, nrows), dtype=np.object)
+        self._byte_chunk = np.empty((nd, 8 * nrows), dtype=np.uint8)
+
+        self._current_row_in_chunk_index = 0
+        p = Parser(self)
+        p.read(nrows)
+
+        rslt = self._chunk_to_dataframe()
+        if self.index is not None:
+            rslt = rslt.set_index(self.index)
+
+        return rslt
+
+    def _read_next_page(self):
+        self._current_page_data_subheader_pointers = []
+        self._cached_page = self._path_or_buf.read(self._page_length)
+        if len(self._cached_page) <= 0:
+            return True
+        elif len(self._cached_page) != self._page_length:
+            self.close()
+            msg = ("failed to read complete page from file "
+                   "(read {:d} of {:d} bytes)")
+            raise ValueError(msg.format(len(self._cached_page),
+                                        self._page_length))
+
+        self._read_page_header()
+        if self._current_page_type == const.page_meta_type:
+            self._process_page_metadata()
+        pt = [const.page_meta_type, const.page_data_type]
+        pt += [const.page_mix_types]
+        if self._current_page_type not in pt:
+            return self._read_next_page()
+
+        return False
+
+    def _chunk_to_dataframe(self):
+
+        n = self._current_row_in_chunk_index
+        m = self._current_row_in_file_index
+        ix = range(m - n, m)
+        rslt = pd.DataFrame(index=ix)
+
+        js, jb = 0, 0
+        for j in range(self.column_count):
+
+            name = self.column_names[j]
+
+            if self.column_types[j] == b'd':
+                rslt[name] = self._byte_chunk[jb, :].view(
+                    dtype=self.byte_order + 'd')
+                rslt[name] = np.asarray(rslt[name], dtype=np.float64)
+                if self.convert_dates:
+                    unit = None
+                    if self.column_formats[j] in const.sas_date_formats:
+                        unit = 'd'
+                    elif self.column_formats[j] in const.sas_datetime_formats:
+                        unit = 's'
+                    if unit:
+                        rslt[name] = pd.to_datetime(rslt[name], unit=unit,
+                                                    origin="1960-01-01")
+                jb += 1
+            elif self.column_types[j] == b's':
+                rslt[name] = self._string_chunk[js, :]
+                if self.convert_text and (self.encoding is not None):
+                    rslt[name] = rslt[name].str.decode(
+                        self.encoding or self.default_encoding)
+                if self.blank_missing:
+                    ii = rslt[name].str.len() == 0
+                    rslt.loc[ii, name] = np.nan
+                js += 1
+            else:
+                self.close()
+                raise ValueError("unknown column type %s" %
+                                 self.column_types[j])
+
+        return rslt
@@ -0,0 +1,171 @@
+magic = (b"\x00\x00\x00\x00\x00\x00\x00\x00" +
+         b"\x00\x00\x00\x00\xc2\xea\x81\x60" +
+         b"\xb3\x14\x11\xcf\xbd\x92\x08\x00" +
+         b"\x09\xc7\x31\x8c\x18\x1f\x10\x11")
+
+align_1_checker_value = b'3'
+align_1_offset = 32
+align_1_length = 1
+align_1_value = 4
+u64_byte_checker_value = b'3'
+align_2_offset = 35
+align_2_length = 1
+align_2_value = 4
+endianness_offset = 37
+endianness_length = 1
+platform_offset = 39
+platform_length = 1
+encoding_offset = 70
+encoding_length = 1
+dataset_offset = 92
+dataset_length = 64
+file_type_offset = 156
+file_type_length = 8
+date_created_offset = 164
+date_created_length = 8
+date_modified_offset = 172
+date_modified_length = 8
+header_size_offset = 196
+header_size_length = 4
+page_size_offset = 200
+page_size_length = 4
+page_count_offset = 204
+page_count_length = 4
+sas_release_offset = 216
+sas_release_length = 8
+sas_server_type_offset = 224
+sas_server_type_length = 16
+os_version_number_offset = 240
+os_version_number_length = 16
+os_maker_offset = 256
+os_maker_length = 16
+os_name_offset = 272
+os_name_length = 16
+page_bit_offset_x86 = 16
+page_bit_offset_x64 = 32
+subheader_pointer_length_x86 = 12
+subheader_pointer_length_x64 = 24
+page_type_offset = 0
+page_type_length = 2
+block_count_offset = 2
+block_count_length = 2
+subheader_count_offset = 4
+subheader_count_length = 2
+page_meta_type = 0
+page_data_type = 256
+page_amd_type = 1024
+page_metc_type = 16384
+page_comp_type = -28672
+page_mix_types = [512, 640]
+subheader_pointers_offset = 8
+truncated_subheader_id = 1
+compressed_subheader_id = 4
+compressed_subheader_type = 1
+text_block_size_length = 2
+row_length_offset_multiplier = 5
+row_count_offset_multiplier = 6
+col_count_p1_multiplier = 9
+col_count_p2_multiplier = 10
+row_count_on_mix_page_offset_multiplier = 15
+column_name_pointer_length = 8
+column_name_text_subheader_offset = 0
+column_name_text_subheader_length = 2
+column_name_offset_offset = 2
+column_name_offset_length = 2
+column_name_length_offset = 4
+column_name_length_length = 2
+column_data_offset_offset = 8
+column_data_length_offset = 8
+column_data_length_length = 4
+column_type_offset = 14
+column_type_length = 1
+column_format_text_subheader_index_offset = 22
+column_format_text_subheader_index_length = 2
+column_format_offset_offset = 24
+column_format_offset_length = 2
+column_format_length_offset = 26
+column_format_length_length = 2
+column_label_text_subheader_index_offset = 28
+column_label_text_subheader_index_length = 2
+column_label_offset_offset = 30
+column_label_offset_length = 2
+column_label_length_offset = 32
+column_label_length_length = 2
+rle_compression = b'SASYZCRL'
+rdc_compression = b'SASYZCR2'
+
+compression_literals = [rle_compression, rdc_compression]
+
+# Incomplete list of encodings, using SAS nomenclature:
+# http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm
+encoding_names = {29: "latin1", 20: "utf-8", 33: "cyrillic", 60: "wlatin2",
+                  61: "wcyrillic", 62: "wlatin1", 90: "ebcdic870"}
+
+
+class SASIndex(object):
+    row_size_index = 0
+    column_size_index = 1
+    subheader_counts_index = 2
+    column_text_index = 3
+    column_name_index = 4
+    column_attributes_index = 5
+    format_and_label_index = 6
+    column_list_index = 7
+    data_subheader_index = 8
+
+
+subheader_signature_to_index = {
+    b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
+    b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
+    b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
+    b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index}
+
+
+# List of frequently used SAS date and datetime formats
+# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
+# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
+sas_date_formats = ("DATE", "DAY", "DDMMYY", "DOWNAME", "JULDAY", "JULIAN",
+                    "MMDDYY", "MMYY", "MMYYC", "MMYYD", "MMYYP", "MMYYS",
+                    "MMYYN", "MONNAME", "MONTH", "MONYY", "QTR", "QTRR",
+                    "NENGO", "WEEKDATE", "WEEKDATX", "WEEKDAY", "WEEKV",
+                    "WORDDATE", "WORDDATX", "YEAR", "YYMM", "YYMMC", "YYMMD",
+                    "YYMMP", "YYMMS", "YYMMN", "YYMON", "YYMMDD", "YYQ",
+                    "YYQC", "YYQD", "YYQP", "YYQS", "YYQN", "YYQR", "YYQRC",
+                    "YYQRD", "YYQRP", "YYQRS", "YYQRN",
+                    "YYMMDDP", "YYMMDDC", "E8601DA", "YYMMDDN", "MMDDYYC",
+                    "MMDDYYS", "MMDDYYD", "YYMMDDS", "B8601DA", "DDMMYYN",
+                    "YYMMDDD", "DDMMYYB", "DDMMYYP", "MMDDYYP", "YYMMDDB",
+                    "MMDDYYN", "DDMMYYC", "DDMMYYD", "DDMMYYS",
+                    "MINGUO")
+
+sas_datetime_formats = ("DATETIME", "DTWKDATX",
+                        "B8601DN", "B8601DT", "B8601DX", "B8601DZ", "B8601LX",
+                        "E8601DN", "E8601DT", "E8601DX", "E8601DZ", "E8601LX",
+                        "DATEAMPM", "DTDATE", "DTMONYY", "DTMONYY", "DTWKDATX",
+                        "DTYEAR", "TOD", "MDYAMPM")
@@ -0,0 +1,465 @@
+"""
+Read a SAS XPort format file into a Pandas DataFrame.
+
+Based on code from Jack Cushman (github.com/jcushman/xport).
+
+The file format is defined here:
+
+https://support.sas.com/techsup/technote/ts140.pdf
+"""
+
+from datetime import datetime
+import pandas as pd
+from pandas.io.common import get_filepath_or_buffer, BaseIterator
+from pandas import compat
+import struct
+import numpy as np
+from pandas.util._decorators import Appender
+import warnings
+
+_correct_line1 = ("HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!"
+                  "000000000000000000000000000000  ")
+_correct_header1 = ("HEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!"
+                    "000000000000000001600000000")
+_correct_header2 = ("HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!"
+                    "000000000000000000000000000000  ")
+_correct_obs_header = ("HEADER RECORD*******OBS     HEADER RECORD!!!!!!!"
+                       "000000000000000000000000000000  ")
+_fieldkeys = ['ntype', 'nhfun', 'field_length', 'nvar0', 'name', 'label',
+              'nform', 'nfl', 'num_decimals', 'nfj', 'nfill', 'niform',
+              'nifl', 'nifd', 'npos', '_']
+
+
+_base_params_doc = """\
+Parameters
+----------
+filepath_or_buffer : string or file-like object
+    Path to SAS file or object implementing binary read method."""
+
+_params2_doc = """\
+index : identifier of index column
+    Identifier of column that should be used as index of the DataFrame.
+encoding : string
+    Encoding for text data.
+chunksize : int
+    Read file `chunksize` lines at a time, returns iterator."""
+
+_format_params_doc = """\
+format : string
+    File format, only `xport` is currently supported."""
+
+_iterator_doc = """\
+iterator : boolean, default False
+    Return XportReader object for reading file incrementally."""
+
+
+_read_sas_doc = """Read a SAS file into a DataFrame.
+
+%(_base_params_doc)s
+%(_format_params_doc)s
+%(_params2_doc)s
+%(_iterator_doc)s
+
+Returns
+-------
+DataFrame or XportReader
+
+Examples
+--------
+Read a SAS Xport file:
+
+>>> df = pandas.read_sas('filename.XPT')
+
+Read a Xport file in 10,000 line chunks:
+
+>>> itr = pandas.read_sas('filename.XPT', chunksize=10000)
+>>> for chunk in itr:
+>>>     do_something(chunk)
+
+""" % {"_base_params_doc": _base_params_doc,
+       "_format_params_doc": _format_params_doc,
+       "_params2_doc": _params2_doc,
+       "_iterator_doc": _iterator_doc}
+
+
+_xport_reader_doc = """\
+Class for reading SAS Xport files.
+
+%(_base_params_doc)s
+%(_params2_doc)s
+
+Attributes
+----------
+member_info : list
+    Contains information about the file
+fields : list
+    Contains information about the variables in the file
+""" % {"_base_params_doc": _base_params_doc,
+       "_params2_doc": _params2_doc}
+
+
+_read_method_doc = """\
+Read observations from SAS Xport file, returning as data frame.
+
+Parameters
+----------
+nrows : int
+    Number of rows to read from data file; if None, read whole
+    file.
+
+Returns
+-------
+A DataFrame.
+"""
+
+
+def _parse_date(datestr):
+    """ Given a date in xport format, return Python date. """
+    try:
+        # e.g. "16FEB11:10:07:55"
+        return datetime.strptime(datestr, "%d%b%y:%H:%M:%S")
+    except ValueError:
+        return pd.NaT
+
+
+def _split_line(s, parts):
+    """
+    Parameters
+    ----------
+    s: string
+        Fixed-length string to split
+    parts: list of (name, length) pairs
+        Used to break up string, name '_' will be filtered from output.
+
+    Returns
+    -------
+    Dict of name:contents of string at given location.
+    """
+    out = {}
+    start = 0
+    for name, length in parts:
+        out[name] = s[start:start + length].strip()
+        start += length
+    del out['_']
+    return out
+
+
+def _handle_truncated_float_vec(vec, nbytes):
+    # This feature is not well documented, but some SAS XPORT files
+    # have 2-7 byte "truncated" floats.  To read these truncated
+    # floats, pad them with zeros on the right to make 8 byte floats.
+    #
+    # References:
+    # https://github.com/jcushman/xport/pull/3
+    # The R "foreign" library
+
+    if nbytes != 8:
+        vec1 = np.zeros(len(vec), np.dtype('S8'))
+        dtype = np.dtype('S%d,S%d' % (nbytes, 8 - nbytes))
+        vec2 = vec1.view(dtype=dtype)
+        vec2['f0'] = vec
+        return vec2
+
+    return vec
+
+
+def _parse_float_vec(vec):
+    """
+    Parse a vector of float values representing IBM 8 byte floats into
+    native 8 byte floats.
+    """
+
+    dtype = np.dtype('>u4,>u4')
+    vec1 = vec.view(dtype=dtype)
+    xport1 = vec1['f0']
+    xport2 = vec1['f1']
+
+    # Start by setting first half of ieee number to first half of IBM
+    # number sans exponent
+    ieee1 = xport1 & 0x00ffffff
+
+    # Get the second half of the ibm number into the second half of
+    # the ieee number
+    ieee2 = xport2
+
+    # The fraction bit to the left of the binary point in the ieee
+    # format was set and the number was shifted 0, 1, 2, or 3
+    # places. This will tell us how to adjust the ibm exponent to be a
+    # power of 2 ieee exponent and how to shift the fraction bits to
+    # restore the correct magnitude.
+    shift = np.zeros(len(vec), dtype=np.uint8)
+    shift[np.where(xport1 & 0x00200000)] = 1
+    shift[np.where(xport1 & 0x00400000)] = 2
+    shift[np.where(xport1 & 0x00800000)] = 3
+
+    # shift the ieee number down the correct number of places then
+    # set the second half of the ieee number to be the second half
+    # of the ibm number shifted appropriately, ored with the bits
+    # from the first half that would have been shifted in if we
+    # could shift a double. All we are worried about are the low
+    # order 3 bits of the first half since we're only shifting by
+    # 1, 2, or 3.
+    ieee1 >>= shift
+    ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift)))
+
+    # clear the 1 bit to the left of the binary point
+    ieee1 &= 0xffefffff
+
+    # set the exponent of the ieee number to be the actual exponent
+    # plus the shift count + 1023. Or this into the first half of the
+    # ieee number. The ibm exponent is excess 64 but is adjusted by 65
+    # since during conversion to ibm format the exponent is
+    # incremented by 1 and the fraction bits left 4 positions to the
+    # right of the radix point.  (had to add >> 24 because C treats &
+    # 0x7f as 0x7f000000 and Python doesn't)
+    ieee1 |= ((((((xport1 >> 24) & 0x7f) - 65) << 2) +
+               shift + 1023) << 20) | (xport1 & 0x80000000)
+
+    ieee = np.empty((len(ieee1),), dtype='>u4,>u4')
+    ieee['f0'] = ieee1
+    ieee['f1'] = ieee2
+    ieee = ieee.view(dtype='>f8')
+    ieee = ieee.astype('f8')
+
+    return ieee
+
+
+class XportReader(BaseIterator):
+    __doc__ = _xport_reader_doc
+
+    def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
+                 chunksize=None):
+
+        self._encoding = encoding
+        self._lines_read = 0
+        self._index = index
+        self._chunksize = chunksize
+
+        if isinstance(filepath_or_buffer, str):
+            (filepath_or_buffer, encoding,
+             compression, should_close) = get_filepath_or_buffer(
+                filepath_or_buffer, encoding=encoding)
+
+        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
+            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
+        else:
+            # Copy to BytesIO, and ensure no encoding
+            contents = filepath_or_buffer.read()
+            try:
+                contents = contents.encode(self._encoding)
+            except:
+                pass
+            self.filepath_or_buffer = compat.BytesIO(contents)
+
+        self._read_header()
+
+    def close(self):
+        self.filepath_or_buffer.close()
+
+    def _get_row(self):
+        return self.filepath_or_buffer.read(80).decode()
+
+    def _read_header(self):
+        self.filepath_or_buffer.seek(0)
+
+        # read file header
+        line1 = self._get_row()
+        if line1 != _correct_line1:
+            self.close()
+            raise ValueError("Header record is not an XPORT file.")
+
+        line2 = self._get_row()
+        fif = [['prefix', 24], ['version', 8], ['OS', 8],
+               ['_', 24], ['created', 16]]
+        file_info = _split_line(line2, fif)
+        if file_info['prefix'] != "SAS     SAS     SASLIB":
+            self.close()
+            raise ValueError("Header record has invalid prefix.")
+        file_info['created'] = _parse_date(file_info['created'])
+        self.file_info = file_info
+
+        line3 = self._get_row()
+        file_info['modified'] = _parse_date(line3[:16])
+
+        # read member header
+        header1 = self._get_row()
+        header2 = self._get_row()
+        headflag1 = header1.startswith(_correct_header1)
+        headflag2 = (header2 == _correct_header2)
+        if not (headflag1 and headflag2):
+            self.close()
+            raise ValueError("Member header not found")
+        # usually 140, could be 135
+        fieldnamelength = int(header1[-5:-2])
+
+        # member info
+        mem = [['prefix', 8], ['set_name', 8], ['sasdata', 8],
+               ['version', 8], ['OS', 8], ['_', 24], ['created', 16]]
+        member_info = _split_line(self._get_row(), mem)
+        mem = [['modified', 16], ['_', 16], ['label', 40], ['type', 8]]
+        member_info.update(_split_line(self._get_row(), mem))
+        member_info['modified'] = _parse_date(member_info['modified'])
+        member_info['created'] = _parse_date(member_info['created'])
+        self.member_info = member_info
+
+        # read field names
+        types = {1: 'numeric', 2: 'char'}
+        fieldcount = int(self._get_row()[54:58])
+        datalength = fieldnamelength * fieldcount
+        # round up to nearest 80
+        if datalength % 80:
+            datalength += 80 - datalength % 80
+        fielddata = self.filepath_or_buffer.read(datalength)
+        fields = []
+        obs_length = 0
+        while len(fielddata) >= fieldnamelength:
+            # pull data for one field
+            field, fielddata = (fielddata[:fieldnamelength],
+                                fielddata[fieldnamelength:])
+
+            # rest at end gets ignored, so if field is short, pad out
+            # to match struct pattern below
+            field = field.ljust(140)
+
+            fieldstruct = struct.unpack('>hhhh8s40s8shhh2s8shhl52s', field)
+            field = dict(zip(_fieldkeys, fieldstruct))
+            del field['_']
+            field['ntype'] = types[field['ntype']]
+            fl = field['field_length']
+            if field['ntype'] == 'numeric' and ((fl < 2) or (fl > 8)):
+                self.close()
+                msg = "Floating field width {0} is not between 2 and 8."
+                raise TypeError(msg.format(fl))
+
+            for k, v in field.items():
+                try:
+                    field[k] = v.strip()
+                except AttributeError:
+                    pass
+
+            obs_length += field['field_length']
+            fields += [field]
+
+        header = self._get_row()
+        if not header == _correct_obs_header:
+            self.close()
+            raise ValueError("Observation header not found.")
+
+        self.fields = fields
+        self.record_length = obs_length
+        self.record_start = self.filepath_or_buffer.tell()
+
+        self.nobs = self._record_count()
+        self.columns = [x['name'].decode() for x in self.fields]
+
+        # Setup the dtype.
+        dtypel = []
+        for i, field in enumerate(self.fields):
+            dtypel.append(('s' + str(i), "S" + str(field['field_length'])))
+        dtype = np.dtype(dtypel)
+        self._dtype = dtype
+
+    def __next__(self):
+        return self.read(nrows=self._chunksize or 1)
+
+    def _record_count(self):
+        """
+        Get number of records in file.
+
+        This is maybe suboptimal because we have to seek to the end of
+        the file.
+
+        Side effect: returns file position to record_start.
+        """
+
+        self.filepath_or_buffer.seek(0, 2)
+        total_records_length = (self.filepath_or_buffer.tell() -
+                                self.record_start)
+
+        if total_records_length % 80 != 0:
+            warnings.warn("xport file may be corrupted")
+
+        if self.record_length > 80:
+            self.filepath_or_buffer.seek(self.record_start)
+            return total_records_length // self.record_length
+
+        self.filepath_or_buffer.seek(-80, 2)
+        last_card = self.filepath_or_buffer.read(80)
+        last_card = np.frombuffer(last_card, dtype=np.uint64)
+
+        # 8 byte blank
+        ix = np.flatnonzero(last_card == 2314885530818453536)
+
+        if len(ix) == 0:
+            tail_pad = 0
+        else:
+            tail_pad = 8 * len(ix)
+
+        self.filepath_or_buffer.seek(self.record_start)
+
+        return (total_records_length - tail_pad) // self.record_length
+
+    def get_chunk(self, size=None):
+        """
+        Reads lines from Xport file and returns as dataframe
+
+        Parameters
+        ----------
+        size : int, defaults to None
+            Number of lines to read.  If None, reads whole file.
+
+        Returns
+        -------
+        DataFrame
+        """
+        if size is None:
+            size = self._chunksize
+        return self.read(nrows=size)
+
+    def _missing_double(self, vec):
+        v = vec.view(dtype='u1,u1,u2,u4')
+        miss = (v['f1'] == 0) & (v['f2'] == 0) & (v['f3'] == 0)
+        miss1 = (((v['f0'] >= 0x41) & (v['f0'] <= 0x5a)) |
+                 (v['f0'] == 0x5f) | (v['f0'] == 0x2e))
+        miss &= miss1
+        return miss
+
+    @Appender(_read_method_doc)
+    def read(self, nrows=None):
+
+        if nrows is None:
+            nrows = self.nobs
+
+        read_lines = min(nrows, self.nobs - self._lines_read)
+        read_len = read_lines * self.record_length
+        if read_len <= 0:
+            self.close()
+            raise StopIteration
+        raw = self.filepath_or_buffer.read(read_len)
+        data = np.frombuffer(raw, dtype=self._dtype, count=read_lines)
+
+        df = pd.DataFrame(index=range(read_lines))
+        for j, x in enumerate(self.columns):
+            vec = data['s%d' % j]
+            ntype = self.fields[j]['ntype']
+            if ntype == "numeric":
+                vec = _handle_truncated_float_vec(
+                    vec, self.fields[j]['field_length'])
+                miss = self._missing_double(vec)
+                v = _parse_float_vec(vec)
+                v[miss] = np.nan
+            elif self.fields[j]['ntype'] == 'char':
+                v = [y.rstrip() for y in vec]
+                if compat.PY3:
+                    if self._encoding is not None:
+                        v = [y.decode(self._encoding) for y in v]
+            df[x] = v
+
+        if self._index is None:
+            df.index = range(self._lines_read, self._lines_read + read_lines)
+        else:
+            df = df.set_index(self._index)
+
+        self._lines_read += read_lines
+
+        return df
@@ -0,0 +1,70 @@
+"""
+Read SAS sas7bdat or xport files.
+"""
+from pandas import compat
+from pandas.io.common import _stringify_path
+
+
+def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
+             chunksize=None, iterator=False):
+    """
+    Read SAS files stored as either XPORT or SAS7BDAT format files.
+
+    Parameters
+    ----------
+    filepath_or_buffer : string or file-like object
+        Path to the SAS file.
+    format : string {'xport', 'sas7bdat'} or None
+        If None, file format is inferred.  If 'xport' or 'sas7bdat',
+        uses the corresponding format.
+    index : identifier of index column, defaults to None
+        Identifier of column that should be used as index of the DataFrame.
+    encoding : string, default is None
+        Encoding for text data.  If None, text data are stored as raw bytes.
+    chunksize : int
+        Read file `chunksize` lines at a time, returns iterator.
+    iterator : bool, defaults to False
+        If True, returns an iterator for reading the file incrementally.
+
+    Returns
+    -------
+    DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
+    or XportReader
+    """
+    if format is None:
+        buffer_error_msg = ("If this is a buffer object rather "
+                            "than a string name, you must specify "
+                            "a format string")
+        filepath_or_buffer = _stringify_path(filepath_or_buffer)
+        if not isinstance(filepath_or_buffer, compat.string_types):
+            raise ValueError(buffer_error_msg)
+        try:
+            fname = filepath_or_buffer.lower()
+            if fname.endswith(".xpt"):
+                format = "xport"
+            elif fname.endswith(".sas7bdat"):
+                format = "sas7bdat"
+            else:
+                raise ValueError("unable to infer format of SAS file")
+        except:
+            pass
+
+    if format.lower() == 'xport':
+        from pandas.io.sas.sas_xport import XportReader
+        reader = XportReader(filepath_or_buffer, index=index,
+                             encoding=encoding,
+                             chunksize=chunksize)
+    elif format.lower() == 'sas7bdat':
+        from pandas.io.sas.sas7bdat import SAS7BDATReader
+        reader = SAS7BDATReader(filepath_or_buffer, index=index,
+                                encoding=encoding,
+                                chunksize=chunksize)
+    else:
+        raise ValueError('unknown SAS format')
+
+    if iterator or chunksize:
+        return reader
+
+    data = reader.read()
+    reader.close()
+    return data