Static code analysis and corrections

2019-07-17 16:06:09 +02:00
parent 674692c2fc
commit 21bfae9fbc
10086 changed files with 2102103 additions and 51 deletions
@@ -0,0 +1,90 @@
+from distutils.version import LooseVersion
+import os
+
+import pytest
+
+import pandas.util.testing as tm
+
+from pandas.io.parsers import read_csv
+
+
+@pytest.fixture
+def tips_file(datapath):
+    """Path to the tips dataset"""
+    return datapath('io', 'parser', 'data', 'tips.csv')
+
+
+@pytest.fixture
+def jsonl_file(datapath):
+    """Path a JSONL dataset"""
+    return datapath('io', 'parser', 'data', 'items.jsonl')
+
+
+@pytest.fixture
+def salaries_table(datapath):
+    """DataFrame with the salaries dataset"""
+    return read_csv(datapath('io', 'parser', 'data', 'salaries.csv'), sep='\t')
+
+
+@pytest.fixture
+def s3_resource(tips_file, jsonl_file):
+    """Fixture for mocking S3 interaction.
+
+    The primary bucket name is "pandas-test". The following datasets
+    are loaded.
+
+    - tips.csv
+    - tips.csv.gz
+    - tips.csv.bz2
+    - items.jsonl
+
+    A private bucket "cant_get_it" is also created. The boto3 s3 resource
+    is yielded by the fixture.
+    """
+    pytest.importorskip('s3fs')
+    boto3 = pytest.importorskip('boto3')
+    botocore = pytest.importorskip('botocore')
+
+    if LooseVersion(botocore.__version__) < LooseVersion("1.11.0"):
+        # botocore leaks an uncatchable ResourceWarning before 1.11.0;
+        # see GH 23731 and https://github.com/boto/botocore/issues/1464
+        pytest.skip("botocore is leaking resources before 1.11.0")
+
+    with tm.ensure_safe_environment_variables():
+        # temporary workaround as moto fails for botocore >= 1.11 otherwise,
+        # see https://github.com/spulec/moto/issues/1924 & 1952
+        os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
+        os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
+
+        moto = pytest.importorskip('moto')
+
+        test_s3_files = [
+            ('tips.csv', tips_file),
+            ('tips.csv.gz', tips_file + '.gz'),
+            ('tips.csv.bz2', tips_file + '.bz2'),
+            ('items.jsonl', jsonl_file),
+        ]
+
+        def add_tips_files(bucket_name):
+            for s3_key, file_name in test_s3_files:
+                with open(file_name, 'rb') as f:
+                    conn.Bucket(bucket_name).put_object(
+                        Key=s3_key,
+                        Body=f)
+
+        try:
+            s3 = moto.mock_s3()
+            s3.start()
+
+            # see gh-16135
+            bucket = 'pandas-test'
+            conn = boto3.resource("s3", region_name="us-east-1")
+
+            conn.create_bucket(Bucket=bucket)
+            add_tips_files(bucket)
+
+            conn.create_bucket(Bucket='cant_get_it', ACL='private')
+            add_tips_files('cant_get_it')
+            yield conn
+        finally:
+            s3.stop()
@@ -0,0 +1,74 @@
+import pytest
+
+from pandas.io.formats.console import detect_console_encoding
+
+
+class MockEncoding(object):  # TODO(py27): replace with mock
+    """
+    Used to add a side effect when accessing the 'encoding' property. If the
+    side effect is a str in nature, the value will be returned. Otherwise, the
+    side effect should be an exception that will be raised.
+    """
+    def __init__(self, encoding):
+        super(MockEncoding, self).__init__()
+        self.val = encoding
+
+    @property
+    def encoding(self):
+        return self.raise_or_return(self.val)
+
+    @staticmethod
+    def raise_or_return(val):
+        if isinstance(val, str):
+            return val
+        else:
+            raise val
+
+
+@pytest.mark.parametrize('empty,filled', [
+    ['stdin', 'stdout'],
+    ['stdout', 'stdin']
+])
+def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
+    # Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
+    # they have values filled.
+    # GH 21552
+    with monkeypatch.context() as context:
+        context.setattr('sys.{}'.format(empty), MockEncoding(''))
+        context.setattr('sys.{}'.format(filled), MockEncoding(filled))
+        assert detect_console_encoding() == filled
+
+
+@pytest.mark.parametrize('encoding', [
+    AttributeError,
+    IOError,
+    'ascii'
+])
+def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
+    # GH 21552
+    with monkeypatch.context() as context:
+        context.setattr('locale.getpreferredencoding', lambda: 'foo')
+        context.setattr('sys.stdout', MockEncoding(encoding))
+        assert detect_console_encoding() == 'foo'
+
+
+@pytest.mark.parametrize('std,locale', [
+    ['ascii', 'ascii'],
+    ['ascii', Exception],
+    [AttributeError, 'ascii'],
+    [AttributeError, Exception],
+    [IOError, 'ascii'],
+    [IOError, Exception]
+])
+def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
+    # When both the stdout/stdin encoding and locale preferred encoding checks
+    # fail (or return 'ascii', we should default to the sys default encoding.
+    # GH 21552
+    with monkeypatch.context() as context:
+        context.setattr(
+            'locale.getpreferredencoding',
+            lambda: MockEncoding.raise_or_return(locale)
+        )
+        context.setattr('sys.stdout', MockEncoding(std))
+        context.setattr('sys.getdefaultencoding', lambda: 'sysDefaultEncoding')
+        assert detect_console_encoding() == 'sysDefaultEncoding'
@@ -0,0 +1,187 @@
+import pytest
+
+from pandas.util import testing as tm
+
+from pandas.io.formats.css import CSSResolver, CSSWarning
+
+
+def assert_resolves(css, props, inherited=None):
+    resolve = CSSResolver()
+    actual = resolve(css, inherited=inherited)
+    assert props == actual
+
+
+def assert_same_resolution(css1, css2, inherited=None):
+    resolve = CSSResolver()
+    resolved1 = resolve(css1, inherited=inherited)
+    resolved2 = resolve(css2, inherited=inherited)
+    assert resolved1 == resolved2
+
+
+@pytest.mark.parametrize('name,norm,abnorm', [
+    ('whitespace', 'hello: world; foo: bar',
+     ' \t hello \t :\n  world \n  ;  \n foo: \tbar\n\n'),
+    ('case', 'hello: world; foo: bar', 'Hello: WORLD; foO: bar'),
+    ('empty-decl', 'hello: world; foo: bar',
+     '; hello: world;; foo: bar;\n; ;'),
+    ('empty-list', '', ';'),
+])
+def test_css_parse_normalisation(name, norm, abnorm):
+    assert_same_resolution(norm, abnorm)
+
+
+@pytest.mark.parametrize(
+    'invalid_css,remainder', [
+        # No colon
+        ('hello-world', ''),
+        ('border-style: solid; hello-world', 'border-style: solid'),
+        ('border-style: solid; hello-world; font-weight: bold',
+         'border-style: solid; font-weight: bold'),
+        # Unclosed string fail
+        # Invalid size
+        ('font-size: blah', 'font-size: 1em'),
+        ('font-size: 1a2b', 'font-size: 1em'),
+        ('font-size: 1e5pt', 'font-size: 1em'),
+        ('font-size: 1+6pt', 'font-size: 1em'),
+        ('font-size: 1unknownunit', 'font-size: 1em'),
+        ('font-size: 10', 'font-size: 1em'),
+        ('font-size: 10 pt', 'font-size: 1em'),
+    ])
+def test_css_parse_invalid(invalid_css, remainder):
+    with tm.assert_produces_warning(CSSWarning):
+        assert_same_resolution(invalid_css, remainder)
+
+    # TODO: we should be checking that in other cases no warnings are raised
+
+
+@pytest.mark.parametrize(
+    'shorthand,expansions',
+    [('margin', ['margin-top', 'margin-right',
+                 'margin-bottom', 'margin-left']),
+     ('padding', ['padding-top', 'padding-right',
+                  'padding-bottom', 'padding-left']),
+     ('border-width', ['border-top-width', 'border-right-width',
+                       'border-bottom-width', 'border-left-width']),
+     ('border-color', ['border-top-color', 'border-right-color',
+                       'border-bottom-color', 'border-left-color']),
+     ('border-style', ['border-top-style', 'border-right-style',
+                       'border-bottom-style', 'border-left-style']),
+     ])
+def test_css_side_shorthands(shorthand, expansions):
+    top, right, bottom, left = expansions
+
+    assert_resolves('{shorthand}: 1pt'.format(shorthand=shorthand),
+                    {top: '1pt', right: '1pt',
+                     bottom: '1pt', left: '1pt'})
+
+    assert_resolves('{shorthand}: 1pt 4pt'.format(shorthand=shorthand),
+                    {top: '1pt', right: '4pt',
+                     bottom: '1pt', left: '4pt'})
+
+    assert_resolves('{shorthand}: 1pt 4pt 2pt'.format(shorthand=shorthand),
+                    {top: '1pt', right: '4pt',
+                     bottom: '2pt', left: '4pt'})
+
+    assert_resolves('{shorthand}: 1pt 4pt 2pt 0pt'.format(shorthand=shorthand),
+                    {top: '1pt', right: '4pt',
+                     bottom: '2pt', left: '0pt'})
+
+    with tm.assert_produces_warning(CSSWarning):
+        assert_resolves(
+            '{shorthand}: 1pt 1pt 1pt 1pt 1pt'.format(shorthand=shorthand), {})
+
+
+@pytest.mark.parametrize('style,inherited,equiv', [
+    ('margin: 1px; margin: 2px', '',
+     'margin: 2px'),
+    ('margin: 1px', 'margin: 2px',
+     'margin: 1px'),
+    ('margin: 1px; margin: inherit', 'margin: 2px',
+     'margin: 2px'),
+    ('margin: 1px; margin-top: 2px', '',
+     'margin-left: 1px; margin-right: 1px; ' +
+     'margin-bottom: 1px; margin-top: 2px'),
+    ('margin-top: 2px', 'margin: 1px',
+     'margin: 1px; margin-top: 2px'),
+    ('margin: 1px', 'margin-top: 2px',
+     'margin: 1px'),
+    ('margin: 1px; margin-top: inherit', 'margin: 2px',
+     'margin: 1px; margin-top: 2px'),
+])
+def test_css_precedence(style, inherited, equiv):
+    resolve = CSSResolver()
+    inherited_props = resolve(inherited)
+    style_props = resolve(style, inherited=inherited_props)
+    equiv_props = resolve(equiv)
+    assert style_props == equiv_props
+
+
+@pytest.mark.parametrize('style,equiv', [
+    ('margin: 1px; margin-top: inherit',
+     'margin-bottom: 1px; margin-right: 1px; margin-left: 1px'),
+    ('margin-top: inherit', ''),
+    ('margin-top: initial', ''),
+])
+def test_css_none_absent(style, equiv):
+    assert_same_resolution(style, equiv)
+
+
+@pytest.mark.parametrize('size,resolved', [
+    ('xx-small', '6pt'),
+    ('x-small', '{pt:f}pt'.format(pt=7.5)),
+    ('small', '{pt:f}pt'.format(pt=9.6)),
+    ('medium', '12pt'),
+    ('large', '{pt:f}pt'.format(pt=13.5)),
+    ('x-large', '18pt'),
+    ('xx-large', '24pt'),
+
+    ('8px', '6pt'),
+    ('1.25pc', '15pt'),
+    ('.25in', '18pt'),
+    ('02.54cm', '72pt'),
+    ('25.4mm', '72pt'),
+    ('101.6q', '72pt'),
+    ('101.6q', '72pt'),
+])
+@pytest.mark.parametrize('relative_to',  # invariant to inherited size
+                         [None, '16pt'])
+def test_css_absolute_font_size(size, relative_to, resolved):
+    if relative_to is None:
+        inherited = None
+    else:
+        inherited = {'font-size': relative_to}
+    assert_resolves('font-size: {size}'.format(size=size),
+                    {'font-size': resolved}, inherited=inherited)
+
+
+@pytest.mark.parametrize('size,relative_to,resolved', [
+    ('1em', None, '12pt'),
+    ('1.0em', None, '12pt'),
+    ('1.25em', None, '15pt'),
+    ('1em', '16pt', '16pt'),
+    ('1.0em', '16pt', '16pt'),
+    ('1.25em', '16pt', '20pt'),
+    ('1rem', '16pt', '12pt'),
+    ('1.0rem', '16pt', '12pt'),
+    ('1.25rem', '16pt', '15pt'),
+    ('100%', None, '12pt'),
+    ('125%', None, '15pt'),
+    ('100%', '16pt', '16pt'),
+    ('125%', '16pt', '20pt'),
+    ('2ex', None, '12pt'),
+    ('2.0ex', None, '12pt'),
+    ('2.50ex', None, '15pt'),
+    ('inherit', '16pt', '16pt'),
+
+    ('smaller', None, '10pt'),
+    ('smaller', '18pt', '15pt'),
+    ('larger', None, '{pt:f}pt'.format(pt=14.4)),
+    ('larger', '15pt', '18pt'),
+])
+def test_css_relative_font_size(size, relative_to, resolved):
+    if relative_to is None:
+        inherited = None
+    else:
+        inherited = {'font-size': relative_to}
+    assert_resolves('font-size: {size}'.format(size=size),
+                    {'font-size': resolved}, inherited=inherited)
@@ -0,0 +1,196 @@
+import numpy as np
+
+from pandas.compat import u
+
+import pandas as pd
+from pandas import DataFrame
+from pandas.util import testing as tm
+
+import pandas.io.formats.format as fmt
+
+
+class TestEngFormatter(object):
+
+    def test_eng_float_formatter(self):
+        df = DataFrame({'A': [1.41, 141., 14100, 1410000.]})
+
+        fmt.set_eng_float_format()
+        result = df.to_string()
+        expected = ('             A\n'
+                    '0    1.410E+00\n'
+                    '1  141.000E+00\n'
+                    '2   14.100E+03\n'
+                    '3    1.410E+06')
+        assert result == expected
+
+        fmt.set_eng_float_format(use_eng_prefix=True)
+        result = df.to_string()
+        expected = ('         A\n'
+                    '0    1.410\n'
+                    '1  141.000\n'
+                    '2  14.100k\n'
+                    '3   1.410M')
+        assert result == expected
+
+        fmt.set_eng_float_format(accuracy=0)
+        result = df.to_string()
+        expected = ('         A\n'
+                    '0    1E+00\n'
+                    '1  141E+00\n'
+                    '2   14E+03\n'
+                    '3    1E+06')
+        assert result == expected
+
+        tm.reset_display_options()
+
+    def compare(self, formatter, input, output):
+        formatted_input = formatter(input)
+        assert formatted_input == output
+
+    def compare_all(self, formatter, in_out):
+        """
+        Parameters:
+        -----------
+        formatter: EngFormatter under test
+        in_out: list of tuples. Each tuple = (number, expected_formatting)
+
+        It is tested if 'formatter(number) == expected_formatting'.
+        *number* should be >= 0 because formatter(-number) == fmt is also
+        tested. *fmt* is derived from *expected_formatting*
+        """
+        for input, output in in_out:
+            self.compare(formatter, input, output)
+            self.compare(formatter, -input, "-" + output[1:])
+
+    def test_exponents_with_eng_prefix(self):
+        formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
+        f = np.sqrt(2)
+        in_out = [
+            (f * 10 ** -24, " 1.414y"), (f * 10 ** -23, " 14.142y"),
+            (f * 10 ** -22, " 141.421y"), (f * 10 ** -21, " 1.414z"),
+            (f * 10 ** -20, " 14.142z"), (f * 10 ** -19, " 141.421z"),
+            (f * 10 ** -18, " 1.414a"), (f * 10 ** -17, " 14.142a"),
+            (f * 10 ** -16, " 141.421a"), (f * 10 ** -15, " 1.414f"),
+            (f * 10 ** -14, " 14.142f"), (f * 10 ** -13, " 141.421f"),
+            (f * 10 ** -12, " 1.414p"), (f * 10 ** -11, " 14.142p"),
+            (f * 10 ** -10, " 141.421p"), (f * 10 ** -9, " 1.414n"),
+            (f * 10 ** -8, " 14.142n"), (f * 10 ** -7, " 141.421n"),
+            (f * 10 ** -6, " 1.414u"), (f * 10 ** -5, " 14.142u"),
+            (f * 10 ** -4, " 141.421u"), (f * 10 ** -3, " 1.414m"),
+            (f * 10 ** -2, " 14.142m"), (f * 10 ** -1, " 141.421m"),
+            (f * 10 ** 0, " 1.414"), (f * 10 ** 1, " 14.142"),
+            (f * 10 ** 2, " 141.421"), (f * 10 ** 3, " 1.414k"),
+            (f * 10 ** 4, " 14.142k"), (f * 10 ** 5, " 141.421k"),
+            (f * 10 ** 6, " 1.414M"), (f * 10 ** 7, " 14.142M"),
+            (f * 10 ** 8, " 141.421M"), (f * 10 ** 9, " 1.414G"),
+            (f * 10 ** 10, " 14.142G"), (f * 10 ** 11, " 141.421G"),
+            (f * 10 ** 12, " 1.414T"), (f * 10 ** 13, " 14.142T"),
+            (f * 10 ** 14, " 141.421T"), (f * 10 ** 15, " 1.414P"),
+            (f * 10 ** 16, " 14.142P"), (f * 10 ** 17, " 141.421P"),
+            (f * 10 ** 18, " 1.414E"), (f * 10 ** 19, " 14.142E"),
+            (f * 10 ** 20, " 141.421E"), (f * 10 ** 21, " 1.414Z"),
+            (f * 10 ** 22, " 14.142Z"), (f * 10 ** 23, " 141.421Z"),
+            (f * 10 ** 24, " 1.414Y"), (f * 10 ** 25, " 14.142Y"),
+            (f * 10 ** 26, " 141.421Y")]
+        self.compare_all(formatter, in_out)
+
+    def test_exponents_without_eng_prefix(self):
+        formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False)
+        f = np.pi
+        in_out = [
+            (f * 10 ** -24, " 3.1416E-24"),
+            (f * 10 ** -23, " 31.4159E-24"),
+            (f * 10 ** -22, " 314.1593E-24"),
+            (f * 10 ** -21, " 3.1416E-21"),
+            (f * 10 ** -20, " 31.4159E-21"),
+            (f * 10 ** -19, " 314.1593E-21"),
+            (f * 10 ** -18, " 3.1416E-18"),
+            (f * 10 ** -17, " 31.4159E-18"),
+            (f * 10 ** -16, " 314.1593E-18"),
+            (f * 10 ** -15, " 3.1416E-15"),
+            (f * 10 ** -14, " 31.4159E-15"),
+            (f * 10 ** -13, " 314.1593E-15"),
+            (f * 10 ** -12, " 3.1416E-12"),
+            (f * 10 ** -11, " 31.4159E-12"),
+            (f * 10 ** -10, " 314.1593E-12"),
+            (f * 10 ** -9, " 3.1416E-09"),
+            (f * 10 ** -8, " 31.4159E-09"),
+            (f * 10 ** -7, " 314.1593E-09"),
+            (f * 10 ** -6, " 3.1416E-06"),
+            (f * 10 ** -5, " 31.4159E-06"),
+            (f * 10 ** -4, " 314.1593E-06"),
+            (f * 10 ** -3, " 3.1416E-03"),
+            (f * 10 ** -2, " 31.4159E-03"),
+            (f * 10 ** -1, " 314.1593E-03"),
+            (f * 10 ** 0, " 3.1416E+00"),
+            (f * 10 ** 1, " 31.4159E+00"),
+            (f * 10 ** 2, " 314.1593E+00"),
+            (f * 10 ** 3, " 3.1416E+03"),
+            (f * 10 ** 4, " 31.4159E+03"),
+            (f * 10 ** 5, " 314.1593E+03"),
+            (f * 10 ** 6, " 3.1416E+06"),
+            (f * 10 ** 7, " 31.4159E+06"),
+            (f * 10 ** 8, " 314.1593E+06"),
+            (f * 10 ** 9, " 3.1416E+09"),
+            (f * 10 ** 10, " 31.4159E+09"),
+            (f * 10 ** 11, " 314.1593E+09"),
+            (f * 10 ** 12, " 3.1416E+12"),
+            (f * 10 ** 13, " 31.4159E+12"),
+            (f * 10 ** 14, " 314.1593E+12"),
+            (f * 10 ** 15, " 3.1416E+15"),
+            (f * 10 ** 16, " 31.4159E+15"),
+            (f * 10 ** 17, " 314.1593E+15"),
+            (f * 10 ** 18, " 3.1416E+18"),
+            (f * 10 ** 19, " 31.4159E+18"),
+            (f * 10 ** 20, " 314.1593E+18"),
+            (f * 10 ** 21, " 3.1416E+21"),
+            (f * 10 ** 22, " 31.4159E+21"),
+            (f * 10 ** 23, " 314.1593E+21"),
+            (f * 10 ** 24, " 3.1416E+24"),
+            (f * 10 ** 25, " 31.4159E+24"),
+            (f * 10 ** 26, " 314.1593E+24")]
+        self.compare_all(formatter, in_out)
+
+    def test_rounding(self):
+        formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
+        in_out = [(5.55555, ' 5.556'), (55.5555, ' 55.556'),
+                  (555.555, ' 555.555'), (5555.55, ' 5.556k'),
+                  (55555.5, ' 55.556k'), (555555, ' 555.555k')]
+        self.compare_all(formatter, in_out)
+
+        formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
+        in_out = [(5.55555, ' 5.6'), (55.5555, ' 55.6'), (555.555, ' 555.6'),
+                  (5555.55, ' 5.6k'), (55555.5, ' 55.6k'), (555555, ' 555.6k')]
+        self.compare_all(formatter, in_out)
+
+        formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True)
+        in_out = [(5.55555, ' 6'), (55.5555, ' 56'), (555.555, ' 556'),
+                  (5555.55, ' 6k'), (55555.5, ' 56k'), (555555, ' 556k')]
+        self.compare_all(formatter, in_out)
+
+        formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
+        result = formatter(0)
+        assert result == u(' 0.000')
+
+    def test_nan(self):
+        # Issue #11981
+
+        formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
+        result = formatter(np.nan)
+        assert result == u('NaN')
+
+        df = pd.DataFrame({'a': [1.5, 10.3, 20.5],
+                           'b': [50.3, 60.67, 70.12],
+                           'c': [100.2, 101.33, 120.33]})
+        pt = df.pivot_table(values='a', index='b', columns='c')
+        fmt.set_eng_float_format(accuracy=1)
+        result = pt.to_string()
+        assert 'NaN' in result
+        tm.reset_display_options()
+
+    def test_inf(self):
+        # Issue #11981
+
+        formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
+        result = formatter(np.inf)
+        assert result == u('inf')
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import compat
+import pandas.core.config as cf
+
+import pandas.io.formats.format as fmt
+import pandas.io.formats.printing as printing
+
+
+def test_adjoin():
+    data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']]
+    expected = 'a  dd  ggg\nb  ee  hhh\nc  ff  iii'
+
+    adjoined = printing.adjoin(2, *data)
+
+    assert (adjoined == expected)
+
+
+def test_repr_binary_type():
+    import string
+    letters = string.ascii_letters
+    btype = compat.binary_type
+    try:
+        raw = btype(letters, encoding=cf.get_option('display.encoding'))
+    except TypeError:
+        raw = btype(letters)
+    b = compat.text_type(compat.bytes_to_str(raw))
+    res = printing.pprint_thing(b, quote_strings=True)
+    assert res == repr(b)
+    res = printing.pprint_thing(b, quote_strings=False)
+    assert res == b
+
+
+class TestFormattBase(object):
+
+    def test_adjoin(self):
+        data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']]
+        expected = 'a  dd  ggg\nb  ee  hhh\nc  ff  iii'
+
+        adjoined = printing.adjoin(2, *data)
+
+        assert adjoined == expected
+
+    def test_adjoin_unicode(self):
+        data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', 'hhh', u'いいい']]
+        expected = u'あ  dd  ggg\nb  ええ  hhh\nc  ff  いいい'
+        adjoined = printing.adjoin(2, *data)
+        assert adjoined == expected
+
+        adj = fmt.EastAsianTextAdjustment()
+
+        expected = u"""あ  dd    ggg
+b   ええ  hhh
+c   ff    いいい"""
+
+        adjoined = adj.adjoin(2, *data)
+        assert adjoined == expected
+        cols = adjoined.split('\n')
+        assert adj.len(cols[0]) == 13
+        assert adj.len(cols[1]) == 13
+        assert adj.len(cols[2]) == 16
+
+        expected = u"""あ       dd         ggg
+b        ええ       hhh
+c        ff         いいい"""
+
+        adjoined = adj.adjoin(7, *data)
+        assert adjoined == expected
+        cols = adjoined.split('\n')
+        assert adj.len(cols[0]) == 23
+        assert adj.len(cols[1]) == 23
+        assert adj.len(cols[2]) == 26
+
+    def test_justify(self):
+        adj = fmt.EastAsianTextAdjustment()
+
+        def just(x, *args, **kwargs):
+            # wrapper to test single str
+            return adj.justify([x], *args, **kwargs)[0]
+
+        assert just('abc', 5, mode='left') == 'abc  '
+        assert just('abc', 5, mode='center') == ' abc '
+        assert just('abc', 5, mode='right') == '  abc'
+        assert just(u'abc', 5, mode='left') == 'abc  '
+        assert just(u'abc', 5, mode='center') == ' abc '
+        assert just(u'abc', 5, mode='right') == '  abc'
+
+        assert just(u'パンダ', 5, mode='left') == u'パンダ'
+        assert just(u'パンダ', 5, mode='center') == u'パンダ'
+        assert just(u'パンダ', 5, mode='right') == u'パンダ'
+
+        assert just(u'パンダ', 10, mode='left') == u'パンダ    '
+        assert just(u'パンダ', 10, mode='center') == u'  パンダ  '
+        assert just(u'パンダ', 10, mode='right') == u'    パンダ'
+
+    def test_east_asian_len(self):
+        adj = fmt.EastAsianTextAdjustment()
+
+        assert adj.len('abc') == 3
+        assert adj.len(u'abc') == 3
+
+        assert adj.len(u'パンダ') == 6
+        assert adj.len(u'ﾊﾟﾝﾀﾞ') == 5
+        assert adj.len(u'パンダpanda') == 11
+        assert adj.len(u'ﾊﾟﾝﾀﾞpanda') == 10
+
+    def test_ambiguous_width(self):
+        adj = fmt.EastAsianTextAdjustment()
+        assert adj.len(u'¡¡ab') == 4
+
+        with cf.option_context('display.unicode.ambiguous_as_wide', True):
+            adj = fmt.EastAsianTextAdjustment()
+            assert adj.len(u'¡¡ab') == 6
+
+        data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'],
+                ['ggg', u'¡¡ab', u'いいい']]
+        expected = u'あ  dd    ggg \nb   ええ  ¡¡ab\nc   ff    いいい'
+        adjoined = adj.adjoin(2, *data)
+        assert adjoined == expected
+
+
+class TestTableSchemaRepr(object):
+
+    @classmethod
+    def setup_class(cls):
+        pytest.importorskip('IPython')
+
+        from IPython.core.interactiveshell import InteractiveShell
+        cls.display_formatter = InteractiveShell.instance().display_formatter
+
+    def test_publishes(self):
+
+        df = pd.DataFrame({"A": [1, 2]})
+        objects = [df['A'], df, df]  # dataframe / series
+        expected_keys = [
+            {'text/plain', 'application/vnd.dataresource+json'},
+            {'text/plain', 'text/html', 'application/vnd.dataresource+json'},
+        ]
+
+        opt = pd.option_context('display.html.table_schema', True)
+        for obj, expected in zip(objects, expected_keys):
+            with opt:
+                formatted = self.display_formatter.format(obj)
+            assert set(formatted[0].keys()) == expected
+
+        with_latex = pd.option_context('display.latex.repr', True)
+
+        with opt, with_latex:
+            formatted = self.display_formatter.format(obj)
+
+        expected = {'text/plain', 'text/html', 'text/latex',
+                    'application/vnd.dataresource+json'}
+        assert set(formatted[0].keys()) == expected
+
+    def test_publishes_not_implemented(self):
+        # column MultiIndex
+        # GH 15996
+        midx = pd.MultiIndex.from_product([['A', 'B'], ['a', 'b', 'c']])
+        df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx)
+
+        opt = pd.option_context('display.html.table_schema', True)
+
+        with opt:
+            formatted = self.display_formatter.format(df)
+
+        expected = {'text/plain', 'text/html'}
+        assert set(formatted[0].keys()) == expected
+
+    def test_config_on(self):
+        df = pd.DataFrame({"A": [1, 2]})
+        with pd.option_context("display.html.table_schema", True):
+            result = df._repr_data_resource_()
+
+        assert result is not None
+
+    def test_config_default_off(self):
+        df = pd.DataFrame({"A": [1, 2]})
+        with pd.option_context("display.html.table_schema", False):
+            result = df._repr_data_resource_()
+
+        assert result is None
+
+    def test_enable_data_resource_formatter(self):
+        # GH 10491
+        formatters = self.display_formatter.formatters
+        mimetype = 'application/vnd.dataresource+json'
+
+        with pd.option_context('display.html.table_schema', True):
+            assert 'application/vnd.dataresource+json' in formatters
+            assert formatters[mimetype].enabled
+
+        # still there, just disabled
+        assert 'application/vnd.dataresource+json' in formatters
+        assert not formatters[mimetype].enabled
+
+        # able to re-set
+        with pd.option_context('display.html.table_schema', True):
+            assert 'application/vnd.dataresource+json' in formatters
+            assert formatters[mimetype].enabled
+            # smoke test that it works
+            self.display_formatter.format(cf)
@@ -0,0 +1,563 @@
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import DataFrame, compat
+from pandas.util import testing as tm
+
+
+class TestToCSV(object):
+
+    @pytest.mark.xfail((3, 6, 5) > sys.version_info >= (3, 5),
+                       reason=("Python csv library bug "
+                               "(see https://bugs.python.org/issue32255)"))
+    def test_to_csv_with_single_column(self):
+        # see gh-18676, https://bugs.python.org/issue32255
+        #
+        # Python's CSV library adds an extraneous '""'
+        # before the newline when the NaN-value is in
+        # the first row. Otherwise, only the newline
+        # character is added. This behavior is inconsistent
+        # and was patched in https://bugs.python.org/pull_request4672.
+        df1 = DataFrame([None, 1])
+        expected1 = """\
+""
+1.0
+"""
+        with tm.ensure_clean('test.csv') as path:
+            df1.to_csv(path, header=None, index=None)
+            with open(path, 'r') as f:
+                assert f.read() == expected1
+
+        df2 = DataFrame([1, None])
+        expected2 = """\
+1.0
+""
+"""
+        with tm.ensure_clean('test.csv') as path:
+            df2.to_csv(path, header=None, index=None)
+            with open(path, 'r') as f:
+                assert f.read() == expected2
+
+    def test_to_csv_defualt_encoding(self):
+        # GH17097
+        df = DataFrame({'col': [u"AAAAA", u"ÄÄÄÄÄ", u"ßßßßß", u"聞聞聞聞聞"]})
+
+        with tm.ensure_clean('test.csv') as path:
+            # the default to_csv encoding in Python 2 is ascii, and that in
+            # Python 3 is uft-8.
+            if pd.compat.PY2:
+                # the encoding argument parameter should be utf-8
+                with pytest.raises(UnicodeEncodeError, match='ascii'):
+                    df.to_csv(path)
+            else:
+                df.to_csv(path)
+                tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
+
+    def test_to_csv_quotechar(self):
+        df = DataFrame({'col': [1, 2]})
+        expected = """\
+"","col"
+"0","1"
+"1","2"
+"""
+
+        with tm.ensure_clean('test.csv') as path:
+            df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
+            with open(path, 'r') as f:
+                assert f.read() == expected
+
+        expected = """\
+$$,$col$
+$0$,$1$
+$1$,$2$
+"""
+
+        with tm.ensure_clean('test.csv') as path:
+            df.to_csv(path, quoting=1, quotechar="$")
+            with open(path, 'r') as f:
+                assert f.read() == expected
+
+        with tm.ensure_clean('test.csv') as path:
+            with pytest.raises(TypeError, match='quotechar'):
+                df.to_csv(path, quoting=1, quotechar=None)
+
+    def test_to_csv_doublequote(self):
+        df = DataFrame({'col': ['a"a', '"bb"']})
+        expected = '''\
+"","col"
+"0","a""a"
+"1","""bb"""
+'''
+
+        with tm.ensure_clean('test.csv') as path:
+            df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
+            with open(path, 'r') as f:
+                assert f.read() == expected
+
+        from _csv import Error
+        with tm.ensure_clean('test.csv') as path:
+            with pytest.raises(Error, match='escapechar'):
+                df.to_csv(path, doublequote=False)  # no escapechar set
+
+    def test_to_csv_escapechar(self):
+        df = DataFrame({'col': ['a"a', '"bb"']})
+        expected = '''\
+"","col"
+"0","a\\"a"
+"1","\\"bb\\""
+'''
+
+        with tm.ensure_clean('test.csv') as path:  # QUOTE_ALL
+            df.to_csv(path, quoting=1, doublequote=False, escapechar='\\')
+            with open(path, 'r') as f:
+                assert f.read() == expected
+
+        df = DataFrame({'col': ['a,a', ',bb,']})
+        expected = """\
+,col
+0,a\\,a
+1,\\,bb\\,
+"""
+
+        with tm.ensure_clean('test.csv') as path:
+            df.to_csv(path, quoting=3, escapechar='\\')  # QUOTE_NONE
+            with open(path, 'r') as f:
+                assert f.read() == expected
+
+    def test_csv_to_string(self):
+        df = DataFrame({'col': [1, 2]})
+        expected_rows = [',col',
+                         '0,1',
+                         '1,2']
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv() == expected
+
+    def test_to_csv_decimal(self):
+        # see gh-781
+        df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})
+
+        expected_rows = [',col1,col2,col3',
+                         '0,1,a,10.1']
+        expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv() == expected_default
+
+        expected_rows = [';col1;col2;col3',
+                         '0;1;a;10,1']
+        expected_european_excel = tm.convert_rows_list_to_csv_str(
+            expected_rows)
+        assert df.to_csv(decimal=',', sep=';') == expected_european_excel
+
+        expected_rows = [',col1,col2,col3',
+                         '0,1,a,10.10']
+        expected_float_format_default = tm.convert_rows_list_to_csv_str(
+            expected_rows)
+        assert df.to_csv(float_format='%.2f') == expected_float_format_default
+
+        expected_rows = [';col1;col2;col3',
+                         '0;1;a;10,10']
+        expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv(decimal=',', sep=';',
+                         float_format='%.2f') == expected_float_format
+
+        # see gh-11553: testing if decimal is taken into account for '0.0'
+        df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1})
+
+        expected_rows = ['a,b,c',
+                         '0^0,2^2,1',
+                         '1^1,3^3,1']
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv(index=False, decimal='^') == expected
+
+        # same but for an index
+        assert df.set_index('a').to_csv(decimal='^') == expected
+
+        # same for a multi-index
+        assert df.set_index(['a', 'b']).to_csv(decimal="^") == expected
+
+    def test_to_csv_float_format(self):
+        # testing if float_format is taken into account for the index
+        # GH 11553
+        df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1})
+
+        expected_rows = ['a,b,c',
+                         '0,2.20,1',
+                         '1,3.30,1']
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.set_index('a').to_csv(float_format='%.2f') == expected
+
+        # same for a multi-index
+        assert df.set_index(['a', 'b']).to_csv(
+            float_format='%.2f') == expected
+
+    def test_to_csv_na_rep(self):
+        # see gh-11553
+        #
+        # Testing if NaN values are correctly represented in the index.
+        df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]})
+        expected_rows = ['a,b,c',
+                         '0.0,0,2',
+                         '_,1,3']
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        assert df.set_index('a').to_csv(na_rep='_') == expected
+        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected
+
+        # now with an index containing only NaNs
+        df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]})
+        expected_rows = ['a,b,c',
+                         '_,0,2',
+                         '_,1,3']
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        assert df.set_index('a').to_csv(na_rep='_') == expected
+        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected
+
+        # check if na_rep parameter does not break anything when no NaN
+        df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]})
+        expected_rows = ['a,b,c',
+                         '0,0,2',
+                         '0,1,3']
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        assert df.set_index('a').to_csv(na_rep='_') == expected
+        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected
+
+    def test_to_csv_date_format(self):
+        # GH 10209
+        df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s')
+                            })
+        df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d')
+                            })
+
+        expected_rows = [',A',
+                         '0,2013-01-01 00:00:00',
+                         '1,2013-01-01 00:00:01',
+                         '2,2013-01-01 00:00:02',
+                         '3,2013-01-01 00:00:03',
+                         '4,2013-01-01 00:00:04']
+        expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df_sec.to_csv() == expected_default_sec
+
+        expected_rows = [',A',
+                         '0,2013-01-01 00:00:00',
+                         '1,2013-01-02 00:00:00',
+                         '2,2013-01-03 00:00:00',
+                         '3,2013-01-04 00:00:00',
+                         '4,2013-01-05 00:00:00']
+        expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert (df_day.to_csv(date_format='%Y-%m-%d %H:%M:%S') ==
+                expected_ymdhms_day)
+
+        expected_rows = [',A',
+                         '0,2013-01-01',
+                         '1,2013-01-01',
+                         '2,2013-01-01',
+                         '3,2013-01-01',
+                         '4,2013-01-01']
+        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df_sec.to_csv(date_format='%Y-%m-%d') == expected_ymd_sec
+
+        expected_rows = [',A',
+                         '0,2013-01-01',
+                         '1,2013-01-02',
+                         '2,2013-01-03',
+                         '3,2013-01-04',
+                         '4,2013-01-05']
+        expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df_day.to_csv() == expected_default_day
+        assert df_day.to_csv(date_format='%Y-%m-%d') == expected_default_day
+
+        # see gh-7791
+        #
+        # Testing if date_format parameter is taken into account
+        # for multi-indexed DataFrames.
+        df_sec['B'] = 0
+        df_sec['C'] = 1
+
+        expected_rows = ['A,B,C',
+                         '2013-01-01,0,1']
+        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B'])
+        assert (df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d') ==
+                expected_ymd_sec)
+
+    def test_to_csv_multi_index(self):
+        # see gh-6618
+        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
+
+        exp_rows = [',1',
+                    ',2',
+                    '0,1']
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv() == exp
+
+        exp_rows = ['1', '2', '1']
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv(index=False) == exp
+
+        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]),
+                       index=pd.MultiIndex.from_arrays([[1], [2]]))
+
+        exp_rows = [',,1', ',,2', '1,2,1']
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv() == exp
+
+        exp_rows = ['1', '2', '1']
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv(index=False) == exp
+
+        df = DataFrame(
+            [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']]))
+
+        exp_rows = [',foo', ',bar', '0,1']
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv() == exp
+
+        exp_rows = ['foo', 'bar', '1']
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv(index=False) == exp
+
+    @pytest.mark.parametrize("ind,expected", [
+        (pd.MultiIndex(levels=[[1.0]],
+                       codes=[[0]],
+                       names=["x"]),
+         "x,data\n1.0,1\n"),
+        (pd.MultiIndex(levels=[[1.], [2.]],
+                       codes=[[0], [0]],
+                       names=["x", "y"]),
+         "x,y,data\n1.0,2.0,1\n")
+    ])
+    @pytest.mark.parametrize("klass", [
+        pd.DataFrame, pd.Series
+    ])
+    def test_to_csv_single_level_multi_index(self, ind, expected, klass):
+        # see gh-19589
+        result = klass(pd.Series([1], ind, name="data")).to_csv(
+            line_terminator="\n", header=True)
+        assert result == expected
+
+    def test_to_csv_string_array_ascii(self):
+        # GH 10813
+        str_array = [{'names': ['foo', 'bar']}, {'names': ['baz', 'qux']}]
+        df = pd.DataFrame(str_array)
+        expected_ascii = '''\
+,names
+0,"['foo', 'bar']"
+1,"['baz', 'qux']"
+'''
+        with tm.ensure_clean('str_test.csv') as path:
+            df.to_csv(path, encoding='ascii')
+            with open(path, 'r') as f:
+                assert f.read() == expected_ascii
+
+    @pytest.mark.xfail
+    def test_to_csv_string_array_utf8(self):
+        # GH 10813
+        str_array = [{'names': ['foo', 'bar']}, {'names': ['baz', 'qux']}]
+        df = pd.DataFrame(str_array)
+        expected_utf8 = '''\
+,names
+0,"[u'foo', u'bar']"
+1,"[u'baz', u'qux']"
+'''
+        with tm.ensure_clean('unicode_test.csv') as path:
+            df.to_csv(path, encoding='utf-8')
+            with open(path, 'r') as f:
+                assert f.read() == expected_utf8
+
+    def test_to_csv_string_with_lf(self):
+        # GH 20353
+        data = {
+            'int': [1, 2, 3],
+            'str_lf': ['abc', 'd\nef', 'g\nh\n\ni']
+        }
+        df = pd.DataFrame(data)
+        with tm.ensure_clean('lf_test.csv') as path:
+            # case 1: The default line terminator(=os.linesep)(PR 21406)
+            os_linesep = os.linesep.encode('utf-8')
+            expected_noarg = (
+                b'int,str_lf' + os_linesep +
+                b'1,abc' + os_linesep +
+                b'2,"d\nef"' + os_linesep +
+                b'3,"g\nh\n\ni"' + os_linesep
+            )
+            df.to_csv(path, index=False)
+            with open(path, 'rb') as f:
+                assert f.read() == expected_noarg
+        with tm.ensure_clean('lf_test.csv') as path:
+            # case 2: LF as line terminator
+            expected_lf = (
+                b'int,str_lf\n'
+                b'1,abc\n'
+                b'2,"d\nef"\n'
+                b'3,"g\nh\n\ni"\n'
+            )
+            df.to_csv(path, line_terminator='\n', index=False)
+            with open(path, 'rb') as f:
+                assert f.read() == expected_lf
+        with tm.ensure_clean('lf_test.csv') as path:
+            # case 3: CRLF as line terminator
+            # 'line_terminator' should not change inner element
+            expected_crlf = (
+                b'int,str_lf\r\n'
+                b'1,abc\r\n'
+                b'2,"d\nef"\r\n'
+                b'3,"g\nh\n\ni"\r\n'
+            )
+            df.to_csv(path, line_terminator='\r\n', index=False)
+            with open(path, 'rb') as f:
+                assert f.read() == expected_crlf
+
+    def test_to_csv_string_with_crlf(self):
+        # GH 20353
+        data = {
+            'int': [1, 2, 3],
+            'str_crlf': ['abc', 'd\r\nef', 'g\r\nh\r\n\r\ni']
+        }
+        df = pd.DataFrame(data)
+        with tm.ensure_clean('crlf_test.csv') as path:
+            # case 1: The default line terminator(=os.linesep)(PR 21406)
+            os_linesep = os.linesep.encode('utf-8')
+            expected_noarg = (
+                b'int,str_crlf' + os_linesep +
+                b'1,abc' + os_linesep +
+                b'2,"d\r\nef"' + os_linesep +
+                b'3,"g\r\nh\r\n\r\ni"' + os_linesep
+            )
+            df.to_csv(path, index=False)
+            with open(path, 'rb') as f:
+                assert f.read() == expected_noarg
+        with tm.ensure_clean('crlf_test.csv') as path:
+            # case 2: LF as line terminator
+            expected_lf = (
+                b'int,str_crlf\n'
+                b'1,abc\n'
+                b'2,"d\r\nef"\n'
+                b'3,"g\r\nh\r\n\r\ni"\n'
+            )
+            df.to_csv(path, line_terminator='\n', index=False)
+            with open(path, 'rb') as f:
+                assert f.read() == expected_lf
+        with tm.ensure_clean('crlf_test.csv') as path:
+            # case 3: CRLF as line terminator
+            # 'line_terminator' should not change inner element
+            expected_crlf = (
+                b'int,str_crlf\r\n'
+                b'1,abc\r\n'
+                b'2,"d\r\nef"\r\n'
+                b'3,"g\r\nh\r\n\r\ni"\r\n'
+            )
+            df.to_csv(path, line_terminator='\r\n', index=False)
+            with open(path, 'rb') as f:
+                assert f.read() == expected_crlf
+
+    def test_to_csv_stdout_file(self, capsys):
+        # GH 21561
+        df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']],
+                          columns=['name_1', 'name_2'])
+        expected_rows = [',name_1,name_2',
+                         '0,foo,bar',
+                         '1,baz,qux']
+        expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        df.to_csv(sys.stdout, encoding='ascii')
+        captured = capsys.readouterr()
+
+        assert captured.out == expected_ascii
+        assert not sys.stdout.closed
+
+    @pytest.mark.xfail(
+        compat.is_platform_windows(),
+        reason=("Especially in Windows, file stream should not be passed"
+                "to csv writer without newline='' option."
+                "(https://docs.python.org/3.6/library/csv.html#csv.writer)"))
+    def test_to_csv_write_to_open_file(self):
+        # GH 21696
+        df = pd.DataFrame({'a': ['x', 'y', 'z']})
+        expected = '''\
+manual header
+x
+y
+z
+'''
+        with tm.ensure_clean('test.txt') as path:
+            with open(path, 'w') as f:
+                f.write('manual header\n')
+                df.to_csv(f, header=None, index=None)
+            with open(path, 'r') as f:
+                assert f.read() == expected
+
+    @pytest.mark.skipif(compat.PY2, reason="Test case for python3")
+    def test_to_csv_write_to_open_file_with_newline_py3(self):
+        # see gh-21696
+        # see gh-20353
+        df = pd.DataFrame({'a': ['x', 'y', 'z']})
+        expected_rows = ["x",
+                         "y",
+                         "z"]
+        expected = ("manual header\n" +
+                    tm.convert_rows_list_to_csv_str(expected_rows))
+        with tm.ensure_clean('test.txt') as path:
+            with open(path, 'w', newline='') as f:
+                f.write('manual header\n')
+                df.to_csv(f, header=None, index=None)
+
+            with open(path, 'rb') as f:
+                assert f.read() == bytes(expected, 'utf-8')
+
+    @pytest.mark.skipif(compat.PY3, reason="Test case for python2")
+    def test_to_csv_write_to_open_file_with_newline_py2(self):
+        # see gh-21696
+        # see gh-20353
+        df = pd.DataFrame({'a': ['x', 'y', 'z']})
+        expected_rows = ["x",
+                         "y",
+                         "z"]
+        expected = ("manual header\n" +
+                    tm.convert_rows_list_to_csv_str(expected_rows))
+        with tm.ensure_clean('test.txt') as path:
+            with open(path, 'wb') as f:
+                f.write('manual header\n')
+                df.to_csv(f, header=None, index=None)
+
+            with open(path, 'rb') as f:
+                assert f.read() == expected
+
+    @pytest.mark.parametrize("to_infer", [True, False])
+    @pytest.mark.parametrize("read_infer", [True, False])
+    def test_to_csv_compression(self, compression_only,
+                                read_infer, to_infer):
+        # see gh-15008
+        compression = compression_only
+
+        if compression == "zip":
+            pytest.skip("{compression} is not supported "
+                        "for to_csv".format(compression=compression))
+
+        # We'll complete file extension subsequently.
+        filename = "test."
+
+        if compression == "gzip":
+            filename += "gz"
+        else:
+            # xz --> .xz
+            # bz2 --> .bz2
+            filename += compression
+
+        df = DataFrame({"A": [1]})
+
+        to_compression = "infer" if to_infer else compression
+        read_compression = "infer" if read_infer else compression
+
+        with tm.ensure_clean(filename) as path:
+            df.to_csv(path, compression=to_compression)
+            result = pd.read_csv(path, index_col=0,
+                                 compression=read_compression)
+            tm.assert_frame_equal(result, df)
@@ -0,0 +1,278 @@
+"""Tests formatting as writer-agnostic ExcelCells
+
+ExcelFormatter is tested implicitly in pandas/tests/io/test_excel.py
+"""
+
+import pytest
+
+import pandas.util.testing as tm
+
+from pandas.io.formats.css import CSSWarning
+from pandas.io.formats.excel import CSSToExcelConverter
+
+
+@pytest.mark.parametrize('css,expected', [
+    # FONT
+    # - name
+    ('font-family: foo,bar', {'font': {'name': 'foo'}}),
+    ('font-family: "foo bar",baz', {'font': {'name': 'foo bar'}}),
+    ('font-family: foo,\nbar', {'font': {'name': 'foo'}}),
+    ('font-family: foo, bar,    baz', {'font': {'name': 'foo'}}),
+    ('font-family: bar, foo', {'font': {'name': 'bar'}}),
+    ('font-family: \'foo bar\', baz', {'font': {'name': 'foo bar'}}),
+    ('font-family: \'foo \\\'bar\', baz', {'font': {'name': 'foo \'bar'}}),
+    ('font-family: "foo \\"bar", baz', {'font': {'name': 'foo "bar'}}),
+    ('font-family: "foo ,bar", baz', {'font': {'name': 'foo ,bar'}}),
+    # - family
+    ('font-family: serif', {'font': {'name': 'serif', 'family': 1}}),
+    ('font-family: Serif', {'font': {'name': 'serif', 'family': 1}}),
+    ('font-family: roman, serif', {'font': {'name': 'roman', 'family': 1}}),
+    ('font-family: roman, sans-serif', {'font': {'name': 'roman',
+                                                 'family': 2}}),
+    ('font-family: roman, sans serif', {'font': {'name': 'roman'}}),
+    ('font-family: roman, sansserif', {'font': {'name': 'roman'}}),
+    ('font-family: roman, cursive', {'font': {'name': 'roman', 'family': 4}}),
+    ('font-family: roman, fantasy', {'font': {'name': 'roman', 'family': 5}}),
+    # - size
+    ('font-size: 1em', {'font': {'size': 12}}),
+    ('font-size: xx-small', {'font': {'size': 6}}),
+    ('font-size: x-small', {'font': {'size': 7.5}}),
+    ('font-size: small', {'font': {'size': 9.6}}),
+    ('font-size: medium', {'font': {'size': 12}}),
+    ('font-size: large', {'font': {'size': 13.5}}),
+    ('font-size: x-large', {'font': {'size': 18}}),
+    ('font-size: xx-large', {'font': {'size': 24}}),
+    ('font-size: 50%', {'font': {'size': 6}}),
+    # - bold
+    ('font-weight: 100', {'font': {'bold': False}}),
+    ('font-weight: 200', {'font': {'bold': False}}),
+    ('font-weight: 300', {'font': {'bold': False}}),
+    ('font-weight: 400', {'font': {'bold': False}}),
+    ('font-weight: normal', {'font': {'bold': False}}),
+    ('font-weight: lighter', {'font': {'bold': False}}),
+    ('font-weight: bold', {'font': {'bold': True}}),
+    ('font-weight: bolder', {'font': {'bold': True}}),
+    ('font-weight: 700', {'font': {'bold': True}}),
+    ('font-weight: 800', {'font': {'bold': True}}),
+    ('font-weight: 900', {'font': {'bold': True}}),
+    # - italic
+    ('font-style: italic', {'font': {'italic': True}}),
+    ('font-style: oblique', {'font': {'italic': True}}),
+    # - underline
+    ('text-decoration: underline',
+     {'font': {'underline': 'single'}}),
+    ('text-decoration: overline',
+     {}),
+    ('text-decoration: none',
+     {}),
+    # - strike
+    ('text-decoration: line-through',
+     {'font': {'strike': True}}),
+    ('text-decoration: underline line-through',
+     {'font': {'strike': True, 'underline': 'single'}}),
+    ('text-decoration: underline; text-decoration: line-through',
+     {'font': {'strike': True}}),
+    # - color
+    ('color: red', {'font': {'color': 'FF0000'}}),
+    ('color: #ff0000', {'font': {'color': 'FF0000'}}),
+    ('color: #f0a', {'font': {'color': 'FF00AA'}}),
+    # - shadow
+    ('text-shadow: none', {'font': {'shadow': False}}),
+    ('text-shadow: 0px -0em 0px #CCC', {'font': {'shadow': False}}),
+    ('text-shadow: 0px -0em 0px #999', {'font': {'shadow': False}}),
+    ('text-shadow: 0px -0em 0px', {'font': {'shadow': False}}),
+    ('text-shadow: 2px -0em 0px #CCC', {'font': {'shadow': True}}),
+    ('text-shadow: 0px -2em 0px #CCC', {'font': {'shadow': True}}),
+    ('text-shadow: 0px -0em 2px #CCC', {'font': {'shadow': True}}),
+    ('text-shadow: 0px -0em 2px', {'font': {'shadow': True}}),
+    ('text-shadow: 0px -2em', {'font': {'shadow': True}}),
+
+    # FILL
+    # - color, fillType
+    ('background-color: red', {'fill': {'fgColor': 'FF0000',
+                                        'patternType': 'solid'}}),
+    ('background-color: #ff0000', {'fill': {'fgColor': 'FF0000',
+                                            'patternType': 'solid'}}),
+    ('background-color: #f0a', {'fill': {'fgColor': 'FF00AA',
+                                         'patternType': 'solid'}}),
+    # BORDER
+    # - style
+    ('border-style: solid',
+     {'border': {'top': {'style': 'medium'},
+                 'bottom': {'style': 'medium'},
+                 'left': {'style': 'medium'},
+                 'right': {'style': 'medium'}}}),
+    ('border-style: solid; border-width: thin',
+     {'border': {'top': {'style': 'thin'},
+                 'bottom': {'style': 'thin'},
+                 'left': {'style': 'thin'},
+                 'right': {'style': 'thin'}}}),
+
+    ('border-top-style: solid; border-top-width: thin',
+     {'border': {'top': {'style': 'thin'}}}),
+    ('border-top-style: solid; border-top-width: 1pt',
+     {'border': {'top': {'style': 'thin'}}}),
+    ('border-top-style: solid',
+     {'border': {'top': {'style': 'medium'}}}),
+    ('border-top-style: solid; border-top-width: medium',
+     {'border': {'top': {'style': 'medium'}}}),
+    ('border-top-style: solid; border-top-width: 2pt',
+     {'border': {'top': {'style': 'medium'}}}),
+    ('border-top-style: solid; border-top-width: thick',
+     {'border': {'top': {'style': 'thick'}}}),
+    ('border-top-style: solid; border-top-width: 4pt',
+     {'border': {'top': {'style': 'thick'}}}),
+
+    ('border-top-style: dotted',
+     {'border': {'top': {'style': 'mediumDashDotDot'}}}),
+    ('border-top-style: dotted; border-top-width: thin',
+     {'border': {'top': {'style': 'dotted'}}}),
+    ('border-top-style: dashed',
+     {'border': {'top': {'style': 'mediumDashed'}}}),
+    ('border-top-style: dashed; border-top-width: thin',
+     {'border': {'top': {'style': 'dashed'}}}),
+    ('border-top-style: double',
+     {'border': {'top': {'style': 'double'}}}),
+    # - color
+    ('border-style: solid; border-color: #0000ff',
+     {'border': {'top': {'style': 'medium', 'color': '0000FF'},
+                 'right': {'style': 'medium', 'color': '0000FF'},
+                 'bottom': {'style': 'medium', 'color': '0000FF'},
+                 'left': {'style': 'medium', 'color': '0000FF'}}}),
+    ('border-top-style: double; border-top-color: blue',
+     {'border': {'top': {'style': 'double', 'color': '0000FF'}}}),
+    ('border-top-style: solid; border-top-color: #06c',
+     {'border': {'top': {'style': 'medium', 'color': '0066CC'}}}),
+    # ALIGNMENT
+    # - horizontal
+    ('text-align: center',
+     {'alignment': {'horizontal': 'center'}}),
+    ('text-align: left',
+     {'alignment': {'horizontal': 'left'}}),
+    ('text-align: right',
+     {'alignment': {'horizontal': 'right'}}),
+    ('text-align: justify',
+     {'alignment': {'horizontal': 'justify'}}),
+    # - vertical
+    ('vertical-align: top',
+     {'alignment': {'vertical': 'top'}}),
+    ('vertical-align: text-top',
+     {'alignment': {'vertical': 'top'}}),
+    ('vertical-align: middle',
+     {'alignment': {'vertical': 'center'}}),
+    ('vertical-align: bottom',
+     {'alignment': {'vertical': 'bottom'}}),
+    ('vertical-align: text-bottom',
+     {'alignment': {'vertical': 'bottom'}}),
+    # - wrap_text
+    ('white-space: nowrap',
+     {'alignment': {'wrap_text': False}}),
+    ('white-space: pre',
+     {'alignment': {'wrap_text': False}}),
+    ('white-space: pre-line',
+     {'alignment': {'wrap_text': False}}),
+    ('white-space: normal',
+     {'alignment': {'wrap_text': True}}),
+    # NUMBER FORMAT
+    ('number-format: 0%',
+     {'number_format': {'format_code': '0%'}}),
+])
+def test_css_to_excel(css, expected):
+    convert = CSSToExcelConverter()
+    assert expected == convert(css)
+
+
+def test_css_to_excel_multiple():
+    convert = CSSToExcelConverter()
+    actual = convert('''
+        font-weight: bold;
+        text-decoration: underline;
+        color: red;
+        border-width: thin;
+        text-align: center;
+        vertical-align: top;
+        unused: something;
+    ''')
+    assert {"font": {"bold": True, "underline": "single", "color": "FF0000"},
+            "border": {"top": {"style": "thin"},
+                       "right": {"style": "thin"},
+                       "bottom": {"style": "thin"},
+                       "left": {"style": "thin"}},
+            "alignment": {"horizontal": "center",
+                          "vertical": "top"}} == actual
+
+
+@pytest.mark.parametrize('css,inherited,expected', [
+    ('font-weight: bold', '',
+     {'font': {'bold': True}}),
+    ('', 'font-weight: bold',
+     {'font': {'bold': True}}),
+    ('font-weight: bold', 'font-style: italic',
+     {'font': {'bold': True, 'italic': True}}),
+    ('font-style: normal', 'font-style: italic',
+     {'font': {'italic': False}}),
+    ('font-style: inherit', '', {}),
+    ('font-style: normal; font-style: inherit', 'font-style: italic',
+     {'font': {'italic': True}}),
+])
+def test_css_to_excel_inherited(css, inherited, expected):
+    convert = CSSToExcelConverter(inherited)
+    assert expected == convert(css)
+
+
+@pytest.mark.parametrize("input_color,output_color", (
+    [(name, rgb) for name, rgb in CSSToExcelConverter.NAMED_COLORS.items()] +
+    [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()] +
+    [("#F0F", "FF00FF"), ("#ABC", "AABBCC")])
+)
+def test_css_to_excel_good_colors(input_color, output_color):
+    # see gh-18392
+    css = ("border-top-color: {color}; "
+           "border-right-color: {color}; "
+           "border-bottom-color: {color}; "
+           "border-left-color: {color}; "
+           "background-color: {color}; "
+           "color: {color}").format(color=input_color)
+
+    expected = dict()
+
+    expected["fill"] = {
+        "patternType": "solid",
+        "fgColor": output_color
+    }
+
+    expected["font"] = {
+        "color": output_color
+    }
+
+    expected["border"] = {
+        k: {
+            "color": output_color,
+        } for k in ("top", "right", "bottom", "left")
+    }
+
+    with tm.assert_produces_warning(None):
+        convert = CSSToExcelConverter()
+        assert expected == convert(css)
+
+
+@pytest.mark.parametrize("input_color", [None, "not-a-color"])
+def test_css_to_excel_bad_colors(input_color):
+    # see gh-18392
+    css = ("border-top-color: {color}; "
+           "border-right-color: {color}; "
+           "border-bottom-color: {color}; "
+           "border-left-color: {color}; "
+           "background-color: {color}; "
+           "color: {color}").format(color=input_color)
+
+    expected = dict()
+
+    if input_color is not None:
+        expected["fill"] = {
+            "patternType": "solid"
+        }
+
+    with tm.assert_produces_warning(CSSWarning):
+        convert = CSSToExcelConverter()
+        assert expected == convert(css)
@@ -0,0 +1,602 @@
+# -*- coding: utf-8 -*-
+
+from datetime import datetime
+from io import open
+import re
+
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO, lrange, u
+
+import pandas as pd
+from pandas import DataFrame, Index, MultiIndex, compat, option_context
+from pandas.util import testing as tm
+
+import pandas.io.formats.format as fmt
+
+
+def expected_html(datapath, name):
+    """
+    Read HTML file from formats data directory.
+
+    Parameters
+    ----------
+    datapath : pytest fixture
+        The datapath fixture injected into a test by pytest.
+    name : str
+        The name of the HTML file without the suffix.
+
+    Returns
+    -------
+    str : contents of HTML file.
+    """
+    filename = '.'.join([name, 'html'])
+    filepath = datapath('io', 'formats', 'data', 'html', filename)
+    with open(filepath, encoding='utf-8') as f:
+        html = f.read()
+    return html.rstrip()
+
+
+@pytest.fixture(params=['mixed', 'empty'])
+def biggie_df_fixture(request):
+    """Fixture for a big mixed Dataframe and an empty Dataframe"""
+    if request.param == 'mixed':
+        df = DataFrame({'A': np.random.randn(200),
+                        'B': tm.makeStringIndex(200)},
+                       index=lrange(200))
+        df.loc[:20, 'A'] = np.nan
+        df.loc[:20, 'B'] = np.nan
+        return df
+    elif request.param == 'empty':
+        df = DataFrame(index=np.arange(200))
+        return df
+
+
+@pytest.fixture(params=fmt._VALID_JUSTIFY_PARAMETERS)
+def justify(request):
+    return request.param
+
+
+@pytest.mark.parametrize('col_space', [30, 50])
+def test_to_html_with_col_space(col_space):
+    df = DataFrame(np.random.random(size=(1, 3)))
+    # check that col_space affects HTML generation
+    # and be very brittle about it.
+    result = df.to_html(col_space=col_space)
+    hdrs = [x for x in result.split(r"\n") if re.search(r"<th[>\s]", x)]
+    assert len(hdrs) > 0
+    for h in hdrs:
+        assert "min-width" in h
+        assert str(col_space) in h
+
+
+def test_to_html_with_empty_string_label():
+    # GH 3547, to_html regards empty string labels as repeated labels
+    data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]}
+    df = DataFrame(data).set_index(['c1', 'c2'])
+    result = df.to_html()
+    assert "rowspan" not in result
+
+
+@pytest.mark.parametrize('df,expected', [
+    (DataFrame({u('\u03c3'): np.arange(10.)}), 'unicode_1'),
+    (DataFrame({'A': [u('\u03c3')]}), 'unicode_2')
+])
+def test_to_html_unicode(df, expected, datapath):
+    expected = expected_html(datapath, expected)
+    result = df.to_html()
+    assert result == expected
+
+
+def test_to_html_decimal(datapath):
+    # GH 12031
+    df = DataFrame({'A': [6.0, 3.1, 2.2]})
+    result = df.to_html(decimal=',')
+    expected = expected_html(datapath, 'gh12031_expected_output')
+    assert result == expected
+
+
+@pytest.mark.parametrize('kwargs,string,expected', [
+    (dict(), "<type 'str'>", 'escaped'),
+    (dict(escape=False), "<b>bold</b>", 'escape_disabled')
+])
+def test_to_html_escaped(kwargs, string, expected, datapath):
+    a = 'str<ing1 &amp;'
+    b = 'stri>ng2 &amp;'
+
+    test_dict = {'co<l1': {a: string,
+                           b: string},
+                 'co>l2': {a: string,
+                           b: string}}
+    result = DataFrame(test_dict).to_html(**kwargs)
+    expected = expected_html(datapath, expected)
+    assert result == expected
+
+
+@pytest.mark.parametrize('index_is_named', [True, False])
+def test_to_html_multiindex_index_false(index_is_named, datapath):
+    # GH 8452
+    df = DataFrame({
+        'a': range(2),
+        'b': range(3, 5),
+        'c': range(5, 7),
+        'd': range(3, 5)
+    })
+    df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
+    if index_is_named:
+        df.index = Index(df.index.values, name='idx')
+    result = df.to_html(index=False)
+    expected = expected_html(datapath, 'gh8452_expected_output')
+    assert result == expected
+
+
+@pytest.mark.parametrize('multi_sparse,expected', [
+    (False, 'multiindex_sparsify_false_multi_sparse_1'),
+    (False, 'multiindex_sparsify_false_multi_sparse_2'),
+    (True, 'multiindex_sparsify_1'),
+    (True, 'multiindex_sparsify_2')
+])
+def test_to_html_multiindex_sparsify(multi_sparse, expected, datapath):
+    index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]],
+                                   names=['foo', None])
+    df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index)
+    if expected.endswith('2'):
+        df.columns = index[::2]
+    with option_context('display.multi_sparse', multi_sparse):
+        result = df.to_html()
+    expected = expected_html(datapath, expected)
+    assert result == expected
+
+
+@pytest.mark.parametrize('max_rows,expected', [
+    (60, 'gh14882_expected_output_1'),
+
+    # Test that ... appears in a middle level
+    (56, 'gh14882_expected_output_2')
+])
+def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath):
+    # GH 14882 - Issue on truncation with odd length DataFrame
+    index = MultiIndex.from_product([[100, 200, 300],
+                                     [10, 20, 30],
+                                     [1, 2, 3, 4, 5, 6, 7]],
+                                    names=['a', 'b', 'c'])
+    df = DataFrame({'n': range(len(index))}, index=index)
+    result = df.to_html(max_rows=max_rows)
+    expected = expected_html(datapath, expected)
+    assert result == expected
+
+
+@pytest.mark.parametrize('df,formatters,expected', [
+    (DataFrame(
+        [[0, 1], [2, 3], [4, 5], [6, 7]],
+        columns=['foo', None], index=lrange(4)),
+     {'__index__': lambda x: 'abcd' [x]},
+     'index_formatter'),
+
+    (DataFrame(
+        {'months': [datetime(2016, 1, 1), datetime(2016, 2, 2)]}),
+     {'months': lambda x: x.strftime('%Y-%m')},
+     'datetime64_monthformatter'),
+
+    (DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'],
+                                      format='%H:%M:%S.%f')}),
+     {'hod': lambda x: x.strftime('%H:%M')},
+     'datetime64_hourformatter')
+])
+def test_to_html_formatters(df, formatters, expected, datapath):
+    expected = expected_html(datapath, expected)
+    result = df.to_html(formatters=formatters)
+    assert result == expected
+
+
+def test_to_html_regression_GH6098():
+    df = DataFrame({
+        u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')],
+        u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), u('1er')],
+        'données1': np.random.randn(5),
+        'données2': np.random.randn(5)})
+
+    # it works
+    df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_()
+
+
+def test_to_html_truncate(datapath):
+    index = pd.date_range(start='20010101', freq='D', periods=20)
+    df = DataFrame(index=index, columns=range(20))
+    result = df.to_html(max_rows=8, max_cols=4)
+    expected = expected_html(datapath, 'truncate')
+    assert result == expected
+
+
+@pytest.mark.parametrize('sparsify,expected', [
+    (True, 'truncate_multi_index'),
+    (False, 'truncate_multi_index_sparse_off')
+])
+def test_to_html_truncate_multi_index(sparsify, expected, datapath):
+    arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
+              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
+    df = DataFrame(index=arrays, columns=arrays)
+    result = df.to_html(max_rows=7, max_cols=7, sparsify=sparsify)
+    expected = expected_html(datapath, expected)
+    assert result == expected
+
+
+@pytest.mark.parametrize('option,result,expected', [
+    (None, lambda df: df.to_html(), '1'),
+    (None, lambda df: df.to_html(border=0), '0'),
+    (0, lambda df: df.to_html(), '0'),
+    (0, lambda df: df._repr_html_(), '0'),
+])
+def test_to_html_border(option, result, expected):
+    df = DataFrame({'A': [1, 2]})
+    if option is None:
+        result = result(df)
+    else:
+        with option_context('display.html.border', option):
+            result = result(df)
+    expected = 'border="{}"'.format(expected)
+    assert expected in result
+
+
+def test_display_option_warning():
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
+        pd.options.html.border
+
+
+@pytest.mark.parametrize('biggie_df_fixture', ['mixed'], indirect=True)
+def test_to_html(biggie_df_fixture):
+    # TODO: split this test
+    df = biggie_df_fixture
+    s = df.to_html()
+
+    buf = StringIO()
+    retval = df.to_html(buf=buf)
+    assert retval is None
+    assert buf.getvalue() == s
+
+    assert isinstance(s, compat.string_types)
+
+    df.to_html(columns=['B', 'A'], col_space=17)
+    df.to_html(columns=['B', 'A'],
+               formatters={'A': lambda x: '{x:.1f}'.format(x=x)})
+
+    df.to_html(columns=['B', 'A'], float_format=str)
+    df.to_html(columns=['B', 'A'], col_space=12, float_format=str)
+
+
+@pytest.mark.parametrize('biggie_df_fixture', ['empty'], indirect=True)
+def test_to_html_empty_dataframe(biggie_df_fixture):
+    df = biggie_df_fixture
+    df.to_html()
+
+
+def test_to_html_filename(biggie_df_fixture, tmpdir):
+    df = biggie_df_fixture
+    expected = df.to_html()
+    path = tmpdir.join('test.html')
+    df.to_html(path)
+    result = path.read()
+    assert result == expected
+
+
+def test_to_html_with_no_bold():
+    df = DataFrame({'x': np.random.randn(5)})
+    html = df.to_html(bold_rows=False)
+    result = html[html.find("</thead>")]
+    assert '<strong' not in result
+
+
+def test_to_html_columns_arg():
+    df = DataFrame(tm.getSeriesData())
+    result = df.to_html(columns=['A'])
+    assert '<th>B</th>' not in result
+
+
+@pytest.mark.parametrize('columns,justify,expected', [
+    (MultiIndex.from_tuples(
+        list(zip(np.arange(2).repeat(2), np.mod(lrange(4), 2))),
+        names=['CL0', 'CL1']),
+     'left',
+     'multiindex_1'),
+
+    (MultiIndex.from_tuples(
+        list(zip(range(4), np.mod(lrange(4), 2)))),
+     'right',
+     'multiindex_2')
+])
+def test_to_html_multiindex(columns, justify, expected, datapath):
+    df = DataFrame([list('abcd'), list('efgh')], columns=columns)
+    result = df.to_html(justify=justify)
+    expected = expected_html(datapath, expected)
+    assert result == expected
+
+
+def test_to_html_justify(justify, datapath):
+    df = DataFrame({'A': [6, 30000, 2],
+                    'B': [1, 2, 70000],
+                    'C': [223442, 0, 1]},
+                   columns=['A', 'B', 'C'])
+    result = df.to_html(justify=justify)
+    expected = expected_html(datapath, 'justify').format(justify=justify)
+    assert result == expected
+
+
+@pytest.mark.parametrize("justify", ["super-right", "small-left",
+                                     "noinherit", "tiny", "pandas"])
+def test_to_html_invalid_justify(justify):
+    # GH 17527
+    df = DataFrame()
+    msg = "Invalid value for justify parameter"
+
+    with pytest.raises(ValueError, match=msg):
+        df.to_html(justify=justify)
+
+
+def test_to_html_index(datapath):
+    # TODO: split this test
+    index = ['foo', 'bar', 'baz']
+    df = DataFrame({'A': [1, 2, 3],
+                    'B': [1.2, 3.4, 5.6],
+                    'C': ['one', 'two', np.nan]},
+                   columns=['A', 'B', 'C'],
+                   index=index)
+    expected_with_index = expected_html(datapath, 'index_1')
+    assert df.to_html() == expected_with_index
+
+    expected_without_index = expected_html(datapath, 'index_2')
+    result = df.to_html(index=False)
+    for i in index:
+        assert i not in result
+    assert result == expected_without_index
+    df.index = Index(['foo', 'bar', 'baz'], name='idx')
+    expected_with_index = expected_html(datapath, 'index_3')
+    assert df.to_html() == expected_with_index
+    assert df.to_html(index=False) == expected_without_index
+
+    tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')]
+    df.index = MultiIndex.from_tuples(tuples)
+
+    expected_with_index = expected_html(datapath, 'index_4')
+    assert df.to_html() == expected_with_index
+
+    result = df.to_html(index=False)
+    for i in ['foo', 'bar', 'car', 'bike']:
+        assert i not in result
+    # must be the same result as normal index
+    assert result == expected_without_index
+
+    df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2'])
+    expected_with_index = expected_html(datapath, 'index_5')
+    assert df.to_html() == expected_with_index
+    assert df.to_html(index=False) == expected_without_index
+
+
+@pytest.mark.parametrize('classes', [
+    "sortable draggable",
+    ["sortable", "draggable"]
+])
+def test_to_html_with_classes(classes, datapath):
+    df = DataFrame()
+    expected = expected_html(datapath, 'with_classes')
+    result = df.to_html(classes=classes)
+    assert result == expected
+
+
+def test_to_html_no_index_max_rows(datapath):
+    # GH 14998
+    df = DataFrame({"A": [1, 2, 3, 4]})
+    result = df.to_html(index=False, max_rows=1)
+    expected = expected_html(datapath, 'gh14998_expected_output')
+    assert result == expected
+
+
+def test_to_html_multiindex_max_cols(datapath):
+    # GH 6131
+    index = MultiIndex(levels=[['ba', 'bb', 'bc'], ['ca', 'cb', 'cc']],
+                       codes=[[0, 1, 2], [0, 1, 2]],
+                       names=['b', 'c'])
+    columns = MultiIndex(levels=[['d'], ['aa', 'ab', 'ac']],
+                         codes=[[0, 0, 0], [0, 1, 2]],
+                         names=[None, 'a'])
+    data = np.array(
+        [[1., np.nan, np.nan], [np.nan, 2., np.nan], [np.nan, np.nan, 3.]])
+    df = DataFrame(data, index, columns)
+    result = df.to_html(max_cols=2)
+    expected = expected_html(datapath, 'gh6131_expected_output')
+    assert result == expected
+
+
+def test_to_html_multi_indexes_index_false(datapath):
+    # GH 22579
+    df = DataFrame({'a': range(10), 'b': range(10, 20), 'c': range(10, 20),
+                    'd': range(10, 20)})
+    df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
+    df.index = MultiIndex.from_product([['a', 'b'],
+                                        ['c', 'd', 'e', 'f', 'g']])
+    result = df.to_html(index=False)
+    expected = expected_html(datapath, 'gh22579_expected_output')
+    assert result == expected
+
+
+@pytest.mark.parametrize('index_names', [True, False])
+@pytest.mark.parametrize('header', [True, False])
+@pytest.mark.parametrize('index', [True, False])
+@pytest.mark.parametrize('column_index, column_type', [
+    (Index([0, 1]), 'unnamed_standard'),
+    (Index([0, 1], name='columns.name'), 'named_standard'),
+    (MultiIndex.from_product([['a'], ['b', 'c']]), 'unnamed_multi'),
+    (MultiIndex.from_product(
+        [['a'], ['b', 'c']], names=['columns.name.0',
+                                    'columns.name.1']), 'named_multi')
+])
+@pytest.mark.parametrize('row_index, row_type', [
+    (Index([0, 1]), 'unnamed_standard'),
+    (Index([0, 1], name='index.name'), 'named_standard'),
+    (MultiIndex.from_product([['a'], ['b', 'c']]), 'unnamed_multi'),
+    (MultiIndex.from_product(
+        [['a'], ['b', 'c']], names=['index.name.0',
+                                    'index.name.1']), 'named_multi')
+])
+def test_to_html_basic_alignment(
+        datapath, row_index, row_type, column_index, column_type,
+        index, header, index_names):
+    # GH 22747, GH 22579
+    df = DataFrame(np.zeros((2, 2), dtype=int),
+                   index=row_index, columns=column_index)
+    result = df.to_html(
+        index=index, header=header, index_names=index_names)
+
+    if not index:
+        row_type = 'none'
+    elif not index_names and row_type.startswith('named'):
+        row_type = 'un' + row_type
+
+    if not header:
+        column_type = 'none'
+    elif not index_names and column_type.startswith('named'):
+        column_type = 'un' + column_type
+
+    filename = 'index_' + row_type + '_columns_' + column_type
+    expected = expected_html(datapath, filename)
+    assert result == expected
+
+
+@pytest.mark.parametrize('index_names', [True, False])
+@pytest.mark.parametrize('header', [True, False])
+@pytest.mark.parametrize('index', [True, False])
+@pytest.mark.parametrize('column_index, column_type', [
+    (Index(np.arange(8)), 'unnamed_standard'),
+    (Index(np.arange(8), name='columns.name'), 'named_standard'),
+    (MultiIndex.from_product(
+        [['a', 'b'], ['c', 'd'], ['e', 'f']]), 'unnamed_multi'),
+    (MultiIndex.from_product(
+        [['a', 'b'], ['c', 'd'], ['e', 'f']], names=['foo', None, 'baz']),
+        'named_multi')
+])
+@pytest.mark.parametrize('row_index, row_type', [
+    (Index(np.arange(8)), 'unnamed_standard'),
+    (Index(np.arange(8), name='index.name'), 'named_standard'),
+    (MultiIndex.from_product(
+        [['a', 'b'], ['c', 'd'], ['e', 'f']]), 'unnamed_multi'),
+    (MultiIndex.from_product(
+        [['a', 'b'], ['c', 'd'], ['e', 'f']], names=['foo', None, 'baz']),
+        'named_multi')
+])
+def test_to_html_alignment_with_truncation(
+        datapath, row_index, row_type, column_index, column_type,
+        index, header, index_names):
+    # GH 22747, GH 22579
+    df = DataFrame(np.arange(64).reshape(8, 8),
+                   index=row_index, columns=column_index)
+    result = df.to_html(
+        max_rows=4, max_cols=4,
+        index=index, header=header, index_names=index_names)
+
+    if not index:
+        row_type = 'none'
+    elif not index_names and row_type.startswith('named'):
+        row_type = 'un' + row_type
+
+    if not header:
+        column_type = 'none'
+    elif not index_names and column_type.startswith('named'):
+        column_type = 'un' + column_type
+
+    filename = 'trunc_df_index_' + row_type + '_columns_' + column_type
+    expected = expected_html(datapath, filename)
+    assert result == expected
+
+
+@pytest.mark.parametrize('index', [False, 0])
+def test_to_html_truncation_index_false_max_rows(datapath, index):
+    # GH 15019
+    data = [[1.764052, 0.400157],
+            [0.978738, 2.240893],
+            [1.867558, -0.977278],
+            [0.950088, -0.151357],
+            [-0.103219, 0.410599]]
+    df = DataFrame(data)
+    result = df.to_html(max_rows=4, index=index)
+    expected = expected_html(datapath, 'gh15019_expected_output')
+    assert result == expected
+
+
+@pytest.mark.parametrize('index', [False, 0])
+@pytest.mark.parametrize('col_index_named, expected_output', [
+    (False, 'gh22783_expected_output'),
+    (True, 'gh22783_named_columns_index')
+])
+def test_to_html_truncation_index_false_max_cols(
+        datapath, index, col_index_named, expected_output):
+    # GH 22783
+    data = [[1.764052, 0.400157, 0.978738, 2.240893, 1.867558],
+            [-0.977278, 0.950088, -0.151357, -0.103219, 0.410599]]
+    df = DataFrame(data)
+    if col_index_named:
+        df.columns.rename('columns.name', inplace=True)
+    result = df.to_html(max_cols=4, index=index)
+    expected = expected_html(datapath, expected_output)
+    assert result == expected
+
+
+@pytest.mark.parametrize('notebook', [True, False])
+def test_to_html_notebook_has_style(notebook):
+    df = DataFrame({"A": [1, 2, 3]})
+    result = df.to_html(notebook=notebook)
+
+    if notebook:
+        assert "tbody tr th:only-of-type" in result
+        assert "vertical-align: middle;" in result
+        assert "thead th" in result
+    else:
+        assert "tbody tr th:only-of-type" not in result
+        assert "vertical-align: middle;" not in result
+        assert "thead th" not in result
+
+
+def test_to_html_with_index_names_false():
+    # GH 16493
+    df = DataFrame({"A": [1, 2]}, index=Index(['a', 'b'],
+                                              name='myindexname'))
+    result = df.to_html(index_names=False)
+    assert 'myindexname' not in result
+
+
+def test_to_html_with_id():
+    # GH 8496
+    df = DataFrame({"A": [1, 2]}, index=Index(['a', 'b'],
+                                              name='myindexname'))
+    result = df.to_html(index_names=False, table_id="TEST_ID")
+    assert ' id="TEST_ID"' in result
+
+
+@pytest.mark.parametrize('value,float_format,expected', [
+    (0.19999, '%.3f', 'gh21625_expected_output'),
+    (100.0, '%.0f', 'gh22270_expected_output'),
+])
+def test_to_html_float_format_no_fixed_width(
+        value, float_format, expected, datapath):
+    # GH 21625, GH 22270
+    df = DataFrame({'x': [value]})
+    expected = expected_html(datapath, expected)
+    result = df.to_html(float_format=float_format)
+    assert result == expected
+
+
+@pytest.mark.parametrize("render_links,expected", [
+    (True, 'render_links_true'),
+    (False, 'render_links_false'),
+])
+def test_to_html_render_links(render_links, expected, datapath):
+    # GH 2679
+    data = [
+        [0, 'http://pandas.pydata.org/?q1=a&q2=b', 'pydata.org'],
+        [0, 'www.pydata.org', 'pydata.org']
+    ]
+    df = DataFrame(data, columns=['foo', 'bar', None])
+
+    result = df.to_html(render_links=render_links)
+    expected = expected_html(datapath, expected)
+    assert result == expected
@@ -0,0 +1,737 @@
+import codecs
+from datetime import datetime
+
+import pytest
+
+from pandas.compat import u
+
+import pandas as pd
+from pandas import DataFrame, Series, compat
+from pandas.util import testing as tm
+
+
+@pytest.fixture
+def frame():
+    return DataFrame(tm.getSeriesData())
+
+
+class TestToLatex(object):
+
+    def test_to_latex_filename(self, frame):
+        with tm.ensure_clean('test.tex') as path:
+            frame.to_latex(path)
+
+            with open(path, 'r') as f:
+                assert frame.to_latex() == f.read()
+
+        # test with utf-8 and encoding option (GH 7061)
+        df = DataFrame([[u'au\xdfgangen']])
+        with tm.ensure_clean('test.tex') as path:
+            df.to_latex(path, encoding='utf-8')
+            with codecs.open(path, 'r', encoding='utf-8') as f:
+                assert df.to_latex() == f.read()
+
+        # test with utf-8 without encoding option
+        if compat.PY3:  # python3: pandas default encoding is utf-8
+            with tm.ensure_clean('test.tex') as path:
+                df.to_latex(path)
+                with codecs.open(path, 'r', encoding='utf-8') as f:
+                    assert df.to_latex() == f.read()
+        else:
+            # python2 default encoding is ascii, so an error should be raised
+            with tm.ensure_clean('test.tex') as path:
+                with pytest.raises(UnicodeEncodeError):
+                    df.to_latex(path)
+
+    def test_to_latex(self, frame):
+        # it works!
+        frame.to_latex()
+
+        df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        withindex_result = df.to_latex()
+        withindex_expected = r"""\begin{tabular}{lrl}
+\toprule
+{} &  a &   b \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withindex_result == withindex_expected
+
+        withoutindex_result = df.to_latex(index=False)
+        withoutindex_expected = r"""\begin{tabular}{rl}
+\toprule
+ a &   b \\
+\midrule
+ 1 &  b1 \\
+ 2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withoutindex_result == withoutindex_expected
+
+    def test_to_latex_format(self, frame):
+        # GH Bug #9402
+        frame.to_latex(column_format='ccc')
+
+        df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        withindex_result = df.to_latex(column_format='ccc')
+        withindex_expected = r"""\begin{tabular}{ccc}
+\toprule
+{} &  a &   b \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withindex_result == withindex_expected
+
+    def test_to_latex_empty(self):
+        df = DataFrame()
+        result = df.to_latex()
+        expected = r"""\begin{tabular}{l}
+\toprule
+Empty DataFrame
+Columns: Index([], dtype='object')
+Index: Index([], dtype='object') \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+        result = df.to_latex(longtable=True)
+        expected = r"""\begin{longtable}{l}
+\toprule
+Empty DataFrame
+Columns: Index([], dtype='object')
+Index: Index([], dtype='object') \\
+\end{longtable}
+"""
+        assert result == expected
+
+    def test_to_latex_with_formatters(self):
+        df = DataFrame({'datetime64': [datetime(2016, 1, 1),
+                                       datetime(2016, 2, 5),
+                                       datetime(2016, 3, 3)],
+                        'float': [1.0, 2.0, 3.0],
+                        'int': [1, 2, 3],
+                        'object': [(1, 2), True, False],
+                        })
+
+        formatters = {'datetime64': lambda x: x.strftime('%Y-%m'),
+                      'float': lambda x: '[{x: 4.1f}]'.format(x=x),
+                      'int': lambda x: '0x{x:x}'.format(x=x),
+                      'object': lambda x: '-{x!s}-'.format(x=x),
+                      '__index__': lambda x: 'index: {x}'.format(x=x)}
+        result = df.to_latex(formatters=dict(formatters))
+
+        expected = r"""\begin{tabular}{llrrl}
+\toprule
+{} & datetime64 &  float & int &    object \\
+\midrule
+index: 0 &    2016-01 & [ 1.0] & 0x1 &  -(1, 2)- \\
+index: 1 &    2016-02 & [ 2.0] & 0x2 &    -True- \\
+index: 2 &    2016-03 & [ 3.0] & 0x3 &   -False- \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+    def test_to_latex_multiindex(self):
+        df = DataFrame({('x', 'y'): ['a']})
+        result = df.to_latex()
+        expected = r"""\begin{tabular}{ll}
+\toprule
+{} &  x \\
+{} &  y \\
+\midrule
+0 &  a \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert result == expected
+
+        result = df.T.to_latex()
+        expected = r"""\begin{tabular}{lll}
+\toprule
+  &   &  0 \\
+\midrule
+x & y &  a \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert result == expected
+
+        df = DataFrame.from_dict({
+            ('c1', 0): pd.Series({x: x for x in range(4)}),
+            ('c1', 1): pd.Series({x: x + 4 for x in range(4)}),
+            ('c2', 0): pd.Series({x: x for x in range(4)}),
+            ('c2', 1): pd.Series({x: x + 4 for x in range(4)}),
+            ('c3', 0): pd.Series({x: x for x in range(4)}),
+        }).T
+        result = df.to_latex()
+        expected = r"""\begin{tabular}{llrrrr}
+\toprule
+   &   &  0 &  1 &  2 &  3 \\
+\midrule
+c1 & 0 &  0 &  1 &  2 &  3 \\
+   & 1 &  4 &  5 &  6 &  7 \\
+c2 & 0 &  0 &  1 &  2 &  3 \\
+   & 1 &  4 &  5 &  6 &  7 \\
+c3 & 0 &  0 &  1 &  2 &  3 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert result == expected
+
+        # GH 14184
+        df = df.T
+        df.columns.names = ['a', 'b']
+        result = df.to_latex()
+        expected = r"""\begin{tabular}{lrrrrr}
+\toprule
+a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
+b &  0 &  1 &  0 &  1 &  0 \\
+\midrule
+0 &  0 &  4 &  0 &  4 &  0 \\
+1 &  1 &  5 &  1 &  5 &  1 \\
+2 &  2 &  6 &  2 &  6 &  2 \\
+3 &  3 &  7 &  3 &  7 &  3 \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+        # GH 10660
+        df = pd.DataFrame({'a': [0, 0, 1, 1],
+                           'b': list('abab'),
+                           'c': [1, 2, 3, 4]})
+        result = df.set_index(['a', 'b']).to_latex()
+        expected = r"""\begin{tabular}{llr}
+\toprule
+  &   &  c \\
+a & b &    \\
+\midrule
+0 & a &  1 \\
+  & b &  2 \\
+1 & a &  3 \\
+  & b &  4 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert result == expected
+
+        result = df.groupby('a').describe().to_latex()
+        expected = r"""\begin{tabular}{lrrrrrrrr}
+\toprule
+{} & \multicolumn{8}{l}{c} \\
+{} & count & mean &       std &  min &   25\% &  50\% &   75\% &  max \\
+a &       &      &           &      &       &      &       &      \\
+\midrule
+0 &   2.0 &  1.5 &  0.707107 &  1.0 &  1.25 &  1.5 &  1.75 &  2.0 \\
+1 &   2.0 &  3.5 &  0.707107 &  3.0 &  3.25 &  3.5 &  3.75 &  4.0 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert result == expected
+
+    def test_to_latex_multiindex_dupe_level(self):
+        # see gh-14484
+        #
+        # If an index is repeated in subsequent rows, it should be
+        # replaced with a blank in the created table. This should
+        # ONLY happen if all higher order indices (to the left) are
+        # equal too. In this test, 'c' has to be printed both times
+        # because the higher order index 'A' != 'B'.
+        df = pd.DataFrame(index=pd.MultiIndex.from_tuples(
+            [('A', 'c'), ('B', 'c')]), columns=['col'])
+        result = df.to_latex()
+        expected = r"""\begin{tabular}{lll}
+\toprule
+  &   &  col \\
+\midrule
+A & c &  NaN \\
+B & c &  NaN \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+    def test_to_latex_multicolumnrow(self):
+        df = pd.DataFrame({
+            ('c1', 0): {x: x for x in range(5)},
+            ('c1', 1): {x: x + 5 for x in range(5)},
+            ('c2', 0): {x: x for x in range(5)},
+            ('c2', 1): {x: x + 5 for x in range(5)},
+            ('c3', 0): {x: x for x in range(5)}
+        })
+        result = df.to_latex()
+        expected = r"""\begin{tabular}{lrrrrr}
+\toprule
+{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
+{} &  0 &  1 &  0 &  1 &  0 \\
+\midrule
+0 &  0 &  5 &  0 &  5 &  0 \\
+1 &  1 &  6 &  1 &  6 &  1 \\
+2 &  2 &  7 &  2 &  7 &  2 \\
+3 &  3 &  8 &  3 &  8 &  3 \\
+4 &  4 &  9 &  4 &  9 &  4 \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+        result = df.to_latex(multicolumn=False)
+        expected = r"""\begin{tabular}{lrrrrr}
+\toprule
+{} & c1 &    & c2 &    & c3 \\
+{} &  0 &  1 &  0 &  1 &  0 \\
+\midrule
+0 &  0 &  5 &  0 &  5 &  0 \\
+1 &  1 &  6 &  1 &  6 &  1 \\
+2 &  2 &  7 &  2 &  7 &  2 \\
+3 &  3 &  8 &  3 &  8 &  3 \\
+4 &  4 &  9 &  4 &  9 &  4 \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+        result = df.T.to_latex(multirow=True)
+        expected = r"""\begin{tabular}{llrrrrr}
+\toprule
+   &   &  0 &  1 &  2 &  3 &  4 \\
+\midrule
+\multirow{2}{*}{c1} & 0 &  0 &  1 &  2 &  3 &  4 \\
+   & 1 &  5 &  6 &  7 &  8 &  9 \\
+\cline{1-7}
+\multirow{2}{*}{c2} & 0 &  0 &  1 &  2 &  3 &  4 \\
+   & 1 &  5 &  6 &  7 &  8 &  9 \\
+\cline{1-7}
+c3 & 0 &  0 &  1 &  2 &  3 &  4 \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+        df.index = df.T.index
+        result = df.T.to_latex(multirow=True, multicolumn=True,
+                               multicolumn_format='c')
+        expected = r"""\begin{tabular}{llrrrrr}
+\toprule
+   &   & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\
+   &   &  0 &  1 &  0 &  1 &  0 \\
+\midrule
+\multirow{2}{*}{c1} & 0 &  0 &  1 &  2 &  3 &  4 \\
+   & 1 &  5 &  6 &  7 &  8 &  9 \\
+\cline{1-7}
+\multirow{2}{*}{c2} & 0 &  0 &  1 &  2 &  3 &  4 \\
+   & 1 &  5 &  6 &  7 &  8 &  9 \\
+\cline{1-7}
+c3 & 0 &  0 &  1 &  2 &  3 &  4 \\
+\bottomrule
+\end{tabular}
+"""
+        assert result == expected
+
+    def test_to_latex_escape(self):
+        a = 'a'
+        b = 'b'
+
+        test_dict = {u('co$e^x$'): {a: "a",
+                                    b: "b"},
+                     u('co^l1'): {a: "a",
+                                  b: "b"}}
+
+        unescaped_result = DataFrame(test_dict).to_latex(escape=False)
+        escaped_result = DataFrame(test_dict).to_latex(
+        )  # default: escape=True
+
+        unescaped_expected = r'''\begin{tabular}{lll}
+\toprule
+{} & co$e^x$ & co^l1 \\
+\midrule
+a &       a &     a \\
+b &       b &     b \\
+\bottomrule
+\end{tabular}
+'''
+
+        escaped_expected = r'''\begin{tabular}{lll}
+\toprule
+{} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\
+\midrule
+a &       a &     a \\
+b &       b &     b \\
+\bottomrule
+\end{tabular}
+'''
+
+        assert unescaped_result == unescaped_expected
+        assert escaped_result == escaped_expected
+
+    def test_to_latex_special_escape(self):
+        df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"])
+
+        escaped_result = df.to_latex()
+        escaped_expected = r"""\begin{tabular}{ll}
+\toprule
+{} &       0 \\
+\midrule
+0 &   a\textbackslash b\textbackslash c \\
+1 &  \textasciicircum a\textasciicircum b\textasciicircum c \\
+2 &  \textasciitilde a\textasciitilde b\textasciitilde c \\
+\bottomrule
+\end{tabular}
+"""
+        assert escaped_result == escaped_expected
+
+    def test_to_latex_longtable(self, frame):
+        frame.to_latex(longtable=True)
+
+        df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        withindex_result = df.to_latex(longtable=True)
+        withindex_expected = r"""\begin{longtable}{lrl}
+\toprule
+{} &  a &   b \\
+\midrule
+\endhead
+\midrule
+\multicolumn{3}{r}{{Continued on next page}} \\
+\midrule
+\endfoot
+
+\bottomrule
+\endlastfoot
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\end{longtable}
+"""
+        assert withindex_result == withindex_expected
+
+        withoutindex_result = df.to_latex(index=False, longtable=True)
+        withoutindex_expected = r"""\begin{longtable}{rl}
+\toprule
+ a &   b \\
+\midrule
+\endhead
+\midrule
+\multicolumn{2}{r}{{Continued on next page}} \\
+\midrule
+\endfoot
+
+\bottomrule
+\endlastfoot
+ 1 &  b1 \\
+ 2 &  b2 \\
+\end{longtable}
+"""
+
+        assert withoutindex_result == withoutindex_expected
+
+        df = DataFrame({'a': [1, 2]})
+        with1column_result = df.to_latex(index=False, longtable=True)
+        assert r"\multicolumn{1}" in with1column_result
+
+        df = DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
+        with3columns_result = df.to_latex(index=False, longtable=True)
+        assert r"\multicolumn{3}" in with3columns_result
+
+    def test_to_latex_escape_special_chars(self):
+        special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^',
+                              '\\']
+        df = DataFrame(data=special_characters)
+        observed = df.to_latex()
+        expected = r"""\begin{tabular}{ll}
+\toprule
+{} &  0 \\
+\midrule
+0 &  \& \\
+1 &  \% \\
+2 &  \$ \\
+3 &  \# \\
+4 &  \_ \\
+5 &  \{ \\
+6 &  \} \\
+7 &  \textasciitilde  \\
+8 &  \textasciicircum  \\
+9 &  \textbackslash  \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert observed == expected
+
+    def test_to_latex_no_header(self):
+        # GH 7124
+        df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        withindex_result = df.to_latex(header=False)
+        withindex_expected = r"""\begin{tabular}{lrl}
+\toprule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withindex_result == withindex_expected
+
+        withoutindex_result = df.to_latex(index=False, header=False)
+        withoutindex_expected = r"""\begin{tabular}{rl}
+\toprule
+ 1 &  b1 \\
+ 2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withoutindex_result == withoutindex_expected
+
+    def test_to_latex_specified_header(self):
+        # GH 7124
+        df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        withindex_result = df.to_latex(header=['AA', 'BB'])
+        withindex_expected = r"""\begin{tabular}{lrl}
+\toprule
+{} & AA &  BB \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withindex_result == withindex_expected
+
+        withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False)
+        withoutindex_expected = r"""\begin{tabular}{rl}
+\toprule
+AA &  BB \\
+\midrule
+ 1 &  b1 \\
+ 2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withoutindex_result == withoutindex_expected
+
+        withoutescape_result = df.to_latex(header=['$A$', '$B$'], escape=False)
+        withoutescape_expected = r"""\begin{tabular}{lrl}
+\toprule
+{} & $A$ & $B$ \\
+\midrule
+0 &   1 &  b1 \\
+1 &   2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withoutescape_result == withoutescape_expected
+
+        with pytest.raises(ValueError):
+            df.to_latex(header=['A'])
+
+    def test_to_latex_decimal(self, frame):
+        # GH 12031
+        frame.to_latex()
+
+        df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']})
+        withindex_result = df.to_latex(decimal=',')
+
+        withindex_expected = r"""\begin{tabular}{lrl}
+\toprule
+{} &    a &   b \\
+\midrule
+0 &  1,0 &  b1 \\
+1 &  2,1 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert withindex_result == withindex_expected
+
+    def test_to_latex_series(self):
+        s = Series(['a', 'b', 'c'])
+        withindex_result = s.to_latex()
+        withindex_expected = r"""\begin{tabular}{ll}
+\toprule
+{} &  0 \\
+\midrule
+0 &  a \\
+1 &  b \\
+2 &  c \\
+\bottomrule
+\end{tabular}
+"""
+        assert withindex_result == withindex_expected
+
+    def test_to_latex_bold_rows(self):
+        # GH 16707
+        df = pd.DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        observed = df.to_latex(bold_rows=True)
+        expected = r"""\begin{tabular}{lrl}
+\toprule
+{} &  a &   b \\
+\midrule
+\textbf{0} &  1 &  b1 \\
+\textbf{1} &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+        assert observed == expected
+
+    def test_to_latex_no_bold_rows(self):
+        # GH 16707
+        df = pd.DataFrame({'a': [1, 2], 'b': ['b1', 'b2']})
+        observed = df.to_latex(bold_rows=False)
+        expected = r"""\begin{tabular}{lrl}
+\toprule
+{} &  a &   b \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+"""
+        assert observed == expected
+
+    @pytest.mark.parametrize('name0', [None, 'named0'])
+    @pytest.mark.parametrize('name1', [None, 'named1'])
+    @pytest.mark.parametrize('axes', [[0], [1], [0, 1]])
+    def test_to_latex_multiindex_names(self, name0, name1, axes):
+        # GH 18667
+        names = [name0, name1]
+        mi = pd.MultiIndex.from_product([[1, 2], [3, 4]])
+        df = pd.DataFrame(-1, index=mi.copy(), columns=mi.copy())
+        for idx in axes:
+            df.axes[idx].names = names
+
+        idx_names = tuple(n or '{}' for n in names)
+        idx_names_row = ('%s & %s &    &    &    &    \\\\\n' % idx_names
+                         if (0 in axes and any(names)) else '')
+        placeholder = '{}' if any(names) and 1 in axes else ' '
+        col_names = [n if (bool(n) and 1 in axes) else placeholder
+                     for n in names]
+        observed = df.to_latex()
+        expected = r"""\begin{tabular}{llrrrr}
+\toprule
+  & %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\
+  & %s &  3 &  4 &  3 &  4 \\
+%s\midrule
+1 & 3 & -1 & -1 & -1 & -1 \\
+  & 4 & -1 & -1 & -1 & -1 \\
+2 & 3 & -1 & -1 & -1 & -1 \\
+  & 4 & -1 & -1 & -1 & -1 \\
+\bottomrule
+\end{tabular}
+""" % tuple(list(col_names) + [idx_names_row])
+        assert observed == expected
+
+    @pytest.mark.parametrize('one_row', [True, False])
+    def test_to_latex_multiindex_nans(self, one_row):
+        # GH 14249
+        df = pd.DataFrame({'a': [None, 1], 'b': [2, 3], 'c': [4, 5]})
+        if one_row:
+            df = df.iloc[[0]]
+        observed = df.set_index(['a', 'b']).to_latex()
+        expected = r"""\begin{tabular}{llr}
+\toprule
+    &   &  c \\
+a & b &    \\
+\midrule
+NaN & 2 &  4 \\
+"""
+        if not one_row:
+            expected += r"""1.0 & 3 &  5 \\
+"""
+        expected += r"""\bottomrule
+\end{tabular}
+"""
+        assert observed == expected
+
+    def test_to_latex_non_string_index(self):
+        # GH 19981
+        observed = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]).to_latex()
+        expected = r"""\begin{tabular}{llr}
+\toprule
+  &   &  2 \\
+0 & 1 &    \\
+\midrule
+1 & 2 &  3 \\
+  & 2 &  3 \\
+\bottomrule
+\end{tabular}
+"""
+        assert observed == expected
+
+    def test_to_latex_midrule_location(self):
+        # GH 18326
+        df = pd.DataFrame({'a': [1, 2]})
+        df.index.name = 'foo'
+        observed = df.to_latex(index_names=False)
+        expected = r"""\begin{tabular}{lr}
+\toprule
+{} &  a \\
+\midrule
+0 &  1 \\
+1 &  2 \\
+\bottomrule
+\end{tabular}
+"""
+
+        assert observed == expected
+
+    def test_to_latex_multiindex_empty_name(self):
+        # GH 18669
+        mi = pd.MultiIndex.from_product([[1, 2]], names=[''])
+        df = pd.DataFrame(-1, index=mi, columns=range(4))
+        observed = df.to_latex()
+        expected = r"""\begin{tabular}{lrrrr}
+\toprule
+  &  0 &  1 &  2 &  3 \\
+{} &    &    &    &    \\
+\midrule
+1 & -1 & -1 & -1 & -1 \\
+2 & -1 & -1 & -1 & -1 \\
+\bottomrule
+\end{tabular}
+"""
+        assert observed == expected
+
+    def test_to_latex_float_format_no_fixed_width(self):
+
+        # GH 21625
+        df = DataFrame({'x': [0.19999]})
+        expected = r"""\begin{tabular}{lr}
+\toprule
+{} &     x \\
+\midrule
+0 & 0.200 \\
+\bottomrule
+\end{tabular}
+"""
+        assert df.to_latex(float_format='%.3f') == expected
+
+        # GH 22270
+        df = DataFrame({'x': [100.0]})
+        expected = r"""\begin{tabular}{lr}
+\toprule
+{} &   x \\
+\midrule
+0 & 100 \\
+\bottomrule
+\end{tabular}
+"""
+        assert df.to_latex(float_format='%.0f') == expected
@@ -0,0 +1,369 @@
+#!/usr/bin/env python
+
+"""
+self-contained to write legacy storage (pickle/msgpack) files
+
+To use this script. Create an environment where you want
+generate pickles, say its for 0.18.1, with your pandas clone
+in ~/pandas
+
+. activate pandas_0.18.1
+cd ~/
+
+$ python pandas/pandas/tests/io/generate_legacy_storage_files.py \
+    pandas/pandas/tests/io/data/legacy_pickle/0.18.1/ pickle
+
+This script generates a storage file for the current arch, system,
+and python version
+  pandas version: 0.18.1
+  output dir    : pandas/pandas/tests/io/data/legacy_pickle/0.18.1/
+  storage format: pickle
+created pickle file: 0.18.1_x86_64_darwin_3.5.2.pickle
+
+The idea here is you are using the *current* version of the
+generate_legacy_storage_files with an *older* version of pandas to
+generate a pickle file. We will then check this file into a current
+branch, and test using test_pickle.py. This will load the *older*
+pickles and test versus the current data that is generated
+(with master). These are then compared.
+
+If we have cases where we changed the signature (e.g. we renamed
+offset -> freq in Timestamp). Then we have to conditionally execute
+in the generate_legacy_storage_files.py to make it
+run under the older AND the newer version.
+
+"""
+
+from __future__ import print_function
+
+from datetime import timedelta
+from distutils.version import LooseVersion
+import os
+import platform as pl
+import sys
+from warnings import catch_warnings, filterwarnings
+
+import numpy as np
+
+from pandas.compat import u
+
+import pandas
+from pandas import (
+    Categorical, DataFrame, Index, MultiIndex, NaT, Panel, Period, Series,
+    SparseDataFrame, SparseSeries, Timestamp, bdate_range, date_range,
+    period_range, timedelta_range, to_msgpack)
+
+from pandas.tseries.offsets import (
+    FY5253, BusinessDay, BusinessHour, CustomBusinessDay, DateOffset, Day,
+    Easter, Hour, LastWeekOfMonth, Minute, MonthBegin, MonthEnd, QuarterBegin,
+    QuarterEnd, SemiMonthBegin, SemiMonthEnd, Week, WeekOfMonth, YearBegin,
+    YearEnd)
+
+_loose_version = LooseVersion(pandas.__version__)
+
+
+def _create_sp_series():
+    nan = np.nan
+
+    # nan-based
+    arr = np.arange(15, dtype=np.float64)
+    arr[7:12] = nan
+    arr[-1:] = nan
+
+    bseries = SparseSeries(arr, kind='block')
+    bseries.name = u'bseries'
+    return bseries
+
+
+def _create_sp_tsseries():
+    nan = np.nan
+
+    # nan-based
+    arr = np.arange(15, dtype=np.float64)
+    arr[7:12] = nan
+    arr[-1:] = nan
+
+    date_index = bdate_range('1/1/2011', periods=len(arr))
+    bseries = SparseSeries(arr, index=date_index, kind='block')
+    bseries.name = u'btsseries'
+    return bseries
+
+
+def _create_sp_frame():
+    nan = np.nan
+
+    data = {u'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
+            u'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
+            u'C': np.arange(10).astype(np.int64),
+            u'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
+
+    dates = bdate_range('1/1/2011', periods=10)
+    return SparseDataFrame(data, index=dates)
+
+
+def create_data():
+    """ create the pickle/msgpack data """
+
+    data = {
+        u'A': [0., 1., 2., 3., np.nan],
+        u'B': [0, 1, 0, 1, 0],
+        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
+        u'D': date_range('1/1/2009', periods=5),
+        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
+    }
+
+    scalars = dict(timestamp=Timestamp('20130101'),
+                   period=Period('2012', 'M'))
+
+    index = dict(int=Index(np.arange(10)),
+                 date=date_range('20130101', periods=10),
+                 period=period_range('2013-01-01', freq='M', periods=10),
+                 float=Index(np.arange(10, dtype=np.float64)),
+                 uint=Index(np.arange(10, dtype=np.uint64)),
+                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))
+
+    if _loose_version >= LooseVersion('0.18'):
+        from pandas import RangeIndex
+        index['range'] = RangeIndex(10)
+
+    if _loose_version >= LooseVersion('0.21'):
+        from pandas import interval_range
+        index['interval'] = interval_range(0, periods=10)
+
+    mi = dict(reg2=MultiIndex.from_tuples(
+        tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo',
+                     u'foo', u'qux', u'qux'],
+                    [u'one', u'two', u'one', u'two', u'one',
+                     u'two', u'one', u'two']])),
+        names=[u'first', u'second']))
+
+    series = dict(float=Series(data[u'A']),
+                  int=Series(data[u'B']),
+                  mixed=Series(data[u'E']),
+                  ts=Series(np.arange(10).astype(np.int64),
+                            index=date_range('20130101', periods=10)),
+                  mi=Series(np.arange(5).astype(np.float64),
+                            index=MultiIndex.from_tuples(
+                                tuple(zip(*[[1, 1, 2, 2, 2],
+                                            [3, 4, 3, 4, 5]])),
+                                names=[u'one', u'two'])),
+                  dup=Series(np.arange(5).astype(np.float64),
+                             index=[u'A', u'B', u'C', u'D', u'A']),
+                  cat=Series(Categorical([u'foo', u'bar', u'baz'])),
+                  dt=Series(date_range('20130101', periods=5)),
+                  dt_tz=Series(date_range('20130101', periods=5,
+                                          tz='US/Eastern')),
+                  period=Series([Period('2000Q1')] * 5))
+
+    mixed_dup_df = DataFrame(data)
+    mixed_dup_df.columns = list(u"ABCDA")
+    frame = dict(float=DataFrame({u'A': series[u'float'],
+                                  u'B': series[u'float'] + 1}),
+                 int=DataFrame({u'A': series[u'int'],
+                                u'B': series[u'int'] + 1}),
+                 mixed=DataFrame({k: data[k]
+                                  for k in [u'A', u'B', u'C', u'D']}),
+                 mi=DataFrame({u'A': np.arange(5).astype(np.float64),
+                               u'B': np.arange(5).astype(np.int64)},
+                              index=MultiIndex.from_tuples(
+                                  tuple(zip(*[[u'bar', u'bar', u'baz',
+                                               u'baz', u'baz'],
+                                              [u'one', u'two', u'one',
+                                               u'two', u'three']])),
+                                  names=[u'first', u'second'])),
+                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
+                               columns=[u'A', u'B', u'A']),
+                 cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
+                 cat_and_float=DataFrame({
+                     u'A': Categorical([u'foo', u'bar', u'baz']),
+                     u'B': np.arange(3).astype(np.int64)}),
+                 mixed_dup=mixed_dup_df,
+                 dt_mixed_tzs=DataFrame({
+                     u'A': Timestamp('20130102', tz='US/Eastern'),
+                     u'B': Timestamp('20130603', tz='CET')}, index=range(5)),
+                 dt_mixed2_tzs=DataFrame({
+                     u'A': Timestamp('20130102', tz='US/Eastern'),
+                     u'B': Timestamp('20130603', tz='CET'),
+                     u'C': Timestamp('20130603', tz='UTC')}, index=range(5))
+                 )
+
+    with catch_warnings(record=True):
+        filterwarnings("ignore", "\\nPanel", FutureWarning)
+        mixed_dup_panel = Panel({u'ItemA': frame[u'float'],
+                                 u'ItemB': frame[u'int']})
+        mixed_dup_panel.items = [u'ItemA', u'ItemA']
+        panel = dict(float=Panel({u'ItemA': frame[u'float'],
+                                  u'ItemB': frame[u'float'] + 1}),
+                     dup=Panel(
+                         np.arange(30).reshape(3, 5, 2).astype(np.float64),
+                         items=[u'A', u'B', u'A']),
+                     mixed_dup=mixed_dup_panel)
+
+    cat = dict(int8=Categorical(list('abcdefg')),
+               int16=Categorical(np.arange(1000)),
+               int32=Categorical(np.arange(10000)))
+
+    timestamp = dict(normal=Timestamp('2011-01-01'),
+                     nat=NaT,
+                     tz=Timestamp('2011-01-01', tz='US/Eastern'))
+
+    if _loose_version < LooseVersion('0.19.2'):
+        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
+        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
+                                      offset='M')
+    else:
+        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
+        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
+                                      freq='M')
+
+    off = {'DateOffset': DateOffset(years=1),
+           'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
+           'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
+           'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
+           'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
+           'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
+           'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
+           'MonthBegin': MonthBegin(1),
+           'MonthEnd': MonthEnd(1),
+           'QuarterBegin': QuarterBegin(1),
+           'QuarterEnd': QuarterEnd(1),
+           'Day': Day(1),
+           'YearBegin': YearBegin(1),
+           'YearEnd': YearEnd(1),
+           'Week': Week(1),
+           'Week_Tues': Week(2, normalize=False, weekday=1),
+           'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
+           'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
+           'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
+           'Easter': Easter(),
+           'Hour': Hour(1),
+           'Minute': Minute(1)}
+
+    return dict(series=series,
+                frame=frame,
+                panel=panel,
+                index=index,
+                scalars=scalars,
+                mi=mi,
+                sp_series=dict(float=_create_sp_series(),
+                               ts=_create_sp_tsseries()),
+                sp_frame=dict(float=_create_sp_frame()),
+                cat=cat,
+                timestamp=timestamp,
+                offsets=off)
+
+
+def create_pickle_data():
+    data = create_data()
+
+    # Pre-0.14.1 versions generated non-unpicklable mixed-type frames and
+    # panels if their columns/items were non-unique.
+    if _loose_version < LooseVersion('0.14.1'):
+        del data['frame']['mixed_dup']
+        del data['panel']['mixed_dup']
+    if _loose_version < LooseVersion('0.17.0'):
+        del data['series']['period']
+        del data['scalars']['period']
+    return data
+
+
+def _u(x):
+    return {u(k): _u(x[k]) for k in x} if isinstance(x, dict) else x
+
+
+def create_msgpack_data():
+    data = create_data()
+    if _loose_version < LooseVersion('0.17.0'):
+        del data['frame']['mixed_dup']
+        del data['panel']['mixed_dup']
+        del data['frame']['dup']
+        del data['panel']['dup']
+    if _loose_version < LooseVersion('0.18.0'):
+        del data['series']['dt_tz']
+        del data['frame']['dt_mixed_tzs']
+    # Not supported
+    del data['sp_series']
+    del data['sp_frame']
+    del data['series']['cat']
+    del data['series']['period']
+    del data['frame']['cat_onecol']
+    del data['frame']['cat_and_float']
+    del data['scalars']['period']
+    if _loose_version < LooseVersion('0.23.0'):
+        del data['index']['interval']
+    del data['offsets']
+    return _u(data)
+
+
+def platform_name():
+    return '_'.join([str(pandas.__version__), str(pl.machine()),
+                     str(pl.system().lower()), str(pl.python_version())])
+
+
+def write_legacy_pickles(output_dir):
+
+    # make sure we are < 0.13 compat (in py3)
+    try:
+        from pandas.compat import zip, cPickle as pickle  # noqa
+    except ImportError:
+        import pickle
+
+    version = pandas.__version__
+
+    print("This script generates a storage file for the current arch, system, "
+          "and python version")
+    print("  pandas version: {0}".format(version))
+    print("  output dir    : {0}".format(output_dir))
+    print("  storage format: pickle")
+
+    pth = '{0}.pickle'.format(platform_name())
+
+    fh = open(os.path.join(output_dir, pth), 'wb')
+    pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL)
+    fh.close()
+
+    print("created pickle file: %s" % pth)
+
+
+def write_legacy_msgpack(output_dir, compress):
+
+    version = pandas.__version__
+
+    print("This script generates a storage file for the current arch, "
+          "system, and python version")
+    print("  pandas version: {0}".format(version))
+    print("  output dir    : {0}".format(output_dir))
+    print("  storage format: msgpack")
+    pth = '{0}.msgpack'.format(platform_name())
+    to_msgpack(os.path.join(output_dir, pth), create_msgpack_data(),
+               compress=compress)
+
+    print("created msgpack file: %s" % pth)
+
+
+def write_legacy_file():
+    # force our cwd to be the first searched
+    sys.path.insert(0, '.')
+
+    if not (3 <= len(sys.argv) <= 4):
+        exit("Specify output directory and storage type: generate_legacy_"
+             "storage_files.py <output_dir> <storage_type> "
+             "<msgpack_compress_type>")
+
+    output_dir = str(sys.argv[1])
+    storage_type = str(sys.argv[2])
+    try:
+        compress_type = str(sys.argv[3])
+    except IndexError:
+        compress_type = None
+
+    if storage_type == 'pickle':
+        write_legacy_pickles(output_dir=output_dir)
+    elif storage_type == 'msgpack':
+        write_legacy_msgpack(output_dir=output_dir, compress=compress_type)
+    else:
+        exit("storage_type must be one of {'pickle', 'msgpack'}")
+
+
+if __name__ == '__main__':
+    write_legacy_file()
@@ -0,0 +1,120 @@
+import pytest
+
+import pandas.util._test_decorators as td
+
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.util.testing import assert_frame_equal
+
+
+def test_compression_roundtrip(compression):
+    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
+                       [12.32112, 123123.2, 321321.2]],
+                      index=['A', 'B'], columns=['X', 'Y', 'Z'])
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        assert_frame_equal(df, pd.read_json(path,
+                                            compression=compression))
+
+        # explicitly ensure file was compressed.
+        with tm.decompress_file(path, compression) as fh:
+            result = fh.read().decode('utf8')
+        assert_frame_equal(df, pd.read_json(result))
+
+
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
+    uncompressed_df = pd.read_json(uncompressed_path)
+
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
+    compressed_df = pd.read_json(compressed_path, compression='zip')
+
+    assert_frame_equal(uncompressed_df, compressed_df)
+
+
+@td.skip_if_not_us_locale
+def test_with_s3_url(compression, s3_resource):
+    # Bucket "pandas-test" created in tests/io/conftest.py
+
+    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        with open(path, 'rb') as f:
+            s3_resource.Bucket("pandas-test").put_object(Key='test-1', Body=f)
+
+    roundtripped_df = pd.read_json('s3://pandas-test/test-1',
+                                   compression=compression)
+    assert_frame_equal(df, roundtripped_df)
+
+
+def test_lines_with_compression(compression):
+
+    with tm.ensure_clean() as path:
+        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+        df.to_json(path, orient='records', lines=True,
+                   compression=compression)
+        roundtripped_df = pd.read_json(path, lines=True,
+                                       compression=compression)
+        assert_frame_equal(df, roundtripped_df)
+
+
+def test_chunksize_with_compression(compression):
+
+    with tm.ensure_clean() as path:
+        df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
+        df.to_json(path, orient='records', lines=True,
+                   compression=compression)
+
+        res = pd.read_json(path, lines=True, chunksize=1,
+                           compression=compression)
+        roundtripped_df = pd.concat(res)
+        assert_frame_equal(df, roundtripped_df)
+
+
+def test_write_unsupported_compression_type():
+    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(path, compression="unsupported")
+
+
+def test_read_unsupported_compression_type():
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            pd.read_json(path, compression="unsupported")
+
+
+@pytest.mark.parametrize("to_infer", [True, False])
+@pytest.mark.parametrize("read_infer", [True, False])
+def test_to_json_compression(compression_only,
+                             read_infer, to_infer):
+    # see gh-15008
+    compression = compression_only
+
+    if compression == "zip":
+        pytest.skip("{compression} is not supported "
+                    "for to_csv".format(compression=compression))
+
+    # We'll complete file extension subsequently.
+    filename = "test."
+
+    if compression == "gzip":
+        filename += "gz"
+    else:
+        # xz --> .xz
+        # bz2 --> .bz2
+        filename += compression
+
+    df = pd.DataFrame({"A": [1]})
+
+    to_compression = "infer" if to_infer else compression
+    read_compression = "infer" if read_infer else compression
+
+    with tm.ensure_clean(filename) as path:
+        df.to_json(path, compression=to_compression)
+        result = pd.read_json(path, compression=read_compression)
+        tm.assert_frame_equal(result, df)
@@ -0,0 +1,580 @@
+"""Tests for Table Schema integration."""
+from collections import OrderedDict
+import json
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype, DatetimeTZDtype, PeriodDtype)
+
+import pandas as pd
+from pandas import DataFrame
+import pandas.util.testing as tm
+
+from pandas.io.json.table_schema import (
+    as_json_table_type, build_table_schema, convert_json_field_to_pandas_type,
+    convert_pandas_type_to_json_field, set_default_names)
+
+
+class TestBuildSchema(object):
+
+    def setup_method(self, method):
+        self.df = DataFrame(
+            {'A': [1, 2, 3, 4],
+             'B': ['a', 'b', 'c', 'c'],
+             'C': pd.date_range('2016-01-01', freq='d', periods=4),
+             'D': pd.timedelta_range('1H', periods=4, freq='T'),
+             },
+            index=pd.Index(range(4), name='idx'))
+
+    def test_build_table_schema(self):
+        result = build_table_schema(self.df, version=False)
+        expected = {
+            'fields': [{'name': 'idx', 'type': 'integer'},
+                       {'name': 'A', 'type': 'integer'},
+                       {'name': 'B', 'type': 'string'},
+                       {'name': 'C', 'type': 'datetime'},
+                       {'name': 'D', 'type': 'duration'},
+                       ],
+            'primaryKey': ['idx']
+        }
+        assert result == expected
+        result = build_table_schema(self.df)
+        assert "pandas_version" in result
+
+    def test_series(self):
+        s = pd.Series([1, 2, 3], name='foo')
+        result = build_table_schema(s, version=False)
+        expected = {'fields': [{'name': 'index', 'type': 'integer'},
+                               {'name': 'foo', 'type': 'integer'}],
+                    'primaryKey': ['index']}
+        assert result == expected
+        result = build_table_schema(s)
+        assert 'pandas_version' in result
+
+    def test_series_unnamed(self):
+        result = build_table_schema(pd.Series([1, 2, 3]), version=False)
+        expected = {'fields': [{'name': 'index', 'type': 'integer'},
+                               {'name': 'values', 'type': 'integer'}],
+                    'primaryKey': ['index']}
+        assert result == expected
+
+    def test_multiindex(self):
+        df = self.df.copy()
+        idx = pd.MultiIndex.from_product([('a', 'b'), (1, 2)])
+        df.index = idx
+
+        result = build_table_schema(df, version=False)
+        expected = {
+            'fields': [{'name': 'level_0', 'type': 'string'},
+                       {'name': 'level_1', 'type': 'integer'},
+                       {'name': 'A', 'type': 'integer'},
+                       {'name': 'B', 'type': 'string'},
+                       {'name': 'C', 'type': 'datetime'},
+                       {'name': 'D', 'type': 'duration'},
+                       ],
+            'primaryKey': ['level_0', 'level_1']
+        }
+        assert result == expected
+
+        df.index.names = ['idx0', None]
+        expected['fields'][0]['name'] = 'idx0'
+        expected['primaryKey'] = ['idx0', 'level_1']
+        result = build_table_schema(df, version=False)
+        assert result == expected
+
+
+class TestTableSchemaType(object):
+
+    @pytest.mark.parametrize('int_type', [
+        np.int, np.int16, np.int32, np.int64])
+    def test_as_json_table_type_int_data(self, int_type):
+        int_data = [1, 2, 3]
+        assert as_json_table_type(np.array(
+            int_data, dtype=int_type)) == 'integer'
+
+    @pytest.mark.parametrize('float_type', [
+        np.float, np.float16, np.float32, np.float64])
+    def test_as_json_table_type_float_data(self, float_type):
+        float_data = [1., 2., 3.]
+        assert as_json_table_type(np.array(
+            float_data, dtype=float_type)) == 'number'
+
+    @pytest.mark.parametrize('bool_type', [bool, np.bool])
+    def test_as_json_table_type_bool_data(self, bool_type):
+        bool_data = [True, False]
+        assert as_json_table_type(np.array(
+            bool_data, dtype=bool_type)) == 'boolean'
+
+    @pytest.mark.parametrize('date_data', [
+        pd.to_datetime(['2016']),
+        pd.to_datetime(['2016'], utc=True),
+        pd.Series(pd.to_datetime(['2016'])),
+        pd.Series(pd.to_datetime(['2016'], utc=True)),
+        pd.period_range('2016', freq='A', periods=3)
+    ])
+    def test_as_json_table_type_date_data(self, date_data):
+        assert as_json_table_type(date_data) == 'datetime'
+
+    @pytest.mark.parametrize('str_data', [
+        pd.Series(['a', 'b']), pd.Index(['a', 'b'])])
+    def test_as_json_table_type_string_data(self, str_data):
+        assert as_json_table_type(str_data) == 'string'
+
+    @pytest.mark.parametrize('cat_data', [
+        pd.Categorical(['a']),
+        pd.Categorical([1]),
+        pd.Series(pd.Categorical([1])),
+        pd.CategoricalIndex([1]),
+        pd.Categorical([1])])
+    def test_as_json_table_type_categorical_data(self, cat_data):
+        assert as_json_table_type(cat_data) == 'any'
+
+    # ------
+    # dtypes
+    # ------
+    @pytest.mark.parametrize('int_dtype', [
+        np.int, np.int16, np.int32, np.int64])
+    def test_as_json_table_type_int_dtypes(self, int_dtype):
+        assert as_json_table_type(int_dtype) == 'integer'
+
+    @pytest.mark.parametrize('float_dtype', [
+        np.float, np.float16, np.float32, np.float64])
+    def test_as_json_table_type_float_dtypes(self, float_dtype):
+        assert as_json_table_type(float_dtype) == 'number'
+
+    @pytest.mark.parametrize('bool_dtype', [bool, np.bool])
+    def test_as_json_table_type_bool_dtypes(self, bool_dtype):
+        assert as_json_table_type(bool_dtype) == 'boolean'
+
+    @pytest.mark.parametrize('date_dtype', [
+        np.datetime64, np.dtype("<M8[ns]"), PeriodDtype('D'),
+        DatetimeTZDtype('ns', 'US/Central')])
+    def test_as_json_table_type_date_dtypes(self, date_dtype):
+        # TODO: datedate.date? datetime.time?
+        assert as_json_table_type(date_dtype) == 'datetime'
+
+    @pytest.mark.parametrize('td_dtype', [
+        np.timedelta64, np.dtype("<m8[ns]")])
+    def test_as_json_table_type_timedelta_dtypes(self, td_dtype):
+        assert as_json_table_type(td_dtype) == 'duration'
+
+    @pytest.mark.parametrize('str_dtype', [object])  # TODO
+    def test_as_json_table_type_string_dtypes(self, str_dtype):
+        assert as_json_table_type(str_dtype) == 'string'
+
+    def test_as_json_table_type_categorical_dtypes(self):
+        # TODO: I think before is_categorical_dtype(Categorical)
+        # returned True, but now it's False. Figure out why or
+        # if it matters
+        assert as_json_table_type(pd.Categorical(['a'])) == 'any'
+        assert as_json_table_type(CategoricalDtype()) == 'any'
+
+
+class TestTableOrient(object):
+
+    def setup_method(self, method):
+        self.df = DataFrame(
+            {'A': [1, 2, 3, 4],
+             'B': ['a', 'b', 'c', 'c'],
+             'C': pd.date_range('2016-01-01', freq='d', periods=4),
+             'D': pd.timedelta_range('1H', periods=4, freq='T'),
+             'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
+             'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
+                                           ordered=True)),
+             'G': [1., 2., 3, 4.],
+             'H': pd.date_range('2016-01-01', freq='d', periods=4,
+                                tz='US/Central'),
+             },
+            index=pd.Index(range(4), name='idx'))
+
+    def test_build_series(self):
+        s = pd.Series([1, 2], name='a')
+        s.index.name = 'id'
+        result = s.to_json(orient='table', date_format='iso')
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result['schema']
+        result['schema'].pop('pandas_version')
+
+        fields = [{'name': 'id', 'type': 'integer'},
+                  {'name': 'a', 'type': 'integer'}]
+
+        schema = {
+            'fields': fields,
+            'primaryKey': ['id'],
+        }
+
+        expected = OrderedDict([
+            ('schema', schema),
+            ('data', [OrderedDict([('id', 0), ('a', 1)]),
+                      OrderedDict([('id', 1), ('a', 2)])])])
+        assert result == expected
+
+    def test_to_json(self):
+        df = self.df.copy()
+        df.index.name = 'idx'
+        result = df.to_json(orient='table', date_format='iso')
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result['schema']
+        result['schema'].pop('pandas_version')
+
+        fields = [
+            {'name': 'idx', 'type': 'integer'},
+            {'name': 'A', 'type': 'integer'},
+            {'name': 'B', 'type': 'string'},
+            {'name': 'C', 'type': 'datetime'},
+            {'name': 'D', 'type': 'duration'},
+            {'constraints': {'enum': ['a', 'b', 'c']},
+             'name': 'E',
+             'ordered': False,
+             'type': 'any'},
+            {'constraints': {'enum': ['a', 'b', 'c']},
+             'name': 'F',
+             'ordered': True,
+             'type': 'any'},
+            {'name': 'G', 'type': 'number'},
+            {'name': 'H', 'type': 'datetime', 'tz': 'US/Central'}
+        ]
+
+        schema = {
+            'fields': fields,
+            'primaryKey': ['idx'],
+        }
+        data = [
+            OrderedDict([('idx', 0), ('A', 1), ('B', 'a'),
+                         ('C', '2016-01-01T00:00:00.000Z'),
+                         ('D', 'P0DT1H0M0S'),
+                         ('E', 'a'), ('F', 'a'), ('G', 1.),
+                         ('H', '2016-01-01T06:00:00.000Z')
+                         ]),
+            OrderedDict([('idx', 1), ('A', 2), ('B', 'b'),
+                         ('C', '2016-01-02T00:00:00.000Z'),
+                         ('D', 'P0DT1H1M0S'),
+                         ('E', 'b'), ('F', 'b'), ('G', 2.),
+                         ('H', '2016-01-02T06:00:00.000Z')
+                         ]),
+            OrderedDict([('idx', 2), ('A', 3), ('B', 'c'),
+                         ('C', '2016-01-03T00:00:00.000Z'),
+                         ('D', 'P0DT1H2M0S'),
+                         ('E', 'c'), ('F', 'c'), ('G', 3.),
+                         ('H', '2016-01-03T06:00:00.000Z')
+                         ]),
+            OrderedDict([('idx', 3), ('A', 4), ('B', 'c'),
+                         ('C', '2016-01-04T00:00:00.000Z'),
+                         ('D', 'P0DT1H3M0S'),
+                         ('E', 'c'), ('F', 'c'), ('G', 4.),
+                         ('H', '2016-01-04T06:00:00.000Z')
+                         ]),
+        ]
+        expected = OrderedDict([('schema', schema), ('data', data)])
+        assert result == expected
+
+    def test_to_json_float_index(self):
+        data = pd.Series(1, index=[1., 2.])
+        result = data.to_json(orient='table', date_format='iso')
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result['schema'].pop('pandas_version')
+
+        expected = (
+            OrderedDict([('schema', {
+                'fields': [{'name': 'index', 'type': 'number'},
+                           {'name': 'values', 'type': 'integer'}],
+                'primaryKey': ['index']
+            }),
+                ('data', [OrderedDict([('index', 1.0), ('values', 1)]),
+                          OrderedDict([('index', 2.0), ('values', 1)])])])
+        )
+        assert result == expected
+
+    def test_to_json_period_index(self):
+        idx = pd.period_range('2016', freq='Q-JAN', periods=2)
+        data = pd.Series(1, idx)
+        result = data.to_json(orient='table', date_format='iso')
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result['schema'].pop('pandas_version')
+
+        fields = [{'freq': 'Q-JAN', 'name': 'index', 'type': 'datetime'},
+                  {'name': 'values', 'type': 'integer'}]
+
+        schema = {'fields': fields, 'primaryKey': ['index']}
+        data = [OrderedDict([('index', '2015-11-01T00:00:00.000Z'),
+                             ('values', 1)]),
+                OrderedDict([('index', '2016-02-01T00:00:00.000Z'),
+                             ('values', 1)])]
+        expected = OrderedDict([('schema', schema), ('data', data)])
+        assert result == expected
+
+    def test_to_json_categorical_index(self):
+        data = pd.Series(1, pd.CategoricalIndex(['a', 'b']))
+        result = data.to_json(orient='table', date_format='iso')
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result['schema'].pop('pandas_version')
+
+        expected = (
+            OrderedDict([('schema',
+                          {'fields': [{'name': 'index', 'type': 'any',
+                                       'constraints': {'enum': ['a', 'b']},
+                                       'ordered': False},
+                                      {'name': 'values', 'type': 'integer'}],
+                           'primaryKey': ['index']}),
+                         ('data', [
+                             OrderedDict([('index', 'a'),
+                                          ('values', 1)]),
+                             OrderedDict([('index', 'b'), ('values', 1)])])])
+        )
+        assert result == expected
+
+    def test_date_format_raises(self):
+        with pytest.raises(ValueError):
+            self.df.to_json(orient='table', date_format='epoch')
+
+        # others work
+        self.df.to_json(orient='table', date_format='iso')
+        self.df.to_json(orient='table')
+
+    @pytest.mark.parametrize('kind', [pd.Series, pd.Index])
+    def test_convert_pandas_type_to_json_field_int(self, kind):
+        data = [1, 2, 3]
+        result = convert_pandas_type_to_json_field(kind(data, name='name'))
+        expected = {"name": "name", "type": "integer"}
+        assert result == expected
+
+    @pytest.mark.parametrize('kind', [pd.Series, pd.Index])
+    def test_convert_pandas_type_to_json_field_float(self, kind):
+        data = [1., 2., 3.]
+        result = convert_pandas_type_to_json_field(kind(data, name='name'))
+        expected = {"name": "name", "type": "number"}
+        assert result == expected
+
+    @pytest.mark.parametrize('dt_args,extra_exp', [
+        ({}, {}), ({'utc': True}, {'tz': 'UTC'})])
+    @pytest.mark.parametrize('wrapper', [None, pd.Series])
+    def test_convert_pandas_type_to_json_field_datetime(self, dt_args,
+                                                        extra_exp, wrapper):
+        data = [1., 2., 3.]
+        data = pd.to_datetime(data, **dt_args)
+        if wrapper is pd.Series:
+            data = pd.Series(data, name='values')
+        result = convert_pandas_type_to_json_field(data)
+        expected = {"name": "values", "type": 'datetime'}
+        expected.update(extra_exp)
+        assert result == expected
+
+    def test_convert_pandas_type_to_json_period_range(self):
+        arr = pd.period_range('2016', freq='A-DEC', periods=4)
+        result = convert_pandas_type_to_json_field(arr)
+        expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"}
+        assert result == expected
+
+    @pytest.mark.parametrize('kind', [pd.Categorical, pd.CategoricalIndex])
+    @pytest.mark.parametrize('ordered', [True, False])
+    def test_convert_pandas_type_to_json_field_categorical(self, kind,
+                                                           ordered):
+        data = ['a', 'b', 'c']
+        if kind is pd.Categorical:
+            arr = pd.Series(kind(data, ordered=ordered), name='cats')
+        elif kind is pd.CategoricalIndex:
+            arr = kind(data, ordered=ordered, name='cats')
+
+        result = convert_pandas_type_to_json_field(arr)
+        expected = {"name": "cats", "type": "any",
+                    "constraints": {"enum": data},
+                    "ordered": ordered}
+        assert result == expected
+
+    @pytest.mark.parametrize("inp,exp", [
+        ({'type': 'integer'}, 'int64'),
+        ({'type': 'number'}, 'float64'),
+        ({'type': 'boolean'}, 'bool'),
+        ({'type': 'duration'}, 'timedelta64'),
+        ({'type': 'datetime'}, 'datetime64[ns]'),
+        ({'type': 'datetime', 'tz': 'US/Hawaii'}, 'datetime64[ns, US/Hawaii]'),
+        ({'type': 'any'}, 'object'),
+        ({'type': 'any', 'constraints': {'enum': ['a', 'b', 'c']},
+          'ordered': False}, CategoricalDtype(categories=['a', 'b', 'c'],
+                                              ordered=False)),
+        ({'type': 'any', 'constraints': {'enum': ['a', 'b', 'c']},
+          'ordered': True}, CategoricalDtype(categories=['a', 'b', 'c'],
+                                             ordered=True)),
+        ({'type': 'string'}, 'object')])
+    def test_convert_json_field_to_pandas_type(self, inp, exp):
+        field = {'name': 'foo'}
+        field.update(inp)
+        assert convert_json_field_to_pandas_type(field) == exp
+
+    @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
+    def test_convert_json_field_to_pandas_type_raises(self, inp):
+        field = {'type': inp}
+        with pytest.raises(ValueError, match=("Unsupported or invalid field "
+                                              "type: {}".format(inp))):
+            convert_json_field_to_pandas_type(field)
+
+    def test_categorical(self):
+        s = pd.Series(pd.Categorical(['a', 'b', 'a']))
+        s.index.name = 'idx'
+        result = s.to_json(orient='table', date_format='iso')
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result['schema'].pop('pandas_version')
+
+        fields = [{'name': 'idx', 'type': 'integer'},
+                  {'constraints': {'enum': ['a', 'b']},
+                   'name': 'values',
+                   'ordered': False,
+                   'type': 'any'}]
+
+        expected = OrderedDict([
+            ('schema', {'fields': fields,
+                        'primaryKey': ['idx']}),
+            ('data', [OrderedDict([('idx', 0), ('values', 'a')]),
+                      OrderedDict([('idx', 1), ('values', 'b')]),
+                      OrderedDict([('idx', 2), ('values', 'a')])])])
+        assert result == expected
+
+    @pytest.mark.parametrize('idx,nm,prop', [
+        (pd.Index([1]), 'index', 'name'),
+        (pd.Index([1], name='myname'), 'myname', 'name'),
+        (pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')]),
+         ['level_0', 'level_1'], 'names'),
+        (pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
+                                    names=['n1', 'n2']),
+         ['n1', 'n2'], 'names'),
+        (pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
+                                    names=['n1', None]),
+         ['n1', 'level_1'], 'names')
+    ])
+    def test_set_names_unset(self, idx, nm, prop):
+        data = pd.Series(1, idx)
+        result = set_default_names(data)
+        assert getattr(result.index, prop) == nm
+
+    @pytest.mark.parametrize("idx", [
+        pd.Index([], name='index'),
+        pd.MultiIndex.from_arrays([['foo'], ['bar']],
+                                  names=('level_0', 'level_1')),
+        pd.MultiIndex.from_arrays([['foo'], ['bar']],
+                                  names=('foo', 'level_1'))
+    ])
+    def test_warns_non_roundtrippable_names(self, idx):
+        # GH 19130
+        df = pd.DataFrame([[]], index=idx)
+        df.index.name = 'index'
+        with tm.assert_produces_warning():
+            set_default_names(df)
+
+    def test_timestamp_in_columns(self):
+        df = pd.DataFrame([[1, 2]], columns=[pd.Timestamp('2016'),
+                                             pd.Timedelta(10, unit='s')])
+        result = df.to_json(orient="table")
+        js = json.loads(result)
+        assert js['schema']['fields'][1]['name'] == 1451606400000
+        assert js['schema']['fields'][2]['name'] == 10000
+
+    @pytest.mark.parametrize('case', [
+        pd.Series([1], index=pd.Index([1], name='a'), name='a'),
+        pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
+        pd.DataFrame({"A": [1]}, index=pd.MultiIndex.from_arrays([
+            ['a'], [1]], names=["A", "a"]))
+    ])
+    def test_overlapping_names(self, case):
+        with pytest.raises(ValueError, match='Overlapping'):
+            case.to_json(orient='table')
+
+    def test_mi_falsey_name(self):
+        # GH 16203
+        df = pd.DataFrame(np.random.randn(4, 4),
+                          index=pd.MultiIndex.from_product([('A', 'B'),
+                                                            ('a', 'b')]))
+        result = [x['name'] for x in build_table_schema(df)['fields']]
+        assert result == ['level_0', 'level_1', 0, 1, 2, 3]
+
+
+class TestTableOrientReader(object):
+
+    @pytest.mark.parametrize("index_nm", [
+        None,
+        "idx",
+        pytest.param("index",
+                     marks=pytest.mark.xfail),
+        'level_0'])
+    @pytest.mark.parametrize("vals", [
+        {'ints': [1, 2, 3, 4]},
+        {'objects': ['a', 'b', 'c', 'd']},
+        {'date_ranges': pd.date_range('2016-01-01', freq='d', periods=4)},
+        {'categoricals': pd.Series(pd.Categorical(['a', 'b', 'c', 'c']))},
+        {'ordered_cats': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
+                                                  ordered=True))},
+        pytest.param({'floats': [1., 2., 3., 4.]},
+                     marks=pytest.mark.xfail),
+        {'floats': [1.1, 2.2, 3.3, 4.4]},
+        {'bools': [True, False, False, True]}])
+    def test_read_json_table_orient(self, index_nm, vals, recwarn):
+        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize("index_nm", [
+        None, "idx", "index"])
+    @pytest.mark.parametrize("vals", [
+        {'timedeltas': pd.timedelta_range('1H', periods=4, freq='T')},
+        {'timezones': pd.date_range('2016-01-01', freq='d', periods=4,
+                                    tz='US/Central')}])
+    def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
+        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+        out = df.to_json(orient="table")
+        with pytest.raises(NotImplementedError, match='can not yet read '):
+            pd.read_json(out, orient="table")
+
+    def test_comprehensive(self):
+        df = DataFrame(
+            {'A': [1, 2, 3, 4],
+             'B': ['a', 'b', 'c', 'c'],
+             'C': pd.date_range('2016-01-01', freq='d', periods=4),
+             # 'D': pd.timedelta_range('1H', periods=4, freq='T'),
+             'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
+             'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
+                                           ordered=True)),
+             'G': [1.1, 2.2, 3.3, 4.4],
+             # 'H': pd.date_range('2016-01-01', freq='d', periods=4,
+             #                   tz='US/Central'),
+             'I': [True, False, False, True],
+             },
+            index=pd.Index(range(4), name='idx'))
+
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize("index_names", [
+        [None, None], ['foo', 'bar'], ['foo', None], [None, 'foo'],
+        ['index', 'foo']])
+    def test_multiindex(self, index_names):
+        # GH 18912
+        df = pd.DataFrame(
+            [["Arr", "alpha", [1, 2, 3, 4]],
+             ["Bee", "Beta", [10, 20, 30, 40]]],
+            index=[["A", "B"], ["Null", "Eins"]],
+            columns=["Aussprache", "Griechisch", "Args"]
+        )
+        df.index.names = index_names
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize("strict_check", [
+        pytest.param(True, marks=pytest.mark.xfail),
+        False
+    ])
+    def test_empty_frame_roundtrip(self, strict_check):
+        # GH 21287
+        df = pd.DataFrame([], columns=['a', 'b', 'c'])
+        expected = df.copy()
+        out = df.to_json(orient='table')
+        result = pd.read_json(out, orient='table')
+        # TODO: When DF coercion issue (#21345) is resolved tighten type checks
+        tm.assert_frame_equal(expected, result,
+                              check_dtype=strict_check,
+                              check_index_type=strict_check)
@@ -0,0 +1,462 @@
+import json
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index, compat
+import pandas.util.testing as tm
+
+from pandas.io.json import json_normalize
+from pandas.io.json.normalize import nested_to_record
+
+
+@pytest.fixture
+def deep_nested():
+    # deeply nested data
+    return [{'country': 'USA',
+             'states': [{'name': 'California',
+                         'cities': [{'name': 'San Francisco',
+                                     'pop': 12345},
+                                    {'name': 'Los Angeles',
+                                     'pop': 12346}]
+                         },
+                        {'name': 'Ohio',
+                         'cities': [{'name': 'Columbus',
+                                     'pop': 1234},
+                                    {'name': 'Cleveland',
+                                     'pop': 1236}]}
+                        ]
+             },
+            {'country': 'Germany',
+             'states': [{'name': 'Bayern',
+                         'cities': [{'name': 'Munich', 'pop': 12347}]
+                         },
+                        {'name': 'Nordrhein-Westfalen',
+                         'cities': [{'name': 'Duesseldorf', 'pop': 1238},
+                                    {'name': 'Koeln', 'pop': 1239}]}
+                        ]
+             }
+            ]
+
+
+@pytest.fixture
+def state_data():
+    return [
+        {'counties': [{'name': 'Dade', 'population': 12345},
+                      {'name': 'Broward', 'population': 40000},
+                      {'name': 'Palm Beach', 'population': 60000}],
+         'info': {'governor': 'Rick Scott'},
+         'shortname': 'FL',
+         'state': 'Florida'},
+        {'counties': [{'name': 'Summit', 'population': 1234},
+                      {'name': 'Cuyahoga', 'population': 1337}],
+         'info': {'governor': 'John Kasich'},
+         'shortname': 'OH',
+         'state': 'Ohio'}]
+
+
+@pytest.fixture
+def author_missing_data():
+    return [
+        {'info': None},
+        {'info':
+            {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
+            'author_name':
+         {'first': 'Jane', 'last_name': 'Doe'}
+         }]
+
+
+class TestJSONNormalize(object):
+
+    def test_simple_records(self):
+        recs = [{'a': 1, 'b': 2, 'c': 3},
+                {'a': 4, 'b': 5, 'c': 6},
+                {'a': 7, 'b': 8, 'c': 9},
+                {'a': 10, 'b': 11, 'c': 12}]
+
+        result = json_normalize(recs)
+        expected = DataFrame(recs)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize(self, state_data):
+        result = json_normalize(state_data[0], 'counties')
+        expected = DataFrame(state_data[0]['counties'])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(state_data, 'counties')
+
+        expected = []
+        for rec in state_data:
+            expected.extend(rec['counties'])
+        expected = DataFrame(expected)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(state_data, 'counties', meta='state')
+        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_empty_array(self):
+        result = json_normalize([])
+        expected = DataFrame()
+        tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize_with_separator(self, deep_nested):
+        # GH 14883
+        result = json_normalize({'A': {'A': 1, 'B': 2}})
+        expected = DataFrame([[1, 2]], columns=['A.A', 'A.B'])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
+        expected = DataFrame([[1, 2]], columns=['A_A', 'A_B'])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
+        expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B'])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize(deep_nested, ['states', 'cities'],
+                                meta=['country', ['states', 'name']],
+                                sep='_')
+        expected = Index(['name', 'pop',
+                          'country', 'states_name']).sort_values()
+        assert result.columns.sort_values().equals(expected)
+
+    def test_value_array_record_prefix(self):
+        # GH 21536
+        result = json_normalize({'A': [1, 2]}, 'A', record_prefix='Prefix.')
+        expected = DataFrame([[1], [2]], columns=['Prefix.0'])
+        tm.assert_frame_equal(result, expected)
+
+    def test_nested_object_record_path(self):
+        # GH 22706
+        data = {'state': 'Florida',
+                'info': {
+                    'governor': 'Rick Scott',
+                    'counties': [{'name': 'Dade', 'population': 12345},
+                                 {'name': 'Broward', 'population': 40000},
+                                 {'name': 'Palm Beach', 'population': 60000}]}}
+        result = json_normalize(data, record_path=["info", "counties"])
+        expected = DataFrame([['Dade', 12345],
+                              ['Broward', 40000],
+                              ['Palm Beach', 60000]],
+                             columns=['name', 'population'])
+        tm.assert_frame_equal(result, expected)
+
+    def test_more_deeply_nested(self, deep_nested):
+
+        result = json_normalize(deep_nested, ['states', 'cities'],
+                                meta=['country', ['states', 'name']])
+        # meta_prefix={'states': 'state_'})
+
+        ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
+                   'states.name': ['California', 'California', 'Ohio', 'Ohio',
+                                   'Bayern', 'Nordrhein-Westfalen',
+                                   'Nordrhein-Westfalen'],
+                   'name': ['San Francisco', 'Los Angeles', 'Columbus',
+                            'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
+                   'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}
+
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_shallow_nested(self):
+        data = [{'state': 'Florida',
+                 'shortname': 'FL',
+                 'info': {
+                     'governor': 'Rick Scott'
+                 },
+                 'counties': [{'name': 'Dade', 'population': 12345},
+                              {'name': 'Broward', 'population': 40000},
+                              {'name': 'Palm Beach', 'population': 60000}]},
+                {'state': 'Ohio',
+                 'shortname': 'OH',
+                 'info': {
+                     'governor': 'John Kasich'
+                 },
+                 'counties': [{'name': 'Summit', 'population': 1234},
+                              {'name': 'Cuyahoga', 'population': 1337}]}]
+
+        result = json_normalize(data, 'counties',
+                                ['state', 'shortname',
+                                 ['info', 'governor']])
+        ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
+                            'Cuyahoga'],
+                   'state': ['Florida'] * 3 + ['Ohio'] * 2,
+                   'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
+                   'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
+                   'population': [12345, 40000, 60000, 1234, 1337]}
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_meta_name_conflict(self):
+        data = [{'foo': 'hello',
+                 'bar': 'there',
+                 'data': [{'foo': 'something', 'bar': 'else'},
+                          {'foo': 'something2', 'bar': 'else2'}]}]
+
+        msg = (r"Conflicting metadata name (foo|bar),"
+               " need distinguishing prefix")
+        with pytest.raises(ValueError, match=msg):
+            json_normalize(data, 'data', meta=['foo', 'bar'])
+
+        result = json_normalize(data, 'data', meta=['foo', 'bar'],
+                                meta_prefix='meta')
+
+        for val in ['metafoo', 'metabar', 'foo', 'bar']:
+            assert val in result
+
+    def test_meta_parameter_not_modified(self):
+        # GH 18610
+        data = [{'foo': 'hello',
+                 'bar': 'there',
+                 'data': [{'foo': 'something', 'bar': 'else'},
+                          {'foo': 'something2', 'bar': 'else2'}]}]
+
+        COLUMNS = ['foo', 'bar']
+        result = json_normalize(data, 'data', meta=COLUMNS,
+                                meta_prefix='meta')
+
+        assert COLUMNS == ['foo', 'bar']
+        for val in ['metafoo', 'metabar', 'foo', 'bar']:
+            assert val in result
+
+    def test_record_prefix(self, state_data):
+        result = json_normalize(state_data[0], 'counties')
+        expected = DataFrame(state_data[0]['counties'])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(state_data, 'counties',
+                                meta='state',
+                                record_prefix='county_')
+
+        expected = []
+        for rec in state_data:
+            expected.extend(rec['counties'])
+        expected = DataFrame(expected)
+        expected = expected.rename(columns=lambda x: 'county_' + x)
+        expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_non_ascii_key(self):
+        if compat.PY3:
+            testjson = (
+                b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
+                b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
+            ).decode('utf8')
+        else:
+            testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
+                        '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')
+
+        testdata = {
+            u'sub.A': [1, 3],
+            u'sub.B': [2, 4],
+            b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
+        }
+        expected = DataFrame(testdata)
+
+        result = json_normalize(json.loads(testjson))
+        tm.assert_frame_equal(result, expected)
+
+    def test_missing_field(self, author_missing_data):
+        # GH20030:
+        result = json_normalize(author_missing_data)
+        ex_data = [
+            {'info': np.nan,
+             'author_name.first': np.nan,
+             'author_name.last_name': np.nan,
+             'info.created_at': np.nan,
+             'info.last_updated': np.nan},
+            {'info': None,
+             'author_name.first': 'Jane',
+             'author_name.last_name': 'Doe',
+             'info.created_at': '11/08/1993',
+             'info.last_updated': '26/05/2012'}
+        ]
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+
+
+class TestNestedToRecord(object):
+
+    def test_flat_stays_flat(self):
+        recs = [dict(flat1=1, flat2=2),
+                dict(flat1=3, flat2=4),
+                ]
+
+        result = nested_to_record(recs)
+        expected = recs
+        assert result == expected
+
+    def test_one_level_deep_flattens(self):
+        data = dict(flat1=1,
+                    dict1=dict(c=1, d=2))
+
+        result = nested_to_record(data)
+        expected = {'dict1.c': 1,
+                    'dict1.d': 2,
+                    'flat1': 1}
+
+        assert result == expected
+
+    def test_nested_flattens(self):
+        data = dict(flat1=1,
+                    dict1=dict(c=1, d=2),
+                    nested=dict(e=dict(c=1, d=2),
+                                d=2))
+
+        result = nested_to_record(data)
+        expected = {'dict1.c': 1,
+                    'dict1.d': 2,
+                    'flat1': 1,
+                    'nested.d': 2,
+                    'nested.e.c': 1,
+                    'nested.e.d': 2}
+
+        assert result == expected
+
+    def test_json_normalize_errors(self):
+        # GH14583: If meta keys are not always present
+        # a new option to set errors='ignore' has been implemented
+        i = {
+            "Trades": [{
+                "general": {
+                    "tradeid": 100,
+                    "trade_version": 1,
+                    "stocks": [{
+
+                        "symbol": "AAPL",
+                        "name": "Apple",
+                        "price": "0"
+                    }, {
+                        "symbol": "GOOG",
+                        "name": "Google",
+                        "price": "0"
+                    }
+                    ]
+                }
+            }, {
+                "general": {
+                    "tradeid": 100,
+                    "stocks": [{
+                        "symbol": "AAPL",
+                        "name": "Apple",
+                        "price": "0"
+                    }, {
+                        "symbol": "GOOG",
+                        "name": "Google",
+                        "price": "0"
+                    }
+                    ]
+                }
+            }
+            ]
+        }
+        j = json_normalize(data=i['Trades'],
+                           record_path=[['general', 'stocks']],
+                           meta=[['general', 'tradeid'],
+                                 ['general', 'trade_version']],
+                           errors='ignore')
+        expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
+                    'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
+                    'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
+                    'price': {0: '0', 1: '0', 2: '0', 3: '0'},
+                    'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
+
+        assert j.fillna('').to_dict() == expected
+
+        msg = ("Try running with errors='ignore' as key 'trade_version'"
+               " is not always present")
+        with pytest.raises(KeyError, match=msg):
+            json_normalize(
+                data=i['Trades'],
+                record_path=[['general', 'stocks']],
+                meta=[['general', 'tradeid'],
+                      ['general', 'trade_version']],
+                errors='raise')
+
+    def test_donot_drop_nonevalues(self):
+        # GH21356
+        data = [
+            {'info': None,
+             'author_name':
+             {'first': 'Smith', 'last_name': 'Appleseed'}
+             },
+            {'info':
+                {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
+             'author_name':
+                {'first': 'Jane', 'last_name': 'Doe'}
+             }
+        ]
+        result = nested_to_record(data)
+        expected = [
+            {'info': None,
+             'author_name.first': 'Smith',
+             'author_name.last_name': 'Appleseed'},
+            {'author_name.first': 'Jane',
+             'author_name.last_name': 'Doe',
+             'info.created_at': '11/08/1993',
+             'info.last_updated': '26/05/2012'}]
+
+        assert result == expected
+
+    def test_nonetype_top_level_bottom_level(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it doesnt do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "country": {
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "id": None,
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656}}}
+            }
+        }
+        result = nested_to_record(data)
+        expected = {
+            'id': None,
+            'location.country.state.id': None,
+            'location.country.state.town.info.id': None,
+            'location.country.state.town.info.region': None,
+            'location.country.state.town.info.x': 49.151580810546875,
+            'location.country.state.town.info.y': -33.148521423339844,
+            'location.country.state.town.info.z': 27.572303771972656}
+        assert result == expected
+
+    def test_nonetype_multiple_levels(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it doesnt do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "id": None,
+                "country": {
+                    "id": None,
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656}}}
+            }
+        }
+        result = nested_to_record(data)
+        expected = {
+            'id': None,
+            'location.id': None,
+            'location.country.id': None,
+            'location.country.state.id': None,
+            'location.country.state.town.info.region': None,
+            'location.country.state.town.info.x': 49.151580810546875,
+            'location.country.state.town.info.y': -33.148521423339844,
+            'location.country.state.town.info.z': 27.572303771972656}
+        assert result == expected
@@ -0,0 +1,172 @@
+# -*- coding: utf-8 -*-
+import pytest
+
+from pandas.compat import StringIO
+
+import pandas as pd
+from pandas import DataFrame, read_json
+import pandas.util.testing as tm
+from pandas.util.testing import (
+    assert_frame_equal, assert_series_equal, ensure_clean)
+
+from pandas.io.json.json import JsonReader
+
+
+@pytest.fixture
+def lines_json_df():
+    df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+    return df.to_json(lines=True, orient="records")
+
+
+def test_read_jsonl():
+    # GH9180
+    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
+    expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
+    assert_frame_equal(result, expected)
+
+
+def test_read_jsonl_unicode_chars():
+    # GH15132: non-ascii unicode characters
+    # \u201d == RIGHT DOUBLE QUOTATION MARK
+
+    # simulate file handle
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    json = StringIO(json)
+    result = read_json(json, lines=True)
+    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
+                         columns=['a', 'b'])
+    assert_frame_equal(result, expected)
+
+    # simulate string
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    result = read_json(json, lines=True)
+    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
+                         columns=['a', 'b'])
+    assert_frame_equal(result, expected)
+
+
+def test_to_jsonl():
+    # GH9180
+    df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
+    assert result == expected
+
+    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
+    assert result == expected
+    assert_frame_equal(read_json(result, lines=True), df)
+
+    # GH15096: escaped characters in columns and data
+    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
+                   columns=["a\\", 'b'])
+    result = df.to_json(orient="records", lines=True)
+    expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
+                '{"a\\\\":"foo\\"","b":"bar"}')
+    assert result == expected
+    assert_frame_equal(read_json(result, lines=True), df)
+
+
+@pytest.mark.parametrize("chunksize", [1, 1.0])
+def test_readjson_chunks(lines_json_df, chunksize):
+    # Basic test that read_json(chunks=True) gives the same result as
+    # read_json(chunks=False)
+    # GH17048: memory usage when lines=True
+
+    unchunked = read_json(StringIO(lines_json_df), lines=True)
+    reader = read_json(StringIO(lines_json_df), lines=True,
+                       chunksize=chunksize)
+    chunked = pd.concat(reader)
+
+    assert_frame_equal(chunked, unchunked)
+
+
+def test_readjson_chunksize_requires_lines(lines_json_df):
+    msg = "chunksize can only be passed if lines=True"
+    with pytest.raises(ValueError, match=msg):
+        pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2)
+
+
+def test_readjson_chunks_series():
+    # Test reading line-format JSON to Series with chunksize param
+    s = pd.Series({'A': 1, 'B': 2})
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    unchunked = pd.read_json(strio, lines=True, typ='Series')
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    chunked = pd.concat(pd.read_json(
+        strio, lines=True, typ='Series', chunksize=1
+    ))
+
+    assert_series_equal(chunked, unchunked)
+
+
+def test_readjson_each_chunk(lines_json_df):
+    # Other tests check that the final result of read_json(chunksize=True)
+    # is correct. This checks the intermediate chunks.
+    chunks = list(
+        pd.read_json(StringIO(lines_json_df), lines=True, chunksize=2)
+    )
+    assert chunks[0].shape == (2, 2)
+    assert chunks[1].shape == (1, 2)
+
+
+def test_readjson_chunks_from_file():
+    with ensure_clean('test.json') as path:
+        df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        chunked = pd.concat(pd.read_json(path, lines=True, chunksize=1))
+        unchunked = pd.read_json(path, lines=True)
+        assert_frame_equal(unchunked, chunked)
+
+
+@pytest.mark.parametrize("chunksize", [None, 1])
+def test_readjson_chunks_closes(chunksize):
+    with ensure_clean('test.json') as path:
+        df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        reader = JsonReader(
+            path, orient=None, typ="frame", dtype=True, convert_axes=True,
+            convert_dates=True, keep_default_dates=True, numpy=False,
+            precise_float=False, date_unit=None, encoding=None,
+            lines=True, chunksize=chunksize, compression=None)
+        reader.read()
+        assert reader.open_stream.closed, "didn't close stream with \
+            chunksize = {chunksize}".format(chunksize=chunksize)
+
+
+@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
+def test_readjson_invalid_chunksize(lines_json_df, chunksize):
+    msg = r"'chunksize' must be an integer >=1"
+
+    with pytest.raises(ValueError, match=msg):
+        pd.read_json(StringIO(lines_json_df), lines=True,
+                     chunksize=chunksize)
+
+
+@pytest.mark.parametrize("chunksize", [None, 1, 2])
+def test_readjson_chunks_multiple_empty_lines(chunksize):
+    j = """
+
+    {"A":1,"B":4}
+
+
+
+    {"A":2,"B":5}
+
+
+
+
+
+
+
+    {"A":3,"B":6}
+    """
+    orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+    test = pd.read_json(j, lines=True, chunksize=chunksize)
+    if chunksize is not None:
+        test = pd.concat(test)
+    tm.assert_frame_equal(
+        orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize))
@@ -0,0 +1,9 @@
+from pandas.compat import PY3
+
+# array compat
+if PY3:
+    frombytes = lambda obj, data: obj.frombytes(data)
+    tobytes = lambda obj: obj.tobytes()
+else:
+    frombytes = lambda obj, data: obj.fromstring(data)
+    tobytes = lambda obj: obj.tostring()
@@ -0,0 +1,21 @@
+# coding: utf-8
+
+from pandas.io.msgpack import packb, unpackb
+
+from .common import frombytes
+
+
+def test_unpack_buffer():
+    from array import array
+    buf = array('b')
+    frombytes(buf, packb((b'foo', b'bar')))
+    obj = unpackb(buf, use_list=1)
+    assert [b'foo', b'bar'] == obj
+
+
+def test_unpack_bytearray():
+    buf = bytearray(packb(('foo', 'bar')))
+    obj = unpackb(buf, use_list=1)
+    assert [b'foo', b'bar'] == obj
+    expected_type = bytes
+    assert all(type(s) == expected_type for s in obj)
@@ -0,0 +1,115 @@
+# coding: utf-8
+
+from pandas.io.msgpack import packb, unpackb
+
+
+def check(length, obj):
+    v = packb(obj)
+    assert len(v) == length, \
+        "%r length should be %r but get %r" % (obj, length, len(v))
+    assert unpackb(v, use_list=0) == obj
+
+
+def test_1():
+    for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1,
+              -((1 << 5) - 1), -(1 << 5)]:
+        check(1, o)
+
+
+def test_2():
+    for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]:
+        check(2, o)
+
+
+def test_3():
+    for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]:
+        check(3, o)
+
+
+def test_5():
+    for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]:
+        check(5, o)
+
+
+def test_9():
+    for o in [1 << 32, (1 << 64) - 1, -((1 << 31) + 1), -(1 << 63), 1.0, 0.1,
+              -0.1, -1.0]:
+        check(9, o)
+
+
+def check_raw(overhead, num):
+    check(num + overhead, b" " * num)
+
+
+def test_fixraw():
+    check_raw(1, 0)
+    check_raw(1, (1 << 5) - 1)
+
+
+def test_raw16():
+    check_raw(3, 1 << 5)
+    check_raw(3, (1 << 16) - 1)
+
+
+def test_raw32():
+    check_raw(5, 1 << 16)
+
+
+def check_array(overhead, num):
+    check(num + overhead, (None, ) * num)
+
+
+def test_fixarray():
+    check_array(1, 0)
+    check_array(1, (1 << 4) - 1)
+
+
+def test_array16():
+    check_array(3, 1 << 4)
+    check_array(3, (1 << 16) - 1)
+
+
+def test_array32():
+    check_array(5, (1 << 16))
+
+
+def match(obj, buf):
+    assert packb(obj) == buf
+    assert unpackb(buf, use_list=0) == obj
+
+
+def test_match():
+    cases = [
+        (None, b'\xc0'),
+        (False, b'\xc2'),
+        (True, b'\xc3'),
+        (0, b'\x00'),
+        (127, b'\x7f'),
+        (128, b'\xcc\x80'),
+        (256, b'\xcd\x01\x00'),
+        (-1, b'\xff'),
+        (-33, b'\xd0\xdf'),
+        (-129, b'\xd1\xff\x7f'),
+        ({1: 1}, b'\x81\x01\x01'),
+        (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"),
+        ((), b'\x90'),
+        (tuple(range(15)), (b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
+                            b"\x0a\x0b\x0c\x0d\x0e")),
+        (tuple(range(16)), (b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07"
+                            b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f")),
+        ({}, b'\x80'),
+        ({x: x for x in range(15)},
+         (b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07'
+          b'\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e')),
+        ({x: x for x in range(16)},
+         (b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06'
+          b'\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'
+          b'\x0f\x0f')),
+    ]
+
+    for v, p in cases:
+        match(v, p)
+
+
+def test_unicode():
+    assert unpackb(packb('foobar'), use_list=1) == b'foobar'
@@ -0,0 +1,39 @@
+# coding: utf-8
+
+from datetime import datetime
+
+import pytest
+
+from pandas.io.msgpack import packb, unpackb
+
+
+class DummyException(Exception):
+    pass
+
+
+class TestExceptions(object):
+
+    def test_raise_on_find_unsupported_value(self):
+        msg = "can\'t serialize datetime"
+        with pytest.raises(TypeError, match=msg):
+            packb(datetime.now())
+
+    def test_raise_from_object_hook(self):
+        def hook(_):
+            raise DummyException()
+
+        with pytest.raises(DummyException):
+            unpackb(packb({}), object_hook=hook)
+        with pytest.raises(DummyException):
+            unpackb(packb({'fizz': 'buzz'}), object_hook=hook)
+        with pytest.raises(DummyException):
+            unpackb(packb({'fizz': 'buzz'}), object_pairs_hook=hook)
+        with pytest.raises(DummyException):
+            unpackb(packb({'fizz': {'buzz': 'spam'}}), object_hook=hook)
+        with pytest.raises(DummyException):
+            unpackb(packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook)
+
+    def test_invalid_value(self):
+        msg = "Unpack failed: error"
+        with pytest.raises(ValueError, match=msg):
+            unpackb(b"\xd9\x97#DL_")
@@ -0,0 +1,63 @@
+from __future__ import print_function
+
+import array
+
+import pandas.io.msgpack as msgpack
+from pandas.io.msgpack import ExtType
+
+from .common import frombytes, tobytes
+
+
+def test_pack_ext_type():
+    def p(s):
+        packer = msgpack.Packer()
+        packer.pack_ext_type(0x42, s)
+        return packer.bytes()
+
+    assert p(b'A') == b'\xd4\x42A'  # fixext 1
+    assert p(b'AB') == b'\xd5\x42AB'  # fixext 2
+    assert p(b'ABCD') == b'\xd6\x42ABCD'  # fixext 4
+    assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH'  # fixext 8
+    assert p(b'A' * 16) == b'\xd8\x42' + b'A' * 16  # fixext 16
+    assert p(b'ABC') == b'\xc7\x03\x42ABC'  # ext 8
+    assert p(b'A' * 0x0123) == b'\xc8\x01\x23\x42' + b'A' * 0x0123  # ext 16
+    assert (p(b'A' * 0x00012345) ==
+            b'\xc9\x00\x01\x23\x45\x42' + b'A' * 0x00012345)  # ext 32
+
+
+def test_unpack_ext_type():
+    def check(b, expected):
+        assert msgpack.unpackb(b) == expected
+
+    check(b'\xd4\x42A', ExtType(0x42, b'A'))  # fixext 1
+    check(b'\xd5\x42AB', ExtType(0x42, b'AB'))  # fixext 2
+    check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD'))  # fixext 4
+    check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH'))  # fixext 8
+    check(b'\xd8\x42' + b'A' * 16, ExtType(0x42, b'A' * 16))  # fixext 16
+    check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC'))  # ext 8
+    check(b'\xc8\x01\x23\x42' + b'A' * 0x0123,
+          ExtType(0x42, b'A' * 0x0123))  # ext 16
+    check(b'\xc9\x00\x01\x23\x45\x42' + b'A' * 0x00012345,
+          ExtType(0x42, b'A' * 0x00012345))  # ext 32
+
+
+def test_extension_type():
+    def default(obj):
+        print('default called', obj)
+        if isinstance(obj, array.array):
+            typecode = 123  # application specific typecode
+            data = tobytes(obj)
+            return ExtType(typecode, data)
+        raise TypeError("Unknown type object %r" % (obj, ))
+
+    def ext_hook(code, data):
+        print('ext_hook called', code, data)
+        assert code == 123
+        obj = array.array('d')
+        frombytes(obj, data)
+        return obj
+
+    obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])]
+    s = msgpack.packb(obj, default=default)
+    obj2 = msgpack.unpackb(s, ext_hook=ext_hook)
+    assert obj == obj2
@@ -0,0 +1,91 @@
+# coding: utf-8
+
+from pandas.io.msgpack import unpackb
+
+
+def check(src, should, use_list=0):
+    assert unpackb(src, use_list=use_list) == should
+
+
+def testSimpleValue():
+    check(b"\x93\xc0\xc2\xc3", (None, False, True, ))
+
+
+def testFixnum():
+    check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0,
+                                                     64,
+                                                     127, ),
+                                                    (-32,
+                                                     -16,
+                                                     -1, ), ))
+
+
+def testFixArray():
+    check(b"\x92\x90\x91\x91\xc0", ((), ((None, ), ), ), )
+
+
+def testFixRaw():
+    check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def", ), )
+
+
+def testFixMap():
+    check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80",
+          {False: {None: None},
+           True: {None: {}}}, )
+
+
+def testUnsignedInt():
+    check(b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00"
+          b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00"
+          b"\xce\xff\xff\xff\xff",
+          (0,
+           128,
+           255,
+           0,
+           32768,
+           65535,
+           0,
+           2147483648,
+           4294967295, ), )
+
+
+def testSignedInt():
+    check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00"
+          b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00"
+          b"\xd2\xff\xff\xff\xff", (0,
+                                    -128,
+                                    -1,
+                                    0,
+                                    -32768,
+                                    -1,
+                                    0,
+                                    -2147483648,
+                                    -1, ))
+
+
+def testRaw():
+    check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
+          b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
+          (b"", b"a", b"ab", b"", b"a", b"ab"))
+
+
+def testArray():
+    check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00"
+          b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02"
+          b"\xc2\xc3", ((), (None, ), (False, True), (), (None, ),
+                        (False, True)))
+
+
+def testMap():
+    check(b"\x96"
+          b"\xde\x00\x00"
+          b"\xde\x00\x01\xc0\xc2"
+          b"\xde\x00\x02\xc0\xc2\xc3\xc2"
+          b"\xdf\x00\x00\x00\x00"
+          b"\xdf\x00\x00\x00\x01\xc0\xc2"
+          b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", ({}, {None: False},
+                                                    {True: False,
+                                                     None: False}, {},
+                                                    {None: False},
+                                                    {True: False,
+                                                     None: False}))
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals)
+
+import pytest
+
+from pandas.io.msgpack import ExtType, Packer, Unpacker, packb, unpackb
+
+
+class TestLimits(object):
+
+    def test_integer(self):
+        x = -(2 ** 63)
+        assert unpackb(packb(x)) == x
+        msg = (r"((long |Python )?(int )?too (big|large) to convert"
+               r"( to C (unsigned )?long))?")
+        with pytest.raises((OverflowError, ValueError), match=msg):
+            packb(x - 1)
+        x = 2 ** 64 - 1
+        assert unpackb(packb(x)) == x
+        with pytest.raises((OverflowError, ValueError), match=msg):
+            packb(x + 1)
+
+    def test_array_header(self):
+        packer = Packer()
+        packer.pack_array_header(2 ** 32 - 1)
+        with pytest.raises((OverflowError, ValueError)):
+            packer.pack_array_header(2 ** 32)
+
+    def test_map_header(self):
+        packer = Packer()
+        packer.pack_map_header(2 ** 32 - 1)
+        with pytest.raises((OverflowError, ValueError)):
+            packer.pack_array_header(2 ** 32)
+
+    def test_max_str_len(self):
+        d = 'x' * 3
+        packed = packb(d)
+
+        unpacker = Unpacker(max_str_len=3, encoding='utf-8')
+        unpacker.feed(packed)
+        assert unpacker.unpack() == d
+
+        unpacker = Unpacker(max_str_len=2, encoding='utf-8')
+        unpacker.feed(packed)
+
+        msg = "3 exceeds max_str_len"
+        with pytest.raises(ValueError, match=msg):
+            unpacker.unpack()
+
+    def test_max_bin_len(self):
+        d = b'x' * 3
+        packed = packb(d, use_bin_type=True)
+
+        unpacker = Unpacker(max_bin_len=3)
+        unpacker.feed(packed)
+        assert unpacker.unpack() == d
+
+        unpacker = Unpacker(max_bin_len=2)
+        unpacker.feed(packed)
+
+        msg = "3 exceeds max_bin_len"
+        with pytest.raises(ValueError, match=msg):
+            unpacker.unpack()
+
+    def test_max_array_len(self):
+        d = [1, 2, 3]
+        packed = packb(d)
+
+        unpacker = Unpacker(max_array_len=3)
+        unpacker.feed(packed)
+        assert unpacker.unpack() == d
+
+        unpacker = Unpacker(max_array_len=2)
+        unpacker.feed(packed)
+
+        msg = "3 exceeds max_array_len"
+        with pytest.raises(ValueError, match=msg):
+            unpacker.unpack()
+
+    def test_max_map_len(self):
+        d = {1: 2, 3: 4, 5: 6}
+        packed = packb(d)
+
+        unpacker = Unpacker(max_map_len=3)
+        unpacker.feed(packed)
+        assert unpacker.unpack() == d
+
+        unpacker = Unpacker(max_map_len=2)
+        unpacker.feed(packed)
+
+        msg = "3 exceeds max_map_len"
+        with pytest.raises(ValueError, match=msg):
+            unpacker.unpack()
+
+    def test_max_ext_len(self):
+        d = ExtType(42, b"abc")
+        packed = packb(d)
+
+        unpacker = Unpacker(max_ext_len=3)
+        unpacker.feed(packed)
+        assert unpacker.unpack() == d
+
+        unpacker = Unpacker(max_ext_len=2)
+        unpacker.feed(packed)
+
+        msg = "4 exceeds max_ext_len"
+        with pytest.raises(ValueError, match=msg):
+            unpacker.unpack()
@@ -0,0 +1,92 @@
+# coding: utf-8
+
+from pandas.io.msgpack import ExtType, packb, unpackb
+
+
+def test_str8():
+    header = b'\xd9'
+    data = b'x' * 32
+    b = packb(data.decode(), use_bin_type=True)
+    assert len(b) == len(data) + 2
+    assert b[0:2] == header + b'\x20'
+    assert b[2:] == data
+    assert unpackb(b) == data
+
+    data = b'x' * 255
+    b = packb(data.decode(), use_bin_type=True)
+    assert len(b) == len(data) + 2
+    assert b[0:2] == header + b'\xff'
+    assert b[2:] == data
+    assert unpackb(b) == data
+
+
+def test_bin8():
+    header = b'\xc4'
+    data = b''
+    b = packb(data, use_bin_type=True)
+    assert len(b) == len(data) + 2
+    assert b[0:2] == header + b'\x00'
+    assert b[2:] == data
+    assert unpackb(b) == data
+
+    data = b'x' * 255
+    b = packb(data, use_bin_type=True)
+    assert len(b) == len(data) + 2
+    assert b[0:2] == header + b'\xff'
+    assert b[2:] == data
+    assert unpackb(b) == data
+
+
+def test_bin16():
+    header = b'\xc5'
+    data = b'x' * 256
+    b = packb(data, use_bin_type=True)
+    assert len(b) == len(data) + 3
+    assert b[0:1] == header
+    assert b[1:3] == b'\x01\x00'
+    assert b[3:] == data
+    assert unpackb(b) == data
+
+    data = b'x' * 65535
+    b = packb(data, use_bin_type=True)
+    assert len(b) == len(data) + 3
+    assert b[0:1] == header
+    assert b[1:3] == b'\xff\xff'
+    assert b[3:] == data
+    assert unpackb(b) == data
+
+
+def test_bin32():
+    header = b'\xc6'
+    data = b'x' * 65536
+    b = packb(data, use_bin_type=True)
+    assert len(b) == len(data) + 5
+    assert b[0:1] == header
+    assert b[1:5] == b'\x00\x01\x00\x00'
+    assert b[5:] == data
+    assert unpackb(b) == data
+
+
+def test_ext():
+    def check(ext, packed):
+        assert packb(ext) == packed
+        assert unpackb(packed) == ext
+
+    check(ExtType(0x42, b'Z'), b'\xd4\x42Z')  # fixext 1
+    check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ')  # fixext 2
+    check(ExtType(0x42, b'Z' * 4), b'\xd6\x42' + b'Z' * 4)  # fixext 4
+    check(ExtType(0x42, b'Z' * 8), b'\xd7\x42' + b'Z' * 8)  # fixext 8
+    check(ExtType(0x42, b'Z' * 16), b'\xd8\x42' + b'Z' * 16)  # fixext 16
+    # ext 8
+    check(ExtType(0x42, b''), b'\xc7\x00\x42')
+    check(ExtType(0x42, b'Z' * 255), b'\xc7\xff\x42' + b'Z' * 255)
+    # ext 16
+    check(ExtType(0x42, b'Z' * 256), b'\xc8\x01\x00\x42' + b'Z' * 256)
+    check(ExtType(0x42, b'Z' * 0xffff), b'\xc8\xff\xff\x42' + b'Z' * 0xffff)
+    # ext 32
+    check(
+        ExtType(0x42, b'Z' *
+                0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z' * 0x10000)
+    # needs large memory
+    # check(ExtType(0x42, b'Z'*0xffffffff),
+    #              b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff)
@@ -0,0 +1,74 @@
+# coding: utf-8
+
+import pytest
+
+from pandas.io.msgpack import packb, unpackb
+
+
+class DecodeError(Exception):
+    pass
+
+
+class TestObj(object):
+
+    def _arr_to_str(self, arr):
+        return ''.join(str(c) for c in arr)
+
+    def bad_complex_decoder(self, o):
+        raise DecodeError("Ooops!")
+
+    def _decode_complex(self, obj):
+        if b'__complex__' in obj:
+            return complex(obj[b'real'], obj[b'imag'])
+        return obj
+
+    def _encode_complex(self, obj):
+        if isinstance(obj, complex):
+            return {b'__complex__': True, b'real': 1, b'imag': 2}
+        return obj
+
+    def test_encode_hook(self):
+        packed = packb([3, 1 + 2j], default=self._encode_complex)
+        unpacked = unpackb(packed, use_list=1)
+        assert unpacked[1] == {b'__complex__': True, b'real': 1, b'imag': 2}
+
+    def test_decode_hook(self):
+        packed = packb([3, {b'__complex__': True, b'real': 1, b'imag': 2}])
+        unpacked = unpackb(packed, object_hook=self._decode_complex,
+                           use_list=1)
+        assert unpacked[1] == 1 + 2j
+
+    def test_decode_pairs_hook(self):
+        packed = packb([3, {1: 2, 3: 4}])
+        prod_sum = 1 * 2 + 3 * 4
+        unpacked = unpackb(
+            packed, object_pairs_hook=lambda l: sum(k * v for k, v in l),
+            use_list=1)
+        assert unpacked[1] == prod_sum
+
+    def test_only_one_obj_hook(self):
+        msg = "object_pairs_hook and object_hook are mutually exclusive"
+        with pytest.raises(TypeError, match=msg):
+            unpackb(b'', object_hook=lambda x: x,
+                    object_pairs_hook=lambda x: x)
+
+    def test_bad_hook(self):
+        msg = r"can't serialize \(1\+2j\)"
+        with pytest.raises(TypeError, match=msg):
+            packed = packb([3, 1 + 2j], default=lambda o: o)
+            unpacked = unpackb(packed, use_list=1)  # noqa
+
+    def test_array_hook(self):
+        packed = packb([1, 2, 3])
+        unpacked = unpackb(packed, list_hook=self._arr_to_str, use_list=1)
+        assert unpacked == '123'
+
+    def test_an_exception_in_objecthook1(self):
+        with pytest.raises(DecodeError, match='Ooops!'):
+            packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}})
+            unpackb(packed, object_hook=self.bad_complex_decoder)
+
+    def test_an_exception_in_objecthook2(self):
+        with pytest.raises(DecodeError, match='Ooops!'):
+            packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]})
+            unpackb(packed, list_hook=self.bad_complex_decoder, use_list=1)
@@ -0,0 +1,162 @@
+# coding: utf-8
+
+import struct
+
+import pytest
+
+from pandas.compat import OrderedDict, u
+
+from pandas import compat
+
+from pandas.io.msgpack import Packer, Unpacker, packb, unpackb
+
+
+class TestPack(object):
+
+    def check(self, data, use_list=False):
+        re = unpackb(packb(data), use_list=use_list)
+        assert re == data
+
+    def testPack(self):
+        test_data = [
+            0, 1, 127, 128, 255, 256, 65535, 65536,
+            -1, -32, -33, -128, -129, -32768, -32769,
+            1.0,
+            b"", b"a", b"a" * 31, b"a" * 32,
+            None, True, False,
+            (), ((),), ((), None,),
+            {None: 0},
+            (1 << 23),
+        ]
+        for td in test_data:
+            self.check(td)
+
+    def testPackUnicode(self):
+        test_data = [u(""), u("abcd"), [u("defgh")], u("Русский текст"), ]
+        for td in test_data:
+            re = unpackb(
+                packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
+            assert re == td
+            packer = Packer(encoding='utf-8')
+            data = packer.pack(td)
+            re = Unpacker(
+                compat.BytesIO(data), encoding='utf-8', use_list=1).unpack()
+            assert re == td
+
+    def testPackUTF32(self):
+        test_data = [
+            compat.u(""),
+            compat.u("abcd"),
+            [compat.u("defgh")],
+            compat.u("Русский текст"),
+        ]
+        for td in test_data:
+            re = unpackb(
+                packb(td, encoding='utf-32'), use_list=1, encoding='utf-32')
+            assert re == td
+
+    def testPackBytes(self):
+        test_data = [b"", b"abcd", (b"defgh", ), ]
+        for td in test_data:
+            self.check(td)
+
+    def testIgnoreUnicodeErrors(self):
+        re = unpackb(
+            packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore',
+            use_list=1)
+        assert re == "abcdef"
+
+    def testStrictUnicodeUnpack(self):
+        msg = (r"'utf-*8' codec can't decode byte 0xed in position 3:"
+               " invalid continuation byte")
+        with pytest.raises(UnicodeDecodeError, match=msg):
+            unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1)
+
+    def testStrictUnicodePack(self):
+        msg = (r"'ascii' codec can't encode character u*'\\xed' in position 3:"
+               r" ordinal not in range\(128\)")
+        with pytest.raises(UnicodeEncodeError, match=msg):
+            packb(compat.u("abc\xeddef"), encoding='ascii',
+                  unicode_errors='strict')
+
+    def testIgnoreErrorsPack(self):
+        re = unpackb(
+            packb(
+                compat.u("abcФФФdef"), encoding='ascii',
+                unicode_errors='ignore'), encoding='utf-8', use_list=1)
+        assert re == compat.u("abcdef")
+
+    def testNoEncoding(self):
+        msg = "Can't encode unicode string: no encoding is specified"
+        with pytest.raises(TypeError, match=msg):
+            packb(compat.u("abc"), encoding=None)
+
+    def testDecodeBinary(self):
+        re = unpackb(packb("abc"), encoding=None, use_list=1)
+        assert re == b"abc"
+
+    def testPackFloat(self):
+        assert packb(1.0,
+                     use_single_float=True) == b'\xca' + struct.pack('>f', 1.0)
+        assert packb(
+            1.0, use_single_float=False) == b'\xcb' + struct.pack('>d', 1.0)
+
+    def testArraySize(self, sizes=[0, 5, 50, 1000]):
+        bio = compat.BytesIO()
+        packer = Packer()
+        for size in sizes:
+            bio.write(packer.pack_array_header(size))
+            for i in range(size):
+                bio.write(packer.pack(i))
+
+        bio.seek(0)
+        unpacker = Unpacker(bio, use_list=1)
+        for size in sizes:
+            assert unpacker.unpack() == list(range(size))
+
+    def test_manualreset(self, sizes=[0, 5, 50, 1000]):
+        packer = Packer(autoreset=False)
+        for size in sizes:
+            packer.pack_array_header(size)
+            for i in range(size):
+                packer.pack(i)
+
+        bio = compat.BytesIO(packer.bytes())
+        unpacker = Unpacker(bio, use_list=1)
+        for size in sizes:
+            assert unpacker.unpack() == list(range(size))
+
+        packer.reset()
+        assert packer.bytes() == b''
+
+    def testMapSize(self, sizes=[0, 5, 50, 1000]):
+        bio = compat.BytesIO()
+        packer = Packer()
+        for size in sizes:
+            bio.write(packer.pack_map_header(size))
+            for i in range(size):
+                bio.write(packer.pack(i))  # key
+                bio.write(packer.pack(i * 2))  # value
+
+        bio.seek(0)
+        unpacker = Unpacker(bio)
+        for size in sizes:
+            assert unpacker.unpack() == {i: i * 2 for i in range(size)}
+
+    def test_odict(self):
+        seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)]
+        od = OrderedDict(seq)
+        assert unpackb(packb(od), use_list=1) == dict(seq)
+
+        def pair_hook(seq):
+            return list(seq)
+
+        assert unpackb(
+            packb(od), object_pairs_hook=pair_hook, use_list=1) == seq
+
+    def test_pairlist(self):
+        pairlist = [(b'a', 1), (2, b'b'), (b'foo', b'bar')]
+        packer = Packer()
+        packed = packer.pack_map_pairs(pairlist)
+        unpacked = unpackb(packed, object_pairs_hook=list)
+        assert pairlist == unpacked
@@ -0,0 +1,71 @@
+"""Test Unpacker's read_array_header and read_map_header methods"""
+from pandas.io.msgpack import OutOfData, Unpacker, packb
+
+UnexpectedTypeException = ValueError
+
+
+def test_read_array_header():
+    unpacker = Unpacker()
+    unpacker.feed(packb(['a', 'b', 'c']))
+    assert unpacker.read_array_header() == 3
+    assert unpacker.unpack() == b'a'
+    assert unpacker.unpack() == b'b'
+    assert unpacker.unpack() == b'c'
+    try:
+        unpacker.unpack()
+        assert 0, 'should raise exception'
+    except OutOfData:
+        assert 1, 'okay'
+
+
+def test_read_map_header():
+    unpacker = Unpacker()
+    unpacker.feed(packb({'a': 'A'}))
+    assert unpacker.read_map_header() == 1
+    assert unpacker.unpack() == B'a'
+    assert unpacker.unpack() == B'A'
+    try:
+        unpacker.unpack()
+        assert 0, 'should raise exception'
+    except OutOfData:
+        assert 1, 'okay'
+
+
+def test_incorrect_type_array():
+    unpacker = Unpacker()
+    unpacker.feed(packb(1))
+    try:
+        unpacker.read_array_header()
+        assert 0, 'should raise exception'
+    except UnexpectedTypeException:
+        assert 1, 'okay'
+
+
+def test_incorrect_type_map():
+    unpacker = Unpacker()
+    unpacker.feed(packb(1))
+    try:
+        unpacker.read_map_header()
+        assert 0, 'should raise exception'
+    except UnexpectedTypeException:
+        assert 1, 'okay'
+
+
+def test_correct_type_nested_array():
+    unpacker = Unpacker()
+    unpacker.feed(packb({'a': ['b', 'c', 'd']}))
+    try:
+        unpacker.read_array_header()
+        assert 0, 'should raise exception'
+    except UnexpectedTypeException:
+        assert 1, 'okay'
+
+
+def test_incorrect_type_nested_map():
+    unpacker = Unpacker()
+    unpacker.feed(packb([{'a': 'b'}]))
+    try:
+        unpacker.read_map_header()
+        assert 0, 'should raise exception'
+    except UnexpectedTypeException:
+        assert 1, 'okay'
@@ -0,0 +1,47 @@
+# coding: utf-8
+
+import io
+
+import pandas.io.msgpack as msgpack
+
+binarydata = bytes(bytearray(range(256)))
+
+
+def gen_binary_data(idx):
+    return binarydata[:idx % 300]
+
+
+def test_exceeding_unpacker_read_size():
+    dumpf = io.BytesIO()
+
+    packer = msgpack.Packer()
+
+    NUMBER_OF_STRINGS = 6
+    read_size = 16
+
+    # 5 ok for read_size=16, while 6 glibc detected *** python: double free or
+    # corruption (fasttop):
+
+    # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python:
+    # double free or corruption (!prev)
+
+    # 40 ok for read_size=1024, while 50 introduces errors
+    # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected ***
+    # python: double free or corruption (!prev):
+
+    for idx in range(NUMBER_OF_STRINGS):
+        data = gen_binary_data(idx)
+        dumpf.write(packer.pack(data))
+
+    f = io.BytesIO(dumpf.getvalue())
+    dumpf.close()
+
+    unpacker = msgpack.Unpacker(f, read_size=read_size, use_list=1)
+
+    read_count = 0
+    for idx, o in enumerate(unpacker):
+        assert type(o) == bytes
+        assert o == gen_binary_data(idx)
+        read_count += 1
+
+    assert read_count == NUMBER_OF_STRINGS
@@ -0,0 +1,104 @@
+# coding: utf-8
+
+import pytest
+
+from pandas import compat
+
+from pandas.io.msgpack import BufferFull, OutOfData, Unpacker
+
+
+class TestPack(object):
+
+    def test_partial_data(self):
+        unpacker = Unpacker()
+        msg = "No more data to unpack"
+
+        for data in [b"\xa5", b"h", b"a", b"l", b"l"]:
+            unpacker.feed(data)
+            with pytest.raises(StopIteration, match=msg):
+                next(iter(unpacker))
+
+        unpacker.feed(b"o")
+        assert next(iter(unpacker)) == b"hallo"
+
+    def test_foobar(self):
+        unpacker = Unpacker(read_size=3, use_list=1)
+        unpacker.feed(b'foobar')
+        assert unpacker.unpack() == ord(b'f')
+        assert unpacker.unpack() == ord(b'o')
+        assert unpacker.unpack() == ord(b'o')
+        assert unpacker.unpack() == ord(b'b')
+        assert unpacker.unpack() == ord(b'a')
+        assert unpacker.unpack() == ord(b'r')
+        msg = "No more data to unpack"
+        with pytest.raises(OutOfData, match=msg):
+            unpacker.unpack()
+
+        unpacker.feed(b'foo')
+        unpacker.feed(b'bar')
+
+        k = 0
+        for o, e in zip(unpacker, 'foobarbaz'):
+            assert o == ord(e)
+            k += 1
+        assert k == len(b'foobar')
+
+    def test_foobar_skip(self):
+        unpacker = Unpacker(read_size=3, use_list=1)
+        unpacker.feed(b'foobar')
+        assert unpacker.unpack() == ord(b'f')
+        unpacker.skip()
+        assert unpacker.unpack() == ord(b'o')
+        unpacker.skip()
+        assert unpacker.unpack() == ord(b'a')
+        unpacker.skip()
+        msg = "No more data to unpack"
+        with pytest.raises(OutOfData, match=msg):
+            unpacker.unpack()
+
+    def test_maxbuffersize_read_size_exceeds_max_buffer_size(self):
+        msg = "read_size should be less or equal to max_buffer_size"
+        with pytest.raises(ValueError, match=msg):
+            Unpacker(read_size=5, max_buffer_size=3)
+
+    def test_maxbuffersize_bufferfull(self):
+        unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1)
+        unpacker.feed(b'foo')
+        with pytest.raises(BufferFull, match=r'^$'):
+            unpacker.feed(b'b')
+
+    def test_maxbuffersize(self):
+        unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1)
+        unpacker.feed(b'foo')
+        assert ord('f') == next(unpacker)
+        unpacker.feed(b'b')
+        assert ord('o') == next(unpacker)
+        assert ord('o') == next(unpacker)
+        assert ord('b') == next(unpacker)
+
+    def test_readbytes(self):
+        unpacker = Unpacker(read_size=3)
+        unpacker.feed(b'foobar')
+        assert unpacker.unpack() == ord(b'f')
+        assert unpacker.read_bytes(3) == b'oob'
+        assert unpacker.unpack() == ord(b'a')
+        assert unpacker.unpack() == ord(b'r')
+
+        # Test buffer refill
+        unpacker = Unpacker(compat.BytesIO(b'foobar'), read_size=3)
+        assert unpacker.unpack() == ord(b'f')
+        assert unpacker.read_bytes(3) == b'oob'
+        assert unpacker.unpack() == ord(b'a')
+        assert unpacker.unpack() == ord(b'r')
+
+    def test_issue124(self):
+        unpacker = Unpacker()
+        unpacker.feed(b'\xa1?\xa1!')
+        assert tuple(unpacker) == (b'?', b'!')
+        assert tuple(unpacker) == ()
+        unpacker.feed(b"\xa1?\xa1")
+        assert tuple(unpacker) == (b'?', )
+        assert tuple(unpacker) == ()
+        unpacker.feed(b"!")
+        assert tuple(unpacker) == (b'!', )
+        assert tuple(unpacker) == ()
@@ -0,0 +1,26 @@
+# coding: utf-8
+
+from collections import namedtuple
+
+from pandas.io.msgpack import packb
+
+
+class MyList(list):
+    pass
+
+
+class MyDict(dict):
+    pass
+
+
+class MyTuple(tuple):
+    pass
+
+
+MyNamedTuple = namedtuple('MyNamedTuple', 'x y')
+
+
+def test_types():
+    assert packb(MyDict()) == packb(dict())
+    assert packb(MyList()) == packb(list())
+    assert packb(MyNamedTuple(1, 2)) == packb((1, 2))
@@ -0,0 +1,67 @@
+from io import BytesIO
+import sys
+
+import pytest
+
+from pandas.io.msgpack import ExtType, OutOfData, Unpacker, packb
+
+
+class TestUnpack(object):
+
+    def test_unpack_array_header_from_file(self):
+        f = BytesIO(packb([1, 2, 3, 4]))
+        unpacker = Unpacker(f)
+        assert unpacker.read_array_header() == 4
+        assert unpacker.unpack() == 1
+        assert unpacker.unpack() == 2
+        assert unpacker.unpack() == 3
+        assert unpacker.unpack() == 4
+        msg = "No more data to unpack"
+        with pytest.raises(OutOfData, match=msg):
+            unpacker.unpack()
+
+    def test_unpacker_hook_refcnt(self):
+        if not hasattr(sys, 'getrefcount'):
+            pytest.skip('no sys.getrefcount()')
+        result = []
+
+        def hook(x):
+            result.append(x)
+            return x
+
+        basecnt = sys.getrefcount(hook)
+
+        up = Unpacker(object_hook=hook, list_hook=hook)
+
+        assert sys.getrefcount(hook) >= basecnt + 2
+
+        up.feed(packb([{}]))
+        up.feed(packb([{}]))
+        assert up.unpack() == [{}]
+        assert up.unpack() == [{}]
+        assert result == [{}, [{}], {}, [{}]]
+
+        del up
+
+        assert sys.getrefcount(hook) == basecnt
+
+    def test_unpacker_ext_hook(self):
+        class MyUnpacker(Unpacker):
+
+            def __init__(self):
+                super(MyUnpacker, self).__init__(ext_hook=self._hook,
+                                                 encoding='utf-8')
+
+            def _hook(self, code, data):
+                if code == 1:
+                    return int(data)
+                else:
+                    return ExtType(code, data)
+
+        unpacker = MyUnpacker()
+        unpacker.feed(packb({'a': 1}, encoding='utf-8'))
+        assert unpacker.unpack() == {'a': 1}
+        unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8'))
+        assert unpacker.unpack() == {'a': 123}
+        unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8'))
+        assert unpacker.unpack() == {'a': ExtType(2, b'321')}
@@ -0,0 +1,30 @@
+"""Tests for cases where the user seeks to obtain packed msgpack objects"""
+
+import io
+
+from pandas.io.msgpack import Unpacker, packb
+
+
+def test_write_bytes():
+    unpacker = Unpacker()
+    unpacker.feed(b'abc')
+    f = io.BytesIO()
+    assert unpacker.unpack(f.write) == ord('a')
+    assert f.getvalue() == b'a'
+    f = io.BytesIO()
+    assert unpacker.skip(f.write) is None
+    assert f.getvalue() == b'b'
+    f = io.BytesIO()
+    assert unpacker.skip() is None
+    assert f.getvalue() == b''
+
+
+def test_write_bytes_multi_buffer():
+    long_val = (5) * 100
+    expected = packb(long_val)
+    unpacker = Unpacker(io.BytesIO(expected), read_size=3, max_buffer_size=3)
+
+    f = io.BytesIO()
+    unpacked = unpacker.unpack(f.write)
+    assert unpacked == long_val
+    assert f.getvalue() == expected
@@ -0,0 +1,85 @@
+import os
+
+import pytest
+
+from pandas import read_csv, read_table
+
+
+class BaseParser(object):
+    engine = None
+    low_memory = True
+    float_precision_choices = []
+
+    def update_kwargs(self, kwargs):
+        kwargs = kwargs.copy()
+        kwargs.update(dict(engine=self.engine,
+                           low_memory=self.low_memory))
+
+        return kwargs
+
+    def read_csv(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_csv(*args, **kwargs)
+
+    def read_table(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_table(*args, **kwargs)
+
+
+class CParser(BaseParser):
+    engine = "c"
+    float_precision_choices = [None, "high", "round_trip"]
+
+
+class CParserHighMemory(CParser):
+    low_memory = False
+
+
+class CParserLowMemory(CParser):
+    low_memory = True
+
+
+class PythonParser(BaseParser):
+    engine = "python"
+    float_precision_choices = [None]
+
+
+@pytest.fixture
+def csv_dir_path(datapath):
+    return datapath("io", "parser", "data")
+
+
+@pytest.fixture
+def csv1(csv_dir_path):
+    return os.path.join(csv_dir_path, "test1.csv")
+
+
+_cParserHighMemory = CParserHighMemory()
+_cParserLowMemory = CParserLowMemory()
+_pythonParser = PythonParser()
+
+_py_parsers_only = [_pythonParser]
+_c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
+_all_parsers = _c_parsers_only + _py_parsers_only
+
+_py_parser_ids = ["python"]
+_c_parser_ids = ["c_high", "c_low"]
+_all_parser_ids = _c_parser_ids + _py_parser_ids
+
+
+@pytest.fixture(params=_all_parsers,
+                ids=_all_parser_ids)
+def all_parsers(request):
+    return request.param
+
+
+@pytest.fixture(params=_c_parsers_only,
+                ids=_c_parser_ids)
+def c_parser_only(request):
+    return request.param
+
+
+@pytest.fixture(params=_py_parsers_only,
+                ids=_py_parser_ids)
+def python_parser_only(request):
+    return request.param
@@ -0,0 +1,591 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that apply specifically to the CParser. Unless specifically stated
+as a CParser-specific issue, the goal is to eventually move as many of
+these tests out of this module as soon as the Python parser can accept
+further arguments when parsing.
+"""
+
+from io import TextIOWrapper
+import mmap
+import os
+import tarfile
+
+import numpy as np
+import pytest
+
+from pandas.compat import PY3, BytesIO, StringIO, lrange, range
+from pandas.errors import ParserError
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame, concat
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize(
+    "malformed",
+    ["1\r1\r1\r 1\r 1\r",
+     "1\r1\r1\r 1\r 1\r11\r",
+     "1\r1\r1\r 1\r 1\r11\r1\r"],
+    ids=["words pointer", "stream pointer", "lines pointer"])
+def test_buffer_overflow(c_parser_only, malformed):
+    # see gh-9205: test certain malformed input files that cause
+    # buffer overflows in tokenizer.c
+    msg = "Buffer overflow caught - possible malformed input file."
+    parser = c_parser_only
+
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(malformed))
+
+
+def test_buffer_rd_bytes(c_parser_only):
+    # see gh-12098: src->buffer in the C parser can be freed twice leading
+    # to a segfault if a corrupt gzip file is read with 'read_csv', and the
+    # buffer is filled more than once before gzip raises an Exception.
+
+    data = "\x1F\x8B\x08\x00\x00\x00\x00\x00\x00\x03\xED\xC3\x41\x09" \
+           "\x00\x00\x08\x00\xB1\xB7\xB6\xBA\xFE\xA5\xCC\x21\x6C\xB0" \
+           "\xA6\x4D" + "\x55" * 267 + \
+           "\x7D\xF7\x00\x91\xE0\x47\x97\x14\x38\x04\x00" \
+           "\x1f\x8b\x08\x00VT\x97V\x00\x03\xed]\xefO"
+    parser = c_parser_only
+
+    for _ in range(100):
+        try:
+            parser.read_csv(StringIO(data), compression="gzip",
+                            delim_whitespace=True)
+        except Exception:
+            pass
+
+
+def test_delim_whitespace_custom_terminator(c_parser_only):
+    # See gh-12912
+    data = "a b c~1 2 3~4 5 6~7 8 9"
+    parser = c_parser_only
+
+    df = parser.read_csv(StringIO(data), lineterminator="~",
+                         delim_whitespace=True)
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                         columns=["a", "b", "c"])
+    tm.assert_frame_equal(df, expected)
+
+
+def test_dtype_and_names_error(c_parser_only):
+    # see gh-8833: passing both dtype and names
+    # resulting in an error reporting issue
+    parser = c_parser_only
+    data = """
+1.0 1
+2.0 2
+3.0 3
+"""
+    # base cases
+    result = parser.read_csv(StringIO(data), sep=r"\s+", header=None)
+    expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]])
+    tm.assert_frame_equal(result, expected)
+
+    result = parser.read_csv(StringIO(data), sep=r"\s+",
+                             header=None, names=["a", "b"])
+    expected = DataFrame(
+        [[1.0, 1], [2.0, 2], [3.0, 3]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+    # fallback casting
+    result = parser.read_csv(StringIO(
+        data), sep=r"\s+", header=None,
+        names=["a", "b"], dtype={"a": np.int32})
+    expected = DataFrame([[1, 1], [2, 2], [3, 3]],
+                         columns=["a", "b"])
+    expected["a"] = expected["a"].astype(np.int32)
+    tm.assert_frame_equal(result, expected)
+
+    data = """
+1.0 1
+nan 2
+3.0 3
+"""
+    # fallback casting, but not castable
+    with pytest.raises(ValueError, match="cannot safely convert"):
+        parser.read_csv(StringIO(data), sep=r"\s+", header=None,
+                        names=["a", "b"], dtype={"a": np.int32})
+
+
+@pytest.mark.parametrize("match,kwargs", [
+    # For each of these cases, all of the dtypes are valid, just unsupported.
+    (("the dtype datetime64 is not supported for parsing, "
+      "pass this column using parse_dates instead"),
+     dict(dtype={"A": "datetime64", "B": "float64"})),
+
+    (("the dtype datetime64 is not supported for parsing, "
+      "pass this column using parse_dates instead"),
+     dict(dtype={"A": "datetime64", "B": "float64"},
+          parse_dates=["B"])),
+
+    ("the dtype timedelta64 is not supported for parsing",
+     dict(dtype={"A": "timedelta64", "B": "float64"})),
+
+    ("the dtype <U8 is not supported for parsing",
+     dict(dtype={"A": "U8"}))
+], ids=["dt64-0", "dt64-1", "td64", "<U8"])
+def test_unsupported_dtype(c_parser_only, match, kwargs):
+    parser = c_parser_only
+    df = DataFrame(np.random.rand(5, 2), columns=list(
+        "AB"), index=["1A", "1B", "1C", "1D", "1E"])
+
+    with tm.ensure_clean("__unsupported_dtype__.csv") as path:
+        df.to_csv(path)
+
+        with pytest.raises(TypeError, match=match):
+            parser.read_csv(path, index_col=0, **kwargs)
+
+
+@td.skip_if_32bit
+def test_precise_conversion(c_parser_only):
+    from decimal import Decimal
+    parser = c_parser_only
+
+    normal_errors = []
+    precise_errors = []
+
+    # test numbers between 1 and 2
+    for num in np.linspace(1., 2., num=500):
+        # 25 decimal digits of precision
+        text = "a\n{0:.25}".format(num)
+
+        normal_val = float(parser.read_csv(StringIO(text))["a"][0])
+        precise_val = float(parser.read_csv(
+            StringIO(text), float_precision="high")["a"][0])
+        roundtrip_val = float(parser.read_csv(
+            StringIO(text), float_precision="round_trip")["a"][0])
+        actual_val = Decimal(text[2:])
+
+        def error(val):
+            return abs(Decimal("{0:.100}".format(val)) - actual_val)
+
+        normal_errors.append(error(normal_val))
+        precise_errors.append(error(precise_val))
+
+        # round-trip should match float()
+        assert roundtrip_val == float(text[2:])
+
+    assert sum(precise_errors) <= sum(normal_errors)
+    assert max(precise_errors) <= max(normal_errors)
+
+
+def test_usecols_dtypes(c_parser_only):
+    parser = c_parser_only
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+
+    result = parser.read_csv(StringIO(data), usecols=(0, 1, 2),
+                             names=("a", "b", "c"),
+                             header=None,
+                             converters={"a": str},
+                             dtype={"b": int, "c": float})
+    result2 = parser.read_csv(StringIO(data), usecols=(0, 2),
+                              names=("a", "b", "c"),
+                              header=None,
+                              converters={"a": str},
+                              dtype={"b": int, "c": float})
+
+    assert (result.dtypes == [object, np.int, np.float]).all()
+    assert (result2.dtypes == [object, np.float]).all()
+
+
+def test_disable_bool_parsing(c_parser_only):
+    # see gh-2090
+
+    parser = c_parser_only
+    data = """A,B,C
+Yes,No,Yes
+No,Yes,Yes
+Yes,,Yes
+No,No,No"""
+
+    result = parser.read_csv(StringIO(data), dtype=object)
+    assert (result.dtypes == object).all()
+
+    result = parser.read_csv(StringIO(data), dtype=object, na_filter=False)
+    assert result["B"][2] == ""
+
+
+def test_custom_lineterminator(c_parser_only):
+    parser = c_parser_only
+    data = "a,b,c~1,2,3~4,5,6"
+
+    result = parser.read_csv(StringIO(data), lineterminator="~")
+    expected = parser.read_csv(StringIO(data.replace("~", "\n")))
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_ragged_csv(c_parser_only):
+    parser = c_parser_only
+    data = """1,2,3
+1,2,3,4
+1,2,3,4,5
+1,2
+1,2,3,4"""
+
+    nice_data = """1,2,3,,
+1,2,3,4,
+1,2,3,4,5
+1,2,,,
+1,2,3,4,"""
+    result = parser.read_csv(StringIO(data), header=None,
+                             names=["a", "b", "c", "d", "e"])
+
+    expected = parser.read_csv(StringIO(nice_data), header=None,
+                               names=["a", "b", "c", "d", "e"])
+
+    tm.assert_frame_equal(result, expected)
+
+    # too many columns, cause segfault if not careful
+    data = "1,2\n3,4,5"
+
+    result = parser.read_csv(StringIO(data), header=None,
+                             names=lrange(50))
+    expected = parser.read_csv(StringIO(data), header=None,
+                               names=lrange(3)).reindex(columns=lrange(50))
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_tokenize_CR_with_quoting(c_parser_only):
+    # see gh-3453
+    parser = c_parser_only
+    data = " a,b,c\r\"a,b\",\"e,d\",\"f,f\""
+
+    result = parser.read_csv(StringIO(data), header=None)
+    expected = parser.read_csv(StringIO(data.replace("\r", "\n")),
+                               header=None)
+    tm.assert_frame_equal(result, expected)
+
+    result = parser.read_csv(StringIO(data))
+    expected = parser.read_csv(StringIO(data.replace("\r", "\n")))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_grow_boundary_at_cap(c_parser_only):
+    # See gh-12494
+    #
+    # Cause of error was that the C parser
+    # was not increasing the buffer size when
+    # the desired space would fill the buffer
+    # to capacity, which would later cause a
+    # buffer overflow error when checking the
+    # EOF terminator of the CSV stream.
+    parser = c_parser_only
+
+    def test_empty_header_read(count):
+        s = StringIO("," * count)
+        expected = DataFrame(columns=[
+            "Unnamed: {i}".format(i=i)
+            for i in range(count + 1)])
+        df = parser.read_csv(s)
+        tm.assert_frame_equal(df, expected)
+
+    for cnt in range(1, 101):
+        test_empty_header_read(cnt)
+
+
+def test_parse_trim_buffers(c_parser_only):
+    # This test is part of a bugfix for gh-13703. It attempts to
+    # to stress the system memory allocator, to cause it to move the
+    # stream buffer and either let the OS reclaim the region, or let
+    # other memory requests of parser otherwise modify the contents
+    # of memory space, where it was formally located.
+    # This test is designed to cause a `segfault` with unpatched
+    # `tokenizer.c`. Sometimes the test fails on `segfault`, other
+    # times it fails due to memory corruption, which causes the
+    # loaded DataFrame to differ from the expected one.
+
+    parser = c_parser_only
+
+    # Generate a large mixed-type CSV file on-the-fly (one record is
+    # approx 1.5KiB).
+    record_ = \
+        """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,Z""" \
+        """ZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,""" \
+        """ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,9""" \
+        """99,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,""" \
+        """9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.""" \
+        """99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.""" \
+        """99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZ""" \
+        """ZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZ""" \
+        """ZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,Z""" \
+        """ZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,""" \
+        """9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,""" \
+        """999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,""" \
+        """,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999""" \
+        """,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
+        """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,""" \
+        """,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-Z""" \
+        """ZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ""" \
+        """,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99""" \
+        """,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-""" \
+        """9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9""" \
+        """.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,""" \
+        """,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
+        """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZ""" \
+        """ZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ""" \
+        """-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZ""" \
+        """ZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ""" \
+        """,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99""" \
+        """,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9""" \
+        """.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
+
+    # Set the number of lines so that a call to `parser_trim_buffers`
+    # is triggered: after a couple of full chunks are consumed a
+    # relatively small 'residual' chunk would cause reallocation
+    # within the parser.
+    chunksize, n_lines = 128, 2 * 128 + 15
+    csv_data = "\n".join([record_] * n_lines) + "\n"
+
+    # We will use StringIO to load the CSV from this text buffer.
+    # pd.read_csv() will iterate over the file in chunks and will
+    # finally read a residual chunk of really small size.
+
+    # Generate the expected output: manually create the dataframe
+    # by splitting by comma and repeating the `n_lines` times.
+    row = tuple(val_ if val_ else np.nan
+                for val_ in record_.split(","))
+    expected = DataFrame([row for _ in range(n_lines)],
+                         dtype=object, columns=None, index=None)
+
+    # Iterate over the CSV file in chunks of `chunksize` lines
+    chunks_ = parser.read_csv(StringIO(csv_data), header=None,
+                              dtype=object, chunksize=chunksize)
+    result = concat(chunks_, axis=0, ignore_index=True)
+
+    # Check for data corruption if there was no segfault
+    tm.assert_frame_equal(result, expected)
+
+    # This extra test was added to replicate the fault in gh-5291.
+    # Force 'utf-8' encoding, so that `_string_convert` would take
+    # a different execution branch.
+    chunks_ = parser.read_csv(StringIO(csv_data), header=None,
+                              dtype=object, chunksize=chunksize,
+                              encoding="utf_8")
+    result = concat(chunks_, axis=0, ignore_index=True)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_internal_null_byte(c_parser_only):
+    # see gh-14012
+    #
+    # The null byte ('\x00') should not be used as a
+    # true line terminator, escape character, or comment
+    # character, only as a placeholder to indicate that
+    # none was specified.
+    #
+    # This test should be moved to test_common.py ONLY when
+    # Python's csv class supports parsing '\x00'.
+    parser = c_parser_only
+
+    names = ["a", "b", "c"]
+    data = "1,2,3\n4,\x00,6\n7,8,9"
+    expected = DataFrame([[1, 2.0, 3], [4, np.nan, 6],
+                          [7, 8, 9]], columns=names)
+
+    result = parser.read_csv(StringIO(data), names=names)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_nrows_large(c_parser_only):
+    # gh-7626 - Read only nrows of data in for large inputs (>262144b)
+    parser = c_parser_only
+    header_narrow = "\t".join(["COL_HEADER_" + str(i)
+                               for i in range(10)]) + "\n"
+    data_narrow = "\t".join(["somedatasomedatasomedata1"
+                             for _ in range(10)]) + "\n"
+    header_wide = "\t".join(["COL_HEADER_" + str(i)
+                             for i in range(15)]) + "\n"
+    data_wide = "\t".join(["somedatasomedatasomedata2"
+                           for _ in range(15)]) + "\n"
+    test_input = (header_narrow + data_narrow * 1050 +
+                  header_wide + data_wide * 2)
+
+    df = parser.read_csv(StringIO(test_input), sep="\t", nrows=1010)
+
+    assert df.size == 1010 * 10
+
+
+def test_float_precision_round_trip_with_text(c_parser_only):
+    # see gh-15140 - This should not segfault on Python 2.7+
+    parser = c_parser_only
+    df = parser.read_csv(StringIO("a"), header=None,
+                         float_precision="round_trip")
+    tm.assert_frame_equal(df, DataFrame({0: ["a"]}))
+
+
+def test_large_difference_in_columns(c_parser_only):
+    # see gh-14125
+    parser = c_parser_only
+
+    count = 10000
+    large_row = ("X," * count)[:-1] + "\n"
+    normal_row = "XXXXXX XXXXXX,111111111111111\n"
+    test_input = (large_row + normal_row * 6)[:-1]
+
+    result = parser.read_csv(StringIO(test_input), header=None, usecols=[0])
+    rows = test_input.split("\n")
+
+    expected = DataFrame([row.split(",")[0] for row in rows])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_data_after_quote(c_parser_only):
+    # see gh-15910
+    parser = c_parser_only
+
+    data = "a\n1\n\"b\"a"
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame({"a": ["1", "ba"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_whitespace_delimited(c_parser_only, capsys):
+    parser = c_parser_only
+    test_input = """\
+1 2
+2 2 3
+3 2 3 # 3 fields
+4 2 3# 3 fields
+5 2 # 2 fields
+6 2# 2 fields
+7 # 1 field, NaN
+8# 1 field, NaN
+9 2 3 # skipped line
+# comment"""
+    df = parser.read_csv(StringIO(test_input), comment="#", header=None,
+                         delimiter="\\s+", skiprows=0,
+                         error_bad_lines=False)
+    captured = capsys.readouterr()
+    # skipped lines 2, 3, 4, 9
+    for line_num in (2, 3, 4, 9):
+        assert "Skipping line {}".format(line_num) in captured.err
+    expected = DataFrame([[1, 2],
+                          [5, 2],
+                          [6, 2],
+                          [7, np.nan],
+                          [8, np.nan]])
+    tm.assert_frame_equal(df, expected)
+
+
+def test_file_like_no_next(c_parser_only):
+    # gh-16530: the file-like need not have a "next" or "__next__"
+    # attribute despite having an "__iter__" attribute.
+    #
+    # NOTE: This is only true for the C engine, not Python engine.
+    class NoNextBuffer(StringIO):
+        def __next__(self):
+            raise AttributeError("No next method")
+
+        next = __next__
+
+    parser = c_parser_only
+    data = "a\n1"
+
+    expected = DataFrame({"a": [1]})
+    result = parser.read_csv(NoNextBuffer(data))
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_buffer_rd_bytes_bad_unicode(c_parser_only):
+    # see gh-22748
+    parser = c_parser_only
+    t = BytesIO(b"\xB0")
+
+    if PY3:
+        msg = "'utf-8' codec can't encode character"
+        t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape")
+    else:
+        msg = "'utf8' codec can't decode byte"
+
+    with pytest.raises(UnicodeError, match=msg):
+        parser.read_csv(t, encoding="UTF-8")
+
+
+@pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"])
+def test_read_tarfile(c_parser_only, csv_dir_path, tar_suffix):
+    # see gh-16530
+    #
+    # Unfortunately, Python's CSV library can't handle
+    # tarfile objects (expects string, not bytes when
+    # iterating through a file-like).
+    parser = c_parser_only
+    tar_path = os.path.join(csv_dir_path, "tar_csv" + tar_suffix)
+
+    with tarfile.open(tar_path, "r") as tar:
+        data_file = tar.extractfile("tar_data.csv")
+
+        out = parser.read_csv(data_file)
+        expected = DataFrame({"a": [1]})
+        tm.assert_frame_equal(out, expected)
+
+
+@pytest.mark.high_memory
+def test_bytes_exceed_2gb(c_parser_only):
+    # see gh-16798
+    #
+    # Read from a "CSV" that has a column larger than 2GB.
+    parser = c_parser_only
+
+    if parser.low_memory:
+        pytest.skip("not a high_memory test")
+
+    csv = StringIO("strings\n" + "\n".join(
+        ["x" * (1 << 20) for _ in range(2100)]))
+    df = parser.read_csv(csv)
+    assert not df.empty
+
+
+def test_chunk_whitespace_on_boundary(c_parser_only):
+    # see gh-9735: this issue is C parser-specific (bug when
+    # parsing whitespace and characters at chunk boundary)
+    #
+    # This test case has a field too large for the Python parser / CSV library.
+    parser = c_parser_only
+
+    chunk1 = "a" * (1024 * 256 - 2) + "\na"
+    chunk2 = "\n a"
+    result = parser.read_csv(StringIO(chunk1 + chunk2), header=None)
+
+    expected = DataFrame(["a" * (1024 * 256 - 2), "a", " a"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_file_handles_mmap(c_parser_only, csv1):
+    # gh-14418
+    #
+    # Don't close user provided file handles.
+    parser = c_parser_only
+
+    with open(csv1, "r") as f:
+        m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+        parser.read_csv(m)
+
+        if PY3:
+            assert not m.closed
+        m.close()
+
+
+def test_file_binary_mode(c_parser_only):
+    # see gh-23779
+    parser = c_parser_only
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]])
+
+    with tm.ensure_clean() as path:
+        with open(path, "w") as f:
+            f.write("1,2,3\n4,5,6")
+
+        with open(path, "rb") as f:
+            result = parser.read_csv(f, header=None)
+            tm.assert_frame_equal(result, expected)
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that comments are properly handled during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO
+
+from pandas import DataFrame
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize("na_values", [None, ["NaN"]])
+def test_comment(all_parsers, na_values):
+    parser = all_parsers
+    data = """A,B,C
+1,2.,4.#hello world
+5.,NaN,10.0
+"""
+    expected = DataFrame([[1., 2., 4.], [5., np.nan, 10.]],
+                         columns=["A", "B", "C"])
+    result = parser.read_csv(StringIO(data), comment="#",
+                             na_values=na_values)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("read_kwargs", [
+    dict(),
+    dict(lineterminator="*"),
+    dict(delim_whitespace=True),
+])
+def test_line_comment(all_parsers, read_kwargs):
+    parser = all_parsers
+    data = """# empty
+A,B,C
+1,2.,4.#hello world
+#ignore this line
+5.,NaN,10.0
+"""
+    if read_kwargs.get("delim_whitespace"):
+        data = data.replace(",", " ")
+    elif read_kwargs.get("lineterminator"):
+        if parser.engine != "c":
+            pytest.skip("Custom terminator not supported with Python engine")
+
+        data = data.replace("\n", read_kwargs.get("lineterminator"))
+
+    read_kwargs["comment"] = "#"
+    result = parser.read_csv(StringIO(data), **read_kwargs)
+
+    expected = DataFrame([[1., 2., 4.], [5., np.nan, 10.]],
+                         columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_skiprows(all_parsers):
+    parser = all_parsers
+    data = """# empty
+random line
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # This should ignore the first four lines (including comments).
+    expected = DataFrame([[1., 2., 4.], [5., np.nan, 10.]],
+                         columns=["A", "B", "C"])
+    result = parser.read_csv(StringIO(data), comment="#", skiprows=4)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_header(all_parsers):
+    parser = all_parsers
+    data = """# empty
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # Header should begin at the second non-comment line.
+    expected = DataFrame([[1., 2., 4.], [5., np.nan, 10.]],
+                         columns=["A", "B", "C"])
+    result = parser.read_csv(StringIO(data), comment="#", header=1)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_skiprows_header(all_parsers):
+    parser = all_parsers
+    data = """# empty
+# second empty line
+# third empty line
+X,Y,Z
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # Skiprows should skip the first 4 lines (including comments),
+    # while header should start from the second non-commented line,
+    # starting with line 5.
+    expected = DataFrame([[1., 2., 4.], [5., np.nan, 10.]],
+                         columns=["A", "B", "C"])
+    result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"])
+def test_custom_comment_char(all_parsers, comment_char):
+    parser = all_parsers
+    data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"
+    result = parser.read_csv(StringIO(data.replace("#", comment_char)),
+                             comment=comment_char)
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", ["infer", None])
+def test_comment_first_line(all_parsers, header):
+    # see gh-4623
+    parser = all_parsers
+    data = "# notes\na,b,c\n# more notes\n1,2,3"
+
+    if header is None:
+        expected = DataFrame({0: ["a", "1"], 1: ["b", "2"], 2: ["c", "3"]})
+    else:
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), comment="#", header=header)
+    tm.assert_frame_equal(result, expected)
@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests compressed data parsing functionality for all
+of the parsers defined in parsers.py
+"""
+
+import os
+import zipfile
+
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+
+
+@pytest.fixture(params=[True, False])
+def buffer(request):
+    return request.param
+
+
+@pytest.fixture
+def parser_and_data(all_parsers, csv1):
+    parser = all_parsers
+
+    with open(csv1, "rb") as f:
+        data = f.read()
+        expected = parser.read_csv(csv1)
+
+    return parser, data, expected
+
+
+@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"])
+def test_zip(parser_and_data, compression):
+    parser, data, expected = parser_and_data
+
+    with tm.ensure_clean("test_file.zip") as path:
+        with zipfile.ZipFile(path, mode="w") as tmp:
+            tmp.writestr("test_file", data)
+
+        if compression == "zip2":
+            with open(path, "rb") as f:
+                result = parser.read_csv(f, compression="zip")
+        else:
+            result = parser.read_csv(path, compression=compression)
+
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("compression", ["zip", "infer"])
+def test_zip_error_multiple_files(parser_and_data, compression):
+    parser, data, expected = parser_and_data
+
+    with tm.ensure_clean("combined_zip.zip") as path:
+        inner_file_names = ["test_file", "second_file"]
+
+        with zipfile.ZipFile(path, mode="w") as tmp:
+            for file_name in inner_file_names:
+                tmp.writestr(file_name, data)
+
+        with pytest.raises(ValueError, match="Multiple files"):
+            parser.read_csv(path, compression=compression)
+
+
+def test_zip_error_no_files(parser_and_data):
+    parser, _, _ = parser_and_data
+
+    with tm.ensure_clean() as path:
+        with zipfile.ZipFile(path, mode="w"):
+            pass
+
+        with pytest.raises(ValueError, match="Zero files"):
+            parser.read_csv(path, compression="zip")
+
+
+def test_zip_error_invalid_zip(parser_and_data):
+    parser, _, _ = parser_and_data
+
+    with tm.ensure_clean() as path:
+        with open(path, "wb") as f:
+            with pytest.raises(zipfile.BadZipfile,
+                               match="File is not a zip file"):
+                parser.read_csv(f, compression="zip")
+
+
+@pytest.mark.parametrize("filename", [None, "test.{ext}"])
+def test_compression(parser_and_data, compression_only, buffer, filename):
+    parser, data, expected = parser_and_data
+    compress_type = compression_only
+
+    ext = "gz" if compress_type == "gzip" else compress_type
+    filename = filename if filename is None else filename.format(ext=ext)
+
+    if filename and buffer:
+        pytest.skip("Cannot deduce compression from "
+                    "buffer of compressed data.")
+
+    with tm.ensure_clean(filename=filename) as path:
+        tm.write_to_compressed(compress_type, path, data)
+        compression = "infer" if filename else compress_type
+
+        if buffer:
+            with open(path, "rb") as f:
+                result = parser.read_csv(f, compression=compression)
+        else:
+            result = parser.read_csv(path, compression=compression)
+
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("ext", [None, "gz", "bz2"])
+def test_infer_compression(all_parsers, csv1, buffer, ext):
+    # see gh-9770
+    parser = all_parsers
+    kwargs = dict(index_col=0, parse_dates=True)
+
+    expected = parser.read_csv(csv1, **kwargs)
+    kwargs["compression"] = "infer"
+
+    if buffer:
+        with open(csv1) as f:
+            result = parser.read_csv(f, **kwargs)
+    else:
+        ext = "." + ext if ext else ""
+        result = parser.read_csv(csv1 + ext, **kwargs)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_compression_utf16_encoding(all_parsers, csv_dir_path):
+    # see gh-18071
+    parser = all_parsers
+    path = os.path.join(csv_dir_path, "utf16_ex_small.zip")
+
+    result = parser.read_csv(path, encoding="utf-16",
+                             compression="zip", sep="\t")
+    expected = pd.DataFrame({
+        u"Country": [u"Venezuela", u"Venezuela"],
+        u"Twitter": [u"Hugo Chávez Frías", u"Henrique Capriles R."]
+    })
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"])
+def test_invalid_compression(all_parsers, invalid_compression):
+    parser = all_parsers
+    compress_kwargs = dict(compression=invalid_compression)
+
+    msg = ("Unrecognized compression "
+           "type: {compression}".format(**compress_kwargs))
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv("test_file.zip", **compress_kwargs)
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests column conversion functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO, lmap, parse_date
+
+import pandas as pd
+from pandas import DataFrame, Index
+import pandas.util.testing as tm
+
+
+def test_converters_type_must_be_dict(all_parsers):
+    parser = all_parsers
+    data = """index,A,B,C,D
+foo,2,3,4,5
+"""
+
+    with pytest.raises(TypeError, match="Type converters.+"):
+        parser.read_csv(StringIO(data), converters=0)
+
+
+@pytest.mark.parametrize("column", [3, "D"])
+@pytest.mark.parametrize("converter", [
+    parse_date,
+    lambda x: int(x.split("/")[2])  # Produce integer.
+])
+def test_converters(all_parsers, column, converter):
+    parser = all_parsers
+    data = """A,B,C,D
+a,1,2,01/01/2009
+b,3,4,01/02/2009
+c,4,5,01/03/2009
+"""
+    result = parser.read_csv(StringIO(data), converters={column: converter})
+
+    expected = parser.read_csv(StringIO(data))
+    expected["D"] = expected["D"].map(converter)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_no_implicit_conv(all_parsers):
+    # see gh-2184
+    parser = all_parsers
+    data = """000102,1.2,A\n001245,2,B"""
+
+    converters = {0: lambda x: x.strip()}
+    result = parser.read_csv(StringIO(data), header=None,
+                             converters=converters)
+
+    # Column 0 should not be casted to numeric and should remain as object.
+    expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_euro_decimal_format(all_parsers):
+    # see gh-583
+    converters = dict()
+    parser = all_parsers
+
+    data = """Id;Number1;Number2;Text1;Text2;Number3
+1;1521,1541;187101,9543;ABC;poi;4,7387
+2;121,12;14897,76;DEF;uyt;0,3773
+3;878,158;108013,434;GHI;rez;2,7356"""
+    converters["Number1"] = converters["Number2"] =\
+        converters["Number3"] = lambda x: float(x.replace(",", "."))
+
+    result = parser.read_csv(StringIO(data), sep=";", converters=converters)
+    expected = DataFrame([[1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387],
+                          [2, 121.12, 14897.76, "DEF", "uyt", 0.3773],
+                          [3, 878.158, 108013.434, "GHI", "rez", 2.7356]],
+                         columns=["Id", "Number1", "Number2",
+                                  "Text1", "Text2", "Number3"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_corner_with_nans(all_parsers):
+    parser = all_parsers
+    data = """id,score,days
+1,2,12
+2,2-5,
+3,,14+
+4,6-12,2"""
+
+    # Example converters.
+    def convert_days(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        is_plus = x.endswith("+")
+
+        if is_plus:
+            x = int(x[:-1]) + 1
+        else:
+            x = int(x)
+
+        return x
+
+    def convert_days_sentinel(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        is_plus = x.endswith("+")
+
+        if is_plus:
+            x = int(x[:-1]) + 1
+        else:
+            x = int(x)
+
+        return x
+
+    def convert_score(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        if x.find("-") > 0:
+            val_min, val_max = lmap(int, x.split("-"))
+            val = 0.5 * (val_min + val_max)
+        else:
+            val = float(x)
+
+        return val
+
+    results = []
+
+    for day_converter in [convert_days, convert_days_sentinel]:
+        result = parser.read_csv(StringIO(data),
+                                 converters={"score": convert_score,
+                                             "days": day_converter},
+                                 na_values=["", None])
+        assert pd.isna(result["days"][1])
+        results.append(result)
+
+    tm.assert_frame_equal(results[0], results[1])
+
+
+def test_converter_index_col_bug(all_parsers):
+    # see gh-1835
+    parser = all_parsers
+    data = "A;B\n1;2\n3;4"
+
+    rs = parser.read_csv(StringIO(data), sep=";", index_col="A",
+                         converters={"A": lambda x: x})
+
+    xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
+    tm.assert_frame_equal(rs, xp)
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that dialects are properly handled during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import csv
+
+import pytest
+
+from pandas.compat import StringIO
+from pandas.errors import ParserWarning
+
+from pandas import DataFrame
+import pandas.util.testing as tm
+
+
+@pytest.fixture
+def custom_dialect():
+    dialect_name = "weird"
+    dialect_kwargs = dict(doublequote=False, escapechar="~", delimiter=":",
+                          skipinitialspace=False, quotechar="~", quoting=3)
+    return dialect_name, dialect_kwargs
+
+
+def test_dialect(all_parsers):
+    parser = all_parsers
+    data = """\
+label1,label2,label3
+index1,"a,c,e
+index2,b,d,f
+"""
+
+    dia = csv.excel()
+    dia.quoting = csv.QUOTE_NONE
+    df = parser.read_csv(StringIO(data), dialect=dia)
+
+    data = """\
+label1,label2,label3
+index1,a,c,e
+index2,b,d,f
+"""
+    exp = parser.read_csv(StringIO(data))
+    exp.replace("a", "\"a", inplace=True)
+    tm.assert_frame_equal(df, exp)
+
+
+def test_dialect_str(all_parsers):
+    dialect_name = "mydialect"
+    parser = all_parsers
+    data = """\
+fruit:vegetable
+apple:broccoli
+pear:tomato
+"""
+    exp = DataFrame({
+        "fruit": ["apple", "pear"],
+        "vegetable": ["broccoli", "tomato"]
+    })
+
+    with tm.with_csv_dialect(dialect_name, delimiter=":"):
+        df = parser.read_csv(StringIO(data), dialect=dialect_name)
+        tm.assert_frame_equal(df, exp)
+
+
+def test_invalid_dialect(all_parsers):
+    class InvalidDialect(object):
+        pass
+
+    data = "a\n1"
+    parser = all_parsers
+    msg = "Invalid dialect"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dialect=InvalidDialect)
+
+
+@pytest.mark.parametrize("arg", [None, "doublequote", "escapechar",
+                                 "skipinitialspace", "quotechar", "quoting"])
+@pytest.mark.parametrize("value", ["dialect", "default", "other"])
+def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect,
+                                           arg, value):
+    # see gh-23761.
+    dialect_name, dialect_kwargs = custom_dialect
+    parser = all_parsers
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    data = "a:b\n1:2"
+
+    warning_klass = None
+    kwds = dict()
+
+    # arg=None tests when we pass in the dialect without any other arguments.
+    if arg is not None:
+        if "value" == "dialect":  # No conflict --> no warning.
+            kwds[arg] = dialect_kwargs[arg]
+        elif "value" == "default":  # Default --> no warning.
+            from pandas.io.parsers import _parser_defaults
+            kwds[arg] = _parser_defaults[arg]
+        else:  # Non-default + conflict with dialect --> warning.
+            warning_klass = ParserWarning
+            kwds[arg] = "blah"
+
+    with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
+        with tm.assert_produces_warning(warning_klass):
+            result = parser.read_csv(StringIO(data),
+                                     dialect=dialect_name, **kwds)
+            tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs,warning_klass", [
+    (dict(sep=","), None),           # sep is default --> sep_override=True
+    (dict(sep="."), ParserWarning),  # sep isn't default --> sep_override=False
+    (dict(delimiter=":"), None),     # No conflict
+    (dict(delimiter=None), None),    # Default arguments --> sep_override=True
+    (dict(delimiter=","), ParserWarning),  # Conflict
+    (dict(delimiter="."), ParserWarning),  # Conflict
+], ids=["sep-override-true", "sep-override-false",
+        "delimiter-no-conflict", "delimiter-default-arg",
+        "delimiter-conflict", "delimiter-conflict2"])
+def test_dialect_conflict_delimiter(all_parsers, custom_dialect,
+                                    kwargs, warning_klass):
+    # see gh-23761.
+    dialect_name, dialect_kwargs = custom_dialect
+    parser = all_parsers
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    data = "a:b\n1:2"
+
+    with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
+        with tm.assert_produces_warning(warning_klass):
+            result = parser.read_csv(StringIO(data),
+                                     dialect=dialect_name, **kwargs)
+            tm.assert_frame_equal(result, expected)
@@ -0,0 +1,514 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import os
+
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO
+from pandas.errors import ParserWarning
+
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+import pandas as pd
+from pandas import (
+    Categorical, DataFrame, Index, MultiIndex, Series, Timestamp, concat)
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize("dtype", [str, object])
+@pytest.mark.parametrize("check_orig", [True, False])
+def test_dtype_all_columns(all_parsers, dtype, check_orig):
+    # see gh-3795, gh-6607
+    parser = all_parsers
+
+    df = DataFrame(np.random.rand(5, 2).round(4), columns=list("AB"),
+                   index=["1A", "1B", "1C", "1D", "1E"])
+
+    with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
+        df.to_csv(path)
+
+        result = parser.read_csv(path, dtype=dtype, index_col=0)
+
+        if check_orig:
+            expected = df.copy()
+            result = result.astype(float)
+        else:
+            expected = df.astype(str)
+
+        tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_all_columns_empty(all_parsers):
+    # see gh-12048
+    parser = all_parsers
+    result = parser.read_csv(StringIO("A,B"), dtype=str)
+
+    expected = DataFrame({"A": [], "B": []}, index=[], dtype=str)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+    expected = DataFrame([[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]],
+                         columns=["one", "two"])
+    expected["one"] = expected["one"].astype(np.float64)
+    expected["two"] = expected["two"].astype(object)
+
+    result = parser.read_csv(StringIO(data), dtype={"one": np.float64,
+                                                    1: str})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_invalid_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+
+    with pytest.raises(TypeError, match="data type 'foo' not understood"):
+        parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
+
+
+@pytest.mark.parametrize("dtype", [
+    "category",
+    CategoricalDtype(),
+    {"a": "category",
+     "b": "category",
+     "c": CategoricalDtype()}
+])
+def test_categorical_dtype(all_parsers, dtype):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame({"a": Categorical(["1", "1", "2"]),
+                          "b": Categorical(["a", "a", "b"]),
+                          "c": Categorical(["3.4", "3.4", "4.5"])})
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.parametrize("dtype", [
+    {"b": "category"},
+    {1: "category"}
+])
+def test_categorical_dtype_single(all_parsers, dtype):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame({"a": [1, 1, 2],
+                          "b": Categorical(["a", "a", "b"]),
+                          "c": [3.4, 3.4, 4.5]})
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_unsorted(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,b,3.4
+2,a,4.5"""
+    expected = DataFrame({"a": Categorical(["1", "1", "2"]),
+                          "b": Categorical(["b", "b", "a"]),
+                          "c": Categorical(["3.4", "3.4", "4.5"])})
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_missing(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,nan,3.4
+2,a,4.5"""
+    expected = DataFrame({"a": Categorical(["1", "1", "2"]),
+                          "b": Categorical(["b", np.nan, "a"]),
+                          "c": Categorical(["3.4", "3.4", "4.5"])})
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.slow
+def test_categorical_dtype_high_cardinality_numeric(all_parsers):
+    # see gh-18186
+    parser = all_parsers
+    data = np.sort([str(i) for i in range(524289)])
+    expected = DataFrame({"a": Categorical(data, ordered=True)})
+
+    actual = parser.read_csv(StringIO("a\n" + "\n".join(data)),
+                             dtype="category")
+    actual["a"] = actual["a"].cat.reorder_categories(
+        np.sort(actual.a.cat.categories), ordered=True)
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_latin1(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "unicode_series.csv")
+    parser = all_parsers
+    encoding = "latin-1"
+
+    expected = parser.read_csv(pth, header=None, encoding=encoding)
+    expected[1] = Categorical(expected[1])
+
+    actual = parser.read_csv(pth, header=None, encoding=encoding,
+                             dtype={1: "category"})
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "utf16_ex.txt")
+    parser = all_parsers
+    encoding = "utf-16"
+    sep = ","
+
+    expected = parser.read_csv(pth, sep=sep, encoding=encoding)
+    expected = expected.apply(Categorical)
+
+    actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_chunksize_infer_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expecteds = [DataFrame({"a": [1, 1],
+                            "b": Categorical(["a", "b"])}),
+                 DataFrame({"a": [1, 2],
+                            "b": Categorical(["b", "c"])},
+                           index=[2, 3])]
+    actuals = parser.read_csv(StringIO(data), dtype={"b": "category"},
+                              chunksize=2)
+
+    for actual, expected in zip(actuals, expecteds):
+        tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    cats = ["a", "b", "c"]
+    expecteds = [DataFrame({"a": [1, 1],
+                            "b": Categorical(["a", "b"],
+                                             categories=cats)}),
+                 DataFrame({"a": [1, 2],
+                            "b": Categorical(["b", "c"],
+                                             categories=cats)},
+                           index=[2, 3])]
+    dtype = CategoricalDtype(cats)
+    actuals = parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2)
+
+    for actual, expected in zip(actuals, expecteds):
+        tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.parametrize("ordered", [False, True])
+@pytest.mark.parametrize("categories", [
+    ["a", "b", "c"],
+    ["a", "c", "b"],
+    ["a", "b", "c", "d"],
+    ["c", "b", "a"],
+])
+def test_categorical_category_dtype(all_parsers, categories, ordered):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expected = DataFrame({
+        "a": [1, 1, 1, 2],
+        "b": Categorical(["a", "b", "b", "c"],
+                         categories=categories,
+                         ordered=ordered)
+    })
+
+    dtype = {"b": CategoricalDtype(categories=categories,
+                                   ordered=ordered)}
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_category_dtype_unsorted(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    dtype = CategoricalDtype(["c", "b", "a"])
+    expected = DataFrame({
+        "a": [1, 1, 1, 2],
+        "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"])
+    })
+
+    result = parser.read_csv(StringIO(data), dtype={"b": dtype})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_numeric(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([1, 2, 3])}
+
+    data = "b\n1\n1\n2\n3"
+    expected = DataFrame({"b": Categorical([1, 1, 2, 3])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_datetime(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(pd.date_range("2017", "2019", freq="AS"))}
+
+    data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timestamp(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([Timestamp("2014")])}
+
+    data = "b\n2014-01-01\n2014-01-01T00:00:00"
+    expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timedelta(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))}
+
+    data = "b\n1H\n2H\n3H"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data", [
+    "b\nTrue\nFalse\nNA\nFalse",
+    "b\ntrue\nfalse\nNA\nfalse",
+    "b\nTRUE\nFALSE\nNA\nFALSE",
+    "b\nTrue\nFalse\nNA\nFALSE",
+])
+def test_categorical_dtype_coerces_boolean(all_parsers, data):
+    # see gh-20498
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([False, True])}
+    expected = DataFrame({"b": Categorical([True, False, None, False])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_unexpected_categories(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}
+
+    data = "b\nd\na\nc\nd"  # Unexpected c
+    expected = DataFrame({"b": Categorical(list("dacd"),
+                                           dtype=dtype["b"])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1"})
+
+    expected = DataFrame({"one": np.empty(0, dtype="u1"),
+                          "two": np.empty(0, dtype=np.object)},
+                         index=Index([], dtype=object))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(StringIO(data), index_col=["one"],
+                             dtype={"one": "u1", 1: "f"})
+
+    expected = DataFrame({"two": np.empty(0, dtype="f")},
+                         index=Index([], dtype="u1", name="one"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_multi_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two,three"
+    result = parser.read_csv(StringIO(data), index_col=["one", "two"],
+                             dtype={"one": "u1", 1: "f8"})
+
+    exp_idx = MultiIndex.from_arrays([np.empty(0, dtype="u1"),
+                                      np.empty(0, dtype=np.float64)],
+                                     names=["one", "two"])
+    expected = DataFrame({"three": np.empty(0, dtype=np.object)},
+                         index=exp_idx)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})
+
+    expected = DataFrame({"one": np.empty(0, dtype="u1"),
+                          "one.1": np.empty(0, dtype="f")},
+                         index=Index([], dtype=object))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+
+    expected = DataFrame({"one": np.empty(0, dtype="u1"),
+                          "one.1": np.empty(0, dtype="f")},
+                         index=Index([], dtype=object))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat([Series([], name="one", dtype="u1"),
+                       Series([], name="one.1", dtype="f")], axis=1)
+    expected.index = expected.index.astype(object)
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_dup_column_pass_dtype_by_indexes_warn(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat([Series([], name="one", dtype="u1"),
+                       Series([], name="one.1", dtype="f")], axis=1)
+    expected.index = expected.index.astype(object)
+
+    with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+        data = ""
+        result = parser.read_csv(StringIO(data), names=["one", "one"],
+                                 dtype={0: "u1", 1: "f"})
+        tm.assert_frame_equal(result, expected)
+
+
+def test_raise_on_passed_int_dtype_with_nas(all_parsers):
+    # see gh-2631
+    parser = all_parsers
+    data = """YEAR, DOY, a
+2001,106380451,10
+2001,,11
+2001,106380451,67"""
+
+    msg = ("Integer column has NA values" if parser.engine == "c" else
+           "Unable to convert column DOY")
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dtype={"DOY": np.int64},
+                        skipinitialspace=True)
+
+
+def test_dtype_with_converters(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1.1,2.2
+1.2,2.3"""
+
+    # Dtype spec ignored if converted specified.
+    with tm.assert_produces_warning(ParserWarning):
+        result = parser.read_csv(StringIO(data), dtype={"a": "i8"},
+                                 converters={"a": lambda x: str(x)})
+    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype,expected", [
+    (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)),
+    ("category", DataFrame({"a": Categorical([]),
+                            "b": Categorical([])},
+                           index=[])),
+    (dict(a="category", b="category"),
+     DataFrame({"a": Categorical([]),
+                "b": Categorical([])},
+               index=[])),
+    ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")),
+    ("timedelta64[ns]", DataFrame({"a": Series([], dtype="timedelta64[ns]"),
+                                   "b": Series([], dtype="timedelta64[ns]")},
+                                  index=[])),
+    (dict(a=np.int64,
+          b=np.int32), DataFrame({"a": Series([], dtype=np.int64),
+                                  "b": Series([], dtype=np.int32)},
+                                 index=[])),
+    ({0: np.int64, 1: np.int32}, DataFrame({"a": Series([], dtype=np.int64),
+                                            "b": Series([], dtype=np.int32)},
+                                           index=[])),
+    ({"a": np.int64, 1: np.int32}, DataFrame({"a": Series([], dtype=np.int64),
+                                              "b": Series([], dtype=np.int32)},
+                                             index=[])),
+])
+def test_empty_dtype(all_parsers, dtype, expected):
+    # see gh-14712
+    parser = all_parsers
+    data = "a,b"
+
+    result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", list(np.typecodes["AllInteger"] +
+                                       np.typecodes["Float"]))
+def test_numeric_dtype(all_parsers, dtype):
+    data = "0\n1"
+    parser = all_parsers
+    expected = DataFrame([0, 1], dtype=dtype)
+
+    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
+    tm.assert_frame_equal(expected, result)
@@ -0,0 +1,428 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that the file header is properly handled or inferred
+during parsing for all of the parsers defined in parsers.py
+"""
+
+from collections import namedtuple
+
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO, u
+from pandas.errors import ParserError
+
+from pandas import DataFrame, Index, MultiIndex
+import pandas.util.testing as tm
+
+
+def test_read_with_bad_header(all_parsers):
+    parser = all_parsers
+    msg = r"but only \d+ lines in file"
+
+    with pytest.raises(ValueError, match=msg):
+        s = StringIO(",,")
+        parser.read_csv(s, header=[10])
+
+
+@pytest.mark.parametrize("header", [True, False])
+def test_bool_header_arg(all_parsers, header):
+    # see gh-6114
+    parser = all_parsers
+    data = """\
+MyColumn
+a
+b
+a
+b"""
+    msg = "Passing a bool to header is invalid"
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), header=header)
+
+
+def test_no_header_prefix(all_parsers):
+    parser = all_parsers
+    data = """1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    result = parser.read_csv(StringIO(data), prefix="Field", header=None)
+    expected = DataFrame([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10],
+                          [11, 12, 13, 14, 15]],
+                         columns=["Field0", "Field1", "Field2",
+                                  "Field3", "Field4"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_header_with_index_col(all_parsers):
+    parser = all_parsers
+    data = """foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    names = ["A", "B", "C"]
+    result = parser.read_csv(StringIO(data), names=names)
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                         index=["foo", "bar", "baz"],
+                         columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_header_not_first_line(all_parsers):
+    parser = all_parsers
+    data = """got,to,ignore,this,line
+got,to,ignore,this,line
+index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+"""
+    data2 = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+"""
+
+    result = parser.read_csv(StringIO(data), header=2, index_col=0)
+    expected = parser.read_csv(StringIO(data2), header=0, index_col=0)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_header_multi_index(all_parsers):
+    parser = all_parsers
+    expected = tm.makeCustomDataframe(
+        5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
+
+    data = """\
+C0,,C_l0_g0,C_l0_g1,C_l0_g2
+
+C1,,C_l1_g0,C_l1_g1,C_l1_g2
+C2,,C_l2_g0,C_l2_g1,C_l2_g2
+C3,,C_l3_g0,C_l3_g1,C_l3_g2
+R0,R1,,,
+R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
+R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
+R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
+R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
+R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
+"""
+    result = parser.read_csv(StringIO(data), header=[0, 1, 2, 3],
+                             index_col=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs,msg", [
+    (dict(index_col=["foo", "bar"]), ("index_col must only contain "
+                                      "row numbers when specifying "
+                                      "a multi-index header")),
+    (dict(index_col=[0, 1], names=["foo", "bar"]), ("cannot specify names "
+                                                    "when specifying a "
+                                                    "multi-index header")),
+    (dict(index_col=[0, 1], usecols=["foo", "bar"]), ("cannot specify "
+                                                      "usecols when "
+                                                      "specifying a "
+                                                      "multi-index header")),
+])
+def test_header_multi_index_invalid(all_parsers, kwargs, msg):
+    data = """\
+C0,,C_l0_g0,C_l0_g1,C_l0_g2
+
+C1,,C_l1_g0,C_l1_g1,C_l1_g2
+C2,,C_l2_g0,C_l2_g1,C_l2_g2
+C3,,C_l3_g0,C_l3_g1,C_l3_g2
+R0,R1,,,
+R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
+R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
+R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
+R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
+R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
+"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=[0, 1, 2, 3], **kwargs)
+
+
+_TestTuple = namedtuple("names", ["first", "second"])
+
+
+@pytest.mark.parametrize("kwargs", [
+    dict(header=[0, 1]),
+    dict(skiprows=3,
+         names=[("a", "q"), ("a", "r"), ("a", "s"),
+                ("b", "t"), ("c", "u"), ("c", "v")]),
+    dict(skiprows=3,
+         names=[_TestTuple("a", "q"), _TestTuple("a", "r"),
+                _TestTuple("a", "s"), _TestTuple("b", "t"),
+                _TestTuple("c", "u"), _TestTuple("c", "v")])
+])
+def test_header_multi_index_common_format1(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                         index=["one", "two"],
+                         columns=MultiIndex.from_tuples(
+                             [("a", "q"), ("a", "r"), ("a", "s"),
+                              ("b", "t"), ("c", "u"), ("c", "v")]))
+    data = """,a,a,a,b,c,c
+,q,r,s,t,u,v
+,,,,,,
+one,1,2,3,4,5,6
+two,7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [
+    dict(header=[0, 1]),
+    dict(skiprows=2,
+         names=[("a", "q"), ("a", "r"), ("a", "s"),
+                ("b", "t"), ("c", "u"), ("c", "v")]),
+    dict(skiprows=2,
+         names=[_TestTuple("a", "q"), _TestTuple("a", "r"),
+                _TestTuple("a", "s"), _TestTuple("b", "t"),
+                _TestTuple("c", "u"), _TestTuple("c", "v")])
+])
+def test_header_multi_index_common_format2(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                         index=["one", "two"],
+                         columns=MultiIndex.from_tuples(
+                             [("a", "q"), ("a", "r"), ("a", "s"),
+                              ("b", "t"), ("c", "u"), ("c", "v")]))
+    data = """,a,a,a,b,c,c
+,q,r,s,t,u,v
+one,1,2,3,4,5,6
+two,7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [
+    dict(header=[0, 1]),
+    dict(skiprows=2,
+         names=[("a", "q"), ("a", "r"), ("a", "s"),
+                ("b", "t"), ("c", "u"), ("c", "v")]),
+    dict(skiprows=2,
+         names=[_TestTuple("a", "q"), _TestTuple("a", "r"),
+                _TestTuple("a", "s"), _TestTuple("b", "t"),
+                _TestTuple("c", "u"), _TestTuple("c", "v")])
+])
+def test_header_multi_index_common_format3(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                         index=["one", "two"],
+                         columns=MultiIndex.from_tuples(
+                             [("a", "q"), ("a", "r"), ("a", "s"),
+                              ("b", "t"), ("c", "u"), ("c", "v")]))
+    expected = expected.reset_index(drop=True)
+    data = """a,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_header_multi_index_common_format_malformed1(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(np.array(
+        [[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"),
+        index=Index([1, 7]),
+        columns=MultiIndex(levels=[[u("a"), u("b"), u("c")],
+                                   [u("r"), u("s"), u("t"),
+                                    u("u"), u("v")]],
+                           codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]],
+                           names=[u("a"), u("q")]))
+    data = """a,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0)
+    tm.assert_frame_equal(expected, result)
+
+
+def test_header_multi_index_common_format_malformed2(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(np.array(
+        [[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"),
+        index=Index([1, 7]),
+        columns=MultiIndex(levels=[[u("a"), u("b"), u("c")],
+                                   [u("r"), u("s"), u("t"),
+                                    u("u"), u("v")]],
+                           codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]],
+                           names=[None, u("q")]))
+
+    data = """,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0)
+    tm.assert_frame_equal(expected, result)
+
+
+def test_header_multi_index_common_format_malformed3(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(np.array(
+        [[3, 4, 5, 6], [9, 10, 11, 12]], dtype="int64"),
+        index=MultiIndex(levels=[[1, 7], [2, 8]],
+                         codes=[[0, 1], [0, 1]]),
+        columns=MultiIndex(levels=[[u("a"), u("b"), u("c")],
+                                   [u("s"), u("t"), u("u"), u("v")]],
+                           codes=[[0, 1, 2, 2], [0, 1, 2, 3]],
+                           names=[None, u("q")]))
+    data = """,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1])
+    tm.assert_frame_equal(expected, result)
+
+
+@pytest.mark.parametrize("data,header", [
+    ("1,2,3\n4,5,6", None),
+    ("foo,bar,baz\n1,2,3\n4,5,6", 0),
+])
+def test_header_names_backward_compat(all_parsers, data, header):
+    # see gh-2539
+    parser = all_parsers
+    expected = parser.read_csv(StringIO("1,2,3\n4,5,6"),
+                               names=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), names=["a", "b", "c"],
+                             header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [
+    dict(), dict(index_col=False)
+])
+def test_read_only_header_no_rows(all_parsers, kwargs):
+    # See gh-7773
+    parser = all_parsers
+    expected = DataFrame(columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO("a,b,c"), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs,names", [
+    (dict(), [0, 1, 2, 3, 4]),
+    (dict(prefix="X"), ["X0", "X1", "X2", "X3", "X4"]),
+    (dict(names=["foo", "bar", "baz", "quux", "panda"]),
+     ["foo", "bar", "baz", "quux", "panda"])
+])
+def test_no_header(all_parsers, kwargs, names):
+    parser = all_parsers
+    data = """1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    expected = DataFrame([[1, 2, 3, 4, 5],
+                          [6, 7, 8, 9, 10],
+                          [11, 12, 13, 14, 15]], columns=names)
+    result = parser.read_csv(StringIO(data), header=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", [
+    ["a", "b"],
+    "string_header"
+])
+def test_non_int_header(all_parsers, header):
+    # see gh-16338
+    msg = "header must be integer or list of integers"
+    data = """1,2\n3,4"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=header)
+
+
+def test_singleton_header(all_parsers):
+    # see gh-7757
+    data = """a,b,c\n0,1,2\n1,2,3"""
+    parser = all_parsers
+
+    expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]})
+    result = parser.read_csv(StringIO(data), header=[0])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,expected", [
+    ("A,A,A,B\none,one,one,two\n0,40,34,0.1",
+     DataFrame([[0, 40, 34, 0.1]],
+               columns=MultiIndex.from_tuples(
+                   [("A", "one"), ("A", "one.1"),
+                    ("A", "one.2"), ("B", "two")]))),
+    ("A,A,A,B\none,one,one.1,two\n0,40,34,0.1",
+     DataFrame([[0, 40, 34, 0.1]],
+               columns=MultiIndex.from_tuples(
+                   [("A", "one"), ("A", "one.1"),
+                    ("A", "one.1.1"), ("B", "two")]))),
+    ("A,A,A,B,B\none,one,one.1,two,two\n0,40,34,0.1,0.1",
+     DataFrame([[0, 40, 34, 0.1, 0.1]],
+               columns=MultiIndex.from_tuples(
+                   [("A", "one"), ("A", "one.1"),
+                    ("A", "one.1.1"), ("B", "two"),
+                    ("B", "two.1")])))
+])
+def test_mangles_multi_index(all_parsers, data, expected):
+    # see gh-18062
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", [None, [0]])
+@pytest.mark.parametrize("columns", [None,
+                                     (["", "Unnamed"]),
+                                     (["Unnamed", ""]),
+                                     (["Unnamed", "NotUnnamed"])])
+def test_multi_index_unnamed(all_parsers, index_col, columns):
+    # see gh-23687
+    #
+    # When specifying a multi-index header, make sure that
+    # we don't error just because one of the rows in our header
+    # has ALL column names containing the string "Unnamed". The
+    # correct condition to check is whether the row contains
+    # ALL columns that did not have names (and instead were given
+    # placeholder ones).
+    parser = all_parsers
+    header = [0, 1]
+
+    if index_col is None:
+        data = ",".join(columns or ["", ""]) + "\n0,1\n2,3\n4,5\n"
+    else:
+        data = (",".join([""] + (columns or ["", ""])) +
+                "\n,0,1\n0,2,3\n1,4,5\n")
+
+    if columns is None:
+        msg = (r"Passed header=\[0,1\] are too "
+               r"many rows for this multi_index of columns")
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), header=header,
+                            index_col=index_col)
+    else:
+        result = parser.read_csv(StringIO(data), header=header,
+                                 index_col=index_col)
+        template = "Unnamed: {i}_level_0"
+        exp_columns = []
+
+        for i, col in enumerate(columns):
+            if not col:  # Unnamed.
+                col = template.format(i=i if index_col is None else i + 1)
+
+            exp_columns.append(col)
+
+        columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
+        expected = DataFrame([[2, 3], [4, 5]], columns=columns)
+        tm.assert_frame_equal(result, expected)
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that the specified index column (a.k.a "index_col")
+is properly handled or inferred during parsing for all of
+the parsers defined in parsers.py
+"""
+
+import pytest
+
+from pandas.compat import StringIO
+
+from pandas import DataFrame, Index, MultiIndex
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize("with_header", [True, False])
+def test_index_col_named(all_parsers, with_header):
+    parser = all_parsers
+    no_header = """\
+KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000"""  # noqa
+    header = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n"  # noqa
+
+    if with_header:
+        data = header + no_header
+
+        result = parser.read_csv(StringIO(data), index_col="ID")
+        expected = parser.read_csv(StringIO(data), header=0).set_index("ID")
+        tm.assert_frame_equal(result, expected)
+    else:
+        data = no_header
+        msg = "Index ID invalid"
+
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), index_col="ID")
+
+
+def test_index_col_named2(all_parsers):
+    parser = all_parsers
+    data = """\
+1,2,3,4,hello
+5,6,7,8,world
+9,10,11,12,foo
+"""
+
+    expected = DataFrame({"a": [1, 5, 9], "b": [2, 6, 10],
+                          "c": [3, 7, 11], "d": [4, 8, 12]},
+                         index=Index(["hello", "world", "foo"],
+                                     name="message"))
+    names = ["a", "b", "c", "d", "message"]
+
+    result = parser.read_csv(StringIO(data), names=names,
+                             index_col=["message"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_index_col_is_true(all_parsers):
+    # see gh-9798
+    data = "a,b\n1,2"
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match="The value of index_col "
+                                         "couldn't be 'True'"):
+        parser.read_csv(StringIO(data), index_col=True)
+
+
+def test_infer_index_col(all_parsers):
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                         index=["foo", "bar", "baz"],
+                         columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col,kwargs", [
+    (None, dict(columns=["x", "y", "z"])),
+    (False, dict(columns=["x", "y", "z"])),
+    (0, dict(columns=["y", "z"], index=Index([], name="x"))),
+    (1, dict(columns=["x", "z"], index=Index([], name="y"))),
+    ("x", dict(columns=["y", "z"], index=Index([], name="x"))),
+    ("y", dict(columns=["x", "z"], index=Index([], name="y"))),
+    ([0, 1], dict(columns=["z"], index=MultiIndex.from_arrays(
+        [[]] * 2, names=["x", "y"]))),
+    (["x", "y"], dict(columns=["z"], index=MultiIndex.from_arrays(
+        [[]] * 2, names=["x", "y"]))),
+    ([1, 0], dict(columns=["z"], index=MultiIndex.from_arrays(
+        [[]] * 2, names=["y", "x"]))),
+    (["y", "x"], dict(columns=["z"], index=MultiIndex.from_arrays(
+        [[]] * 2, names=["y", "x"]))),
+])
+def test_index_col_empty_data(all_parsers, index_col, kwargs):
+    data = "x,y,z"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=index_col)
+
+    expected = DataFrame([], **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_index_col_false(all_parsers):
+    # see gh-10413
+    data = "x,y"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=False)
+
+    expected = DataFrame([], columns=["x", "y"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_names", [
+    ["", ""],
+    ["foo", ""],
+    ["", "bar"],
+    ["foo", "bar"],
+    ["NotReallyUnnamed", "Unnamed: 0"],
+])
+def test_multi_index_naming(all_parsers, index_names):
+    parser = all_parsers
+
+    # We don't want empty index names being replaced with "Unnamed: 0"
+    data = ",".join(index_names + ["col\na,c,1\na,d,2\nb,c,3\nb,d,4"])
+    result = parser.read_csv(StringIO(data), index_col=[0, 1])
+
+    expected = DataFrame({"col": [1, 2, 3, 4]},
+                         index=MultiIndex.from_product([["a", "b"],
+                                                        ["c", "d"]]))
+    expected.index.names = [name if name else None for name in index_names]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multi_index_naming_not_all_at_beginning(all_parsers):
+    parser = all_parsers
+    data = ",Unnamed: 2,\na,c,1\na,d,2\nb,c,3\nb,d,4"
+    result = parser.read_csv(StringIO(data), index_col=[0, 2])
+
+    expected = DataFrame({"Unnamed: 2": ["c", "d", "c", "d"]},
+                         index=MultiIndex(
+                             levels=[['a', 'b'], [1, 2, 3, 4]],
+                             codes=[[0, 0, 1, 1], [0, 1, 2, 3]]))
+    tm.assert_frame_equal(result, expected)
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that duplicate columns are handled appropriately when parsed by the
+CSV engine. In general, the expected result is that they are either thoroughly
+de-duplicated (if mangling requested) or ignored otherwise.
+"""
+
+import pytest
+
+from pandas.compat import StringIO
+
+from pandas import DataFrame
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize("kwargs", [dict(), dict(mangle_dupe_cols=True)])
+def test_basic(all_parsers, kwargs):
+    # TODO: add test for condition "mangle_dupe_cols=False"
+    # once it is actually supported (gh-12935)
+    parser = all_parsers
+
+    data = "a,a,b,b,b\n1,2,3,4,5"
+    result = parser.read_csv(StringIO(data), sep=",", **kwargs)
+
+    expected = DataFrame([[1, 2, 3, 4, 5]],
+                         columns=["a", "a.1", "b", "b.1", "b.2"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_basic_names(all_parsers):
+    # See gh-7160
+    parser = all_parsers
+
+    data = "a,b,a\n0,1,2\n3,4,5"
+    expected = DataFrame([[0, 1, 2], [3, 4, 5]],
+                         columns=["a", "b", "a.1"])
+
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_basic_names_warn(all_parsers):
+    # See gh-7160
+    parser = all_parsers
+
+    data = "0,1,2\n3,4,5"
+    expected = DataFrame([[0, 1, 2], [3, 4, 5]],
+                         columns=["a", "b", "a.1"])
+
+    with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+        result = parser.read_csv(StringIO(data), names=["a", "b", "a"])
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,expected", [
+    ("a,a,a.1\n1,2,3",
+     DataFrame([[1, 2, 3]], columns=["a", "a.1", "a.1.1"])),
+    ("a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6",
+     DataFrame([[1, 2, 3, 4, 5, 6]], columns=["a", "a.1", "a.1.1", "a.1.1.1",
+                                              "a.1.1.1.1", "a.1.1.1.1.1"])),
+    ("a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7",
+     DataFrame([[1, 2, 3, 4, 5, 6, 7]], columns=["a", "a.1", "a.3", "a.1.1",
+                                                 "a.2", "a.2.1", "a.3.1"]))
+])
+def test_thorough_mangle_columns(all_parsers, data, expected):
+    # see gh-17060
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,names,expected", [
+    ("a,b,b\n1,2,3",
+     ["a.1", "a.1", "a.1.1"],
+     DataFrame([["a", "b", "b"], ["1", "2", "3"]],
+               columns=["a.1", "a.1.1", "a.1.1.1"])),
+    ("a,b,c,d,e,f\n1,2,3,4,5,6",
+     ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"],
+     DataFrame([["a", "b", "c", "d", "e", "f"],
+                ["1", "2", "3", "4", "5", "6"]],
+               columns=["a", "a.1", "a.1.1", "a.1.1.1",
+                        "a.1.1.1.1", "a.1.1.1.1.1"])),
+    ("a,b,c,d,e,f,g\n1,2,3,4,5,6,7",
+     ["a", "a", "a.3", "a.1", "a.2", "a", "a"],
+     DataFrame([["a", "b", "c", "d", "e", "f", "g"],
+                ["1", "2", "3", "4", "5", "6", "7"]],
+               columns=["a", "a.1", "a.3", "a.1.1",
+                        "a.2", "a.2.1", "a.3.1"])),
+])
+def test_thorough_mangle_names(all_parsers, data, names, expected):
+    # see gh-17095
+    parser = all_parsers
+
+    with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+        result = parser.read_csv(StringIO(data), names=names)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_mangled_unnamed_placeholders(all_parsers):
+    # xref gh-13017
+    orig_key = "0"
+    parser = all_parsers
+
+    orig_value = [1, 2, 3]
+    df = DataFrame({orig_key: orig_value})
+
+    # This test recursively updates `df`.
+    for i in range(3):
+        expected = DataFrame()
+
+        for j in range(i + 1):
+            expected["Unnamed: 0" + ".1" * j] = [0, 1, 2]
+
+        expected[orig_key] = orig_value
+        df = parser.read_csv(StringIO(df.to_csv()))
+
+        tm.assert_frame_equal(df, expected)
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests multithreading behaviour for reading and
+parsing files for each parser defined in parsers.py
+"""
+
+from __future__ import division
+
+from multiprocessing.pool import ThreadPool
+
+import numpy as np
+
+from pandas.compat import BytesIO, range
+
+import pandas as pd
+from pandas import DataFrame
+import pandas.util.testing as tm
+
+
+def _construct_dataframe(num_rows):
+    """
+    Construct a DataFrame for testing.
+
+    Parameters
+    ----------
+    num_rows : int
+        The number of rows for our DataFrame.
+
+    Returns
+    -------
+    df : DataFrame
+    """
+    df = DataFrame(np.random.rand(num_rows, 5), columns=list("abcde"))
+    df["foo"] = "foo"
+    df["bar"] = "bar"
+    df["baz"] = "baz"
+    df["date"] = pd.date_range("20000101 09:00:00",
+                               periods=num_rows,
+                               freq="s")
+    df["int"] = np.arange(num_rows, dtype="int64")
+    return df
+
+
+def test_multi_thread_string_io_read_csv(all_parsers):
+    # see gh-11786
+    parser = all_parsers
+    max_row_range = 10000
+    num_files = 100
+
+    bytes_to_df = [
+        "\n".join(
+            ["%d,%d,%d" % (i, i, i) for i in range(max_row_range)]
+        ).encode() for _ in range(num_files)]
+    files = [BytesIO(b) for b in bytes_to_df]
+
+    # Read all files in many threads.
+    pool = ThreadPool(8)
+
+    results = pool.map(parser.read_csv, files)
+    first_result = results[0]
+
+    for result in results:
+        tm.assert_frame_equal(first_result, result)
+
+
+def _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks):
+    """
+    Generate a DataFrame via multi-thread.
+
+    Parameters
+    ----------
+    parser : BaseParser
+        The parser object to use for reading the data.
+    path : str
+        The location of the CSV file to read.
+    num_rows : int
+        The number of rows to read per task.
+    num_tasks : int
+        The number of tasks to use for reading this DataFrame.
+
+    Returns
+    -------
+    df : DataFrame
+    """
+    def reader(arg):
+        """
+        Create a reader for part of the CSV.
+
+        Parameters
+        ----------
+        arg : tuple
+            A tuple of the following:
+
+            * start : int
+                The starting row to start for parsing CSV
+            * nrows : int
+                The number of rows to read.
+
+        Returns
+        -------
+        df : DataFrame
+        """
+        start, nrows = arg
+
+        if not start:
+            return parser.read_csv(path, index_col=0, header=0,
+                                   nrows=nrows, parse_dates=["date"])
+
+        return parser.read_csv(path, index_col=0, header=None,
+                               skiprows=int(start) + 1,
+                               nrows=nrows, parse_dates=[9])
+
+    tasks = [
+        (num_rows * i // num_tasks,
+         num_rows // num_tasks) for i in range(num_tasks)
+    ]
+
+    pool = ThreadPool(processes=num_tasks)
+    results = pool.map(reader, tasks)
+
+    header = results[0].columns
+
+    for r in results[1:]:
+        r.columns = header
+
+    final_dataframe = pd.concat(results)
+    return final_dataframe
+
+
+def test_multi_thread_path_multipart_read_csv(all_parsers):
+    # see gh-11786
+    num_tasks = 4
+    num_rows = 100000
+
+    parser = all_parsers
+    file_name = "__thread_pool_reader__.csv"
+    df = _construct_dataframe(num_rows)
+
+    with tm.ensure_clean(file_name) as path:
+        df.to_csv(path)
+
+        final_dataframe = _generate_multi_thread_dataframe(parser, path,
+                                                           num_rows, num_tasks)
+        tm.assert_frame_equal(df, final_dataframe)
@@ -0,0 +1,441 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that NA values are properly handled during
+parsing for all of the parsers defined in parsers.py
+"""
+
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO, range
+
+from pandas import DataFrame, Index, MultiIndex
+import pandas.util.testing as tm
+
+import pandas.io.common as com
+
+
+def test_string_nas(all_parsers):
+    parser = all_parsers
+    data = """A,B,C
+a,b,c
+d,,f
+,g,h
+"""
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame([["a", "b", "c"],
+                          ["d", np.nan, "f"],
+                          [np.nan, "g", "h"]],
+                         columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_detect_string_na(all_parsers):
+    parser = all_parsers
+    data = """A,B
+foo,bar
+NA,baz
+NaN,nan
+"""
+    expected = DataFrame([["foo", "bar"], [np.nan, "baz"],
+                          [np.nan, np.nan]], columns=["A", "B"])
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("na_values", [
+    ["-999.0", "-999"],
+    [-999, -999.0],
+    [-999.0, -999],
+    ["-999.0"], ["-999"],
+    [-999.0], [-999]
+])
+@pytest.mark.parametrize("data", [
+    """A,B
+-999,1.2
+2,-999
+3,4.5
+""",
+    """A,B
+-999,1.200
+2,-999.000
+3,4.500
+"""
+])
+def test_non_string_na_values(all_parsers, data, na_values):
+    # see gh-3611: with an odd float format, we can't match
+    # the string "999.0" exactly but still need float matching
+    parser = all_parsers
+    expected = DataFrame([[np.nan, 1.2], [2.0, np.nan],
+                          [3.0, 4.5]], columns=["A", "B"])
+
+    result = parser.read_csv(StringIO(data), na_values=na_values)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_default_na_values(all_parsers):
+    _NA_VALUES = {"-1.#IND", "1.#QNAN", "1.#IND", "-1.#QNAN", "#N/A",
+                  "N/A", "n/a", "NA", "#NA", "NULL", "null", "NaN", "nan",
+                  "-NaN", "-nan", "#N/A N/A", ""}
+    assert _NA_VALUES == com._NA_VALUES
+
+    parser = all_parsers
+    nv = len(_NA_VALUES)
+
+    def f(i, v):
+        if i == 0:
+            buf = ""
+        elif i > 0:
+            buf = "".join([","] * i)
+
+        buf = "{0}{1}".format(buf, v)
+
+        if i < nv - 1:
+            buf = "{0}{1}".format(buf, "".join([","] * (nv - i - 1)))
+
+        return buf
+
+    data = StringIO("\n".join(f(i, v) for i, v in enumerate(_NA_VALUES)))
+    expected = DataFrame(np.nan, columns=range(nv), index=range(nv))
+
+    result = parser.read_csv(data, header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("na_values", ["baz", ["baz"]])
+def test_custom_na_values(all_parsers, na_values):
+    parser = all_parsers
+    data = """A,B,C
+ignore,this,row
+1,NA,3
+-1.#IND,5,baz
+7,8,NaN
+"""
+    expected = DataFrame([[1., np.nan, 3], [np.nan, 5, np.nan],
+                          [7, 8, np.nan]], columns=["A", "B", "C"])
+    result = parser.read_csv(StringIO(data), na_values=na_values, skiprows=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_bool_na_values(all_parsers):
+    data = """A,B,C
+True,False,True
+NA,True,False
+False,NA,True"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame({"A": np.array([True, np.nan, False], dtype=object),
+                          "B": np.array([False, True, np.nan], dtype=object),
+                          "C": [True, False, True]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_na_value_dict(all_parsers):
+    data = """A,B,C
+foo,bar,NA
+bar,foo,foo
+foo,bar,NA
+bar,foo,foo"""
+    parser = all_parsers
+    df = parser.read_csv(StringIO(data),
+                         na_values={"A": ["foo"], "B": ["bar"]})
+    expected = DataFrame({"A": [np.nan, "bar", np.nan, "bar"],
+                          "B": [np.nan, "foo", np.nan, "foo"],
+                          "C": [np.nan, "foo", np.nan, "foo"]})
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("index_col,expected", [
+    ([0], DataFrame({"b": [np.nan], "c": [1], "d": [5]},
+                    index=Index([0], name="a"))),
+    ([0, 2], DataFrame({"b": [np.nan], "d": [5]},
+                       index=MultiIndex.from_tuples(
+                           [(0, 1)], names=["a", "c"]))),
+    (["a", "c"], DataFrame({"b": [np.nan], "d": [5]},
+                           index=MultiIndex.from_tuples(
+                               [(0, 1)], names=["a", "c"]))),
+])
+def test_na_value_dict_multi_index(all_parsers, index_col, expected):
+    data = """\
+a,b,c,d
+0,NA,1,5
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), na_values=set(),
+                             index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs,expected", [
+    (dict(), DataFrame({"A": ["a", "b", np.nan, "d", "e", np.nan, "g"],
+                        "B": [1, 2, 3, 4, 5, 6, 7],
+                        "C": ["one", "two", "three", np.nan, "five",
+                              np.nan, "seven"]})),
+    (dict(na_values={"A": [], "C": []}, keep_default_na=False),
+     DataFrame({"A": ["a", "b", "", "d", "e", "nan", "g"],
+                "B": [1, 2, 3, 4, 5, 6, 7],
+                "C": ["one", "two", "three", "nan", "five", "", "seven"]})),
+    (dict(na_values=["a"], keep_default_na=False),
+     DataFrame({"A": [np.nan, "b", "", "d", "e", "nan", "g"],
+                "B": [1, 2, 3, 4, 5, 6, 7],
+                "C": ["one", "two", "three", "nan", "five", "", "seven"]})),
+    (dict(na_values={"A": [], "C": []}),
+     DataFrame({"A": ["a", "b", np.nan, "d", "e", np.nan, "g"],
+                "B": [1, 2, 3, 4, 5, 6, 7],
+                "C": ["one", "two", "three", np.nan,
+                      "five", np.nan, "seven"]})),
+])
+def test_na_values_keep_default(all_parsers, kwargs, expected):
+    data = """\
+A,B,C
+a,1,one
+b,2,two
+,3,three
+d,4,nan
+e,5,five
+nan,6,
+g,7,seven
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_no_na_values_no_keep_default(all_parsers):
+    # see gh-4318: passing na_values=None and
+    # keep_default_na=False yields 'None" as a na_value
+    data = """\
+A,B,C
+a,1,None
+b,2,two
+,3,None
+d,4,nan
+e,5,five
+nan,6,
+g,7,seven
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), keep_default_na=False)
+
+    expected = DataFrame({"A": ["a", "b", "", "d", "e", "nan", "g"],
+                          "B": [1, 2, 3, 4, 5, 6, 7],
+                          "C": ["None", "two", "None", "nan",
+                                "five", "", "seven"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_no_keep_default_na_dict_na_values(all_parsers):
+    # see gh-19227
+    data = "a,b\n,2"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), na_values={"b": ["2"]},
+                             keep_default_na=False)
+    expected = DataFrame({"a": [""], "b": [np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_no_keep_default_na_dict_na_scalar_values(all_parsers):
+    # see gh-19227
+    #
+    # Scalar values shouldn't cause the parsing to crash or fail.
+    data = "a,b\n1,2"
+    parser = all_parsers
+    df = parser.read_csv(StringIO(data), na_values={"b": 2},
+                         keep_default_na=False)
+    expected = DataFrame({"a": [1], "b": [np.nan]})
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("col_zero_na_values", [
+    113125, "113125"
+])
+def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers,
+                                                      col_zero_na_values):
+    # see gh-19227
+    data = """\
+113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008
+729639,"qwer","",asdfkj,466.681,,252.373
+"""
+    parser = all_parsers
+    expected = DataFrame({0: [np.nan, 729639.0],
+                          1: [np.nan, "qwer"],
+                          2: ["/blaha", np.nan],
+                          3: ["kjsdkj", "asdfkj"],
+                          4: [412.166, 466.681],
+                          5: ["225.874", ""],
+                          6: [np.nan, 252.373]})
+
+    result = parser.read_csv(StringIO(data), header=None,
+                             keep_default_na=False,
+                             na_values={2: "", 6: "214.008",
+                                        1: "blah", 0: col_zero_na_values})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("na_filter,row_data", [
+    (True, [[1, "A"], [np.nan, np.nan], [3, "C"]]),
+    (False, [["1", "A"], ["nan", "B"], ["3", "C"]]),
+])
+def test_na_values_na_filter_override(all_parsers, na_filter, row_data):
+    data = """\
+A,B
+1,A
+nan,B
+3,C
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), na_values=["B"],
+                             na_filter=na_filter)
+
+    expected = DataFrame(row_data, columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_na_trailing_columns(all_parsers):
+    parser = all_parsers
+    data = """Date,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax
+2012-03-14,USD,AAPL,BUY,1000
+2012-05-12,USD,SBUX,SELL,500"""
+
+    # Trailing columns should be all NaN.
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame([
+        ["2012-03-14", "USD", "AAPL", "BUY", 1000, np.nan, np.nan, np.nan],
+        ["2012-05-12", "USD", "SBUX", "SELL", 500, np.nan, np.nan, np.nan],
+    ], columns=["Date", "Currency", "Symbol", "Type",
+                "Units", "UnitPrice", "Cost", "Tax"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("na_values,row_data", [
+    (1, [[np.nan, 2.0], [2.0, np.nan]]),
+    ({"a": 2, "b": 1}, [[1.0, 2.0], [np.nan, np.nan]]),
+])
+def test_na_values_scalar(all_parsers, na_values, row_data):
+    # see gh-12224
+    parser = all_parsers
+    names = ["a", "b"]
+    data = "1,2\n2,1"
+
+    result = parser.read_csv(StringIO(data), names=names, na_values=na_values)
+    expected = DataFrame(row_data, columns=names)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_na_values_dict_aliasing(all_parsers):
+    parser = all_parsers
+    na_values = {"a": 2, "b": 1}
+    na_values_copy = na_values.copy()
+
+    names = ["a", "b"]
+    data = "1,2\n2,1"
+
+    expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names)
+    result = parser.read_csv(StringIO(data), names=names, na_values=na_values)
+
+    tm.assert_frame_equal(result, expected)
+    tm.assert_dict_equal(na_values, na_values_copy)
+
+
+def test_na_values_dict_col_index(all_parsers):
+    # see gh-14203
+    data = "a\nfoo\n1"
+    parser = all_parsers
+    na_values = {0: "foo"}
+
+    result = parser.read_csv(StringIO(data), na_values=na_values)
+    expected = DataFrame({"a": [np.nan, 1]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,kwargs,expected", [
+    (str(2**63) + "\n" + str(2**63 + 1),
+     dict(na_values=[2**63]), DataFrame([str(2**63), str(2**63 + 1)])),
+    (str(2**63) + ",1" + "\n,2",
+     dict(), DataFrame([[str(2**63), 1], ['', 2]])),
+    (str(2**63) + "\n1",
+     dict(na_values=[2**63]), DataFrame([np.nan, 1])),
+])
+def test_na_values_uint64(all_parsers, data, kwargs, expected):
+    # see gh-14983
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), header=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_na_values_no_default_with_index(all_parsers):
+    # see gh-15835
+    data = "a,1\nb,2"
+    parser = all_parsers
+    expected = DataFrame({"1": [2]}, index=Index(["b"], name="a"))
+
+    result = parser.read_csv(StringIO(data), index_col=0,
+                             keep_default_na=False)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("na_filter,index_data", [
+    (False, ["", "5"]),
+    (True, [np.nan, 5.0]),
+])
+def test_no_na_filter_on_index(all_parsers, na_filter, index_data):
+    # see gh-5239
+    #
+    # Don't parse NA-values in index unless na_filter=True
+    parser = all_parsers
+    data = "a,b,c\n1,,3\n4,5,6"
+
+    expected = DataFrame({"a": [1, 4], "c": [3, 6]},
+                         index=Index(index_data, name="b"))
+    result = parser.read_csv(StringIO(data), index_col=[1],
+                             na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_inf_na_values_with_int_index(all_parsers):
+    # see gh-17128
+    parser = all_parsers
+    data = "idx,col1,col2\n1,3,4\n2,inf,-inf"
+
+    # Don't fail with OverflowError with inf's and integer index column.
+    out = parser.read_csv(StringIO(data), index_col=[0],
+                          na_values=["inf", "-inf"])
+    expected = DataFrame({"col1": [3, np.nan], "col2": [4, np.nan]},
+                         index=Index([1, 2], name="idx"))
+    tm.assert_frame_equal(out, expected)
+
+
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
+    # see gh-20377
+    parser = all_parsers
+    data = "a,b,c\n1,,3\n4,5,6"
+
+    # na_filter=True --> missing value becomes NaN.
+    # na_filter=False --> missing value remains empty string.
+    empty = np.nan if na_filter else ""
+    expected = DataFrame({"a": ["1", "4"],
+                          "b": [empty, "5"],
+                          "c": ["3", "6"]})
+
+    result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data, na_values", [
+    ("false,1\n,1\ntrue", None),
+    ("false,1\nnull,1\ntrue", None),
+    ("false,1\nnan,1\ntrue", None),
+    ("false,1\nfoo,1\ntrue", 'foo'),
+    ("false,1\nfoo,1\ntrue", ['foo']),
+    ("false,1\nfoo,1\ntrue", {'a': 'foo'}),
+])
+def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values):
+    parser = all_parsers
+    msg = ("(Bool column has NA values in column [0a])|"
+           "(cannot safely convert passed user dtype of "
+           "bool for object dtyped data in column 0)")
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
+                        dtype={'a': 'bool'}, na_values=na_values)
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests parsers ability to read and parse non-local files
+and hence require a network connection to be read.
+"""
+import logging
+
+import numpy as np
+import pytest
+
+from pandas.compat import BytesIO, StringIO
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas.util.testing as tm
+
+from pandas.io.parsers import read_csv
+
+
+@pytest.mark.network
+@pytest.mark.parametrize(
+    "compress_type, extension", [
+        ('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'),
+        pytest.param('xz', '.xz', marks=td.skip_if_no_lzma)
+    ]
+)
+@pytest.mark.parametrize('mode', ['explicit', 'infer'])
+@pytest.mark.parametrize('engine', ['python', 'c'])
+def test_compressed_urls(salaries_table, compress_type, extension, mode,
+                         engine):
+    check_compressed_urls(salaries_table, compress_type, extension, mode,
+                          engine)
+
+
+@tm.network
+def check_compressed_urls(salaries_table, compression, extension, mode,
+                          engine):
+    # test reading compressed urls with various engines and
+    # extension inference
+    base_url = ('https://github.com/pandas-dev/pandas/raw/master/'
+                'pandas/tests/io/parser/data/salaries.csv')
+
+    url = base_url + extension
+
+    if mode != 'explicit':
+        compression = mode
+
+    url_table = read_csv(url, sep='\t', compression=compression, engine=engine)
+    tm.assert_frame_equal(url_table, salaries_table)
+
+
+@pytest.fixture
+def tips_df(datapath):
+    """DataFrame with the tips dataset."""
+    return read_csv(datapath('io', 'parser', 'data', 'tips.csv'))
+
+
+@pytest.mark.usefixtures("s3_resource")
+@td.skip_if_not_us_locale()
+class TestS3(object):
+
+    def test_parse_public_s3_bucket(self, tips_df):
+        pytest.importorskip('s3fs')
+
+        # more of an integration test due to the not-public contents portion
+        # can probably mock this though.
+        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+            df = read_csv('s3://pandas-test/tips.csv' +
+                          ext, compression=comp)
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+        # Read public file from bucket with not-public contents
+        df = read_csv('s3://cant_get_it/tips.csv')
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(df, tips_df)
+
+    def test_parse_public_s3n_bucket(self, tips_df):
+
+        # Read from AWS s3 as "s3n" URL
+        df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3a_bucket(self, tips_df):
+        # Read from AWS s3 as "s3a" URL
+        df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3_bucket_nrows(self, tips_df):
+        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+            df = read_csv('s3://pandas-test/tips.csv' +
+                          ext, nrows=10, compression=comp)
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3_bucket_chunked(self, tips_df):
+        # Read with a chunksize
+        chunksize = 5
+        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+            df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
+                                 chunksize=chunksize, compression=comp)
+            assert df_reader.chunksize == chunksize
+            for i_chunk in [0, 1, 2]:
+                # Read a couple of chunks and make sure we see them
+                # properly.
+                df = df_reader.get_chunk()
+                assert isinstance(df, DataFrame)
+                assert not df.empty
+                true_df = tips_df.iloc[
+                    chunksize * i_chunk: chunksize * (i_chunk + 1)]
+                tm.assert_frame_equal(true_df, df)
+
+    def test_parse_public_s3_bucket_chunked_python(self, tips_df):
+        # Read with a chunksize using the Python parser
+        chunksize = 5
+        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+            df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
+                                 chunksize=chunksize, compression=comp,
+                                 engine='python')
+            assert df_reader.chunksize == chunksize
+            for i_chunk in [0, 1, 2]:
+                # Read a couple of chunks and make sure we see them properly.
+                df = df_reader.get_chunk()
+                assert isinstance(df, DataFrame)
+                assert not df.empty
+                true_df = tips_df.iloc[
+                    chunksize * i_chunk: chunksize * (i_chunk + 1)]
+                tm.assert_frame_equal(true_df, df)
+
+    def test_parse_public_s3_bucket_python(self, tips_df):
+        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+            df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
+                          compression=comp)
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+    def test_infer_s3_compression(self, tips_df):
+        for ext in ['', '.gz', '.bz2']:
+            df = read_csv('s3://pandas-test/tips.csv' + ext,
+                          engine='python', compression='infer')
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+    def test_parse_public_s3_bucket_nrows_python(self, tips_df):
+        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+            df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
+                          nrows=10, compression=comp)
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_s3_fails(self):
+        with pytest.raises(IOError):
+            read_csv('s3://nyqpug/asdf.csv')
+
+        # Receive a permission error when trying to read a private bucket.
+        # It's irrelevant here that this isn't actually a table.
+        with pytest.raises(IOError):
+            read_csv('s3://cant_get_it/')
+
+    def test_read_csv_handles_boto_s3_object(self,
+                                             s3_resource,
+                                             tips_file):
+        # see gh-16135
+
+        s3_object = s3_resource.meta.client.get_object(
+            Bucket='pandas-test',
+            Key='tips.csv')
+
+        result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8')
+        assert isinstance(result, DataFrame)
+        assert not result.empty
+
+        expected = read_csv(tips_file)
+        tm.assert_frame_equal(result, expected)
+
+    def test_read_csv_chunked_download(self, s3_resource, caplog):
+        # 8 MB, S3FS usees 5MB chunks
+        df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
+        buf = BytesIO()
+        str_buf = StringIO()
+
+        df.to_csv(str_buf)
+
+        buf = BytesIO(str_buf.getvalue().encode('utf-8'))
+
+        s3_resource.Bucket("pandas-test").put_object(
+            Key="large-file.csv",
+            Body=buf)
+
+        with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
+            read_csv("s3://pandas-test/large-file.csv", nrows=5)
+            # log of fetch_range (start, stop)
+            assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})
@@ -0,0 +1,849 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests date parsing functionality for all of the
+parsers defined in parsers.py
+"""
+
+from datetime import date, datetime
+
+import numpy as np
+import pytest
+import pytz
+
+from pandas._libs.tslib import Timestamp
+from pandas._libs.tslibs import parsing
+from pandas.compat import StringIO, lrange, parse_date
+from pandas.compat.numpy import np_array_datetime64_compat
+
+import pandas as pd
+from pandas import DataFrame, DatetimeIndex, Index, MultiIndex
+from pandas.core.indexes.datetimes import date_range
+import pandas.util.testing as tm
+
+import pandas.io.date_converters as conv
+import pandas.io.parsers as parsers
+
+
+def test_separator_date_conflict(all_parsers):
+    # Regression test for gh-4678
+    #
+    # Make sure thousands separator and
+    # date parsing do not conflict.
+    parser = all_parsers
+    data = "06-02-2013;13:00;1-000.215"
+    expected = DataFrame([[datetime(2013, 6, 2, 13, 0, 0), 1000.215]],
+                         columns=["Date", 2])
+
+    df = parser.read_csv(StringIO(data), sep=";", thousands="-",
+                         parse_dates={"Date": [0, 1]}, header=None)
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("keep_date_col", [True, False])
+def test_multiple_date_col_custom(all_parsers, keep_date_col):
+    data = """\
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+    parser = all_parsers
+
+    def date_parser(*date_cols):
+        """
+        Test date parser.
+
+        Parameters
+        ----------
+        date_cols : args
+            The list of data columns to parse.
+
+        Returns
+        -------
+        parsed : Series
+        """
+        return parsing.try_parse_dates(parsers._concat_date_cols(date_cols))
+
+    result = parser.read_csv(StringIO(data), header=None,
+                             date_parser=date_parser, prefix="X",
+                             parse_dates={"actual": [1, 2],
+                                          "nominal": [1, 3]},
+                             keep_date_col=keep_date_col)
+    expected = DataFrame([
+        [datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56),
+         "KORD", "19990127", " 19:00:00", " 18:56:00",
+         0.81, 2.81, 7.2, 0.0, 280.0],
+        [datetime(1999, 1, 27, 20, 0), datetime(1999, 1, 27, 19, 56),
+         "KORD", "19990127", " 20:00:00", " 19:56:00",
+         0.01, 2.21, 7.2, 0.0, 260.0],
+        [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 20, 56),
+         "KORD", "19990127", " 21:00:00", " 20:56:00",
+         -0.59, 2.21, 5.7, 0.0, 280.0],
+        [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 21, 18),
+         "KORD", "19990127", " 21:00:00", " 21:18:00",
+         -0.99, 2.01, 3.6, 0.0, 270.0],
+        [datetime(1999, 1, 27, 22, 0), datetime(1999, 1, 27, 21, 56),
+         "KORD", "19990127", " 22:00:00", " 21:56:00",
+         -0.59, 1.71, 5.1, 0.0, 290.0],
+        [datetime(1999, 1, 27, 23, 0), datetime(1999, 1, 27, 22, 56),
+         "KORD", "19990127", " 23:00:00", " 22:56:00",
+         -0.59, 1.71, 4.6, 0.0, 280.0],
+    ], columns=["actual", "nominal", "X0", "X1", "X2",
+                "X3", "X4", "X5", "X6", "X7", "X8"])
+
+    if not keep_date_col:
+        expected = expected.drop(["X1", "X2", "X3"], axis=1)
+    elif parser.engine == "python":
+        expected["X1"] = expected["X1"].astype(np.int64)
+
+    # Python can sometimes be flaky about how
+    # the aggregated columns are entered, so
+    # this standardizes the order.
+    result = result[expected.columns]
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("keep_date_col", [True, False])
+def test_multiple_date_col(all_parsers, keep_date_col):
+    data = """\
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), header=None,
+                             prefix="X", parse_dates=[[1, 2], [1, 3]],
+                             keep_date_col=keep_date_col)
+    expected = DataFrame([
+        [datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56),
+         "KORD", "19990127", " 19:00:00", " 18:56:00",
+         0.81, 2.81, 7.2, 0.0, 280.0],
+        [datetime(1999, 1, 27, 20, 0), datetime(1999, 1, 27, 19, 56),
+         "KORD", "19990127", " 20:00:00", " 19:56:00",
+         0.01, 2.21, 7.2, 0.0, 260.0],
+        [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 20, 56),
+         "KORD", "19990127", " 21:00:00", " 20:56:00",
+         -0.59, 2.21, 5.7, 0.0, 280.0],
+        [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 21, 18),
+         "KORD", "19990127", " 21:00:00", " 21:18:00",
+         -0.99, 2.01, 3.6, 0.0, 270.0],
+        [datetime(1999, 1, 27, 22, 0), datetime(1999, 1, 27, 21, 56),
+         "KORD", "19990127", " 22:00:00", " 21:56:00",
+         -0.59, 1.71, 5.1, 0.0, 290.0],
+        [datetime(1999, 1, 27, 23, 0), datetime(1999, 1, 27, 22, 56),
+         "KORD", "19990127", " 23:00:00", " 22:56:00",
+         -0.59, 1.71, 4.6, 0.0, 280.0],
+    ], columns=["X1_X2", "X1_X3", "X0", "X1", "X2",
+                "X3", "X4", "X5", "X6", "X7", "X8"])
+
+    if not keep_date_col:
+        expected = expected.drop(["X1", "X2", "X3"], axis=1)
+    elif parser.engine == "python":
+        expected["X1"] = expected["X1"].astype(np.int64)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_date_col_as_index_col(all_parsers):
+    data = """\
+KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), header=None, prefix="X",
+                             parse_dates=[1], index_col=1)
+
+    index = Index([datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 20, 0),
+                   datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 21, 0),
+                   datetime(1999, 1, 27, 22, 0)], name="X1")
+    expected = DataFrame([
+        ["KORD", " 18:56:00", 0.81, 2.81, 7.2, 0.0, 280.0],
+        ["KORD", " 19:56:00", 0.01, 2.21, 7.2, 0.0, 260.0],
+        ["KORD", " 20:56:00", -0.59, 2.21, 5.7, 0.0, 280.0],
+        ["KORD", " 21:18:00", -0.99, 2.01, 3.6, 0.0, 270.0],
+        ["KORD", " 21:56:00", -0.59, 1.71, 5.1, 0.0, 290.0],
+    ], columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], index=index)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multiple_date_cols_int_cast(all_parsers):
+    data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
+            "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
+            "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
+            "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
+            "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
+            "KORD,19990127, 23:00:00, 22:56:00, -0.5900")
+    parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=None,
+                             date_parser=conv.parse_date_time,
+                             parse_dates=parse_dates, prefix="X")
+    expected = DataFrame([
+        [datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56),
+         "KORD", 0.81],
+        [datetime(1999, 1, 27, 20, 0), datetime(1999, 1, 27, 19, 56),
+         "KORD", 0.01],
+        [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 20, 56),
+         "KORD", -0.59],
+        [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 21, 18),
+         "KORD", -0.99],
+        [datetime(1999, 1, 27, 22, 0), datetime(1999, 1, 27, 21, 56),
+         "KORD", -0.59],
+        [datetime(1999, 1, 27, 23, 0), datetime(1999, 1, 27, 22, 56),
+         "KORD", -0.59],
+    ], columns=["actual", "nominal", "X0", "X4"])
+
+    # Python can sometimes be flaky about how
+    # the aggregated columns are entered, so
+    # this standardizes the order.
+    result = result[expected.columns]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multiple_date_col_timestamp_parse(all_parsers):
+    parser = all_parsers
+    data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
+05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25"""
+
+    result = parser.read_csv(StringIO(data), parse_dates=[[0, 1]],
+                             header=None, date_parser=Timestamp)
+    expected = DataFrame([
+        [Timestamp("05/31/2012, 15:30:00.029"),
+         1306.25, 1, "E", 0, np.nan, 1306.25],
+        [Timestamp("05/31/2012, 15:30:00.029"),
+         1306.25, 8, "E", 0, np.nan, 1306.25]
+    ], columns=["0_1", 2, 3, 4, 5, 6, 7])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multiple_date_cols_with_header(all_parsers):
+    parser = all_parsers
+    data = """\
+ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000"""
+
+    result = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]})
+    expected = DataFrame([
+        [datetime(1999, 1, 27, 19, 0), "KORD", " 18:56:00",
+         0.81, 2.81, 7.2, 0.0, 280.0],
+        [datetime(1999, 1, 27, 20, 0), "KORD", " 19:56:00",
+         0.01, 2.21, 7.2, 0.0, 260.0],
+        [datetime(1999, 1, 27, 21, 0), "KORD", " 20:56:00",
+         -0.59, 2.21, 5.7, 0.0, 280.0],
+        [datetime(1999, 1, 27, 21, 0), "KORD", " 21:18:00",
+         -0.99, 2.01, 3.6, 0.0, 270.0],
+        [datetime(1999, 1, 27, 22, 0), "KORD", " 21:56:00",
+         -0.59, 1.71, 5.1, 0.0, 290.0],
+        [datetime(1999, 1, 27, 23, 0), "KORD", " 22:56:00",
+         -0.59, 1.71, 4.6, 0.0, 280.0],
+    ], columns=["nominal", "ID", "ActualTime", "TDew",
+                "TAir", "Windspeed", "Precip", "WindDir"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,parse_dates,msg", [
+    ("""\
+date_NominalTime,date,NominalTime
+KORD1,19990127, 19:00:00
+KORD2,19990127, 20:00:00""", [[1, 2]], ("New date column already "
+                                        "in dict date_NominalTime")),
+    ("""\
+ID,date,nominalTime
+KORD,19990127, 19:00:00
+KORD,19990127, 20:00:00""", dict(ID=[1, 2]), "Date column ID already in dict")
+])
+def test_multiple_date_col_name_collision(all_parsers, data, parse_dates, msg):
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), parse_dates=parse_dates)
+
+
+def test_date_parser_int_bug(all_parsers):
+    # see gh-3071
+    parser = all_parsers
+    data = ("posix_timestamp,elapsed,sys,user,queries,query_time,rows,"
+            "accountid,userid,contactid,level,silo,method\n"
+            "1343103150,0.062353,0,4,6,0.01690,3,"
+            "12345,1,-1,3,invoice_InvoiceResource,search\n")
+
+    result = parser.read_csv(
+        StringIO(data), index_col=0, parse_dates=[0],
+        date_parser=lambda x: datetime.utcfromtimestamp(int(x)))
+    expected = DataFrame([[0.062353, 0, 4, 6, 0.01690, 3, 12345, 1, -1,
+                           3, "invoice_InvoiceResource", "search"]],
+                         columns=["elapsed", "sys", "user", "queries",
+                                  "query_time", "rows", "accountid",
+                                  "userid", "contactid", "level",
+                                  "silo", "method"],
+                         index=Index([Timestamp("2012-07-24 04:12:30")],
+                                     name="posix_timestamp"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nat_parse(all_parsers):
+    # see gh-3062
+    parser = all_parsers
+    df = DataFrame(dict({"A": np.asarray(lrange(10), dtype="float64"),
+                         "B": pd.Timestamp("20010101")}))
+    df.iloc[3:6, :] = np.nan
+
+    with tm.ensure_clean("__nat_parse_.csv") as path:
+        df.to_csv(path)
+
+        result = parser.read_csv(path, index_col=0, parse_dates=["B"])
+        tm.assert_frame_equal(result, df)
+
+
+def test_csv_custom_parser(all_parsers):
+    data = """A,B,C
+20090101,a,1,2
+20090102,b,3,4
+20090103,c,4,5
+"""
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(data),
+        date_parser=lambda x: datetime.strptime(x, "%Y%m%d"))
+    expected = parser.read_csv(StringIO(data), parse_dates=True)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_dates_implicit_first_col(all_parsers):
+    data = """A,B,C
+20090101,a,1,2
+20090102,b,3,4
+20090103,c,4,5
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), parse_dates=True)
+
+    expected = parser.read_csv(StringIO(data), index_col=0,
+                               parse_dates=True)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_dates_string(all_parsers):
+    data = """date,A,B,C
+20090101,a,1,2
+20090102,b,3,4
+20090103,c,4,5
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col="date",
+                             parse_dates=["date"])
+    index = date_range("1/1/2009", periods=3)
+    index.name = "date"
+
+    expected = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 4],
+                          "C": [2, 4, 5]}, index=index)
+    tm.assert_frame_equal(result, expected)
+
+
+# Bug in https://github.com/dateutil/dateutil/issues/217
+# has been addressed, but we just don't pass in the `yearfirst`
+@pytest.mark.xfail(reason="yearfirst is not surfaced in read_*")
+@pytest.mark.parametrize("parse_dates", [
+    [["date", "time"]],
+    [[0, 1]]
+])
+def test_yy_format_with_year_first(all_parsers, parse_dates):
+    data = """date,time,B,C
+090131,0010,1,2
+090228,1020,3,4
+090331,0830,5,6
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=0,
+                             parse_dates=parse_dates)
+    index = DatetimeIndex([datetime(2009, 1, 31, 0, 10, 0),
+                           datetime(2009, 2, 28, 10, 20, 0),
+                           datetime(2009, 3, 31, 8, 30, 0)],
+                          dtype=object, name="date_time")
+    expected = DataFrame({"B": [1, 3, 5], "C": [2, 4, 6]}, index=index)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("parse_dates", [[0, 2], ["a", "c"]])
+def test_parse_dates_column_list(all_parsers, parse_dates):
+    data = "a,b,c\n01/01/2010,1,15/02/2010"
+    parser = all_parsers
+
+    expected = DataFrame({"a": [datetime(2010, 1, 1)], "b": [1],
+                          "c": [datetime(2010, 2, 15)]})
+    expected = expected.set_index(["a", "b"])
+
+    result = parser.read_csv(StringIO(data), index_col=[0, 1],
+                             parse_dates=parse_dates, dayfirst=True)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
+def test_multi_index_parse_dates(all_parsers, index_col):
+    data = """index1,index2,A,B,C
+20090101,one,a,1,2
+20090101,two,b,3,4
+20090101,three,c,4,5
+20090102,one,a,1,2
+20090102,two,b,3,4
+20090102,three,c,4,5
+20090103,one,a,1,2
+20090103,two,b,3,4
+20090103,three,c,4,5
+"""
+    parser = all_parsers
+    index = MultiIndex.from_product([
+        (datetime(2009, 1, 1), datetime(2009, 1, 2),
+         datetime(2009, 1, 3)), ("one", "two", "three")],
+        names=["index1", "index2"])
+
+    # Out of order.
+    if index_col == [1, 0]:
+        index = index.swaplevel(0, 1)
+
+    expected = DataFrame([["a", 1, 2], ["b", 3, 4], ["c", 4, 5],
+                          ["a", 1, 2], ["b", 3, 4], ["c", 4, 5],
+                          ["a", 1, 2], ["b", 3, 4], ["c", 4, 5]],
+                         columns=["A", "B", "C"], index=index)
+    result = parser.read_csv(StringIO(data), index_col=index_col,
+                             parse_dates=True)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [
+    dict(dayfirst=True), dict(day_first=True)
+])
+def test_parse_dates_custom_euro_format(all_parsers, kwargs):
+    parser = all_parsers
+    data = """foo,bar,baz
+31/01/2010,1,2
+01/02/2010,1,NA
+02/02/2010,1,2
+"""
+    if "dayfirst" in kwargs:
+        df = parser.read_csv(StringIO(data), names=["time", "Q", "NTU"],
+                             date_parser=lambda d: parse_date(d, **kwargs),
+                             header=0, index_col=0, parse_dates=True,
+                             na_values=["NA"])
+        exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1),
+                           datetime(2010, 2, 2)], name="time")
+        expected = DataFrame({"Q": [1, 1, 1], "NTU": [2, np.nan, 2]},
+                             index=exp_index, columns=["Q", "NTU"])
+        tm.assert_frame_equal(df, expected)
+    else:
+        msg = "got an unexpected keyword argument 'day_first'"
+        with pytest.raises(TypeError, match=msg):
+            parser.read_csv(StringIO(data), names=["time", "Q", "NTU"],
+                            date_parser=lambda d: parse_date(d, **kwargs),
+                            skiprows=[0], index_col=0, parse_dates=True,
+                            na_values=["NA"])
+
+
+def test_parse_tz_aware(all_parsers):
+    # See gh-1693
+    parser = all_parsers
+    data = "Date,x\n2012-06-13T01:39:00Z,0.5"
+
+    result = parser.read_csv(StringIO(data), index_col=0,
+                             parse_dates=True)
+    expected = DataFrame({"x": [0.5]}, index=Index([Timestamp(
+        "2012-06-13 01:39:00+00:00")], name="Date"))
+    tm.assert_frame_equal(result, expected)
+    assert result.index.tz is pytz.utc
+
+
+@pytest.mark.parametrize("parse_dates,index_col", [
+    ({"nominal": [1, 2]}, "nominal"),
+    ({"nominal": [1, 2]}, 0),
+    ([[1, 2]], 0),
+])
+def test_multiple_date_cols_index(all_parsers, parse_dates, index_col):
+    parser = all_parsers
+    data = """
+ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir
+KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+    expected = DataFrame([
+        [datetime(1999, 1, 27, 19, 0), "KORD1", " 18:56:00",
+         0.81, 2.81, 7.2, 0.0, 280.0],
+        [datetime(1999, 1, 27, 20, 0), "KORD2", " 19:56:00",
+         0.01, 2.21, 7.2, 0.0, 260.0],
+        [datetime(1999, 1, 27, 21, 0), "KORD3", " 20:56:00",
+         -0.59, 2.21, 5.7, 0.0, 280.0],
+        [datetime(1999, 1, 27, 21, 0), "KORD4", " 21:18:00",
+         -0.99, 2.01, 3.6, 0.0, 270.0],
+        [datetime(1999, 1, 27, 22, 0), "KORD5", " 21:56:00",
+         -0.59, 1.71, 5.1, 0.0, 290.0],
+        [datetime(1999, 1, 27, 23, 0), "KORD6", " 22:56:00",
+         -0.59, 1.71, 4.6, 0.0, 280.0],
+    ], columns=["nominal", "ID", "ActualTime", "TDew",
+                "TAir", "Windspeed", "Precip", "WindDir"])
+    expected = expected.set_index("nominal")
+
+    if not isinstance(parse_dates, dict):
+        expected.index.name = "date_NominalTime"
+
+    result = parser.read_csv(StringIO(data), parse_dates=parse_dates,
+                             index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multiple_date_cols_chunked(all_parsers):
+    parser = all_parsers
+    data = """\
+ID,date,nominalTime,actualTime,A,B,C,D,E
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+
+    expected = DataFrame([
+        [datetime(1999, 1, 27, 19, 0), "KORD", " 18:56:00",
+         0.81, 2.81, 7.2, 0.0, 280.0],
+        [datetime(1999, 1, 27, 20, 0), "KORD", " 19:56:00",
+         0.01, 2.21, 7.2, 0.0, 260.0],
+        [datetime(1999, 1, 27, 21, 0), "KORD", " 20:56:00",
+         -0.59, 2.21, 5.7, 0.0, 280.0],
+        [datetime(1999, 1, 27, 21, 0), "KORD", " 21:18:00",
+         -0.99, 2.01, 3.6, 0.0, 270.0],
+        [datetime(1999, 1, 27, 22, 0), "KORD", " 21:56:00",
+         -0.59, 1.71, 5.1, 0.0, 290.0],
+        [datetime(1999, 1, 27, 23, 0), "KORD", " 22:56:00",
+         -0.59, 1.71, 4.6, 0.0, 280.0],
+    ], columns=["nominal", "ID", "actualTime", "A", "B", "C", "D", "E"])
+    expected = expected.set_index("nominal")
+
+    reader = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]},
+                             index_col="nominal", chunksize=2)
+    chunks = list(reader)
+
+    tm.assert_frame_equal(chunks[0], expected[:2])
+    tm.assert_frame_equal(chunks[1], expected[2:4])
+    tm.assert_frame_equal(chunks[2], expected[4:])
+
+
+def test_multiple_date_col_named_index_compat(all_parsers):
+    parser = all_parsers
+    data = """\
+ID,date,nominalTime,actualTime,A,B,C,D,E
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+
+    with_indices = parser.read_csv(StringIO(data),
+                                   parse_dates={"nominal": [1, 2]},
+                                   index_col="nominal")
+    with_names = parser.read_csv(StringIO(data), index_col="nominal",
+                                 parse_dates={"nominal": [
+                                     "date", "nominalTime"]})
+    tm.assert_frame_equal(with_indices, with_names)
+
+
+def test_multiple_date_col_multiple_index_compat(all_parsers):
+    parser = all_parsers
+    data = """\
+ID,date,nominalTime,actualTime,A,B,C,D,E
+KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000
+"""
+    result = parser.read_csv(StringIO(data), index_col=["nominal", "ID"],
+                             parse_dates={"nominal": [1, 2]})
+    expected = parser.read_csv(StringIO(data),
+                               parse_dates={"nominal": [1, 2]})
+
+    expected = expected.set_index(["nominal", "ID"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [dict(), dict(index_col="C")])
+def test_read_with_parse_dates_scalar_non_bool(all_parsers, kwargs):
+    # see gh-5636
+    parser = all_parsers
+    msg = ("Only booleans, lists, and dictionaries "
+           "are accepted for the 'parse_dates' parameter")
+    data = """A,B,C
+    1,2,2003-11-1"""
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), parse_dates="C", **kwargs)
+
+
+@pytest.mark.parametrize("parse_dates", [
+    (1,), np.array([4, 5]), {1, 3, 3}
+])
+def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates):
+    parser = all_parsers
+    msg = ("Only booleans, lists, and dictionaries "
+           "are accepted for the 'parse_dates' parameter")
+    data = """A,B,C
+    1,2,2003-11-1"""
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), parse_dates=(1,))
+
+
+def test_parse_dates_empty_string(all_parsers):
+    # see gh-2263
+    parser = all_parsers
+    data = "Date,test\n2012-01-01,1\n,2"
+    result = parser.read_csv(StringIO(data), parse_dates=["Date"],
+                             na_filter=False)
+
+    expected = DataFrame([[datetime(2012, 1, 1), 1], [pd.NaT, 2]],
+                         columns=["Date", "test"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,kwargs,expected", [
+    ("a\n04.15.2016", dict(parse_dates=["a"]),
+     DataFrame([datetime(2016, 4, 15)], columns=["a"])),
+    ("a\n04.15.2016", dict(parse_dates=True, index_col=0),
+     DataFrame(index=DatetimeIndex(["2016-04-15"], name="a"))),
+    ("a,b\n04.15.2016,09.16.2013", dict(parse_dates=["a", "b"]),
+     DataFrame([[datetime(2016, 4, 15), datetime(2013, 9, 16)]],
+               columns=["a", "b"])),
+    ("a,b\n04.15.2016,09.16.2013", dict(parse_dates=True, index_col=[0, 1]),
+     DataFrame(index=MultiIndex.from_tuples(
+         [(datetime(2016, 4, 15), datetime(2013, 9, 16))], names=["a", "b"]))),
+])
+def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected):
+    # see gh-14066
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), thousands=".", **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_date_time_multi_level_column_name(all_parsers):
+    data = """\
+D,T,A,B
+date, time,a,b
+2001-01-05, 09:00:00, 0.0, 10.
+2001-01-06, 00:00:00, 1.0, 11.
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), header=[0, 1],
+                             parse_dates={"date_time": [0, 1]},
+                             date_parser=conv.parse_date_time)
+
+    expected_data = [[datetime(2001, 1, 5, 9, 0, 0), 0., 10.],
+                     [datetime(2001, 1, 6, 0, 0, 0), 1., 11.]]
+    expected = DataFrame(expected_data,
+                         columns=["date_time", ("A", "a"), ("B", "b")])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,kwargs,expected", [
+    ("""\
+date,time,a,b
+2001-01-05, 10:00:00, 0.0, 10.
+2001-01-05, 00:00:00, 1., 11.
+""", dict(header=0, parse_dates={"date_time": [0, 1]}),
+     DataFrame([[datetime(2001, 1, 5, 10, 0, 0), 0.0, 10],
+                [datetime(2001, 1, 5, 0, 0, 0), 1.0, 11.0]],
+               columns=["date_time", "a", "b"])),
+    (("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
+      "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
+      "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
+      "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
+      "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
+      "KORD,19990127, 23:00:00, 22:56:00, -0.5900"),
+     dict(header=None, parse_dates={"actual": [1, 2], "nominal": [1, 3]}),
+     DataFrame([
+         [datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56),
+          "KORD", 0.81],
+         [datetime(1999, 1, 27, 20, 0), datetime(1999, 1, 27, 19, 56),
+          "KORD", 0.01],
+         [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 20, 56),
+          "KORD", -0.59],
+         [datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 21, 18),
+          "KORD", -0.99],
+         [datetime(1999, 1, 27, 22, 0), datetime(1999, 1, 27, 21, 56),
+          "KORD", -0.59],
+         [datetime(1999, 1, 27, 23, 0), datetime(1999, 1, 27, 22, 56),
+          "KORD", -0.59]], columns=["actual", "nominal", 0, 4])),
+])
+def test_parse_date_time(all_parsers, data, kwargs, expected):
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), date_parser=conv.parse_date_time,
+                             **kwargs)
+
+    # Python can sometimes be flaky about how
+    # the aggregated columns are entered, so
+    # this standardizes the order.
+    result = result[expected.columns]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_date_fields(all_parsers):
+    parser = all_parsers
+    data = ("year,month,day,a\n2001,01,10,10.\n"
+            "2001,02,1,11.")
+    result = parser.read_csv(StringIO(data), header=0,
+                             parse_dates={"ymd": [0, 1, 2]},
+                             date_parser=conv.parse_date_fields)
+
+    expected = DataFrame([[datetime(2001, 1, 10), 10.],
+                          [datetime(2001, 2, 1), 11.]], columns=["ymd", "a"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_date_all_fields(all_parsers):
+    parser = all_parsers
+    data = """\
+year,month,day,hour,minute,second,a,b
+2001,01,05,10,00,0,0.0,10.
+2001,01,5,10,0,00,1.,11.
+"""
+    result = parser.read_csv(StringIO(data), header=0,
+                             date_parser=conv.parse_all_fields,
+                             parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]})
+    expected = DataFrame([[datetime(2001, 1, 5, 10, 0, 0), 0.0, 10.0],
+                          [datetime(2001, 1, 5, 10, 0, 0), 1.0, 11.0]],
+                         columns=["ymdHMS", "a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_datetime_fractional_seconds(all_parsers):
+    parser = all_parsers
+    data = """\
+year,month,day,hour,minute,second,a,b
+2001,01,05,10,00,0.123456,0.0,10.
+2001,01,5,10,0,0.500000,1.,11.
+"""
+    result = parser.read_csv(StringIO(data), header=0,
+                             date_parser=conv.parse_all_fields,
+                             parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]})
+    expected = DataFrame([[datetime(2001, 1, 5, 10, 0, 0,
+                                    microsecond=123456), 0.0, 10.0],
+                          [datetime(2001, 1, 5, 10, 0, 0,
+                                    microsecond=500000), 1.0, 11.0]],
+                         columns=["ymdHMS", "a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_generic(all_parsers):
+    parser = all_parsers
+    data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
+
+    result = parser.read_csv(StringIO(data), header=0,
+                             parse_dates={"ym": [0, 1]},
+                             date_parser=lambda y, m: date(year=int(y),
+                                                           month=int(m),
+                                                           day=1))
+    expected = DataFrame([[date(2001, 1, 1), 10, 10.],
+                          [date(2001, 2, 1), 1, 11.]],
+                         columns=["ym", "day", "a"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_date_parser_resolution_if_not_ns(all_parsers):
+    # see gh-10245
+    parser = all_parsers
+    data = """\
+date,time,prn,rxstatus
+2013-11-03,19:00:00,126,00E80000
+2013-11-03,19:00:00,23,00E80000
+2013-11-03,19:00:00,13,00E80000
+"""
+
+    def date_parser(dt, time):
+        return np_array_datetime64_compat(dt + "T" + time + "Z",
+                                          dtype="datetime64[s]")
+
+    result = parser.read_csv(StringIO(data), date_parser=date_parser,
+                             parse_dates={"datetime": ["date", "time"]},
+                             index_col=["datetime", "prn"])
+
+    datetimes = np_array_datetime64_compat(["2013-11-03T19:00:00Z"] * 3,
+                                           dtype="datetime64[s]")
+    expected = DataFrame(data={"rxstatus": ["00E80000"] * 3},
+                         index=MultiIndex.from_tuples(
+                             [(datetimes[0], 126), (datetimes[1], 23),
+                              (datetimes[2], 13)], names=["datetime", "prn"]))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_date_column_with_empty_string(all_parsers):
+    # see gh-6428
+    parser = all_parsers
+    data = "case,opdate\n7,10/18/2006\n7,10/18/2008\n621, "
+    result = parser.read_csv(StringIO(data), parse_dates=["opdate"])
+
+    expected_data = [[7, "10/18/2006"],
+                     [7, "10/18/2008"],
+                     [621, " "]]
+    expected = DataFrame(expected_data, columns=["case", "opdate"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,expected", [
+    ("a\n135217135789158401\n1352171357E+5",
+     DataFrame({"a": [135217135789158401,
+                      135217135700000]}, dtype="float64")),
+    ("a\n99999999999\n123456789012345\n1234E+0",
+     DataFrame({"a": [99999999999,
+                      123456789012345,
+                      1234]}, dtype="float64"))
+])
+@pytest.mark.parametrize("parse_dates", [True, False])
+def test_parse_date_float(all_parsers, data, expected, parse_dates):
+    # see gh-2697
+    #
+    # Date parsing should fail, so we leave the data untouched
+    # (i.e. float precision should remain unchanged).
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_timezone(all_parsers):
+    # see gh-22256
+    parser = all_parsers
+    data = """dt,val
+              2018-01-04 09:01:00+09:00,23350
+              2018-01-04 09:02:00+09:00,23400
+              2018-01-04 09:03:00+09:00,23400
+              2018-01-04 09:04:00+09:00,23400
+              2018-01-04 09:05:00+09:00,23400"""
+    result = parser.read_csv(StringIO(data), parse_dates=["dt"])
+
+    dti = pd.date_range(start="2018-01-04 09:01:00",
+                        end="2018-01-04 09:05:00", freq="1min",
+                        tz=pytz.FixedOffset(540))
+    expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]}
+
+    expected = DataFrame(expected_data)
+    tm.assert_frame_equal(result, expected)
@@ -0,0 +1,301 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that apply specifically to the Python parser. Unless specifically
+stated as a Python-specific issue, the goal is to eventually move as many of
+these tests out of this module as soon as the C parser can accept further
+arguments when parsing.
+"""
+
+import csv
+
+import pytest
+
+import pandas.compat as compat
+from pandas.compat import BytesIO, StringIO, u
+from pandas.errors import ParserError
+
+from pandas import DataFrame, Index, MultiIndex
+import pandas.util.testing as tm
+
+
+def test_default_separator(python_parser_only):
+    # see gh-17333
+    #
+    # csv.Sniffer in Python treats "o" as separator.
+    data = "aob\n1o2\n3o4"
+    parser = python_parser_only
+    expected = DataFrame({"a": [1, 3], "b": [2, 4]})
+
+    result = parser.read_csv(StringIO(data), sep=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("skipfooter", ["foo", 1.5, True])
+def test_invalid_skipfooter_non_int(python_parser_only, skipfooter):
+    # see gh-15925 (comment)
+    data = "a\n1\n2"
+    parser = python_parser_only
+    msg = "skipfooter must be an integer"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=skipfooter)
+
+
+def test_invalid_skipfooter_negative(python_parser_only):
+    # see gh-15925 (comment)
+    data = "a\n1\n2"
+    parser = python_parser_only
+    msg = "skipfooter cannot be negative"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=-1)
+
+
+@pytest.mark.parametrize("kwargs", [
+    dict(sep=None),
+    dict(delimiter="|")
+])
+def test_sniff_delimiter(python_parser_only, kwargs):
+    data = """index|A|B|C
+foo|1|2|3
+bar|4|5|6
+baz|7|8|9
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                         columns=["A", "B", "C"],
+                         index=Index(["foo", "bar", "baz"], name="index"))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_sniff_delimiter_encoding(python_parser_only, encoding):
+    parser = python_parser_only
+    data = """ignore this
+ignore this too
+index|A|B|C
+foo|1|2|3
+bar|4|5|6
+baz|7|8|9
+"""
+
+    if encoding is not None:
+        data = u(data).encode(encoding)
+        data = BytesIO(data)
+
+        if compat.PY3:
+            from io import TextIOWrapper
+            data = TextIOWrapper(data, encoding=encoding)
+    else:
+        data = StringIO(data)
+
+    result = parser.read_csv(data, index_col=0, sep=None,
+                             skiprows=2, encoding=encoding)
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                         columns=["A", "B", "C"],
+                         index=Index(["foo", "bar", "baz"], name="index"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_single_line(python_parser_only):
+    # see gh-6607: sniff separator
+    parser = python_parser_only
+    result = parser.read_csv(StringIO("1,2"), names=["a", "b"],
+                             header=None, sep=None)
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [dict(skipfooter=2), dict(nrows=3)])
+def test_skipfooter(python_parser_only, kwargs):
+    # see gh-6607
+    data = """A,B,C
+1,2,3
+4,5,6
+7,8,9
+want to skip this
+also also skip this
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), **kwargs)
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                         columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("compression,klass", [
+    ("gzip", "GzipFile"),
+    ("bz2", "BZ2File"),
+])
+def test_decompression_regex_sep(python_parser_only, csv1, compression, klass):
+    # see gh-6607
+    parser = python_parser_only
+
+    with open(csv1, "rb") as f:
+        data = f.read()
+
+    data = data.replace(b",", b"::")
+    expected = parser.read_csv(csv1)
+
+    module = pytest.importorskip(compression)
+    klass = getattr(module, klass)
+
+    with tm.ensure_clean() as path:
+        tmp = klass(path, mode="wb")
+        tmp.write(data)
+        tmp.close()
+
+        result = parser.read_csv(path, sep="::",
+                                 compression=compression)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_buglet_4x_multi_index(python_parser_only):
+    # see gh-6607
+    data = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+    parser = python_parser_only
+
+    expected = DataFrame([[-0.5109, -2.3358, -0.4645, 0.05076, 0.3640],
+                          [0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
+                          [-0.6662, -0.5243, -0.3580, 0.89145, 2.5838]],
+                         columns=["A", "B", "C", "D", "E"],
+                         index=MultiIndex.from_tuples([
+                             ("a", "b", 10.0032, 5),
+                             ("a", "q", 20, 4),
+                             ("x", "q", 30, 3),
+                         ], names=["one", "two", "three", "four"]))
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_buglet_4x_multi_index2(python_parser_only):
+    # see gh-6893
+    data = "      A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9"
+    parser = python_parser_only
+
+    expected = DataFrame.from_records(
+        [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)],
+        columns=list("abcABC"), index=list("abc"))
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("add_footer", [True, False])
+def test_skipfooter_with_decimal(python_parser_only, add_footer):
+    # see gh-6971
+    data = "1#2\n3#4"
+    parser = python_parser_only
+    expected = DataFrame({"a": [1.2, 3.4]})
+
+    if add_footer:
+        # The stray footer line should not mess with the
+        # casting of the first two lines if we skip it.
+        kwargs = dict(skipfooter=1)
+        data += "\nFooter"
+    else:
+        kwargs = dict()
+
+    result = parser.read_csv(StringIO(data), names=["a"],
+                             decimal="#", **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("sep", ["::", "#####", "!!!", "123", "#1!c5",
+                                 "%!c!d", "@@#4:2", "_!pd#_"])
+@pytest.mark.parametrize("encoding", ["utf-16", "utf-16-be", "utf-16-le",
+                                      "utf-32", "cp037"])
+def test_encoding_non_utf8_multichar_sep(python_parser_only, sep, encoding):
+    # see gh-3404
+    expected = DataFrame({"a": [1], "b": [2]})
+    parser = python_parser_only
+
+    data = "1" + sep + "2"
+    encoded_data = data.encode(encoding)
+
+    result = parser.read_csv(BytesIO(encoded_data), sep=sep,
+                             names=["a", "b"], encoding=encoding)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE])
+def test_multi_char_sep_quotes(python_parser_only, quoting):
+    # see gh-13374
+    kwargs = dict(sep=",,")
+    parser = python_parser_only
+
+    data = 'a,,b\n1,,a\n2,,"2,,b"'
+    msg = "ignored when a multi-char delimiter is used"
+
+    def fail_read():
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
+
+    if quoting == csv.QUOTE_NONE:
+        # We expect no match, so there should be an assertion
+        # error out of the inner context manager.
+        with pytest.raises(AssertionError):
+            fail_read()
+    else:
+        fail_read()
+
+
+def test_none_delimiter(python_parser_only, capsys):
+    # see gh-13374 and gh-17465
+    parser = python_parser_only
+    data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9"
+    expected = DataFrame({"a": [0, 7], "b": [1, 8], "c": [2, 9]})
+
+    # We expect the third line in the data to be
+    # skipped because it is malformed, but we do
+    # not expect any errors to occur.
+    result = parser.read_csv(StringIO(data), header=0,
+                             sep=None, warn_bad_lines=True,
+                             error_bad_lines=False)
+    tm.assert_frame_equal(result, expected)
+
+    captured = capsys.readouterr()
+    assert "Skipping line 3" in captured.err
+
+
+@pytest.mark.parametrize("data", [
+    'a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'])
+@pytest.mark.parametrize("skipfooter", [0, 1])
+def test_skipfooter_bad_row(python_parser_only, data, skipfooter):
+    # see gh-13879 and gh-15910
+    msg = "parsing errors in the skipped footer rows"
+    parser = python_parser_only
+
+    def fail_read():
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), skipfooter=skipfooter)
+
+    if skipfooter:
+        fail_read()
+    else:
+        # We expect no match, so there should be an assertion
+        # error out of the inner context manager.
+        with pytest.raises(AssertionError):
+            fail_read()
+
+
+def test_malformed_skipfooter(python_parser_only):
+    parser = python_parser_only
+    data = """ignore
+A,B,C
+1,2,3 # comment
+1,2,3,4,5
+2,3,4
+footer
+"""
+    msg = "Expected 3 fields in line 4, saw 5"
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), header=1,
+                        comment="#", skipfooter=1)
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that quoting specifications are properly handled
+during parsing for all of the parsers defined in parsers.py
+"""
+
+import csv
+
+import pytest
+
+from pandas.compat import PY2, StringIO, u
+from pandas.errors import ParserError
+
+from pandas import DataFrame
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize("kwargs,msg", [
+    (dict(quotechar="foo"), '"quotechar" must be a(n)? 1-character string'),
+    (dict(quotechar=None, quoting=csv.QUOTE_MINIMAL),
+     "quotechar must be set if quoting enabled"),
+    (dict(quotechar=2), '"quotechar" must be string, not int')
+])
+def test_bad_quote_char(all_parsers, kwargs, msg):
+    data = "1,2,3"
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), **kwargs)
+
+
+@pytest.mark.parametrize("quoting,msg", [
+    ("foo", '"quoting" must be an integer'),
+    (5, 'bad "quoting" value'),  # quoting must be in the range [0, 3]
+])
+def test_bad_quoting(all_parsers, quoting, msg):
+    data = "1,2,3"
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), quoting=quoting)
+
+
+def test_quote_char_basic(all_parsers):
+    parser = all_parsers
+    data = 'a,b,c\n1,2,"cat"'
+    expected = DataFrame([[1, 2, "cat"]],
+                         columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), quotechar='"')
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quote_char", ["~", "*", "%", "$", "@", "P"])
+def test_quote_char_various(all_parsers, quote_char):
+    parser = all_parsers
+    expected = DataFrame([[1, 2, "cat"]],
+                         columns=["a", "b", "c"])
+
+    data = 'a,b,c\n1,2,"cat"'
+    new_data = data.replace('"', quote_char)
+
+    result = parser.read_csv(StringIO(new_data), quotechar=quote_char)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE])
+@pytest.mark.parametrize("quote_char", ["", None])
+def test_null_quote_char(all_parsers, quoting, quote_char):
+    kwargs = dict(quotechar=quote_char, quoting=quoting)
+    data = "a,b,c\n1,2,3"
+    parser = all_parsers
+
+    if quoting != csv.QUOTE_NONE:
+        # Sanity checking.
+        msg = "quotechar must be set if quoting enabled"
+
+        with pytest.raises(TypeError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs,exp_data", [
+    (dict(), [[1, 2, "foo"]]),  # Test default.
+
+    # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading.
+    (dict(quotechar='"', quoting=csv.QUOTE_MINIMAL), [[1, 2, "foo"]]),
+
+    # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading.
+    (dict(quotechar='"', quoting=csv.QUOTE_ALL), [[1, 2, "foo"]]),
+
+    # QUOTE_NONE tells the reader to do no special handling
+    # of quote characters and leave them alone.
+    (dict(quotechar='"', quoting=csv.QUOTE_NONE), [[1, 2, '"foo"']]),
+
+    # QUOTE_NONNUMERIC tells the reader to cast
+    # all non-quoted fields to float
+    (dict(quotechar='"', quoting=csv.QUOTE_NONNUMERIC), [[1.0, 2.0, "foo"]])
+])
+def test_quoting_various(all_parsers, kwargs, exp_data):
+    data = '1,2,"foo"'
+    parser = all_parsers
+    columns = ["a", "b", "c"]
+
+    result = parser.read_csv(StringIO(data), names=columns, **kwargs)
+    expected = DataFrame(exp_data, columns=columns)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("doublequote,exp_data", [
+    (True, [[3, '4 " 5']]),
+    (False, [[3, '4 " 5"']]),
+])
+def test_double_quote(all_parsers, doublequote, exp_data):
+    parser = all_parsers
+    data = 'a,b\n3,"4 "" 5"'
+
+    result = parser.read_csv(StringIO(data), quotechar='"',
+                             doublequote=doublequote)
+    expected = DataFrame(exp_data, columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quotechar", [
+    u('"'),
+    pytest.param(u('\u0001'), marks=pytest.mark.skipif(
+        PY2, reason="Python 2.x does not handle unicode well."))])
+def test_quotechar_unicode(all_parsers, quotechar):
+    # see gh-14477
+    data = "a\n1"
+    parser = all_parsers
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(StringIO(data), quotechar=quotechar)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("balanced", [True, False])
+def test_unbalanced_quoting(all_parsers, balanced):
+    # see gh-22789.
+    parser = all_parsers
+    data = "a,b,c\n1,2,\"3"
+
+    if balanced:
+        # Re-balance the quoting and read in without errors.
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+        result = parser.read_csv(StringIO(data + '"'))
+        tm.assert_frame_equal(result, expected)
+    else:
+        msg = ("EOF inside string starting at row 1" if parser.engine == "c"
+               else "unexpected end of data")
+
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data))
@@ -0,0 +1,580 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests the 'read_fwf' function in parsers.py. This
+test suite is independent of the others because the
+engine is set to 'python-fwf' internally.
+"""
+
+from datetime import datetime
+
+import numpy as np
+import pytest
+
+import pandas.compat as compat
+from pandas.compat import BytesIO, StringIO
+
+import pandas as pd
+from pandas import DataFrame, DatetimeIndex
+import pandas.util.testing as tm
+
+from pandas.io.parsers import EmptyDataError, read_csv, read_fwf
+
+
+def test_basic():
+    data = """\
+A         B            C            D
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    result = read_fwf(StringIO(data))
+    expected = DataFrame([[201158, 360.242940, 149.910199, 11950.7],
+                          [201159, 444.953632, 166.985655, 11788.4],
+                          [201160, 364.136849, 183.628767, 11806.2],
+                          [201161, 413.836124, 184.375703, 11916.8],
+                          [201162, 502.953953, 173.237159, 12468.3]],
+                         columns=["A", "B", "C", "D"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_colspecs():
+    data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+    result = read_fwf(StringIO(data), colspecs=colspecs)
+
+    expected = DataFrame([[2011, 58, 360.242940, 149.910199, 11950.7],
+                          [2011, 59, 444.953632, 166.985655, 11788.4],
+                          [2011, 60, 364.136849, 183.628767, 11806.2],
+                          [2011, 61, 413.836124, 184.375703, 11916.8],
+                          [2011, 62, 502.953953, 173.237159, 12468.3]],
+                         columns=["A", "B", "C", "D", "E"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_widths():
+    data = """\
+A    B    C            D            E
+2011 58   360.242940   149.910199   11950.7
+2011 59   444.953632   166.985655   11788.4
+2011 60   364.136849   183.628767   11806.2
+2011 61   413.836124   184.375703   11916.8
+2011 62   502.953953   173.237159   12468.3
+"""
+    result = read_fwf(StringIO(data), widths=[5, 5, 13, 13, 7])
+
+    expected = DataFrame([[2011, 58, 360.242940, 149.910199, 11950.7],
+                          [2011, 59, 444.953632, 166.985655, 11788.4],
+                          [2011, 60, 364.136849, 183.628767, 11806.2],
+                          [2011, 61, 413.836124, 184.375703, 11916.8],
+                          [2011, 62, 502.953953, 173.237159, 12468.3]],
+                         columns=["A", "B", "C", "D", "E"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_non_space_filler():
+    # From Thomas Kluyver:
+    #
+    # Apparently, some non-space filler characters can be seen, this is
+    # supported by specifying the 'delimiter' character:
+    #
+    # http://publib.boulder.ibm.com/infocenter/dmndhelp/v6r1mx/index.jsp?topic=/com.ibm.wbit.612.help.config.doc/topics/rfixwidth.html
+    data = """\
+A~~~~B~~~~C~~~~~~~~~~~~D~~~~~~~~~~~~E
+201158~~~~360.242940~~~149.910199~~~11950.7
+201159~~~~444.953632~~~166.985655~~~11788.4
+201160~~~~364.136849~~~183.628767~~~11806.2
+201161~~~~413.836124~~~184.375703~~~11916.8
+201162~~~~502.953953~~~173.237159~~~12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+    result = read_fwf(StringIO(data), colspecs=colspecs, delimiter="~")
+
+    expected = DataFrame([[2011, 58, 360.242940, 149.910199, 11950.7],
+                          [2011, 59, 444.953632, 166.985655, 11788.4],
+                          [2011, 60, 364.136849, 183.628767, 11806.2],
+                          [2011, 61, 413.836124, 184.375703, 11916.8],
+                          [2011, 62, 502.953953, 173.237159, 12468.3]],
+                         columns=["A", "B", "C", "D", "E"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_over_specified():
+    data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+
+    with pytest.raises(ValueError, match="must specify only one of"):
+        read_fwf(StringIO(data), colspecs=colspecs, widths=[6, 10, 10, 7])
+
+
+def test_under_specified():
+    data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    with pytest.raises(ValueError, match="Must specify either"):
+        read_fwf(StringIO(data), colspecs=None, widths=None)
+
+
+def test_read_csv_compat():
+    csv_data = """\
+A,B,C,D,E
+2011,58,360.242940,149.910199,11950.7
+2011,59,444.953632,166.985655,11788.4
+2011,60,364.136849,183.628767,11806.2
+2011,61,413.836124,184.375703,11916.8
+2011,62,502.953953,173.237159,12468.3
+"""
+    expected = read_csv(StringIO(csv_data), engine="python")
+
+    fwf_data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+    result = read_fwf(StringIO(fwf_data), colspecs=colspecs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_bytes_io_input():
+    if not compat.PY3:
+        pytest.skip("Bytes-related test - only needs to work on Python 3")
+
+    result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')),
+                      widths=[2, 2], encoding="utf8")
+    expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fwf_colspecs_is_list_or_tuple():
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+
+    msg = "column specifications must be a list or tuple.+"
+
+    with pytest.raises(TypeError, match=msg):
+        read_fwf(StringIO(data), colspecs={"a": 1}, delimiter=",")
+
+
+def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples():
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+
+    msg = "Each column specification must be.+"
+
+    with pytest.raises(TypeError, match=msg):
+        read_fwf(StringIO(data), [("a", 1)])
+
+
+@pytest.mark.parametrize("colspecs,exp_data", [
+    ([(0, 3), (3, None)], [[123, 456], [456, 789]]),
+    ([(None, 3), (3, 6)], [[123, 456], [456, 789]]),
+    ([(0, None), (3, None)], [[123456, 456], [456789, 789]]),
+    ([(None, None), (3, 6)], [[123456, 456], [456789, 789]]),
+])
+def test_fwf_colspecs_none(colspecs, exp_data):
+    # see gh-7079
+    data = """\
+123456
+456789
+"""
+    expected = DataFrame(exp_data)
+
+    result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("infer_nrows,exp_data", [
+    # infer_nrows --> colspec == [(2, 3), (5, 6)]
+    (1, [[1, 2], [3, 8]]),
+
+    # infer_nrows > number of rows
+    (10, [[1, 2], [123, 98]]),
+])
+def test_fwf_colspecs_infer_nrows(infer_nrows, exp_data):
+    # see gh-15138
+    data = """\
+  1  2
+123 98
+"""
+    expected = DataFrame(exp_data)
+
+    result = read_fwf(StringIO(data), infer_nrows=infer_nrows, header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fwf_regression():
+    # see gh-3594
+    #
+    # Turns out "T060" is parsable as a datetime slice!
+    tz_list = [1, 10, 20, 30, 60, 80, 100]
+    widths = [16] + [8] * len(tz_list)
+    names = ["SST"] + ["T%03d" % z for z in tz_list[1:]]
+
+    data = """  2009164202000   9.5403  9.4105  8.6571  7.8372  6.0612  5.8843  5.5192
+2009164203000   9.5435  9.2010  8.6167  7.8176  6.0804  5.8728  5.4869
+2009164204000   9.5873  9.1326  8.4694  7.5889  6.0422  5.8526  5.4657
+2009164205000   9.5810  9.0896  8.4009  7.4652  6.0322  5.8189  5.4379
+2009164210000   9.6034  9.0897  8.3822  7.4905  6.0908  5.7904  5.4039
+"""
+
+    result = read_fwf(StringIO(data), index_col=0, header=None, names=names,
+                      widths=widths, parse_dates=True,
+                      date_parser=lambda s: datetime.strptime(s, "%Y%j%H%M%S"))
+    expected = DataFrame([
+        [9.5403, 9.4105, 8.6571, 7.8372, 6.0612, 5.8843, 5.5192],
+        [9.5435, 9.2010, 8.6167, 7.8176, 6.0804, 5.8728, 5.4869],
+        [9.5873, 9.1326, 8.4694, 7.5889, 6.0422, 5.8526, 5.4657],
+        [9.5810, 9.0896, 8.4009, 7.4652, 6.0322, 5.8189, 5.4379],
+        [9.6034, 9.0897, 8.3822, 7.4905, 6.0908, 5.7904, 5.4039],
+    ], index=DatetimeIndex(["2009-06-13 20:20:00", "2009-06-13 20:30:00",
+                            "2009-06-13 20:40:00", "2009-06-13 20:50:00",
+                            "2009-06-13 21:00:00"]),
+        columns=["SST", "T010", "T020", "T030", "T060", "T080", "T100"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fwf_for_uint8():
+    data = """1421302965.213420    PRI=3 PGN=0xef00      DST=0x17 SRC=0x28    04 154 00 00 00 00 00 127
+1421302964.226776    PRI=6 PGN=0xf002               SRC=0x47    243 00 00 255 247 00 00 71"""  # noqa
+    df = read_fwf(StringIO(data),
+                  colspecs=[(0, 17), (25, 26), (33, 37),
+                            (49, 51), (58, 62), (63, 1000)],
+                  names=["time", "pri", "pgn", "dst", "src", "data"],
+                  converters={
+                      "pgn": lambda x: int(x, 16),
+                      "src": lambda x: int(x, 16),
+                      "dst": lambda x: int(x, 16),
+                      "data": lambda x: len(x.split(" "))})
+
+    expected = DataFrame([[1421302965.213420, 3, 61184, 23, 40, 8],
+                          [1421302964.226776, 6, 61442, None, 71, 8]],
+                         columns=["time", "pri", "pgn",
+                                  "dst", "src", "data"])
+    expected["dst"] = expected["dst"].astype(object)
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("comment", ["#", "~", "!"])
+def test_fwf_comment(comment):
+    data = """\
+  1   2.   4  #hello world
+  5  NaN  10.0
+"""
+    data = data.replace("#", comment)
+
+    colspecs = [(0, 3), (4, 9), (9, 25)]
+    expected = DataFrame([[1, 2., 4], [5, np.nan, 10.]])
+
+    result = read_fwf(StringIO(data), colspecs=colspecs,
+                      header=None, comment=comment)
+    tm.assert_almost_equal(result, expected)
+
+
+@pytest.mark.parametrize("thousands", [",", "#", "~"])
+def test_fwf_thousands(thousands):
+    data = """\
+ 1 2,334.0    5
+10   13     10.
+"""
+    data = data.replace(",", thousands)
+
+    colspecs = [(0, 3), (3, 11), (12, 16)]
+    expected = DataFrame([[1, 2334., 5], [10, 13, 10.]])
+
+    result = read_fwf(StringIO(data), header=None,
+                      colspecs=colspecs, thousands=thousands)
+    tm.assert_almost_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", [True, False])
+def test_bool_header_arg(header):
+    # see gh-6114
+    data = """\
+MyColumn
+   a
+   b
+   a
+   b"""
+
+    msg = "Passing a bool to header is invalid"
+    with pytest.raises(TypeError, match=msg):
+        read_fwf(StringIO(data), header=header)
+
+
+def test_full_file():
+    # File with all values.
+    test = """index                             A    B    C
+2000-01-03T00:00:00  0.980268513777    3  foo
+2000-01-04T00:00:00  1.04791624281    -4  bar
+2000-01-05T00:00:00  0.498580885705   73  baz
+2000-01-06T00:00:00  1.12020151869     1  foo
+2000-01-07T00:00:00  0.487094399463    0  bar
+2000-01-10T00:00:00  0.836648671666    2  baz
+2000-01-11T00:00:00  0.157160753327   34  foo"""
+    colspecs = ((0, 19), (21, 35), (38, 40), (42, 45))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_full_file_with_missing():
+    # File with missing values.
+    test = """index                             A    B    C
+2000-01-03T00:00:00  0.980268513777    3  foo
+2000-01-04T00:00:00  1.04791624281    -4  bar
+                     0.498580885705   73  baz
+2000-01-06T00:00:00  1.12020151869     1  foo
+2000-01-07T00:00:00                    0  bar
+2000-01-10T00:00:00  0.836648671666    2  baz
+                                      34"""
+    colspecs = ((0, 19), (21, 35), (38, 40), (42, 45))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_full_file_with_spaces():
+    # File with spaces in columns.
+    test = """
+Account                 Name  Balance     CreditLimit   AccountCreated
+101     Keanu Reeves          9315.45     10000.00           1/17/1998
+312     Gerard Butler         90.00       1000.00             8/6/2003
+868     Jennifer Love Hewitt  0           17000.00           5/25/1985
+761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
+317     Bill Murray           789.65      5000.00             2/5/2007
+""".strip("\r\n")
+    colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_full_file_with_spaces_and_missing():
+    # File with spaces and missing values in columns.
+    test = """
+Account               Name    Balance     CreditLimit   AccountCreated
+101                           10000.00                       1/17/1998
+312     Gerard Butler         90.00       1000.00             8/6/2003
+868                                                          5/25/1985
+761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
+317     Bill Murray           789.65
+""".strip("\r\n")
+    colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_messed_up_data():
+    # Completely messed up file.
+    test = """
+   Account          Name             Balance     Credit Limit   Account Created
+       101                           10000.00                       1/17/1998
+       312     Gerard Butler         90.00       1000.00
+
+       761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
+  317          Bill Murray           789.65
+""".strip("\r\n")
+    colspecs = ((2, 10), (15, 33), (37, 45), (49, 61), (64, 79))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multiple_delimiters():
+    test = r"""
+col1~~~~~col2  col3++++++++++++++++++col4
+~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves
+  33+++122.33\\\bar.........Gerard Butler
++44~~~~12.01   baz~~Jennifer Love Hewitt
+~~55       11+++foo++++Jada Pinkett-Smith
+..66++++++.03~~~bar           Bill Murray
+""".strip("\r\n")
+    delimiter = " +~.\\"
+    colspecs = ((0, 4), (7, 13), (15, 19), (21, 41))
+    expected = read_fwf(StringIO(test), colspecs=colspecs, delimiter=delimiter)
+
+    result = read_fwf(StringIO(test), delimiter=delimiter)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_variable_width_unicode():
+    if not compat.PY3:
+        pytest.skip("Bytes-related test - only needs to work on Python 3")
+
+    data = """
+שלום שלום
+ום   שלל
+של   ום
+""".strip("\r\n")
+    encoding = "utf8"
+    kwargs = dict(header=None, encoding=encoding)
+
+    expected = read_fwf(BytesIO(data.encode(encoding)),
+                        colspecs=[(0, 4), (5, 9)], **kwargs)
+    result = read_fwf(BytesIO(data.encode(encoding)), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", [
+    dict(), {"a": "float64", "b": str, "c": "int32"}
+])
+def test_dtype(dtype):
+    data = """ a    b    c
+1    2    3.2
+3    4    5.2
+"""
+    colspecs = [(0, 5), (5, 10), (10, None)]
+    result = read_fwf(StringIO(data), colspecs=colspecs, dtype=dtype)
+
+    expected = pd.DataFrame({
+        "a": [1, 3], "b": [2, 4],
+        "c": [3.2, 5.2]}, columns=["a", "b", "c"])
+
+    for col, dt in dtype.items():
+        expected[col] = expected[col].astype(dt)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_inference():
+    # see gh-11256
+    data = """
+Text contained in the file header
+
+DataCol1   DataCol2
+     0.0        1.0
+   101.6      956.1
+""".strip()
+    skiprows = 2
+    expected = read_csv(StringIO(data), skiprows=skiprows,
+                        delim_whitespace=True)
+
+    result = read_fwf(StringIO(data), skiprows=skiprows)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_by_index_inference():
+    data = """
+To be skipped
+Not  To  Be  Skipped
+Once more to be skipped
+123  34   8      123
+456  78   9      456
+""".strip()
+    skiprows = [0, 2]
+    expected = read_csv(StringIO(data), skiprows=skiprows,
+                        delim_whitespace=True)
+
+    result = read_fwf(StringIO(data), skiprows=skiprows)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_inference_empty():
+    data = """
+AA   BBB  C
+12   345  6
+78   901  2
+""".strip()
+
+    msg = "No rows from which to infer column width"
+    with pytest.raises(EmptyDataError, match=msg):
+        read_fwf(StringIO(data), skiprows=3)
+
+
+def test_whitespace_preservation():
+    # see gh-16772
+    header = None
+    csv_data = """
+ a ,bbb
+ cc,dd """
+
+    fwf_data = """
+ a bbb
+ ccdd """
+    result = read_fwf(StringIO(fwf_data), widths=[3, 3],
+                      header=header, skiprows=[0], delimiter="\n\t")
+    expected = read_csv(StringIO(csv_data), header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_default_delimiter():
+    header = None
+    csv_data = """
+a,bbb
+cc,dd"""
+
+    fwf_data = """
+a \tbbb
+cc\tdd """
+    result = read_fwf(StringIO(fwf_data), widths=[3, 3],
+                      header=header, skiprows=[0])
+    expected = read_csv(StringIO(csv_data), header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("infer", [True, False, None])
+def test_fwf_compression(compression_only, infer):
+    data = """1111111111
+    2222222222
+    3333333333""".strip()
+
+    compression = compression_only
+    extension = "gz" if compression == "gzip" else compression
+
+    kwargs = dict(widths=[5, 5], names=["one", "two"])
+    expected = read_fwf(StringIO(data), **kwargs)
+
+    if compat.PY3:
+        data = bytes(data, encoding="utf-8")
+
+    with tm.ensure_clean(filename="tmp." + extension) as path:
+        tm.write_to_compressed(compression, path, data)
+
+        if infer is not None:
+            kwargs["compression"] = "infer" if infer else compression
+
+        result = read_fwf(path, **kwargs)
+        tm.assert_frame_equal(result, expected)
@@ -0,0 +1,222 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that skipped rows are properly handled during
+parsing for all of the parsers defined in parsers.py
+"""
+
+from datetime import datetime
+
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO, lrange, range
+from pandas.errors import EmptyDataError
+
+from pandas import DataFrame, Index
+import pandas.util.testing as tm
+
+
+@pytest.mark.parametrize("skiprows", [lrange(6), 6])
+def test_skip_rows_bug(all_parsers, skiprows):
+    # see gh-505
+    parser = all_parsers
+    text = """#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+1/1/2000,1.,2.,3.
+1/2/2000,4,5,6
+1/3/2000,7,8,9
+"""
+    result = parser.read_csv(StringIO(text), skiprows=skiprows, header=None,
+                             index_col=0, parse_dates=True)
+    index = Index([datetime(2000, 1, 1), datetime(2000, 1, 2),
+                   datetime(2000, 1, 3)], name=0)
+
+    expected = DataFrame(np.arange(1., 10.).reshape((3, 3)),
+                         columns=[1, 2, 3], index=index)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_deep_skip_rows(all_parsers):
+    # see gh-4382
+    parser = all_parsers
+    data = "a,b,c\n" + "\n".join([",".join([str(i), str(i + 1), str(i + 2)])
+                                  for i in range(10)])
+    condensed_data = "a,b,c\n" + "\n".join([
+        ",".join([str(i), str(i + 1), str(i + 2)])
+        for i in [0, 1, 2, 3, 4, 6, 8, 9]])
+
+    result = parser.read_csv(StringIO(data), skiprows=[6, 8])
+    condensed_result = parser.read_csv(StringIO(condensed_data))
+    tm.assert_frame_equal(result, condensed_result)
+
+
+def test_skip_rows_blank(all_parsers):
+    # see gh-9832
+    parser = all_parsers
+    text = """#foo,a,b,c
+#foo,a,b,c
+
+#foo,a,b,c
+#foo,a,b,c
+
+1/1/2000,1.,2.,3.
+1/2/2000,4,5,6
+1/3/2000,7,8,9
+"""
+    data = parser.read_csv(StringIO(text), skiprows=6, header=None,
+                           index_col=0, parse_dates=True)
+    index = Index([datetime(2000, 1, 1), datetime(2000, 1, 2),
+                   datetime(2000, 1, 3)], name=0)
+
+    expected = DataFrame(np.arange(1., 10.).reshape((3, 3)),
+                         columns=[1, 2, 3],
+                         index=index)
+    tm.assert_frame_equal(data, expected)
+
+
+@pytest.mark.parametrize("data,kwargs,expected", [
+    ("""id,text,num_lines
+1,"line 11
+line 12",2
+2,"line 21
+line 22",2
+3,"line 31",1""",
+     dict(skiprows=[1]),
+     DataFrame([[2, "line 21\nline 22", 2],
+                [3, "line 31", 1]], columns=["id", "text", "num_lines"])),
+    ("a,b,c\n~a\n b~,~e\n d~,~f\n f~\n1,2,~12\n 13\n 14~",
+     dict(quotechar="~", skiprows=[2]),
+     DataFrame([["a\n b", "e\n d", "f\n f"]], columns=["a", "b", "c"])),
+    (("Text,url\n~example\n "
+      "sentence\n one~,url1\n~"
+      "example\n sentence\n two~,url2\n~"
+      "example\n sentence\n three~,url3"),
+     dict(quotechar="~", skiprows=[1, 3]),
+     DataFrame([['example\n sentence\n two', 'url2']],
+               columns=["Text", "url"]))
+])
+def test_skip_row_with_newline(all_parsers, data, kwargs, expected):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skip_row_with_quote(all_parsers):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    data = """id,text,num_lines
+1,"line '11' line 12",2
+2,"line '21' line 22",2
+3,"line '31' line 32",1"""
+
+    exp_data = [[2, "line '21' line 22", 2],
+                [3, "line '31' line 32", 1]]
+    expected = DataFrame(exp_data, columns=[
+        "id", "text", "num_lines"])
+
+    result = parser.read_csv(StringIO(data), skiprows=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,exp_data", [
+    ("""id,text,num_lines
+1,"line \n'11' line 12",2
+2,"line \n'21' line 22",2
+3,"line \n'31' line 32",1""",
+     [[2, "line \n'21' line 22", 2],
+      [3, "line \n'31' line 32", 1]]),
+    ("""id,text,num_lines
+1,"line '11\n' line 12",2
+2,"line '21\n' line 22",2
+3,"line '31\n' line 32",1""",
+     [[2, "line '21\n' line 22", 2],
+      [3, "line '31\n' line 32", 1]]),
+    ("""id,text,num_lines
+1,"line '11\n' \r\tline 12",2
+2,"line '21\n' \r\tline 22",2
+3,"line '31\n' \r\tline 32",1""",
+     [[2, "line '21\n' \r\tline 22", 2],
+      [3, "line '31\n' \r\tline 32", 1]]),
+])
+def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), skiprows=[1])
+
+    expected = DataFrame(exp_data, columns=["id", "text", "num_lines"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("line_terminator", [
+    "\n",    # "LF"
+    "\r\n",  # "CRLF"
+    "\r"     # "CR"
+])
+def test_skiprows_lineterminator(all_parsers, line_terminator):
+    # see gh-9079
+    parser = all_parsers
+    data = "\n".join(["SMOSMANIA ThetaProbe-ML2X ",
+                      "2007/01/01 01:00   0.2140 U M ",
+                      "2007/01/01 02:00   0.2141 M O ",
+                      "2007/01/01 04:00   0.2142 D M "])
+    expected = DataFrame([["2007/01/01", "01:00", 0.2140, "U", "M"],
+                          ["2007/01/01", "02:00", 0.2141, "M", "O"],
+                          ["2007/01/01", "04:00", 0.2142, "D", "M"]],
+                         columns=["date", "time", "var", "flag",
+                                  "oflag"])
+
+    if parser.engine == "python" and line_terminator == "\r":
+        pytest.skip("'CR' not respect with the Python parser yet")
+
+    data = data.replace("\n", line_terminator)
+    result = parser.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
+                             names=["date", "time", "var", "flag", "oflag"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_infield_quote(all_parsers):
+    # see gh-14459
+    parser = all_parsers
+    data = "a\"\nb\"\na\n1"
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(StringIO(data), skiprows=2)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs,expected", [
+    (dict(), DataFrame({"1": [3, 5]})),
+    (dict(header=0, names=["foo"]), DataFrame({"foo": [3, 5]}))
+])
+def test_skip_rows_callable(all_parsers, kwargs, expected):
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+
+    result = parser.read_csv(StringIO(data),
+                             skiprows=lambda x: x % 2 == 0,
+                             **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skip_rows_skip_all(all_parsers):
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+    msg = "No columns to parse from file"
+
+    with pytest.raises(EmptyDataError, match=msg):
+        parser.read_csv(StringIO(data), skiprows=lambda x: True)
+
+
+def test_skip_rows_bad_callable(all_parsers):
+    msg = "by zero"
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+
+    with pytest.raises(ZeroDivisionError, match=msg):
+        parser.read_csv(StringIO(data), skiprows=lambda x: 1 / 0)
@@ -0,0 +1,353 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests the TextReader class in parsers.pyx, which
+is integral to the C engine in parsers.py
+"""
+
+import os
+
+import numpy as np
+from numpy import nan
+import pytest
+
+import pandas._libs.parsers as parser
+from pandas._libs.parsers import TextReader
+import pandas.compat as compat
+from pandas.compat import BytesIO, StringIO, map
+
+from pandas import DataFrame
+import pandas.util.testing as tm
+from pandas.util.testing import assert_frame_equal
+
+from pandas.io.parsers import TextFileReader, read_csv
+
+
+class TestTextReader(object):
+
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
+        self.csv1 = os.path.join(self.dirpath, 'test1.csv')
+        self.csv2 = os.path.join(self.dirpath, 'test2.csv')
+        self.xls1 = os.path.join(self.dirpath, 'test.xls')
+
+    def test_file_handle(self):
+        with open(self.csv1, 'rb') as f:
+            reader = TextReader(f)
+            reader.read()
+
+    def test_string_filename(self):
+        reader = TextReader(self.csv1, header=None)
+        reader.read()
+
+    def test_file_handle_mmap(self):
+        with open(self.csv1, 'rb') as f:
+            reader = TextReader(f, memory_map=True, header=None)
+            reader.read()
+
+    def test_StringIO(self):
+        with open(self.csv1, 'rb') as f:
+            text = f.read()
+        src = BytesIO(text)
+        reader = TextReader(src, header=None)
+        reader.read()
+
+    def test_string_factorize(self):
+        # should this be optional?
+        data = 'a\nb\na\nb\na'
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+        assert len(set(map(id, result[0]))) == 2
+
+    def test_skipinitialspace(self):
+        data = ('a,   b\n'
+                'a,   b\n'
+                'a,   b\n'
+                'a,   b')
+
+        reader = TextReader(StringIO(data), skipinitialspace=True,
+                            header=None)
+        result = reader.read()
+
+        tm.assert_numpy_array_equal(result[0], np.array(['a', 'a', 'a', 'a'],
+                                                        dtype=np.object_))
+        tm.assert_numpy_array_equal(result[1], np.array(['b', 'b', 'b', 'b'],
+                                                        dtype=np.object_))
+
+    def test_parse_booleans(self):
+        data = 'True\nFalse\nTrue\nTrue'
+
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+
+        assert result[0].dtype == np.bool_
+
+    def test_delimit_whitespace(self):
+        data = 'a  b\na\t\t "b"\n"a"\t \t b'
+
+        reader = TextReader(StringIO(data), delim_whitespace=True,
+                            header=None)
+        result = reader.read()
+
+        tm.assert_numpy_array_equal(result[0], np.array(['a', 'a', 'a'],
+                                                        dtype=np.object_))
+        tm.assert_numpy_array_equal(result[1], np.array(['b', 'b', 'b'],
+                                                        dtype=np.object_))
+
+    def test_embedded_newline(self):
+        data = 'a\n"hello\nthere"\nthis'
+
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+
+        expected = np.array(['a', 'hello\nthere', 'this'], dtype=np.object_)
+        tm.assert_numpy_array_equal(result[0], expected)
+
+    def test_euro_decimal(self):
+        data = '12345,67\n345,678'
+
+        reader = TextReader(StringIO(data), delimiter=':',
+                            decimal=',', header=None)
+        result = reader.read()
+
+        expected = np.array([12345.67, 345.678])
+        tm.assert_almost_equal(result[0], expected)
+
+    def test_integer_thousands(self):
+        data = '123,456\n12,500'
+
+        reader = TextReader(StringIO(data), delimiter=':',
+                            thousands=',', header=None)
+        result = reader.read()
+
+        expected = np.array([123456, 12500], dtype=np.int64)
+        tm.assert_almost_equal(result[0], expected)
+
+    def test_integer_thousands_alt(self):
+        data = '123.456\n12.500'
+
+        reader = TextFileReader(StringIO(data), delimiter=':',
+                                thousands='.', header=None)
+        result = reader.read()
+
+        expected = DataFrame([123456, 12500])
+        tm.assert_frame_equal(result, expected)
+
+    def test_skip_bad_lines(self, capsys):
+        # too many lines, see #2430 for why
+        data = ('a:b:c\n'
+                'd:e:f\n'
+                'g:h:i\n'
+                'j:k:l:m\n'
+                'l:m:n\n'
+                'o:p:q:r')
+
+        reader = TextReader(StringIO(data), delimiter=':',
+                            header=None)
+        msg = (r"Error tokenizing data\. C error: Expected 3 fields in"
+               " line 4, saw 4")
+        with pytest.raises(parser.ParserError, match=msg):
+            reader.read()
+
+        reader = TextReader(StringIO(data), delimiter=':',
+                            header=None,
+                            error_bad_lines=False,
+                            warn_bad_lines=False)
+        result = reader.read()
+        expected = {0: np.array(['a', 'd', 'g', 'l'], dtype=object),
+                    1: np.array(['b', 'e', 'h', 'm'], dtype=object),
+                    2: np.array(['c', 'f', 'i', 'n'], dtype=object)}
+        assert_array_dicts_equal(result, expected)
+
+        reader = TextReader(StringIO(data), delimiter=':',
+                            header=None,
+                            error_bad_lines=False,
+                            warn_bad_lines=True)
+        reader.read()
+        captured = capsys.readouterr()
+
+        assert 'Skipping line 4' in captured.err
+        assert 'Skipping line 6' in captured.err
+
+    def test_header_not_enough_lines(self):
+        data = ('skip this\n'
+                'skip this\n'
+                'a,b,c\n'
+                '1,2,3\n'
+                '4,5,6')
+
+        reader = TextReader(StringIO(data), delimiter=',', header=2)
+        header = reader.header
+        expected = [['a', 'b', 'c']]
+        assert header == expected
+
+        recs = reader.read()
+        expected = {0: np.array([1, 4], dtype=np.int64),
+                    1: np.array([2, 5], dtype=np.int64),
+                    2: np.array([3, 6], dtype=np.int64)}
+        assert_array_dicts_equal(recs, expected)
+
+    def test_escapechar(self):
+        data = ('\\"hello world\"\n'
+                '\\"hello world\"\n'
+                '\\"hello world\"')
+
+        reader = TextReader(StringIO(data), delimiter=',', header=None,
+                            escapechar='\\')
+        result = reader.read()
+        expected = {0: np.array(['"hello world"'] * 3, dtype=object)}
+        assert_array_dicts_equal(result, expected)
+
+    def test_eof_has_eol(self):
+        # handling of new line at EOF
+        pass
+
+    def test_na_substitution(self):
+        pass
+
+    def test_numpy_string_dtype(self):
+        data = """\
+a,1
+aa,2
+aaa,3
+aaaa,4
+aaaaa,5"""
+
+        def _make_reader(**kwds):
+            return TextReader(StringIO(data), delimiter=',', header=None,
+                              **kwds)
+
+        reader = _make_reader(dtype='S5,i4')
+        result = reader.read()
+
+        assert result[0].dtype == 'S5'
+
+        ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaaa'], dtype='S5')
+        assert (result[0] == ex_values).all()
+        assert result[1].dtype == 'i4'
+
+        reader = _make_reader(dtype='S4')
+        result = reader.read()
+        assert result[0].dtype == 'S4'
+        ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaa'], dtype='S4')
+        assert (result[0] == ex_values).all()
+        assert result[1].dtype == 'S4'
+
+    def test_pass_dtype(self):
+        data = """\
+one,two
+1,a
+2,b
+3,c
+4,d"""
+
+        def _make_reader(**kwds):
+            return TextReader(StringIO(data), delimiter=',', **kwds)
+
+        reader = _make_reader(dtype={'one': 'u1', 1: 'S1'})
+        result = reader.read()
+        assert result[0].dtype == 'u1'
+        assert result[1].dtype == 'S1'
+
+        reader = _make_reader(dtype={'one': np.uint8, 1: object})
+        result = reader.read()
+        assert result[0].dtype == 'u1'
+        assert result[1].dtype == 'O'
+
+        reader = _make_reader(dtype={'one': np.dtype('u1'),
+                                     1: np.dtype('O')})
+        result = reader.read()
+        assert result[0].dtype == 'u1'
+        assert result[1].dtype == 'O'
+
+    def test_usecols(self):
+        data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+
+        def _make_reader(**kwds):
+            return TextReader(StringIO(data), delimiter=',', **kwds)
+
+        reader = _make_reader(usecols=(1, 2))
+        result = reader.read()
+
+        exp = _make_reader().read()
+        assert len(result) == 2
+        assert (result[1] == exp[1]).all()
+        assert (result[2] == exp[2]).all()
+
+    def test_cr_delimited(self):
+        def _test(text, **kwargs):
+            nice_text = text.replace('\r', '\r\n')
+            result = TextReader(StringIO(text), **kwargs).read()
+            expected = TextReader(StringIO(nice_text), **kwargs).read()
+            assert_array_dicts_equal(result, expected)
+
+        data = 'a,b,c\r1,2,3\r4,5,6\r7,8,9\r10,11,12'
+        _test(data, delimiter=',')
+
+        data = 'a  b  c\r1  2  3\r4  5  6\r7  8  9\r10  11  12'
+        _test(data, delim_whitespace=True)
+
+        data = 'a,b,c\r1,2,3\r4,5,6\r,88,9\r10,11,12'
+        _test(data, delimiter=',')
+
+        sample = ('A,B,C,D,E,F,G,H,I,J,K,L,M,N,O\r'
+                  'AAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0\r'
+                  ',BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0')
+        _test(sample, delimiter=',')
+
+        data = 'A  B  C\r  2  3\r4  5  6'
+        _test(data, delim_whitespace=True)
+
+        data = 'A B C\r2 3\r4 5 6'
+        _test(data, delim_whitespace=True)
+
+    def test_empty_field_eof(self):
+        data = 'a,b,c\n1,2,3\n4,,'
+
+        result = TextReader(StringIO(data), delimiter=',').read()
+
+        expected = {0: np.array([1, 4], dtype=np.int64),
+                    1: np.array(['2', ''], dtype=object),
+                    2: np.array(['3', ''], dtype=object)}
+        assert_array_dicts_equal(result, expected)
+
+        # GH5664
+        a = DataFrame([['b'], [nan]], columns=['a'], index=['a', 'c'])
+        b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]],
+                      columns=list('abcd'),
+                      index=[1, 1])
+        c = DataFrame([[1, 2, 3, 4], [6, nan, nan, nan],
+                       [8, 9, 10, 11], [13, 14, nan, nan]],
+                      columns=list('abcd'),
+                      index=[0, 5, 7, 12])
+
+        for _ in range(100):
+            df = read_csv(StringIO('a,b\nc\n'), skiprows=0,
+                          names=['a'], engine='c')
+            assert_frame_equal(df, a)
+
+            df = read_csv(StringIO('1,1,1,1,0\n' * 2 + '\n' * 2),
+                          names=list("abcd"), engine='c')
+            assert_frame_equal(df, b)
+
+            df = read_csv(StringIO('0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14'),
+                          names=list('abcd'), engine='c')
+            assert_frame_equal(df, c)
+
+    def test_empty_csv_input(self):
+        # GH14867
+        df = read_csv(StringIO(), chunksize=20, header=None,
+                      names=['a', 'b', 'c'])
+        assert isinstance(df, TextFileReader)
+
+
+def assert_array_dicts_equal(left, right):
+    for k, v in compat.iteritems(left):
+        assert tm.assert_numpy_array_equal(np.asarray(v),
+                                           np.asarray(right[k]))
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests that features that are currently unsupported in
+either the Python or C parser are actually enforced
+and are clearly communicated to the user.
+
+Ultimately, the goal is to remove test cases from this
+test suite as new feature support is added to the parsers.
+"""
+
+import pytest
+
+from pandas.compat import StringIO
+from pandas.errors import ParserError
+
+import pandas.util.testing as tm
+
+import pandas.io.parsers as parsers
+from pandas.io.parsers import read_csv
+
+
+@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val)
+def python_engine(request):
+    return request.param
+
+
+class TestUnsupportedFeatures(object):
+
+    def test_mangle_dupe_cols_false(self):
+        # see gh-12935
+        data = 'a b c\n1 2 3'
+        msg = 'is not supported'
+
+        for engine in ('c', 'python'):
+            with pytest.raises(ValueError, match=msg):
+                read_csv(StringIO(data), engine=engine,
+                         mangle_dupe_cols=False)
+
+    def test_c_engine(self):
+        # see gh-6607
+        data = 'a b c\n1 2 3'
+        msg = 'does not support'
+
+        # specify C engine with unsupported options (raise)
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine='c',
+                     sep=None, delim_whitespace=False)
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine='c', sep=r'\s')
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128))
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine='c', skipfooter=1)
+
+        # specify C-unsupported options without python-unsupported options
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep=None, delim_whitespace=False)
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep=r'\s')
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep='\t', quotechar=chr(128))
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), skipfooter=1)
+
+        text = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+        msg = 'Error tokenizing data'
+
+        with pytest.raises(ParserError, match=msg):
+            read_csv(StringIO(text), sep='\\s+')
+        with pytest.raises(ParserError, match=msg):
+            read_csv(StringIO(text), engine='c', sep='\\s+')
+
+        msg = "Only length-1 thousands markers supported"
+        data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), thousands=',,')
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), thousands='')
+
+        msg = "Only length-1 line terminators supported"
+        data = 'a,b,c~~1,2,3~~4,5,6'
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), lineterminator='~~')
+
+    def test_python_engine(self, python_engine):
+        from pandas.io.parsers import _python_unsupported as py_unsupported
+
+        data = """1,2,3,,
+1,2,3,4,
+1,2,3,4,5
+1,2,,,
+1,2,3,4,"""
+
+        for default in py_unsupported:
+            msg = ('The %r option is not supported '
+                   'with the %r engine' % (default, python_engine))
+
+            kwargs = {default: object()}
+            with pytest.raises(ValueError, match=msg):
+                read_csv(StringIO(data), engine=python_engine, **kwargs)
+
+    def test_python_engine_file_no_next(self, python_engine):
+        # see gh-16530
+        class NoNextBuffer(object):
+            def __init__(self, csv_data):
+                self.data = csv_data
+
+            def __iter__(self):
+                return self
+
+            def read(self):
+                return self.data
+
+        data = "a\n1"
+        msg = "The 'python' engine cannot iterate"
+
+        with pytest.raises(ValueError, match=msg):
+            read_csv(NoNextBuffer(data), engine=python_engine)
+
+
+class TestDeprecatedFeatures(object):
+
+    @pytest.mark.parametrize("engine", ["c", "python"])
+    @pytest.mark.parametrize("kwargs", [{"tupleize_cols": True},
+                                        {"tupleize_cols": False}])
+    def test_deprecated_args(self, engine, kwargs):
+        data = "1,2,3"
+        arg, _ = list(kwargs.items())[0]
+
+        with tm.assert_produces_warning(
+                FutureWarning, check_stacklevel=False):
+            read_csv(StringIO(data), engine=engine, **kwargs)
@@ -0,0 +1,534 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import numpy as np
+import pytest
+
+from pandas._libs.tslib import Timestamp
+from pandas.compat import StringIO
+
+from pandas import DataFrame, Index
+import pandas.util.testing as tm
+
+_msg_validate_usecols_arg = ("'usecols' must either be list-like "
+                             "of all strings, all unicode, all "
+                             "integers or a callable.")
+_msg_validate_usecols_names = ("Usecols do not match columns, columns "
+                               "expected but not found: {0}")
+
+
+def test_raise_on_mixed_dtype_usecols(all_parsers):
+    # See gh-12678
+    data = """a,b,c
+        1000,2000,3000
+        4000,5000,6000
+        """
+    usecols = [0, "b", 2]
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")])
+def test_usecols(all_parsers, usecols):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9],
+                          [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_names(all_parsers):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    names = ["foo", "bar"]
+    result = parser.read_csv(StringIO(data), names=names,
+                             usecols=[1, 2], header=0)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9],
+                          [11, 12]], columns=names)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("names,usecols", [
+    (["b", "c"], [1, 2]),
+    (["a", "b", "c"], ["b", "c"])
+])
+def test_usecols_relative_to_names(all_parsers, names, usecols):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), names=names,
+                             header=None, usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9],
+                          [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_relative_to_names2(all_parsers):
+    # see gh-5766
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), names=["a", "b"],
+                             header=None, usecols=[0, 1])
+
+    expected = DataFrame([[1, 2], [4, 5], [7, 8],
+                          [10, 11]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_name_length_conflict(all_parsers):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    msg = ("Number of passed names did not "
+           "match number of header fields in the file"
+           if parser.engine == "python" else
+           "Passed header names mismatches usecols")
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), names=["a", "b"],
+                        header=None, usecols=[1])
+
+
+def test_usecols_single_string(all_parsers):
+    # see gh-20558
+    parser = all_parsers
+    data = """foo, bar, baz
+1000, 2000, 3000
+4000, 5000, 6000"""
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols="foo")
+
+
+@pytest.mark.parametrize("data", ["a,b,c,d\n1,2,3,4\n5,6,7,8",
+                                  "a,b,c,d\n1,2,3,4,\n5,6,7,8,"])
+def test_usecols_index_col_false(all_parsers, data):
+    # see gh-9082
+    parser = all_parsers
+    usecols = ["a", "c", "d"]
+    expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]})
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", ["b", 0])
+@pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]])
+def test_usecols_index_col_conflict(all_parsers, usecols, index_col):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+    expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b"))
+
+    result = parser.read_csv(StringIO(data), usecols=usecols,
+                             index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_conflict2(all_parsers):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+
+    expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
+    expected = expected.set_index(["b", "c"])
+
+    result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"],
+                             index_col=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_implicit_index_col(all_parsers):
+    # see gh-2654
+    parser = all_parsers
+    data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10"
+
+    result = parser.read_csv(StringIO(data), usecols=["a", "b"])
+    expected = DataFrame({"a": ["apple", "orange"],
+                          "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_regex_sep(all_parsers):
+    # see gh-2733
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+    result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
+
+    expected = DataFrame({"a": ["apple", "orange"],
+                          "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_whitespace(all_parsers):
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+
+    result = parser.read_csv(StringIO(data), delim_whitespace=True,
+                             usecols=("a", "b"))
+    expected = DataFrame({"a": ["apple", "orange"],
+                          "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols,expected", [
+    # Column selection by index.
+    ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]],
+                       columns=["2", "0"])),
+
+    # Column selection by name.
+    (["0", "1"], DataFrame(data=[[2000, 3000], [5000, 6000]],
+                           columns=["0", "1"])),
+])
+def test_usecols_with_integer_like_header(all_parsers, usecols, expected):
+    parser = all_parsers
+    data = """2,0,1
+1000,2000,3000
+4000,5000,6000"""
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+def test_usecols_with_parse_dates(all_parsers, usecols):
+    # see gh-9755
+    data = """a,b,c,d,e
+0,1,20140101,0900,4
+0,1,20140102,1000,4"""
+    parser = all_parsers
+    parse_dates = [[1, 2]]
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [
+            Timestamp("2014-01-01 09:00:00"),
+            Timestamp("2014-01-02 10:00:00")
+        ]
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+    result = parser.read_csv(StringIO(data), usecols=usecols,
+                             parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates2(all_parsers):
+    # see gh-13604
+    parser = all_parsers
+    data = """2008-02-07 09:40,1032.43
+2008-02-07 09:50,1042.54
+2008-02-07 10:00,1051.65"""
+
+    names = ["date", "values"]
+    usecols = names[:]
+    parse_dates = [0]
+
+    index = Index([Timestamp("2008-02-07 09:40"),
+                   Timestamp("2008-02-07 09:50"),
+                   Timestamp("2008-02-07 10:00")],
+                  name="date")
+    cols = {"values": [1032.43, 1042.54, 1051.65]}
+    expected = DataFrame(cols, index=index)
+
+    result = parser.read_csv(StringIO(data), parse_dates=parse_dates,
+                             index_col=0, usecols=usecols,
+                             header=None, names=names)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates3(all_parsers):
+    # see gh-14792
+    parser = all_parsers
+    data = """a,b,c,d,e,f,g,h,i,j
+2016/09/21,1,1,2,3,4,5,6,7,8"""
+
+    usecols = list("abcdefghij")
+    parse_dates = [0]
+
+    cols = {"a": Timestamp("2016-09-21"),
+            "b": [1], "c": [1], "d": [2],
+            "e": [3], "f": [4], "g": [5],
+            "h": [6], "i": [7], "j": [8]}
+    expected = DataFrame(cols, columns=usecols)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols,
+                             parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates4(all_parsers):
+    data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"
+    usecols = list("abcdefghij")
+    parse_dates = [[0, 1]]
+    parser = all_parsers
+
+    cols = {"a_b": "2016/09/21 1",
+            "c": [1], "d": [2], "e": [3], "f": [4],
+            "g": [5], "h": [6], "i": [7], "j": [8]}
+    expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
+
+    result = parser.read_csv(StringIO(data), usecols=usecols,
+                             parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+@pytest.mark.parametrize("names", [
+    list("abcde"),  # Names span all columns in original data.
+    list("acd"),    # Names span only the selected columns.
+])
+def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
+    # see gh-9755
+    s = """0,1,20140101,0900,4
+0,1,20140102,1000,4"""
+    parse_dates = [[1, 2]]
+    parser = all_parsers
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [
+            Timestamp("2014-01-01 09:00:00"),
+            Timestamp("2014-01-02 10:00:00")
+        ]
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+
+    result = parser.read_csv(StringIO(s), names=names,
+                             parse_dates=parse_dates,
+                             usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "AAA": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002
+        },
+        "BBB": {0: 8, 1: 2, 2: 7}
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=[u"AAA", u"BBB"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_single_byte_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """A,B,C,D
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "A": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002
+        },
+        "B": {0: 8, 1: 2, 2: 7}
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=[u"A", u"B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [[u"AAA", b"BBB"], [b"AAA", u"BBB"]])
+def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [
+    ["あああ", "いい"],
+    [u"あああ", u"いい"]
+])
+def test_usecols_with_multi_byte_characters(all_parsers, usecols):
+    data = """あああ,いい,ううう,ええええ
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "あああ": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002
+        },
+        "いい": {0: 8, 1: 2, 2: 7}
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_usecols(all_parsers):
+    data = "a,b,c\n1,2,3\n4,5,6"
+    expected = DataFrame()
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), usecols=set())
+    tm.assert_frame_equal(result, expected)
+
+
+def test_np_array_usecols(all_parsers):
+    # see gh-12546
+    parser = all_parsers
+    data = "a,b,c\n1,2,3"
+    usecols = np.array(["a", "b"])
+
+    expected = DataFrame([[1, 2]], columns=usecols)
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols,expected", [
+    (lambda x: x.upper() in ["AAA", "BBB", "DDD"],
+     DataFrame({
+         "AaA": {
+             0: 0.056674972999999997,
+             1: 2.6132309819999997,
+             2: 3.5689350380000002
+         },
+         "bBb": {0: 8, 1: 2, 2: 7},
+         "ddd": {0: "a", 1: "b", 2: "a"}
+     })),
+    (lambda x: False, DataFrame()),
+])
+def test_callable_usecols(all_parsers, usecols, expected):
+    # see gh-14154
+    data = """AaA,bBb,CCC,ddd
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
+def test_incomplete_first_row(all_parsers, usecols):
+    # see gh-6710
+    data = "1,2\n1,2,3"
+    parser = all_parsers
+    names = ["a", "b", "c"]
+    expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]})
+
+    result = parser.read_csv(StringIO(data), names=names, usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data,usecols,kwargs,expected", [
+    # see gh-8985
+    ("19,29,39\n" * 2 + "10,20,30,40", [0, 1, 2],
+     dict(header=None), DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]])),
+
+    # see gh-9549
+    (("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n"
+      "1,2,3,,,1,\n1,2,3\n5,6,7"), ["A", "B", "C"],
+     dict(), DataFrame({"A": [1, 3, 1, 1, 1, 5],
+                        "B": [2, 4, 2, 2, 2, 6],
+                        "C": [3, 5, 4, 3, 3, 7]})),
+])
+def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected):
+    # see gh-8985
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols,kwargs,expected,msg", [
+    (["a", "b", "c", "d"], dict(),
+     DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}), None),
+    (["a", "b", "c", "f"], dict(), None,
+     _msg_validate_usecols_names.format(r"\['f'\]")),
+    (["a", "b", "f"], dict(), None,
+     _msg_validate_usecols_names.format(r"\['f'\]")),
+    (["a", "b", "f", "g"], dict(), None,
+     _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]")),
+
+    # see gh-14671
+    (None, dict(header=0, names=["A", "B", "C", "D"]),
+     DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7],
+                "D": [4, 8]}), None),
+    (["A", "B", "C", "f"], dict(header=0, names=["A", "B", "C", "D"]),
+     None, _msg_validate_usecols_names.format(r"\['f'\]")),
+    (["A", "B", "f"], dict(names=["A", "B", "C", "D"]),
+     None, _msg_validate_usecols_names.format(r"\['f'\]")),
+])
+def test_raises_on_usecols_names_mismatch(all_parsers, usecols,
+                                          kwargs, expected, msg):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    kwargs.update(usecols=usecols)
+    parser = all_parsers
+
+    if expected is None:
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="see gh-16469: works on the C engine but not the Python engine",
+    strict=False)
+@pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
+def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    names = ["A", "B", "C", "D"]
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=0,
+                             names=names, usecols=usecols)
+    expected = DataFrame({"A": [1, 5], "C": [3, 7]})
+    tm.assert_frame_equal(result, expected)
@@ -0,0 +1,25 @@
+import pytest
+
+from pandas.compat import StringIO
+
+from pandas import read_sas
+import pandas.util.testing as tm
+
+
+class TestSas(object):
+
+    def test_sas_buffer_format(self):
+        # see gh-14947
+        b = StringIO("")
+
+        msg = ("If this is a buffer object rather than a string "
+               "name, you must specify a format string")
+        with pytest.raises(ValueError, match=msg):
+            read_sas(b)
+
+    def test_sas_read_no_format_or_extension(self):
+        # see gh-24548
+        msg = ("unable to infer format of SAS file")
+        with tm.ensure_clean('test_file_no_extension') as path:
+            with pytest.raises(ValueError, match=msg):
+                read_sas(path)
@@ -0,0 +1,227 @@
+import io
+import os
+
+import numpy as np
+import pytest
+
+from pandas.compat import PY2
+from pandas.errors import EmptyDataError
+import pandas.util._test_decorators as td
+
+import pandas as pd
+import pandas.util.testing as tm
+
+
+# https://github.com/cython/cython/issues/1720
+@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
+class TestSAS7BDAT(object):
+
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
+        self.data = []
+        self.test_ix = [list(range(1, 16)), [16]]
+        for j in 1, 2:
+            fname = os.path.join(
+                self.dirpath, "test_sas7bdat_{j}.csv".format(j=j))
+            df = pd.read_csv(fname)
+            epoch = pd.datetime(1960, 1, 1)
+            t1 = pd.to_timedelta(df["Column4"], unit='d')
+            df["Column4"] = epoch + t1
+            t2 = pd.to_timedelta(df["Column12"], unit='d')
+            df["Column12"] = epoch + t2
+            for k in range(df.shape[1]):
+                col = df.iloc[:, k]
+                if col.dtype == np.int64:
+                    df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
+                elif col.dtype == np.dtype('O'):
+                    if PY2:
+                        f = lambda x: (x.decode('utf-8') if
+                                       isinstance(x, str) else x)
+                        df.iloc[:, k] = df.iloc[:, k].apply(f)
+            self.data.append(df)
+
+    def test_from_file(self):
+        for j in 0, 1:
+            df0 = self.data[j]
+            for k in self.test_ix[j]:
+                fname = os.path.join(
+                    self.dirpath, "test{k}.sas7bdat".format(k=k))
+                df = pd.read_sas(fname, encoding='utf-8')
+                tm.assert_frame_equal(df, df0)
+
+    def test_from_buffer(self):
+        for j in 0, 1:
+            df0 = self.data[j]
+            for k in self.test_ix[j]:
+                fname = os.path.join(
+                    self.dirpath, "test{k}.sas7bdat".format(k=k))
+                with open(fname, 'rb') as f:
+                    byts = f.read()
+                buf = io.BytesIO(byts)
+                rdr = pd.read_sas(buf, format="sas7bdat",
+                                  iterator=True, encoding='utf-8')
+                df = rdr.read()
+                tm.assert_frame_equal(df, df0, check_exact=False)
+                rdr.close()
+
+    def test_from_iterator(self):
+        for j in 0, 1:
+            df0 = self.data[j]
+            for k in self.test_ix[j]:
+                fname = os.path.join(
+                    self.dirpath, "test{k}.sas7bdat".format(k=k))
+                rdr = pd.read_sas(fname, iterator=True, encoding='utf-8')
+                df = rdr.read(2)
+                tm.assert_frame_equal(df, df0.iloc[0:2, :])
+                df = rdr.read(3)
+                tm.assert_frame_equal(df, df0.iloc[2:5, :])
+                rdr.close()
+
+    @td.skip_if_no('pathlib')
+    def test_path_pathlib(self):
+        from pathlib import Path
+        for j in 0, 1:
+            df0 = self.data[j]
+            for k in self.test_ix[j]:
+                fname = Path(os.path.join(
+                    self.dirpath, "test{k}.sas7bdat".format(k=k)))
+                df = pd.read_sas(fname, encoding='utf-8')
+                tm.assert_frame_equal(df, df0)
+
+    @td.skip_if_no('py.path')
+    def test_path_localpath(self):
+        from py.path import local as LocalPath
+        for j in 0, 1:
+            df0 = self.data[j]
+            for k in self.test_ix[j]:
+                fname = LocalPath(os.path.join(
+                    self.dirpath, "test{k}.sas7bdat".format(k=k)))
+                df = pd.read_sas(fname, encoding='utf-8')
+                tm.assert_frame_equal(df, df0)
+
+    def test_iterator_loop(self):
+        # github #13654
+        for j in 0, 1:
+            for k in self.test_ix[j]:
+                for chunksize in 3, 5, 10, 11:
+                    fname = os.path.join(
+                        self.dirpath, "test{k}.sas7bdat".format(k=k))
+                    rdr = pd.read_sas(fname, chunksize=10, encoding='utf-8')
+                    y = 0
+                    for x in rdr:
+                        y += x.shape[0]
+                    assert y == rdr.row_count
+                    rdr.close()
+
+    def test_iterator_read_too_much(self):
+        # github #14734
+        k = self.test_ix[0][0]
+        fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k))
+        rdr = pd.read_sas(fname, format="sas7bdat",
+                          iterator=True, encoding='utf-8')
+        d1 = rdr.read(rdr.row_count + 20)
+        rdr.close()
+
+        rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")
+        d2 = rdr.read(rdr.row_count + 20)
+        tm.assert_frame_equal(d1, d2)
+        rdr.close()
+
+
+def test_encoding_options(datapath):
+    fname = datapath("io", "sas", "data", "test1.sas7bdat")
+    df1 = pd.read_sas(fname)
+    df2 = pd.read_sas(fname, encoding='utf-8')
+    for col in df1.columns:
+        try:
+            df1[col] = df1[col].str.decode('utf-8')
+        except AttributeError:
+            pass
+    tm.assert_frame_equal(df1, df2)
+
+    from pandas.io.sas.sas7bdat import SAS7BDATReader
+    rdr = SAS7BDATReader(fname, convert_header_text=False)
+    df3 = rdr.read()
+    rdr.close()
+    for x, y in zip(df1.columns, df3.columns):
+        assert(x == y.decode())
+
+
+def test_productsales(datapath):
+    fname = datapath("io", "sas", "data", "productsales.sas7bdat")
+    df = pd.read_sas(fname, encoding='utf-8')
+    fname = datapath("io", "sas", "data", "productsales.csv")
+    df0 = pd.read_csv(fname, parse_dates=['MONTH'])
+    vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
+    df0[vn] = df0[vn].astype(np.float64)
+    tm.assert_frame_equal(df, df0)
+
+
+def test_12659(datapath):
+    fname = datapath("io", "sas", "data", "test_12659.sas7bdat")
+    df = pd.read_sas(fname)
+    fname = datapath("io", "sas", "data", "test_12659.csv")
+    df0 = pd.read_csv(fname)
+    df0 = df0.astype(np.float64)
+    tm.assert_frame_equal(df, df0)
+
+
+def test_airline(datapath):
+    fname = datapath("io", "sas", "data", "airline.sas7bdat")
+    df = pd.read_sas(fname)
+    fname = datapath("io", "sas", "data", "airline.csv")
+    df0 = pd.read_csv(fname)
+    df0 = df0.astype(np.float64)
+    tm.assert_frame_equal(df, df0, check_exact=False)
+
+
+def test_date_time(datapath):
+    # Support of different SAS date/datetime formats (PR #15871)
+    fname = datapath("io", "sas", "data", "datetime.sas7bdat")
+    df = pd.read_sas(fname)
+    fname = datapath("io", "sas", "data", "datetime.csv")
+    df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime',
+                                          'DateTimeHi', 'Taiw'])
+    # GH 19732: Timestamps imported from sas will incur floating point errors
+    df.iloc[:, 3] = df.iloc[:, 3].dt.round('us')
+    tm.assert_frame_equal(df, df0)
+
+
+def test_compact_numerical_values(datapath):
+    # Regression test for #21616
+    fname = datapath("io", "sas", "data", "cars.sas7bdat")
+    df = pd.read_sas(fname, encoding='latin-1')
+    # The two columns CYL and WGT in cars.sas7bdat have column
+    # width < 8 and only contain integral values.
+    # Test that pandas doesn't corrupt the numbers by adding
+    # decimals.
+    result = df['WGT']
+    expected = df['WGT'].round()
+    tm.assert_series_equal(result, expected, check_exact=True)
+    result = df['CYL']
+    expected = df['CYL'].round()
+    tm.assert_series_equal(result, expected, check_exact=True)
+
+
+def test_many_columns(datapath):
+    # Test for looking for column information in more places (PR #22628)
+    fname = datapath("io", "sas", "data", "many_columns.sas7bdat")
+    df = pd.read_sas(fname, encoding='latin-1')
+    fname = datapath("io", "sas", "data", "many_columns.csv")
+    df0 = pd.read_csv(fname, encoding='latin-1')
+    tm.assert_frame_equal(df, df0)
+
+
+def test_inconsistent_number_of_rows(datapath):
+    # Regression test for issue #16615. (PR #22628)
+    fname = datapath("io", "sas", "data", "load_log.sas7bdat")
+    df = pd.read_sas(fname, encoding='latin-1')
+    assert len(df) == 2097
+
+
+def test_zero_variables(datapath):
+    # Check if the SAS file has zero variables (PR #18184)
+    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
+    with pytest.raises(EmptyDataError):
+        pd.read_sas(fname)
@@ -0,0 +1,146 @@
+import os
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+
+from pandas.io.sas.sasreader import read_sas
+
+# CSV versions of test xpt files were obtained using the R foreign library
+
+# Numbers in a SAS xport file are always float64, so need to convert
+# before making comparisons.
+
+
+def numeric_as_float(data):
+    for v in data.columns:
+        if data[v].dtype is np.dtype('int64'):
+            data[v] = data[v].astype(np.float64)
+
+
+class TestXport(object):
+
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
+        self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt")
+        self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
+        self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
+        self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt")
+
+    def test1_basic(self):
+        # Tests with DEMO_G.xpt (all numeric file)
+
+        # Compare to this
+        data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
+        numeric_as_float(data_csv)
+
+        # Read full file
+        data = read_sas(self.file01, format="xport")
+        tm.assert_frame_equal(data, data_csv)
+        num_rows = data.shape[0]
+
+        # Test reading beyond end of file
+        reader = read_sas(self.file01, format="xport", iterator=True)
+        data = reader.read(num_rows + 100)
+        assert data.shape[0] == num_rows
+        reader.close()
+
+        # Test incremental read with `read` method.
+        reader = read_sas(self.file01, format="xport", iterator=True)
+        data = reader.read(10)
+        reader.close()
+        tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
+
+        # Test incremental read with `get_chunk` method.
+        reader = read_sas(self.file01, format="xport", chunksize=10)
+        data = reader.get_chunk()
+        reader.close()
+        tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
+
+        # Test read in loop
+        m = 0
+        reader = read_sas(self.file01, format="xport", chunksize=100)
+        for x in reader:
+            m += x.shape[0]
+        reader.close()
+        assert m == num_rows
+
+        # Read full file with `read_sas` method
+        data = read_sas(self.file01)
+        tm.assert_frame_equal(data, data_csv)
+
+    def test1_index(self):
+        # Tests with DEMO_G.xpt using index (all numeric file)
+
+        # Compare to this
+        data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
+        data_csv = data_csv.set_index("SEQN")
+        numeric_as_float(data_csv)
+
+        # Read full file
+        data = read_sas(self.file01, index="SEQN", format="xport")
+        tm.assert_frame_equal(data, data_csv, check_index_type=False)
+
+        # Test incremental read with `read` method.
+        reader = read_sas(self.file01, index="SEQN", format="xport",
+                          iterator=True)
+        data = reader.read(10)
+        reader.close()
+        tm.assert_frame_equal(data, data_csv.iloc[0:10, :],
+                              check_index_type=False)
+
+        # Test incremental read with `get_chunk` method.
+        reader = read_sas(self.file01, index="SEQN", format="xport",
+                          chunksize=10)
+        data = reader.get_chunk()
+        reader.close()
+        tm.assert_frame_equal(data, data_csv.iloc[0:10, :],
+                              check_index_type=False)
+
+    def test1_incremental(self):
+        # Test with DEMO_G.xpt, reading full file incrementally
+
+        data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
+        data_csv = data_csv.set_index("SEQN")
+        numeric_as_float(data_csv)
+
+        reader = read_sas(self.file01, index="SEQN", chunksize=1000)
+
+        all_data = [x for x in reader]
+        data = pd.concat(all_data, axis=0)
+
+        tm.assert_frame_equal(data, data_csv, check_index_type=False)
+
+    def test2(self):
+        # Test with SSHSV1_A.xpt
+
+        # Compare to this
+        data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv"))
+        numeric_as_float(data_csv)
+
+        data = read_sas(self.file02)
+        tm.assert_frame_equal(data, data_csv)
+
+    def test_multiple_types(self):
+        # Test with DRXFCD_G.xpt (contains text and numeric variables)
+
+        # Compare to this
+        data_csv = pd.read_csv(self.file03.replace(".xpt", ".csv"))
+
+        data = read_sas(self.file03, encoding="utf-8")
+        tm.assert_frame_equal(data, data_csv)
+
+    def test_truncated_float_support(self):
+        # Test with paxraw_d_short.xpt, a shortened version of:
+        # http://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAXRAW_D.ZIP
+        # This file has truncated floats (5 bytes in this case).
+
+        # GH 11713
+
+        data_csv = pd.read_csv(self.file04.replace(".xpt", ".csv"))
+
+        data = read_sas(self.file04, format="xport")
+        tm.assert_frame_equal(data.astype('int64'), data_csv)
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+from textwrap import dedent
+
+import numpy as np
+from numpy.random import randint
+import pytest
+
+from pandas.compat import PY2
+
+import pandas as pd
+from pandas import DataFrame, get_option, read_clipboard
+from pandas.util import testing as tm
+from pandas.util.testing import makeCustomDataframe as mkdf
+
+from pandas.io.clipboard.exceptions import PyperclipException
+
+try:
+    DataFrame({'A': [1, 2]}).to_clipboard()
+    _DEPS_INSTALLED = 1
+except (PyperclipException, RuntimeError):
+    _DEPS_INSTALLED = 0
+
+
+def build_kwargs(sep, excel):
+    kwargs = {}
+    if excel != 'default':
+        kwargs['excel'] = excel
+    if sep != 'default':
+        kwargs['sep'] = sep
+    return kwargs
+
+
+@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
+                        'colwidth', 'mixed', 'float', 'int'])
+def df(request):
+    data_type = request.param
+
+    if data_type == 'delims':
+        return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
+                             'b': ['hi\'j', 'k\'\'lm']})
+    elif data_type == 'utf8':
+        return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
+                             'b': ['øπ∆˚¬', 'œ∑´®']})
+    elif data_type == 'string':
+        return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'long':
+        max_rows = get_option('display.max_rows')
+        return mkdf(max_rows + 1, 3,
+                    data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'nonascii':
+        return pd.DataFrame({'en': 'in English'.split(),
+                             'es': 'en español'.split()})
+    elif data_type == 'colwidth':
+        _cw = get_option('display.max_colwidth') + 1
+        return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'mixed':
+        return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
+                          'b': np.arange(1, 6),
+                          'c': list('abcde')})
+    elif data_type == 'float':
+        return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'int':
+        return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    else:
+        raise ValueError
+
+
+@pytest.fixture
+def mock_clipboard(monkeypatch, request):
+    """Fixture mocking clipboard IO.
+
+    This mocks pandas.io.clipboard.clipboard_get and
+    pandas.io.clipboard.clipboard_set.
+
+    This uses a local dict for storing data. The dictionary
+    key used is the test ID, available with ``request.node.name``.
+
+    This returns the local dictionary, for direct manipulation by
+    tests.
+    """
+
+    # our local clipboard for tests
+    _mock_data = {}
+
+    def _mock_set(data):
+        _mock_data[request.node.name] = data
+
+    def _mock_get():
+        return _mock_data[request.node.name]
+
+    monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set)
+    monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get)
+
+    yield _mock_data
+
+
+@pytest.mark.clipboard
+def test_mock_clipboard(mock_clipboard):
+    import pandas.io.clipboard
+    pandas.io.clipboard.clipboard_set("abc")
+    assert "abc" in set(mock_clipboard.values())
+    result = pandas.io.clipboard.clipboard_get()
+    assert result == "abc"
+
+
+@pytest.mark.single
+@pytest.mark.clipboard
+@pytest.mark.skipif(not _DEPS_INSTALLED,
+                    reason="clipboard primitives not installed")
+@pytest.mark.usefixtures("mock_clipboard")
+class TestClipboard(object):
+
+    def check_round_trip_frame(self, data, excel=None, sep=None,
+                               encoding=None):
+        data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
+        result = read_clipboard(sep=sep or '\t', index_col=0,
+                                encoding=encoding)
+        tm.assert_frame_equal(data, result, check_dtype=False)
+
+    # Test that default arguments copy as tab delimited
+    def test_round_trip_frame(self, df):
+        self.check_round_trip_frame(df)
+
+    # Test that explicit delimiters are respected
+    @pytest.mark.parametrize('sep', ['\t', ',', '|'])
+    def test_round_trip_frame_sep(self, df, sep):
+        self.check_round_trip_frame(df, sep=sep)
+
+    # Test white space separator
+    def test_round_trip_frame_string(self, df):
+        df.to_clipboard(excel=False, sep=None)
+        result = read_clipboard()
+        assert df.to_string() == result.to_string()
+        assert df.shape == result.shape
+
+    # Two character separator is not supported in to_clipboard
+    # Test that multi-character separators are not silently passed
+    def test_excel_sep_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=True, sep=r'\t')
+
+    # Separator is ignored when excel=False and should produce a warning
+    def test_copy_delim_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=False, sep='\t')
+
+    # Tests that the default behavior of to_clipboard is tab
+    # delimited and excel="True"
+    @pytest.mark.parametrize('sep', ['\t', None, 'default'])
+    @pytest.mark.parametrize('excel', [True, None, 'default'])
+    def test_clipboard_copy_tabs_default(self, sep, excel, df, request,
+                                         mock_clipboard):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        if PY2:
+            # to_clipboard copies unicode, to_csv produces bytes. This is
+            # expected behavior
+            result = mock_clipboard[request.node.name].encode('utf-8')
+            expected = df.to_csv(sep='\t')
+            assert result == expected
+        else:
+            assert mock_clipboard[request.node.name] == df.to_csv(sep='\t')
+
+    # Tests reading of white space separated tables
+    @pytest.mark.parametrize('sep', [None, 'default'])
+    @pytest.mark.parametrize('excel', [False])
+    def test_clipboard_copy_strings(self, sep, excel, df):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        result = read_clipboard(sep=r'\s+')
+        assert result.to_string() == df.to_string()
+        assert df.shape == result.shape
+
+    def test_read_clipboard_infer_excel(self, request,
+                                        mock_clipboard):
+        # gh-19010: avoid warnings
+        clip_kwargs = dict(engine="python")
+
+        text = dedent("""
+            John James	Charlie Mingus
+            1	2
+            4	Harry Carney
+            """.strip())
+        mock_clipboard[request.node.name] = text
+        df = pd.read_clipboard(**clip_kwargs)
+
+        # excel data is parsed correctly
+        assert df.iloc[1][1] == 'Harry Carney'
+
+        # having diff tab counts doesn't trigger it
+        text = dedent("""
+            a\t b
+            1  2
+            3  4
+            """.strip())
+        mock_clipboard[request.node.name] = text
+        res = pd.read_clipboard(**clip_kwargs)
+
+        text = dedent("""
+            a  b
+            1  2
+            3  4
+            """.strip())
+        mock_clipboard[request.node.name] = text
+        exp = pd.read_clipboard(**clip_kwargs)
+
+        tm.assert_frame_equal(res, exp)
+
+    def test_invalid_encoding(self, df):
+        # test case for testing invalid encoding
+        with pytest.raises(ValueError):
+            df.to_clipboard(encoding='ascii')
+        with pytest.raises(NotImplementedError):
+            pd.read_clipboard(encoding='ascii')
+
+    @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
+    def test_round_trip_valid_encodings(self, enc, df):
+        self.check_round_trip_frame(df, encoding=enc)
@@ -0,0 +1,357 @@
+"""
+Tests for the pandas.io.common functionalities
+"""
+import mmap
+import os
+
+import pytest
+
+from pandas.compat import FileNotFoundError, StringIO, is_platform_windows
+import pandas.util._test_decorators as td
+
+import pandas as pd
+import pandas.util.testing as tm
+
+import pandas.io.common as icom
+
+
+class CustomFSPath(object):
+    """For testing fspath on unknown objects"""
+    def __init__(self, path):
+        self.path = path
+
+    def __fspath__(self):
+        return self.path
+
+
+# Functions that consume a string path and return a string or path-like object
+path_types = [str, CustomFSPath]
+
+try:
+    from pathlib import Path
+    path_types.append(Path)
+except ImportError:
+    pass
+
+try:
+    from py.path import local as LocalPath
+    path_types.append(LocalPath)
+except ImportError:
+    pass
+
+HERE = os.path.abspath(os.path.dirname(__file__))
+
+
+# https://github.com/cython/cython/issues/1720
+@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
+class TestCommonIOCapabilities(object):
+    data1 = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+
+    def test_expand_user(self):
+        filename = '~/sometest'
+        expanded_name = icom._expand_user(filename)
+
+        assert expanded_name != filename
+        assert os.path.isabs(expanded_name)
+        assert os.path.expanduser(filename) == expanded_name
+
+    def test_expand_user_normal_path(self):
+        filename = '/somefolder/sometest'
+        expanded_name = icom._expand_user(filename)
+
+        assert expanded_name == filename
+        assert os.path.expanduser(filename) == expanded_name
+
+    @td.skip_if_no('pathlib')
+    def test_stringify_path_pathlib(self):
+        rel_path = icom._stringify_path(Path('.'))
+        assert rel_path == '.'
+        redundant_path = icom._stringify_path(Path('foo//bar'))
+        assert redundant_path == os.path.join('foo', 'bar')
+
+    @td.skip_if_no('py.path')
+    def test_stringify_path_localpath(self):
+        path = os.path.join('foo', 'bar')
+        abs_path = os.path.abspath(path)
+        lpath = LocalPath(path)
+        assert icom._stringify_path(lpath) == abs_path
+
+    def test_stringify_path_fspath(self):
+        p = CustomFSPath('foo/bar.csv')
+        result = icom._stringify_path(p)
+        assert result == 'foo/bar.csv'
+
+    @pytest.mark.parametrize('extension,expected', [
+        ('', None),
+        ('.gz', 'gzip'),
+        ('.bz2', 'bz2'),
+        ('.zip', 'zip'),
+        ('.xz', 'xz'),
+    ])
+    @pytest.mark.parametrize('path_type', path_types)
+    def test_infer_compression_from_path(self, extension, expected, path_type):
+        path = path_type('foo/bar.csv' + extension)
+        compression = icom._infer_compression(path, compression='infer')
+        assert compression == expected
+
+    def test_get_filepath_or_buffer_with_path(self):
+        filename = '~/sometest'
+        filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer(
+            filename)
+        assert filepath_or_buffer != filename
+        assert os.path.isabs(filepath_or_buffer)
+        assert os.path.expanduser(filename) == filepath_or_buffer
+        assert not should_close
+
+    def test_get_filepath_or_buffer_with_buffer(self):
+        input_buffer = StringIO()
+        filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer(
+            input_buffer)
+        assert filepath_or_buffer == input_buffer
+        assert not should_close
+
+    def test_iterator(self):
+        reader = pd.read_csv(StringIO(self.data1), chunksize=1)
+        result = pd.concat(reader, ignore_index=True)
+        expected = pd.read_csv(StringIO(self.data1))
+        tm.assert_frame_equal(result, expected)
+
+        # GH12153
+        it = pd.read_csv(StringIO(self.data1), chunksize=1)
+        first = next(it)
+        tm.assert_frame_equal(first, expected.iloc[[0]])
+        tm.assert_frame_equal(pd.concat(it), expected.iloc[1:])
+
+    @pytest.mark.parametrize('reader, module, error_class, fn_ext', [
+        (pd.read_csv, 'os', FileNotFoundError, 'csv'),
+        (pd.read_fwf, 'os', FileNotFoundError, 'txt'),
+        (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'),
+        (pd.read_feather, 'feather', Exception, 'feather'),
+        (pd.read_hdf, 'tables', FileNotFoundError, 'h5'),
+        (pd.read_stata, 'os', FileNotFoundError, 'dta'),
+        (pd.read_sas, 'os', FileNotFoundError, 'sas7bdat'),
+        (pd.read_json, 'os', ValueError, 'json'),
+        (pd.read_msgpack, 'os', ValueError, 'mp'),
+        (pd.read_pickle, 'os', FileNotFoundError, 'pickle'),
+    ])
+    def test_read_non_existant(self, reader, module, error_class, fn_ext):
+        pytest.importorskip(module)
+
+        path = os.path.join(HERE, 'data', 'does_not_exist.' + fn_ext)
+        msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist"
+                .format(fn_ext))
+        msg2 = (r"\[Errno 2\] No such file or directory: '.+does_not_exist"
+                r"\.{}'").format(fn_ext)
+        msg3 = "Expected object or value"
+        msg4 = "path_or_buf needs to be a string file path or file-like"
+        msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:"
+                r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext)
+        with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format(
+                msg1, msg2, msg3, msg4, msg5)):
+            reader(path)
+
+    @pytest.mark.parametrize('reader, module, error_class, fn_ext', [
+        (pd.read_csv, 'os', FileNotFoundError, 'csv'),
+        (pd.read_fwf, 'os', FileNotFoundError, 'txt'),
+        (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'),
+        (pd.read_feather, 'feather', Exception, 'feather'),
+        (pd.read_hdf, 'tables', FileNotFoundError, 'h5'),
+        (pd.read_stata, 'os', FileNotFoundError, 'dta'),
+        (pd.read_sas, 'os', FileNotFoundError, 'sas7bdat'),
+        (pd.read_json, 'os', ValueError, 'json'),
+        (pd.read_msgpack, 'os', ValueError, 'mp'),
+        (pd.read_pickle, 'os', FileNotFoundError, 'pickle'),
+    ])
+    def test_read_expands_user_home_dir(self, reader, module,
+                                        error_class, fn_ext, monkeypatch):
+        pytest.importorskip(module)
+
+        path = os.path.join('~', 'does_not_exist.' + fn_ext)
+        monkeypatch.setattr(icom, '_expand_user',
+                            lambda x: os.path.join('foo', x))
+
+        msg1 = (r"File (b')?.+does_not_exist\.{}'? does not exist"
+                .format(fn_ext))
+        msg2 = (r"\[Errno 2\] No such file or directory:"
+                r" '.+does_not_exist\.{}'").format(fn_ext)
+        msg3 = "Unexpected character found when decoding 'false'"
+        msg4 = "path_or_buf needs to be a string file path or file-like"
+        msg5 = (r"\[Errno 2\] File .+does_not_exist\.{} does not exist:"
+                r" '.+does_not_exist\.{}'").format(fn_ext, fn_ext)
+
+        with pytest.raises(error_class, match=r"({}|{}|{}|{}|{})".format(
+                msg1, msg2, msg3, msg4, msg5)):
+            reader(path)
+
+    def test_read_non_existant_read_table(self):
+        path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv')
+        msg1 = r"File b'.+does_not_exist\.csv' does not exist"
+        msg2 = (r"\[Errno 2\] File .+does_not_exist\.csv does not exist:"
+                r" '.+does_not_exist\.csv'")
+        with pytest.raises(FileNotFoundError, match=r"({}|{})".format(
+                msg1, msg2)):
+            with tm.assert_produces_warning(FutureWarning):
+                pd.read_table(path)
+
+    @pytest.mark.parametrize('reader, module, path', [
+        (pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
+        (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
+        (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
+        (pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf',
+                                 'datetimetz_object.h5')),
+        (pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')),
+        (pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')),
+        (pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')),
+        (pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')),
+        (pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')),
+    ])
+    def test_read_fspath_all(self, reader, module, path, datapath):
+        pytest.importorskip(module)
+        path = datapath(*path)
+
+        mypath = CustomFSPath(path)
+        result = reader(mypath)
+        expected = reader(path)
+
+        if path.endswith('.pickle'):
+            # categorical
+            tm.assert_categorical_equal(result, expected)
+        else:
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_fspath_all_read_table(self, datapath):
+        path = datapath('io', 'data', 'iris.csv')
+
+        mypath = CustomFSPath(path)
+        with tm.assert_produces_warning(FutureWarning):
+            result = pd.read_table(mypath)
+        with tm.assert_produces_warning(FutureWarning):
+            expected = pd.read_table(path)
+
+        if path.endswith('.pickle'):
+            # categorical
+            tm.assert_categorical_equal(result, expected)
+        else:
+            tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('writer_name, writer_kwargs, module', [
+        ('to_csv', {}, 'os'),
+        ('to_excel', {'engine': 'xlwt'}, 'xlwt'),
+        ('to_feather', {}, 'feather'),
+        ('to_html', {}, 'os'),
+        ('to_json', {}, 'os'),
+        ('to_latex', {}, 'os'),
+        ('to_msgpack', {}, 'os'),
+        ('to_pickle', {}, 'os'),
+        ('to_stata', {}, 'os'),
+    ])
+    def test_write_fspath_all(self, writer_name, writer_kwargs, module):
+        p1 = tm.ensure_clean('string')
+        p2 = tm.ensure_clean('fspath')
+        df = pd.DataFrame({"A": [1, 2]})
+
+        with p1 as string, p2 as fspath:
+            pytest.importorskip(module)
+            mypath = CustomFSPath(fspath)
+            writer = getattr(df, writer_name)
+
+            writer(string, **writer_kwargs)
+            with open(string, 'rb') as f:
+                expected = f.read()
+
+            writer(mypath, **writer_kwargs)
+            with open(fspath, 'rb') as f:
+                result = f.read()
+
+            assert result == expected
+
+    def test_write_fspath_hdf5(self):
+        # Same test as write_fspath_all, except HDF5 files aren't
+        # necessarily byte-for-byte identical for a given dataframe, so we'll
+        # have to read and compare equality
+        pytest.importorskip('tables')
+
+        df = pd.DataFrame({"A": [1, 2]})
+        p1 = tm.ensure_clean('string')
+        p2 = tm.ensure_clean('fspath')
+
+        with p1 as string, p2 as fspath:
+            mypath = CustomFSPath(fspath)
+            df.to_hdf(mypath, key='bar')
+            df.to_hdf(string, key='bar')
+
+            result = pd.read_hdf(fspath, key='bar')
+            expected = pd.read_hdf(string, key='bar')
+
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.fixture
+def mmap_file(datapath):
+    return datapath('io', 'data', 'test_mmap.csv')
+
+
+class TestMMapWrapper(object):
+
+    def test_constructor_bad_file(self, mmap_file):
+        non_file = StringIO('I am not a file')
+        non_file.fileno = lambda: -1
+
+        # the error raised is different on Windows
+        if is_platform_windows():
+            msg = "The parameter is incorrect"
+            err = OSError
+        else:
+            msg = "[Errno 22]"
+            err = mmap.error
+
+        with pytest.raises(err, match=msg):
+            icom.MMapWrapper(non_file)
+
+        target = open(mmap_file, 'r')
+        target.close()
+
+        msg = "I/O operation on closed file"
+        with pytest.raises(ValueError, match=msg):
+            icom.MMapWrapper(target)
+
+    def test_get_attr(self, mmap_file):
+        with open(mmap_file, 'r') as target:
+            wrapper = icom.MMapWrapper(target)
+
+        attrs = dir(wrapper.mmap)
+        attrs = [attr for attr in attrs
+                 if not attr.startswith('__')]
+        attrs.append('__next__')
+
+        for attr in attrs:
+            assert hasattr(wrapper, attr)
+
+        assert not hasattr(wrapper, 'foo')
+
+    def test_next(self, mmap_file):
+        with open(mmap_file, 'r') as target:
+            wrapper = icom.MMapWrapper(target)
+            lines = target.readlines()
+
+        for line in lines:
+            next_line = next(wrapper)
+            assert next_line.strip() == line.strip()
+
+        with pytest.raises(StopIteration, match=r'^$'):
+            next(wrapper)
+
+    def test_unknown_engine(self):
+        with tm.ensure_clean() as path:
+            df = tm.makeDataFrame()
+            df.to_csv(path)
+            with pytest.raises(ValueError, match='Unknown engine'):
+                pd.read_csv(path, engine='pyt')
@@ -0,0 +1,116 @@
+import contextlib
+import os
+import warnings
+
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+
+import pandas.io.common as icom
+
+
+@contextlib.contextmanager
+def catch_to_csv_depr():
+    # Catching warnings because Series.to_csv has
+    # been deprecated. Remove this context when
+    # Series.to_csv has been aligned.
+
+    with warnings.catch_warnings(record=True):
+        warnings.simplefilter("ignore", FutureWarning)
+        yield
+
+
+@pytest.mark.parametrize('obj', [
+    pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567],
+                        [12.32112, 123123.2, 321321.2]],
+                 columns=['X', 'Y', 'Z']),
+    pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
+@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv'])
+def test_compression_size(obj, method, compression_only):
+    with tm.ensure_clean() as path:
+        with catch_to_csv_depr():
+            getattr(obj, method)(path, compression=compression_only)
+            compressed_size = os.path.getsize(path)
+            getattr(obj, method)(path, compression=None)
+            uncompressed_size = os.path.getsize(path)
+            assert uncompressed_size > compressed_size
+
+
+@pytest.mark.parametrize('obj', [
+    pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567],
+                        [12.32112, 123123.2, 321321.2]],
+                 columns=['X', 'Y', 'Z']),
+    pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
+@pytest.mark.parametrize('method', ['to_csv', 'to_json'])
+def test_compression_size_fh(obj, method, compression_only):
+    with tm.ensure_clean() as path:
+        f, handles = icom._get_handle(path, 'w', compression=compression_only)
+        with catch_to_csv_depr():
+            with f:
+                getattr(obj, method)(f)
+                assert not f.closed
+            assert f.closed
+            compressed_size = os.path.getsize(path)
+    with tm.ensure_clean() as path:
+        f, handles = icom._get_handle(path, 'w', compression=None)
+        with catch_to_csv_depr():
+            with f:
+                getattr(obj, method)(f)
+                assert not f.closed
+        assert f.closed
+        uncompressed_size = os.path.getsize(path)
+        assert uncompressed_size > compressed_size
+
+
+@pytest.mark.parametrize('write_method, write_kwargs, read_method', [
+    ('to_csv', {'index': False}, pd.read_csv),
+    ('to_json', {}, pd.read_json),
+    ('to_pickle', {}, pd.read_pickle),
+])
+def test_dataframe_compression_defaults_to_infer(
+        write_method, write_kwargs, read_method, compression_only):
+    # GH22004
+    input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=['X', 'Y', 'Z'])
+    extension = icom._compression_to_extension[compression_only]
+    with tm.ensure_clean('compressed' + extension) as path:
+        getattr(input, write_method)(path, **write_kwargs)
+        output = read_method(path, compression=compression_only)
+    tm.assert_frame_equal(output, input)
+
+
+@pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [
+    ('to_csv', {'index': False, 'header': True},
+     pd.read_csv, {'squeeze': True}),
+    ('to_json', {}, pd.read_json, {'typ': 'series'}),
+    ('to_pickle', {}, pd.read_pickle, {}),
+])
+def test_series_compression_defaults_to_infer(
+        write_method, write_kwargs, read_method, read_kwargs,
+        compression_only):
+    # GH22004
+    input = pd.Series([0, 5, -2, 10], name='X')
+    extension = icom._compression_to_extension[compression_only]
+    with tm.ensure_clean('compressed' + extension) as path:
+        getattr(input, write_method)(path, **write_kwargs)
+        output = read_method(path, compression=compression_only, **read_kwargs)
+    tm.assert_series_equal(output, input, check_names=False)
+
+
+def test_compression_warning(compression_only):
+    # Assert that passing a file object to to_csv while explicitly specifying a
+    # compression protocol triggers a RuntimeWarning, as per GH21227.
+    # Note that pytest has an issue that causes assert_produces_warning to fail
+    # in Python 2 if the warning has occurred in previous tests
+    # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this
+    # test fail in just Python 2 builds, it likely indicates that other tests
+    # are producing RuntimeWarnings, thereby triggering the pytest bug.
+    df = pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567],
+                             [12.32112, 123123.2, 321321.2]],
+                      columns=['X', 'Y', 'Z'])
+    with tm.ensure_clean() as path:
+        f, handles = icom._get_handle(path, 'w', compression=compression_only)
+        with tm.assert_produces_warning(RuntimeWarning,
+                                        check_stacklevel=False):
+            with f:
+                df.to_csv(f, compression=compression_only)
@@ -0,0 +1,43 @@
+from datetime import datetime
+
+import numpy as np
+
+import pandas.util.testing as tm
+
+import pandas.io.date_converters as conv
+
+
+def test_parse_date_time():
+    dates = np.array(['2007/1/3', '2008/2/4'], dtype=object)
+    times = np.array(['05:07:09', '06:08:00'], dtype=object)
+    expected = np.array([datetime(2007, 1, 3, 5, 7, 9),
+                         datetime(2008, 2, 4, 6, 8, 0)])
+
+    result = conv.parse_date_time(dates, times)
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_parse_date_fields():
+    days = np.array([3, 4])
+    months = np.array([1, 2])
+    years = np.array([2007, 2008])
+    result = conv.parse_date_fields(years, months, days)
+
+    expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)])
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_parse_all_fields():
+    hours = np.array([5, 6])
+    minutes = np.array([7, 8])
+    seconds = np.array([9, 0])
+
+    days = np.array([3, 4])
+    years = np.array([2007, 2008])
+    months = np.array([1, 2])
+
+    result = conv.parse_all_fields(years, months, days,
+                                   hours, minutes, seconds)
+    expected = np.array([datetime(2007, 1, 3, 5, 7, 9),
+                         datetime(2008, 2, 4, 6, 8, 0)])
+    tm.assert_numpy_array_equal(result, expected)
@@ -0,0 +1,158 @@
+""" test feather-format compat """
+from distutils.version import LooseVersion
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.util.testing import assert_frame_equal, ensure_clean
+
+from pandas.io.feather_format import read_feather, to_feather  # noqa:E402
+
+pyarrow = pytest.importorskip('pyarrow')
+
+
+pyarrow_version = LooseVersion(pyarrow.__version__)
+
+
+@pytest.mark.single
+class TestFeather(object):
+
+    def check_error_on_write(self, df, exc):
+        # check that we are raising the exception
+        # on writing
+
+        with pytest.raises(exc):
+            with ensure_clean() as path:
+                to_feather(df, path)
+
+    def check_round_trip(self, df, expected=None, **kwargs):
+
+        if expected is None:
+            expected = df
+
+        with ensure_clean() as path:
+            to_feather(df, path)
+
+            result = read_feather(path, **kwargs)
+            assert_frame_equal(result, expected)
+
+    def test_error(self):
+
+        for obj in [pd.Series([1, 2, 3]), 1, 'foo', pd.Timestamp('20130101'),
+                    np.array([1, 2, 3])]:
+            self.check_error_on_write(obj, ValueError)
+
+    def test_basic(self):
+
+        df = pd.DataFrame({'string': list('abc'),
+                           'int': list(range(1, 4)),
+                           'uint': np.arange(3, 6).astype('u1'),
+                           'float': np.arange(4.0, 7.0, dtype='float64'),
+                           'float_with_null': [1., np.nan, 3],
+                           'bool': [True, False, True],
+                           'bool_with_null': [True, np.nan, False],
+                           'cat': pd.Categorical(list('abc')),
+                           'dt': pd.date_range('20130101', periods=3),
+                           'dttz': pd.date_range('20130101', periods=3,
+                                                 tz='US/Eastern'),
+                           'dt_with_null': [pd.Timestamp('20130101'), pd.NaT,
+                                            pd.Timestamp('20130103')],
+                           'dtns': pd.date_range('20130101', periods=3,
+                                                 freq='ns')})
+
+        assert df.dttz.dtype.tz.zone == 'US/Eastern'
+        self.check_round_trip(df)
+
+    def test_duplicate_columns(self):
+
+        # https://github.com/wesm/feather/issues/53
+        # not currently able to handle duplicate columns
+        df = pd.DataFrame(np.arange(12).reshape(4, 3),
+                          columns=list('aaa')).copy()
+        self.check_error_on_write(df, ValueError)
+
+    def test_stringify_columns(self):
+
+        df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
+        self.check_error_on_write(df, ValueError)
+
+    def test_read_columns(self):
+        # GH 24025
+        df = pd.DataFrame({'col1': list('abc'),
+                           'col2': list(range(1, 4)),
+                           'col3': list('xyz'),
+                           'col4': list(range(4, 7))})
+        columns = ['col1', 'col3']
+        self.check_round_trip(df, expected=df[columns],
+                              columns=columns)
+
+    def test_unsupported_other(self):
+
+        # period
+        df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
+        # Some versions raise ValueError, others raise ArrowInvalid.
+        self.check_error_on_write(df, Exception)
+
+    def test_rw_nthreads(self):
+        df = pd.DataFrame({'A': np.arange(100000)})
+        expected_warning = (
+            "the 'nthreads' keyword is deprecated, "
+            "use 'use_threads' instead"
+        )
+        # TODO: make the warning work with check_stacklevel=True
+        with tm.assert_produces_warning(
+                FutureWarning, check_stacklevel=False) as w:
+            self.check_round_trip(df, nthreads=2)
+        # we have an extra FutureWarning because of #GH23752
+        assert any(expected_warning in str(x) for x in w)
+
+        # TODO: make the warning work with check_stacklevel=True
+        with tm.assert_produces_warning(
+                FutureWarning, check_stacklevel=False) as w:
+            self.check_round_trip(df, nthreads=1)
+        # we have an extra FutureWarnings because of #GH23752
+        assert any(expected_warning in str(x) for x in w)
+
+    def test_rw_use_threads(self):
+        df = pd.DataFrame({'A': np.arange(100000)})
+        self.check_round_trip(df, use_threads=True)
+        self.check_round_trip(df, use_threads=False)
+
+    def test_write_with_index(self):
+
+        df = pd.DataFrame({'A': [1, 2, 3]})
+        self.check_round_trip(df)
+
+        # non-default index
+        for index in [[2, 3, 4],
+                      pd.date_range('20130101', periods=3),
+                      list('abc'),
+                      [1, 3, 4],
+                      pd.MultiIndex.from_tuples([('a', 1), ('a', 2),
+                                                 ('b', 1)]),
+                      ]:
+
+            df.index = index
+            self.check_error_on_write(df, ValueError)
+
+        # index with meta-data
+        df.index = [0, 1, 2]
+        df.index.name = 'foo'
+        self.check_error_on_write(df, ValueError)
+
+        # column multi-index
+        df.index = [0, 1, 2]
+        df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]),
+        self.check_error_on_write(df, ValueError)
+
+    def test_path_pathlib(self):
+        df = tm.makeDataFrame().reset_index()
+        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
+        tm.assert_frame_equal(df, result)
+
+    def test_path_localpath(self):
+        df = tm.makeDataFrame().reset_index()
+        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
+        tm.assert_frame_equal(df, result)
@@ -0,0 +1,153 @@
+from datetime import datetime
+import os
+import platform
+
+import numpy as np
+import pytest
+import pytz
+
+from pandas.compat import range
+
+import pandas as pd
+from pandas import DataFrame, compat
+import pandas.util.testing as tm
+
+api_exceptions = pytest.importorskip("google.api_core.exceptions")
+bigquery = pytest.importorskip("google.cloud.bigquery")
+service_account = pytest.importorskip("google.oauth2.service_account")
+pandas_gbq = pytest.importorskip("pandas_gbq")
+
+PROJECT_ID = None
+PRIVATE_KEY_JSON_PATH = None
+PRIVATE_KEY_JSON_CONTENTS = None
+
+if compat.PY3:
+    DATASET_ID = 'pydata_pandas_bq_testing_py3'
+else:
+    DATASET_ID = 'pydata_pandas_bq_testing_py2'
+
+TABLE_ID = 'new_test'
+DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID)
+
+VERSION = platform.python_version()
+
+
+def _skip_if_no_project_id():
+    if not _get_project_id():
+        pytest.skip(
+            "Cannot run integration tests without a project id")
+
+
+def _skip_if_no_private_key_path():
+    if not _get_private_key_path():
+        pytest.skip("Cannot run integration tests without a "
+                    "private key json file path")
+
+
+def _in_travis_environment():
+    return 'TRAVIS_BUILD_DIR' in os.environ and \
+           'GBQ_PROJECT_ID' in os.environ
+
+
+def _get_project_id():
+    if _in_travis_environment():
+        return os.environ.get('GBQ_PROJECT_ID')
+    return PROJECT_ID or os.environ.get('GBQ_PROJECT_ID')
+
+
+def _get_private_key_path():
+    if _in_travis_environment():
+        return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci',
+                              'travis_gbq.json'])
+
+    private_key_path = PRIVATE_KEY_JSON_PATH
+    if not private_key_path:
+        private_key_path = os.environ.get('GBQ_GOOGLE_APPLICATION_CREDENTIALS')
+    return private_key_path
+
+
+def _get_credentials():
+    private_key_path = _get_private_key_path()
+    if private_key_path:
+        return service_account.Credentials.from_service_account_file(
+            private_key_path)
+
+
+def _get_client():
+    project_id = _get_project_id()
+    credentials = _get_credentials()
+    return bigquery.Client(project=project_id, credentials=credentials)
+
+
+def make_mixed_dataframe_v2(test_size):
+    # create df to test for all BQ datatypes except RECORD
+    bools = np.random.randint(2, size=(1, test_size)).astype(bool)
+    flts = np.random.randn(1, test_size)
+    ints = np.random.randint(1, 10, size=(1, test_size))
+    strs = np.random.randint(1, 10, size=(1, test_size)).astype(str)
+    times = [datetime.now(pytz.timezone('US/Arizona'))
+             for t in range(test_size)]
+    return DataFrame({'bools': bools[0],
+                      'flts': flts[0],
+                      'ints': ints[0],
+                      'strs': strs[0],
+                      'times': times[0]},
+                     index=range(test_size))
+
+
+def test_read_gbq_without_dialect_warns_future_change(monkeypatch):
+    # Default dialect is changing to standard SQL. See:
+    # https://github.com/pydata/pandas-gbq/issues/195
+
+    def mock_read_gbq(*args, **kwargs):
+        return DataFrame([[1.0]])
+
+    monkeypatch.setattr(pandas_gbq, 'read_gbq', mock_read_gbq)
+    with tm.assert_produces_warning(FutureWarning):
+        pd.read_gbq("SELECT 1")
+
+
+@pytest.mark.single
+class TestToGBQIntegrationWithServiceAccountKeyPath(object):
+
+    @classmethod
+    def setup_class(cls):
+        # - GLOBAL CLASS FIXTURES -
+        # put here any instruction you want to execute only *ONCE* *BEFORE*
+        # executing *ALL* tests described below.
+
+        _skip_if_no_project_id()
+        _skip_if_no_private_key_path()
+
+        cls.client = _get_client()
+        cls.dataset = cls.client.dataset(DATASET_ID + "1")
+        try:
+            # Clean-up previous test runs.
+            cls.client.delete_dataset(cls.dataset, delete_contents=True)
+        except api_exceptions.NotFound:
+            pass  # It's OK if the dataset doesn't already exist.
+
+        cls.client.create_dataset(bigquery.Dataset(cls.dataset))
+
+    @classmethod
+    def teardown_class(cls):
+        # - GLOBAL CLASS FIXTURES -
+        # put here any instruction you want to execute only *ONCE* *AFTER*
+        # executing all tests.
+        cls.client.delete_dataset(cls.dataset, delete_contents=True)
+
+    def test_roundtrip(self):
+        destination_table = DESTINATION_TABLE + "1"
+
+        test_size = 20001
+        df = make_mixed_dataframe_v2(test_size)
+
+        df.to_gbq(destination_table, _get_project_id(), chunksize=None,
+                  credentials=_get_credentials())
+
+        result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
+                             .format(destination_table),
+                             project_id=_get_project_id(),
+                             credentials=_get_credentials(),
+                             dialect="standard")
+        assert result['num_rows'][0] == test_size
@@ -0,0 +1,72 @@
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO
+
+from pandas import DataFrame, date_range, read_csv
+from pandas.util import _test_decorators as td
+from pandas.util.testing import assert_frame_equal
+
+from pandas.io.common import is_gcs_url
+
+
+def test_is_gcs_url():
+    assert is_gcs_url("gcs://pandas/somethingelse.com")
+    assert is_gcs_url("gs://pandas/somethingelse.com")
+    assert not is_gcs_url("s3://pandas/somethingelse.com")
+
+
+@td.skip_if_no('gcsfs')
+def test_read_csv_gcs(monkeypatch):
+    df1 = DataFrame({'int': [1, 3], 'float': [2.0, np.nan], 'str': ['t', 's'],
+                     'dt': date_range('2018-06-18', periods=2)})
+
+    class MockGCSFileSystem():
+        def open(*args):
+            return StringIO(df1.to_csv(index=False))
+
+    monkeypatch.setattr('gcsfs.GCSFileSystem', MockGCSFileSystem)
+    df2 = read_csv('gs://test/test.csv', parse_dates=['dt'])
+
+    assert_frame_equal(df1, df2)
+
+
+@td.skip_if_no('gcsfs')
+def test_to_csv_gcs(monkeypatch):
+    df1 = DataFrame({'int': [1, 3], 'float': [2.0, np.nan], 'str': ['t', 's'],
+                     'dt': date_range('2018-06-18', periods=2)})
+    s = StringIO()
+
+    class MockGCSFileSystem():
+        def open(*args):
+            return s
+
+    monkeypatch.setattr('gcsfs.GCSFileSystem', MockGCSFileSystem)
+    df1.to_csv('gs://test/test.csv', index=True)
+    df2 = read_csv(StringIO(s.getvalue()), parse_dates=['dt'], index_col=0)
+
+    assert_frame_equal(df1, df2)
+
+
+@td.skip_if_no('gcsfs')
+def test_gcs_get_filepath_or_buffer(monkeypatch):
+    df1 = DataFrame({'int': [1, 3], 'float': [2.0, np.nan], 'str': ['t', 's'],
+                     'dt': date_range('2018-06-18', periods=2)})
+
+    def mock_get_filepath_or_buffer(*args, **kwargs):
+        return (StringIO(df1.to_csv(index=False)),
+                None, None, False)
+
+    monkeypatch.setattr('pandas.io.gcs.get_filepath_or_buffer',
+                        mock_get_filepath_or_buffer)
+    df2 = read_csv('gs://test/test.csv', parse_dates=['dt'])
+
+    assert_frame_equal(df1, df2)
+
+
+@pytest.mark.skipif(td.safe_import('gcsfs'),
+                    reason='Only check when gcsfs not installed')
+def test_gcs_not_present_exception():
+    with pytest.raises(ImportError) as e:
+        read_csv('gs://test/test.csv')
+        assert 'gcsfs library is required' in str(e.value)
@@ -0,0 +1,954 @@
+import datetime
+from distutils.version import LooseVersion
+import glob
+import os
+from warnings import catch_warnings
+
+import numpy as np
+import pytest
+
+from pandas._libs.tslib import iNaT
+from pandas.compat import PY3, u
+from pandas.errors import PerformanceWarning
+
+import pandas
+from pandas import (
+    Categorical, DataFrame, Index, Interval, MultiIndex, NaT, Panel, Period,
+    Series, Timestamp, bdate_range, compat, date_range, period_range)
+from pandas.tests.test_panel import assert_panel_equal
+import pandas.util.testing as tm
+from pandas.util.testing import (
+    assert_categorical_equal, assert_frame_equal, assert_index_equal,
+    assert_series_equal, ensure_clean)
+
+from pandas.io.packers import read_msgpack, to_msgpack
+
+nan = np.nan
+
+try:
+    import blosc  # NOQA
+except ImportError:
+    _BLOSC_INSTALLED = False
+else:
+    _BLOSC_INSTALLED = True
+
+try:
+    import zlib  # NOQA
+except ImportError:
+    _ZLIB_INSTALLED = False
+else:
+    _ZLIB_INSTALLED = True
+
+
+@pytest.fixture(scope='module')
+def current_packers_data():
+    # our current version packers data
+    from pandas.tests.io.generate_legacy_storage_files import (
+        create_msgpack_data)
+    return create_msgpack_data()
+
+
+@pytest.fixture(scope='module')
+def all_packers_data():
+    # our all of our current version packers data
+    from pandas.tests.io.generate_legacy_storage_files import (
+        create_data)
+    return create_data()
+
+
+def check_arbitrary(a, b):
+
+    if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)):
+        assert(len(a) == len(b))
+        for a_, b_ in zip(a, b):
+            check_arbitrary(a_, b_)
+    elif isinstance(a, Panel):
+        assert_panel_equal(a, b)
+    elif isinstance(a, DataFrame):
+        assert_frame_equal(a, b)
+    elif isinstance(a, Series):
+        assert_series_equal(a, b)
+    elif isinstance(a, Index):
+        assert_index_equal(a, b)
+    elif isinstance(a, Categorical):
+        # Temp,
+        # Categorical.categories is changed from str to bytes in PY3
+        # maybe the same as GH 13591
+        if PY3 and b.categories.inferred_type == 'string':
+            pass
+        else:
+            tm.assert_categorical_equal(a, b)
+    elif a is NaT:
+        assert b is NaT
+    elif isinstance(a, Timestamp):
+        assert a == b
+        assert a.freq == b.freq
+    else:
+        assert(a == b)
+
+
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+class TestPackers(object):
+
+    def setup_method(self, method):
+        self.path = '__%s__.msg' % tm.rands(10)
+
+    def teardown_method(self, method):
+        pass
+
+    def encode_decode(self, x, compress=None, **kwargs):
+        with ensure_clean(self.path) as p:
+            to_msgpack(p, x, compress=compress, **kwargs)
+            return read_msgpack(p, **kwargs)
+
+
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+class TestAPI(TestPackers):
+
+    def test_string_io(self):
+
+        df = DataFrame(np.random.randn(10, 2))
+        s = df.to_msgpack(None)
+        result = read_msgpack(s)
+        tm.assert_frame_equal(result, df)
+
+        s = df.to_msgpack()
+        result = read_msgpack(s)
+        tm.assert_frame_equal(result, df)
+
+        s = df.to_msgpack()
+        result = read_msgpack(compat.BytesIO(s))
+        tm.assert_frame_equal(result, df)
+
+        s = to_msgpack(None, df)
+        result = read_msgpack(s)
+        tm.assert_frame_equal(result, df)
+
+        with ensure_clean(self.path) as p:
+
+            s = df.to_msgpack()
+            with open(p, 'wb') as fh:
+                fh.write(s)
+            result = read_msgpack(p)
+            tm.assert_frame_equal(result, df)
+
+    def test_path_pathlib(self):
+        df = tm.makeDataFrame()
+        result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack)
+        tm.assert_frame_equal(df, result)
+
+    def test_path_localpath(self):
+        df = tm.makeDataFrame()
+        result = tm.round_trip_localpath(df.to_msgpack, read_msgpack)
+        tm.assert_frame_equal(df, result)
+
+    def test_iterator_with_string_io(self):
+
+        dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)]
+        s = to_msgpack(None, *dfs)
+        for i, result in enumerate(read_msgpack(s, iterator=True)):
+            tm.assert_frame_equal(result, dfs[i])
+
+    def test_invalid_arg(self):
+        # GH10369
+        class A(object):
+
+            def __init__(self):
+                self.read = 0
+
+        msg = (r"Invalid file path or buffer object type: <(class|type)"
+               r" '{}'>")
+        with pytest.raises(ValueError, match=msg.format('NoneType')):
+            read_msgpack(path_or_buf=None)
+        with pytest.raises(ValueError, match=msg.format('dict')):
+            read_msgpack(path_or_buf={})
+        with pytest.raises(ValueError, match=msg.format(r'.*\.A')):
+            read_msgpack(path_or_buf=A())
+
+
+class TestNumpy(TestPackers):
+
+    def test_numpy_scalar_float(self):
+        x = np.float32(np.random.rand())
+        x_rec = self.encode_decode(x)
+        tm.assert_almost_equal(x, x_rec)
+
+    def test_numpy_scalar_complex(self):
+        x = np.complex64(np.random.rand() + 1j * np.random.rand())
+        x_rec = self.encode_decode(x)
+        assert np.allclose(x, x_rec)
+
+    def test_scalar_float(self):
+        x = np.random.rand()
+        x_rec = self.encode_decode(x)
+        tm.assert_almost_equal(x, x_rec)
+
+    def test_scalar_bool(self):
+        x = np.bool_(1)
+        x_rec = self.encode_decode(x)
+        tm.assert_almost_equal(x, x_rec)
+
+        x = np.bool_(0)
+        x_rec = self.encode_decode(x)
+        tm.assert_almost_equal(x, x_rec)
+
+    def test_scalar_complex(self):
+        x = np.random.rand() + 1j * np.random.rand()
+        x_rec = self.encode_decode(x)
+        assert np.allclose(x, x_rec)
+
+    def test_list_numpy_float(self):
+        x = [np.float32(np.random.rand()) for i in range(5)]
+        x_rec = self.encode_decode(x)
+        # current msgpack cannot distinguish list/tuple
+        tm.assert_almost_equal(tuple(x), x_rec)
+
+        x_rec = self.encode_decode(tuple(x))
+        tm.assert_almost_equal(tuple(x), x_rec)
+
+    def test_list_numpy_float_complex(self):
+        if not hasattr(np, 'complex128'):
+            pytest.skip('numpy can not handle complex128')
+
+        x = [np.float32(np.random.rand()) for i in range(5)] + \
+            [np.complex128(np.random.rand() + 1j * np.random.rand())
+             for i in range(5)]
+        x_rec = self.encode_decode(x)
+        assert np.allclose(x, x_rec)
+
+    def test_list_float(self):
+        x = [np.random.rand() for i in range(5)]
+        x_rec = self.encode_decode(x)
+        # current msgpack cannot distinguish list/tuple
+        tm.assert_almost_equal(tuple(x), x_rec)
+
+        x_rec = self.encode_decode(tuple(x))
+        tm.assert_almost_equal(tuple(x), x_rec)
+
+    def test_list_float_complex(self):
+        x = [np.random.rand() for i in range(5)] + \
+            [(np.random.rand() + 1j * np.random.rand()) for i in range(5)]
+        x_rec = self.encode_decode(x)
+        assert np.allclose(x, x_rec)
+
+    def test_dict_float(self):
+        x = {'foo': 1.0, 'bar': 2.0}
+        x_rec = self.encode_decode(x)
+        tm.assert_almost_equal(x, x_rec)
+
+    def test_dict_complex(self):
+        x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j}
+        x_rec = self.encode_decode(x)
+        tm.assert_dict_equal(x, x_rec)
+
+        for key in x:
+            tm.assert_class_equal(x[key], x_rec[key], obj="complex value")
+
+    def test_dict_numpy_float(self):
+        x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)}
+        x_rec = self.encode_decode(x)
+        tm.assert_almost_equal(x, x_rec)
+
+    def test_dict_numpy_complex(self):
+        x = {'foo': np.complex128(1.0 + 1.0j),
+             'bar': np.complex128(2.0 + 2.0j)}
+        x_rec = self.encode_decode(x)
+        tm.assert_dict_equal(x, x_rec)
+
+        for key in x:
+            tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128")
+
+    def test_numpy_array_float(self):
+
+        # run multiple times
+        for n in range(10):
+            x = np.random.rand(10)
+            for dtype in ['float32', 'float64']:
+                x = x.astype(dtype)
+                x_rec = self.encode_decode(x)
+                tm.assert_almost_equal(x, x_rec)
+
+    def test_numpy_array_complex(self):
+        x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128)
+        x_rec = self.encode_decode(x)
+        assert (all(map(lambda x, y: x == y, x, x_rec)) and
+                x.dtype == x_rec.dtype)
+
+    def test_list_mixed(self):
+        x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo'), np.bool_(1)]
+        x_rec = self.encode_decode(x)
+        # current msgpack cannot distinguish list/tuple
+        tm.assert_almost_equal(tuple(x), x_rec)
+
+        x_rec = self.encode_decode(tuple(x))
+        tm.assert_almost_equal(tuple(x), x_rec)
+
+
+class TestBasic(TestPackers):
+
+    def test_timestamp(self):
+
+        for i in [Timestamp(
+            '20130101'), Timestamp('20130101', tz='US/Eastern'),
+                Timestamp('201301010501')]:
+            i_rec = self.encode_decode(i)
+            assert i == i_rec
+
+    def test_nat(self):
+        nat_rec = self.encode_decode(NaT)
+        assert NaT is nat_rec
+
+    def test_datetimes(self):
+
+        for i in [datetime.datetime(2013, 1, 1),
+                  datetime.datetime(2013, 1, 1, 5, 1),
+                  datetime.date(2013, 1, 1),
+                  np.datetime64(datetime.datetime(2013, 1, 5, 2, 15))]:
+            i_rec = self.encode_decode(i)
+            assert i == i_rec
+
+    def test_timedeltas(self):
+
+        for i in [datetime.timedelta(days=1),
+                  datetime.timedelta(days=1, seconds=10),
+                  np.timedelta64(1000000)]:
+            i_rec = self.encode_decode(i)
+            assert i == i_rec
+
+    def test_periods(self):
+        # 13463
+        for i in [Period('2010-09', 'M'), Period('2014-Q1', 'Q')]:
+            i_rec = self.encode_decode(i)
+            assert i == i_rec
+
+    def test_intervals(self):
+        # 19967
+        for i in [Interval(0, 1), Interval(0, 1, 'left'),
+                  Interval(10, 25., 'right')]:
+            i_rec = self.encode_decode(i)
+            assert i == i_rec
+
+
+class TestIndex(TestPackers):
+
+    def setup_method(self, method):
+        super(TestIndex, self).setup_method(method)
+
+        self.d = {
+            'string': tm.makeStringIndex(100),
+            'date': tm.makeDateIndex(100),
+            'int': tm.makeIntIndex(100),
+            'rng': tm.makeRangeIndex(100),
+            'float': tm.makeFloatIndex(100),
+            'empty': Index([]),
+            'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])),
+            'period': Index(period_range('2012-1-1', freq='M', periods=3)),
+            'date2': Index(date_range('2013-01-1', periods=10)),
+            'bdate': Index(bdate_range('2013-01-02', periods=10)),
+            'cat': tm.makeCategoricalIndex(100),
+            'interval': tm.makeIntervalIndex(100),
+            'timedelta': tm.makeTimedeltaIndex(100, 'H')
+        }
+
+        self.mi = {
+            'reg': MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'),
+                                           ('foo', 'two'),
+                                           ('qux', 'one'), ('qux', 'two')],
+                                          names=['first', 'second']),
+        }
+
+    def test_basic_index(self):
+
+        for s, i in self.d.items():
+            i_rec = self.encode_decode(i)
+            tm.assert_index_equal(i, i_rec)
+
+        # datetime with no freq (GH5506)
+        i = Index([Timestamp('20130101'), Timestamp('20130103')])
+        i_rec = self.encode_decode(i)
+        tm.assert_index_equal(i, i_rec)
+
+        # datetime with timezone
+        i = Index([Timestamp('20130101 9:00:00'), Timestamp(
+            '20130103 11:00:00')]).tz_localize('US/Eastern')
+        i_rec = self.encode_decode(i)
+        tm.assert_index_equal(i, i_rec)
+
+    def test_multi_index(self):
+
+        for s, i in self.mi.items():
+            i_rec = self.encode_decode(i)
+            tm.assert_index_equal(i, i_rec)
+
+    def test_unicode(self):
+        i = tm.makeUnicodeIndex(100)
+
+        i_rec = self.encode_decode(i)
+        tm.assert_index_equal(i, i_rec)
+
+    def categorical_index(self):
+        # GH15487
+        df = DataFrame(np.random.randn(10, 2))
+        df = df.astype({0: 'category'}).set_index(0)
+        result = self.encode_decode(df)
+        tm.assert_frame_equal(result, df)
+
+
+class TestSeries(TestPackers):
+
+    def setup_method(self, method):
+        super(TestSeries, self).setup_method(method)
+
+        self.d = {}
+
+        s = tm.makeStringSeries()
+        s.name = 'string'
+        self.d['string'] = s
+
+        s = tm.makeObjectSeries()
+        s.name = 'object'
+        self.d['object'] = s
+
+        s = Series(iNaT, dtype='M8[ns]', index=range(5))
+        self.d['date'] = s
+
+        data = {
+            'A': [0., 1., 2., 3., np.nan],
+            'B': [0, 1, 0, 1, 0],
+            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
+            'D': date_range('1/1/2009', periods=5),
+            'E': [0., 1, Timestamp('20100101'), 'foo', 2.],
+            'F': [Timestamp('20130102', tz='US/Eastern')] * 2 +
+                 [Timestamp('20130603', tz='CET')] * 3,
+            'G': [Timestamp('20130102', tz='US/Eastern')] * 5,
+            'H': Categorical([1, 2, 3, 4, 5]),
+            'I': Categorical([1, 2, 3, 4, 5], ordered=True),
+            'J': (np.bool_(1), 2, 3, 4, 5),
+        }
+
+        self.d['float'] = Series(data['A'])
+        self.d['int'] = Series(data['B'])
+        self.d['mixed'] = Series(data['E'])
+        self.d['dt_tz_mixed'] = Series(data['F'])
+        self.d['dt_tz'] = Series(data['G'])
+        self.d['cat_ordered'] = Series(data['H'])
+        self.d['cat_unordered'] = Series(data['I'])
+        self.d['numpy_bool_mixed'] = Series(data['J'])
+
+    def test_basic(self):
+
+        # run multiple times here
+        for n in range(10):
+            for s, i in self.d.items():
+                i_rec = self.encode_decode(i)
+                assert_series_equal(i, i_rec)
+
+
+class TestCategorical(TestPackers):
+
+    def setup_method(self, method):
+        super(TestCategorical, self).setup_method(method)
+
+        self.d = {}
+
+        self.d['plain_str'] = Categorical(['a', 'b', 'c', 'd', 'e'])
+        self.d['plain_str_ordered'] = Categorical(['a', 'b', 'c', 'd', 'e'],
+                                                  ordered=True)
+
+        self.d['plain_int'] = Categorical([5, 6, 7, 8])
+        self.d['plain_int_ordered'] = Categorical([5, 6, 7, 8], ordered=True)
+
+    def test_basic(self):
+
+        # run multiple times here
+        for n in range(10):
+            for s, i in self.d.items():
+                i_rec = self.encode_decode(i)
+                assert_categorical_equal(i, i_rec)
+
+
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+class TestNDFrame(TestPackers):
+
+    def setup_method(self, method):
+        super(TestNDFrame, self).setup_method(method)
+
+        data = {
+            'A': [0., 1., 2., 3., np.nan],
+            'B': [0, 1, 0, 1, 0],
+            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
+            'D': date_range('1/1/2009', periods=5),
+            'E': [0., 1, Timestamp('20100101'), 'foo', 2.],
+            'F': [Timestamp('20130102', tz='US/Eastern')] * 5,
+            'G': [Timestamp('20130603', tz='CET')] * 5,
+            'H': Categorical(['a', 'b', 'c', 'd', 'e']),
+            'I': Categorical(['a', 'b', 'c', 'd', 'e'], ordered=True),
+        }
+
+        self.frame = {
+            'float': DataFrame(dict(A=data['A'], B=Series(data['A']) + 1)),
+            'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)),
+            'mixed': DataFrame(data)}
+
+        self.panel = {
+            'float': Panel(dict(ItemA=self.frame['float'],
+                                ItemB=self.frame['float'] + 1))}
+
+    def test_basic_frame(self):
+
+        for s, i in self.frame.items():
+            i_rec = self.encode_decode(i)
+            assert_frame_equal(i, i_rec)
+
+    def test_basic_panel(self):
+
+        with catch_warnings(record=True):
+            for s, i in self.panel.items():
+                i_rec = self.encode_decode(i)
+                assert_panel_equal(i, i_rec)
+
+    def test_multi(self):
+
+        i_rec = self.encode_decode(self.frame)
+        for k in self.frame.keys():
+            assert_frame_equal(self.frame[k], i_rec[k])
+
+        packed_items = tuple([self.frame['float'], self.frame['float'].A,
+                              self.frame['float'].B, None])
+        l_rec = self.encode_decode(packed_items)
+        check_arbitrary(packed_items, l_rec)
+
+        # this is an oddity in that packed lists will be returned as tuples
+        packed_items = [self.frame['float'], self.frame['float'].A,
+                        self.frame['float'].B, None]
+        l_rec = self.encode_decode(packed_items)
+        assert isinstance(l_rec, tuple)
+        check_arbitrary(packed_items, l_rec)
+
+    def test_iterator(self):
+
+        packed_items = [self.frame['float'], self.frame['float'].A,
+                        self.frame['float'].B, None]
+
+        with ensure_clean(self.path) as path:
+            to_msgpack(path, *packed_items)
+            for i, packed in enumerate(read_msgpack(path, iterator=True)):
+                check_arbitrary(packed, packed_items[i])
+
+    def tests_datetimeindex_freq_issue(self):
+
+        # GH 5947
+        # inferring freq on the datetimeindex
+        df = DataFrame([1, 2, 3], index=date_range('1/1/2013', '1/3/2013'))
+        result = self.encode_decode(df)
+        assert_frame_equal(result, df)
+
+        df = DataFrame([1, 2], index=date_range('1/1/2013', '1/2/2013'))
+        result = self.encode_decode(df)
+        assert_frame_equal(result, df)
+
+    def test_dataframe_duplicate_column_names(self):
+
+        # GH 9618
+        expected_1 = DataFrame(columns=['a', 'a'])
+        expected_2 = DataFrame(columns=[1] * 100)
+        expected_2.loc[0] = np.random.randn(100)
+        expected_3 = DataFrame(columns=[1, 1])
+        expected_3.loc[0] = ['abc', np.nan]
+
+        result_1 = self.encode_decode(expected_1)
+        result_2 = self.encode_decode(expected_2)
+        result_3 = self.encode_decode(expected_3)
+
+        assert_frame_equal(result_1, expected_1)
+        assert_frame_equal(result_2, expected_2)
+        assert_frame_equal(result_3, expected_3)
+
+
+class TestSparse(TestPackers):
+
+    def _check_roundtrip(self, obj, comparator, **kwargs):
+
+        # currently these are not implemetned
+        # i_rec = self.encode_decode(obj)
+        # comparator(obj, i_rec, **kwargs)
+        msg = r"msgpack sparse (series|frame) is not implemented"
+        with pytest.raises(NotImplementedError, match=msg):
+            self.encode_decode(obj)
+
+    def test_sparse_series(self):
+
+        s = tm.makeStringSeries()
+        s[3:5] = np.nan
+        ss = s.to_sparse()
+        self._check_roundtrip(ss, tm.assert_series_equal,
+                              check_series_type=True)
+
+        ss2 = s.to_sparse(kind='integer')
+        self._check_roundtrip(ss2, tm.assert_series_equal,
+                              check_series_type=True)
+
+        ss3 = s.to_sparse(fill_value=0)
+        self._check_roundtrip(ss3, tm.assert_series_equal,
+                              check_series_type=True)
+
+    def test_sparse_frame(self):
+
+        s = tm.makeDataFrame()
+        s.loc[3:5, 1:3] = np.nan
+        s.loc[8:10, -2] = np.nan
+        ss = s.to_sparse()
+
+        self._check_roundtrip(ss, tm.assert_frame_equal,
+                              check_frame_type=True)
+
+        ss2 = s.to_sparse(kind='integer')
+        self._check_roundtrip(ss2, tm.assert_frame_equal,
+                              check_frame_type=True)
+
+        ss3 = s.to_sparse(fill_value=0)
+        self._check_roundtrip(ss3, tm.assert_frame_equal,
+                              check_frame_type=True)
+
+
+class TestCompression(TestPackers):
+    """See https://github.com/pandas-dev/pandas/pull/9783
+    """
+
+    def setup_method(self, method):
+        try:
+            from sqlalchemy import create_engine
+            self._create_sql_engine = create_engine
+        except ImportError:
+            self._SQLALCHEMY_INSTALLED = False
+        else:
+            self._SQLALCHEMY_INSTALLED = True
+
+        super(TestCompression, self).setup_method(method)
+        data = {
+            'A': np.arange(1000, dtype=np.float64),
+            'B': np.arange(1000, dtype=np.int32),
+            'C': list(100 * 'abcdefghij'),
+            'D': date_range(datetime.datetime(2015, 4, 1), periods=1000),
+            'E': [datetime.timedelta(days=x) for x in range(1000)],
+        }
+        self.frame = {
+            'float': DataFrame({k: data[k] for k in ['A', 'A']}),
+            'int': DataFrame({k: data[k] for k in ['B', 'B']}),
+            'mixed': DataFrame(data),
+        }
+
+    def test_plain(self):
+        i_rec = self.encode_decode(self.frame)
+        for k in self.frame.keys():
+            assert_frame_equal(self.frame[k], i_rec[k])
+
+    def _test_compression(self, compress):
+        i_rec = self.encode_decode(self.frame, compress=compress)
+        for k in self.frame.keys():
+            value = i_rec[k]
+            expected = self.frame[k]
+            assert_frame_equal(value, expected)
+            # make sure that we can write to the new frames
+            for block in value._data.blocks:
+                assert block.values.flags.writeable
+
+    def test_compression_zlib(self):
+        if not _ZLIB_INSTALLED:
+            pytest.skip('no zlib')
+        self._test_compression('zlib')
+
+    def test_compression_blosc(self):
+        if not _BLOSC_INSTALLED:
+            pytest.skip('no blosc')
+        self._test_compression('blosc')
+
+    def _test_compression_warns_when_decompress_caches(
+            self, monkeypatch, compress):
+        not_garbage = []
+        control = []  # copied data
+
+        compress_module = globals()[compress]
+        real_decompress = compress_module.decompress
+
+        def decompress(ob):
+            """mock decompress function that delegates to the real
+            decompress but caches the result and a copy of the result.
+            """
+            res = real_decompress(ob)
+            not_garbage.append(res)  # hold a reference to this bytes object
+            control.append(bytearray(res))  # copy the data here to check later
+            return res
+
+        # types mapped to values to add in place.
+        rhs = {
+            np.dtype('float64'): 1.0,
+            np.dtype('int32'): 1,
+            np.dtype('object'): 'a',
+            np.dtype('datetime64[ns]'): np.timedelta64(1, 'ns'),
+            np.dtype('timedelta64[ns]'): np.timedelta64(1, 'ns'),
+        }
+
+        with monkeypatch.context() as m, \
+                tm.assert_produces_warning(PerformanceWarning) as ws:
+            m.setattr(compress_module, 'decompress', decompress)
+            i_rec = self.encode_decode(self.frame, compress=compress)
+            for k in self.frame.keys():
+
+                value = i_rec[k]
+                expected = self.frame[k]
+                assert_frame_equal(value, expected)
+                # make sure that we can write to the new frames even though
+                # we needed to copy the data
+                for block in value._data.blocks:
+                    assert block.values.flags.writeable
+                    # mutate the data in some way
+                    block.values[0] += rhs[block.dtype]
+
+        for w in ws:
+            # check the messages from our warnings
+            assert str(w.message) == ('copying data after decompressing; '
+                                      'this may mean that decompress is '
+                                      'caching its result')
+
+        for buf, control_buf in zip(not_garbage, control):
+            # make sure none of our mutations above affected the
+            # original buffers
+            assert buf == control_buf
+
+    def test_compression_warns_when_decompress_caches_zlib(self, monkeypatch):
+        if not _ZLIB_INSTALLED:
+            pytest.skip('no zlib')
+        self._test_compression_warns_when_decompress_caches(
+            monkeypatch, 'zlib')
+
+    def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch):
+        if not _BLOSC_INSTALLED:
+            pytest.skip('no blosc')
+        self._test_compression_warns_when_decompress_caches(
+            monkeypatch, 'blosc')
+
+    def _test_small_strings_no_warn(self, compress):
+        empty = np.array([], dtype='uint8')
+        with tm.assert_produces_warning(None):
+            empty_unpacked = self.encode_decode(empty, compress=compress)
+
+        tm.assert_numpy_array_equal(empty_unpacked, empty)
+        assert empty_unpacked.flags.writeable
+
+        char = np.array([ord(b'a')], dtype='uint8')
+        with tm.assert_produces_warning(None):
+            char_unpacked = self.encode_decode(char, compress=compress)
+
+        tm.assert_numpy_array_equal(char_unpacked, char)
+        assert char_unpacked.flags.writeable
+        # if this test fails I am sorry because the interpreter is now in a
+        # bad state where b'a' points to 98 == ord(b'b').
+        char_unpacked[0] = ord(b'b')
+
+        # we compare the ord of bytes b'a' with unicode u'a' because the should
+        # always be the same (unless we were able to mutate the shared
+        # character singleton in which case ord(b'a') == ord(b'b').
+        assert ord(b'a') == ord(u'a')
+        tm.assert_numpy_array_equal(
+            char_unpacked,
+            np.array([ord(b'b')], dtype='uint8'),
+        )
+
+    def test_small_strings_no_warn_zlib(self):
+        if not _ZLIB_INSTALLED:
+            pytest.skip('no zlib')
+        self._test_small_strings_no_warn('zlib')
+
+    def test_small_strings_no_warn_blosc(self):
+        if not _BLOSC_INSTALLED:
+            pytest.skip('no blosc')
+        self._test_small_strings_no_warn('blosc')
+
+    def test_readonly_axis_blosc(self):
+        # GH11880
+        if not _BLOSC_INSTALLED:
+            pytest.skip('no blosc')
+        df1 = DataFrame({'A': list('abcd')})
+        df2 = DataFrame(df1, index=[1., 2., 3., 4.])
+        assert 1 in self.encode_decode(df1['A'], compress='blosc')
+        assert 1. in self.encode_decode(df2['A'], compress='blosc')
+
+    def test_readonly_axis_zlib(self):
+        # GH11880
+        df1 = DataFrame({'A': list('abcd')})
+        df2 = DataFrame(df1, index=[1., 2., 3., 4.])
+        assert 1 in self.encode_decode(df1['A'], compress='zlib')
+        assert 1. in self.encode_decode(df2['A'], compress='zlib')
+
+    def test_readonly_axis_blosc_to_sql(self):
+        # GH11880
+        if not _BLOSC_INSTALLED:
+            pytest.skip('no blosc')
+        if not self._SQLALCHEMY_INSTALLED:
+            pytest.skip('no sqlalchemy')
+        expected = DataFrame({'A': list('abcd')})
+        df = self.encode_decode(expected, compress='blosc')
+        eng = self._create_sql_engine("sqlite:///:memory:")
+        df.to_sql('test', eng, if_exists='append')
+        result = pandas.read_sql_table('test', eng, index_col='index')
+        result.index.names = [None]
+        assert_frame_equal(expected, result)
+
+    def test_readonly_axis_zlib_to_sql(self):
+        # GH11880
+        if not _ZLIB_INSTALLED:
+            pytest.skip('no zlib')
+        if not self._SQLALCHEMY_INSTALLED:
+            pytest.skip('no sqlalchemy')
+        expected = DataFrame({'A': list('abcd')})
+        df = self.encode_decode(expected, compress='zlib')
+        eng = self._create_sql_engine("sqlite:///:memory:")
+        df.to_sql('test', eng, if_exists='append')
+        result = pandas.read_sql_table('test', eng, index_col='index')
+        result.index.names = [None]
+        assert_frame_equal(expected, result)
+
+
+class TestEncoding(TestPackers):
+
+    def setup_method(self, method):
+        super(TestEncoding, self).setup_method(method)
+        data = {
+            'A': [compat.u('\u2019')] * 1000,
+            'B': np.arange(1000, dtype=np.int32),
+            'C': list(100 * 'abcdefghij'),
+            'D': date_range(datetime.datetime(2015, 4, 1), periods=1000),
+            'E': [datetime.timedelta(days=x) for x in range(1000)],
+            'G': [400] * 1000
+        }
+        self.frame = {
+            'float': DataFrame({k: data[k] for k in ['A', 'A']}),
+            'int': DataFrame({k: data[k] for k in ['B', 'B']}),
+            'mixed': DataFrame(data),
+        }
+        self.utf_encodings = ['utf8', 'utf16', 'utf32']
+
+    def test_utf(self):
+        # GH10581
+        for encoding in self.utf_encodings:
+            for frame in compat.itervalues(self.frame):
+                result = self.encode_decode(frame, encoding=encoding)
+                assert_frame_equal(result, frame)
+
+    def test_default_encoding(self):
+        for frame in compat.itervalues(self.frame):
+            result = frame.to_msgpack()
+            expected = frame.to_msgpack(encoding='utf8')
+            assert result == expected
+            result = self.encode_decode(frame)
+            assert_frame_equal(result, frame)
+
+
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                               "legacy_msgpack", "*", "*.msgpack"))
+
+
+@pytest.fixture(params=files)
+def legacy_packer(request, datapath):
+    return datapath(request.param)
+
+
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+class TestMsgpack(object):
+    """
+    How to add msgpack tests:
+
+    1. Install pandas version intended to output the msgpack.
+TestPackers
+    2. Execute "generate_legacy_storage_files.py" to create the msgpack.
+    $ python generate_legacy_storage_files.py <output_dir> msgpack
+
+    3. Move the created pickle to "data/legacy_msgpack/<version>" directory.
+    """
+
+    minimum_structure = {'series': ['float', 'int', 'mixed',
+                                    'ts', 'mi', 'dup'],
+                         'frame': ['float', 'int', 'mixed', 'mi'],
+                         'panel': ['float'],
+                         'index': ['int', 'date', 'period'],
+                         'mi': ['reg2']}
+
+    def check_min_structure(self, data, version):
+        for typ, v in self.minimum_structure.items():
+            assert typ in data, '"{0}" not found in unpacked data'.format(typ)
+            for kind in v:
+                msg = '"{0}" not found in data["{1}"]'.format(kind, typ)
+                assert kind in data[typ], msg
+
+    def compare(self, current_data, all_data, vf, version):
+        # GH12277 encoding default used to be latin-1, now utf-8
+        if LooseVersion(version) < LooseVersion('0.18.0'):
+            data = read_msgpack(vf, encoding='latin-1')
+        else:
+            data = read_msgpack(vf)
+        self.check_min_structure(data, version)
+        for typ, dv in data.items():
+            assert typ in all_data, ('unpacked data contains '
+                                     'extra key "{0}"'
+                                     .format(typ))
+            for dt, result in dv.items():
+                assert dt in current_data[typ], ('data["{0}"] contains extra '
+                                                 'key "{1}"'.format(typ, dt))
+                try:
+                    expected = current_data[typ][dt]
+                except KeyError:
+                    continue
+
+                # use a specific comparator
+                # if available
+                comp_method = "compare_{typ}_{dt}".format(typ=typ, dt=dt)
+                comparator = getattr(self, comp_method, None)
+                if comparator is not None:
+                    comparator(result, expected, typ, version)
+                else:
+                    check_arbitrary(result, expected)
+
+        return data
+
+    def compare_series_dt_tz(self, result, expected, typ, version):
+        # 8260
+        # dtype is object < 0.17.0
+        if LooseVersion(version) < LooseVersion('0.17.0'):
+            expected = expected.astype(object)
+            tm.assert_series_equal(result, expected)
+        else:
+            tm.assert_series_equal(result, expected)
+
+    def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
+        # 8260
+        # dtype is object < 0.17.0
+        if LooseVersion(version) < LooseVersion('0.17.0'):
+            expected = expected.astype(object)
+            tm.assert_frame_equal(result, expected)
+        else:
+            tm.assert_frame_equal(result, expected)
+
+    def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
+                             legacy_packer, datapath):
+
+        version = os.path.basename(os.path.dirname(legacy_packer))
+
+        # GH12142 0.17 files packed in P2 can't be read in P3
+        if (compat.PY3 and version.startswith('0.17.') and
+                legacy_packer.split('.')[-4][-1] == '2'):
+            msg = "Files packed in Py2 can't be read in Py3 ({})"
+            pytest.skip(msg.format(version))
+        try:
+            with catch_warnings(record=True):
+                self.compare(current_packers_data, all_packers_data,
+                             legacy_packer, version)
+        except ImportError:
+            # blosc not installed
+            pass
+
+    def test_msgpack_period_freq(self):
+        # https://github.com/pandas-dev/pandas/issues/24135
+        s = Series(np.random.rand(5), index=date_range('20130101', periods=5))
+        r = read_msgpack(s.to_msgpack())
+        repr(r)
@@ -0,0 +1,541 @@
+""" test parquet compat """
+import datetime
+from distutils.version import LooseVersion
+import os
+from warnings import catch_warnings
+
+import numpy as np
+import pytest
+
+from pandas.compat import PY3
+import pandas.util._test_decorators as td
+
+import pandas as pd
+from pandas.util import testing as tm
+
+from pandas.io.parquet import (
+    FastParquetImpl, PyArrowImpl, get_engine, read_parquet, to_parquet)
+
+try:
+    import pyarrow  # noqa
+    _HAVE_PYARROW = True
+except ImportError:
+    _HAVE_PYARROW = False
+
+try:
+    import fastparquet  # noqa
+    _HAVE_FASTPARQUET = True
+except ImportError:
+    _HAVE_FASTPARQUET = False
+
+
+# setup engines & skips
+@pytest.fixture(params=[
+    pytest.param('fastparquet',
+                 marks=pytest.mark.skipif(not _HAVE_FASTPARQUET,
+                                          reason='fastparquet is '
+                                                 'not installed')),
+    pytest.param('pyarrow',
+                 marks=pytest.mark.skipif(not _HAVE_PYARROW,
+                                          reason='pyarrow is '
+                                                 'not installed'))])
+def engine(request):
+    return request.param
+
+
+@pytest.fixture
+def pa():
+    if not _HAVE_PYARROW:
+        pytest.skip("pyarrow is not installed")
+    return 'pyarrow'
+
+
+@pytest.fixture
+def fp():
+    if not _HAVE_FASTPARQUET:
+        pytest.skip("fastparquet is not installed")
+    return 'fastparquet'
+
+
+@pytest.fixture
+def df_compat():
+    return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'})
+
+
+@pytest.fixture
+def df_cross_compat():
+    df = pd.DataFrame({'a': list('abc'),
+                       'b': list(range(1, 4)),
+                       # 'c': np.arange(3, 6).astype('u1'),
+                       'd': np.arange(4.0, 7.0, dtype='float64'),
+                       'e': [True, False, True],
+                       'f': pd.date_range('20130101', periods=3),
+                       # 'g': pd.date_range('20130101', periods=3,
+                       #                    tz='US/Eastern'),
+                       # 'h': pd.date_range('20130101', periods=3, freq='ns')
+                       })
+    return df
+
+
+@pytest.fixture
+def df_full():
+    return pd.DataFrame(
+        {'string': list('abc'),
+         'string_with_nan': ['a', np.nan, 'c'],
+         'string_with_none': ['a', None, 'c'],
+         'bytes': [b'foo', b'bar', b'baz'],
+         'unicode': [u'foo', u'bar', u'baz'],
+         'int': list(range(1, 4)),
+         'uint': np.arange(3, 6).astype('u1'),
+         'float': np.arange(4.0, 7.0, dtype='float64'),
+         'float_with_nan': [2., np.nan, 3.],
+         'bool': [True, False, True],
+         'datetime': pd.date_range('20130101', periods=3),
+         'datetime_with_nat': [pd.Timestamp('20130101'),
+                               pd.NaT,
+                               pd.Timestamp('20130103')]})
+
+
+def check_round_trip(df, engine=None, path=None,
+                     write_kwargs=None, read_kwargs=None,
+                     expected=None, check_names=True,
+                     repeat=2):
+    """Verify parquet serializer and deserializer produce the same results.
+
+    Performs a pandas to disk and disk to pandas round trip,
+    then compares the 2 resulting DataFrames to verify equality.
+
+    Parameters
+    ----------
+    df: Dataframe
+    engine: str, optional
+        'pyarrow' or 'fastparquet'
+    path: str, optional
+    write_kwargs: dict of str:str, optional
+    read_kwargs: dict of str:str, optional
+    expected: DataFrame, optional
+        Expected deserialization result, otherwise will be equal to `df`
+    check_names: list of str, optional
+        Closed set of column names to be compared
+    repeat: int, optional
+        How many times to repeat the test
+    """
+
+    write_kwargs = write_kwargs or {'compression': None}
+    read_kwargs = read_kwargs or {}
+
+    if expected is None:
+        expected = df
+
+    if engine:
+        write_kwargs['engine'] = engine
+        read_kwargs['engine'] = engine
+
+    def compare(repeat):
+        for _ in range(repeat):
+            df.to_parquet(path, **write_kwargs)
+            with catch_warnings(record=True):
+                actual = read_parquet(path, **read_kwargs)
+            tm.assert_frame_equal(expected, actual,
+                                  check_names=check_names)
+
+    if path is None:
+        with tm.ensure_clean() as path:
+            compare(repeat)
+    else:
+        compare(repeat)
+
+
+def test_invalid_engine(df_compat):
+    with pytest.raises(ValueError):
+        check_round_trip(df_compat, 'foo', 'bar')
+
+
+def test_options_py(df_compat, pa):
+    # use the set option
+
+    with pd.option_context('io.parquet.engine', 'pyarrow'):
+        check_round_trip(df_compat)
+
+
+def test_options_fp(df_compat, fp):
+    # use the set option
+
+    with pd.option_context('io.parquet.engine', 'fastparquet'):
+        check_round_trip(df_compat)
+
+
+def test_options_auto(df_compat, fp, pa):
+    # use the set option
+
+    with pd.option_context('io.parquet.engine', 'auto'):
+        check_round_trip(df_compat)
+
+
+def test_options_get_engine(fp, pa):
+    assert isinstance(get_engine('pyarrow'), PyArrowImpl)
+    assert isinstance(get_engine('fastparquet'), FastParquetImpl)
+
+    with pd.option_context('io.parquet.engine', 'pyarrow'):
+        assert isinstance(get_engine('auto'), PyArrowImpl)
+        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
+        assert isinstance(get_engine('fastparquet'), FastParquetImpl)
+
+    with pd.option_context('io.parquet.engine', 'fastparquet'):
+        assert isinstance(get_engine('auto'), FastParquetImpl)
+        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
+        assert isinstance(get_engine('fastparquet'), FastParquetImpl)
+
+    with pd.option_context('io.parquet.engine', 'auto'):
+        assert isinstance(get_engine('auto'), PyArrowImpl)
+        assert isinstance(get_engine('pyarrow'), PyArrowImpl)
+        assert isinstance(get_engine('fastparquet'), FastParquetImpl)
+
+
+def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
+    # cross-compat with differing reading/writing engines
+
+    df = df_cross_compat
+    with tm.ensure_clean() as path:
+        df.to_parquet(path, engine=pa, compression=None)
+
+        result = read_parquet(path, engine=fp)
+        tm.assert_frame_equal(result, df)
+
+        result = read_parquet(path, engine=fp, columns=['a', 'd'])
+        tm.assert_frame_equal(result, df[['a', 'd']])
+
+
+def test_cross_engine_fp_pa(df_cross_compat, pa, fp):
+    # cross-compat with differing reading/writing engines
+
+    df = df_cross_compat
+    with tm.ensure_clean() as path:
+        df.to_parquet(path, engine=fp, compression=None)
+
+        with catch_warnings(record=True):
+            result = read_parquet(path, engine=pa)
+            tm.assert_frame_equal(result, df)
+
+            result = read_parquet(path, engine=pa, columns=['a', 'd'])
+            tm.assert_frame_equal(result, df[['a', 'd']])
+
+
+class Base(object):
+
+    def check_error_on_write(self, df, engine, exc):
+        # check that we are raising the exception on writing
+        with tm.ensure_clean() as path:
+            with pytest.raises(exc):
+                to_parquet(df, path, engine, compression=None)
+
+
+class TestBasic(Base):
+
+    def test_error(self, engine):
+        for obj in [pd.Series([1, 2, 3]), 1, 'foo', pd.Timestamp('20130101'),
+                    np.array([1, 2, 3])]:
+            self.check_error_on_write(obj, engine, ValueError)
+
+    def test_columns_dtypes(self, engine):
+        df = pd.DataFrame({'string': list('abc'),
+                           'int': list(range(1, 4))})
+
+        # unicode
+        df.columns = [u'foo', u'bar']
+        check_round_trip(df, engine)
+
+    def test_columns_dtypes_invalid(self, engine):
+        df = pd.DataFrame({'string': list('abc'),
+                           'int': list(range(1, 4))})
+
+        # numeric
+        df.columns = [0, 1]
+        self.check_error_on_write(df, engine, ValueError)
+
+        if PY3:
+            # bytes on PY3, on PY2 these are str
+            df.columns = [b'foo', b'bar']
+            self.check_error_on_write(df, engine, ValueError)
+
+        # python object
+        df.columns = [datetime.datetime(2011, 1, 1, 0, 0),
+                      datetime.datetime(2011, 1, 1, 1, 1)]
+        self.check_error_on_write(df, engine, ValueError)
+
+    @pytest.mark.parametrize('compression', [None, 'gzip', 'snappy', 'brotli'])
+    def test_compression(self, engine, compression):
+
+        if compression == 'snappy':
+            pytest.importorskip('snappy')
+
+        elif compression == 'brotli':
+            pytest.importorskip('brotli')
+
+        df = pd.DataFrame({'A': [1, 2, 3]})
+        check_round_trip(df, engine, write_kwargs={'compression': compression})
+
+    def test_read_columns(self, engine):
+        # GH18154
+        df = pd.DataFrame({'string': list('abc'),
+                           'int': list(range(1, 4))})
+
+        expected = pd.DataFrame({'string': list('abc')})
+        check_round_trip(df, engine, expected=expected,
+                         read_kwargs={'columns': ['string']})
+
+    def test_write_index(self, engine):
+        check_names = engine != 'fastparquet'
+
+        df = pd.DataFrame({'A': [1, 2, 3]})
+        check_round_trip(df, engine)
+
+        indexes = [
+            [2, 3, 4],
+            pd.date_range('20130101', periods=3),
+            list('abc'),
+            [1, 3, 4],
+        ]
+        # non-default index
+        for index in indexes:
+            df.index = index
+            check_round_trip(df, engine, check_names=check_names)
+
+        # index with meta-data
+        df.index = [0, 1, 2]
+        df.index.name = 'foo'
+        check_round_trip(df, engine)
+
+    def test_write_multiindex(self, pa):
+        # Not suppoprted in fastparquet as of 0.1.3 or older pyarrow version
+        engine = pa
+
+        df = pd.DataFrame({'A': [1, 2, 3]})
+        index = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)])
+        df.index = index
+        check_round_trip(df, engine)
+
+    def test_write_column_multiindex(self, engine):
+        # column multi-index
+        mi_columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)])
+        df = pd.DataFrame(np.random.randn(4, 3), columns=mi_columns)
+        self.check_error_on_write(df, engine, ValueError)
+
+    def test_multiindex_with_columns(self, pa):
+        engine = pa
+        dates = pd.date_range('01-Jan-2018', '01-Dec-2018', freq='MS')
+        df = pd.DataFrame(np.random.randn(2 * len(dates), 3),
+                          columns=list('ABC'))
+        index1 = pd.MultiIndex.from_product(
+            [['Level1', 'Level2'], dates],
+            names=['level', 'date'])
+        index2 = index1.copy(names=None)
+        for index in [index1, index2]:
+            df.index = index
+
+            check_round_trip(df, engine)
+            check_round_trip(df, engine, read_kwargs={'columns': ['A', 'B']},
+                             expected=df[['A', 'B']])
+
+    def test_write_ignoring_index(self, engine):
+        # ENH 20768
+        # Ensure index=False omits the index from the written Parquet file.
+        df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']})
+
+        write_kwargs = {
+            'compression': None,
+            'index': False,
+        }
+
+        # Because we're dropping the index, we expect the loaded dataframe to
+        # have the default integer index.
+        expected = df.reset_index(drop=True)
+
+        check_round_trip(df, engine, write_kwargs=write_kwargs,
+                         expected=expected)
+
+        # Ignore custom index
+        df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']},
+                          index=['zyx', 'wvu', 'tsr'])
+
+        check_round_trip(df, engine, write_kwargs=write_kwargs,
+                         expected=expected)
+
+        # Ignore multi-indexes as well.
+        arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
+                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
+        df = pd.DataFrame({'one': [i for i in range(8)],
+                           'two': [-i for i in range(8)]}, index=arrays)
+
+        expected = df.reset_index(drop=True)
+        check_round_trip(df, engine, write_kwargs=write_kwargs,
+                         expected=expected)
+
+
+class TestParquetPyArrow(Base):
+
+    def test_basic(self, pa, df_full):
+
+        df = df_full
+
+        # additional supported types for pyarrow
+        df['datetime_tz'] = pd.date_range('20130101', periods=3,
+                                          tz='Europe/Brussels')
+        df['bool_with_none'] = [True, None, True]
+
+        check_round_trip(df, pa)
+
+    # TODO: This doesn't fail on all systems; track down which
+    @pytest.mark.xfail(reason="pyarrow fails on this (ARROW-1883)",
+                       strict=False)
+    def test_basic_subset_columns(self, pa, df_full):
+        # GH18628
+
+        df = df_full
+        # additional supported types for pyarrow
+        df['datetime_tz'] = pd.date_range('20130101', periods=3,
+                                          tz='Europe/Brussels')
+
+        check_round_trip(df, pa, expected=df[['string', 'int']],
+                         read_kwargs={'columns': ['string', 'int']})
+
+    def test_duplicate_columns(self, pa):
+        # not currently able to handle duplicate columns
+        df = pd.DataFrame(np.arange(12).reshape(4, 3),
+                          columns=list('aaa')).copy()
+        self.check_error_on_write(df, pa, ValueError)
+
+    def test_unsupported(self, pa):
+        # period
+        df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
+        # pyarrow 0.11 raises ArrowTypeError
+        # older pyarrows raise ArrowInvalid
+        self.check_error_on_write(df, pa, Exception)
+
+        # timedelta
+        df = pd.DataFrame({'a': pd.timedelta_range('1 day',
+                                                   periods=3)})
+        self.check_error_on_write(df, pa, NotImplementedError)
+
+        # mixed python objects
+        df = pd.DataFrame({'a': ['a', 1, 2.0]})
+        # pyarrow 0.11 raises ArrowTypeError
+        # older pyarrows raise ArrowInvalid
+        self.check_error_on_write(df, pa, Exception)
+
+    def test_categorical(self, pa):
+
+        # supported in >= 0.7.0
+        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
+
+        # de-serialized as object
+        expected = df.assign(a=df.a.astype(object))
+        check_round_trip(df, pa, expected=expected)
+
+    def test_s3_roundtrip(self, df_compat, s3_resource, pa):
+        # GH #19134
+        check_round_trip(df_compat, pa,
+                         path='s3://pandas-test/pyarrow.parquet')
+
+    def test_partition_cols_supported(self, pa, df_full):
+        # GH #23283
+        partition_cols = ['bool', 'int']
+        df = df_full
+        with tm.ensure_clean_dir() as path:
+            df.to_parquet(path, partition_cols=partition_cols,
+                          compression=None)
+            import pyarrow.parquet as pq
+            dataset = pq.ParquetDataset(path, validate_schema=False)
+            assert len(dataset.partitions.partition_names) == 2
+            assert dataset.partitions.partition_names == set(partition_cols)
+
+
+class TestParquetFastParquet(Base):
+
+    @td.skip_if_no('fastparquet', min_version="0.2.1")
+    def test_basic(self, fp, df_full):
+        df = df_full
+
+        # additional supported types for fastparquet
+        if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'):
+            df['datetime_tz'] = pd.date_range('20130101', periods=3,
+                                              tz='US/Eastern')
+        df['timedelta'] = pd.timedelta_range('1 day', periods=3)
+        check_round_trip(df, fp)
+
+    @pytest.mark.skip(reason="not supported")
+    def test_duplicate_columns(self, fp):
+
+        # not currently able to handle duplicate columns
+        df = pd.DataFrame(np.arange(12).reshape(4, 3),
+                          columns=list('aaa')).copy()
+        self.check_error_on_write(df, fp, ValueError)
+
+    def test_bool_with_none(self, fp):
+        df = pd.DataFrame({'a': [True, None, False]})
+        expected = pd.DataFrame({'a': [1.0, np.nan, 0.0]}, dtype='float16')
+        check_round_trip(df, fp, expected=expected)
+
+    def test_unsupported(self, fp):
+
+        # period
+        df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
+        self.check_error_on_write(df, fp, ValueError)
+
+        # mixed
+        df = pd.DataFrame({'a': ['a', 1, 2.0]})
+        self.check_error_on_write(df, fp, ValueError)
+
+    def test_categorical(self, fp):
+        if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"):
+            pytest.skip("CategoricalDtype not supported for older fp")
+        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
+        check_round_trip(df, fp)
+
+    def test_filter_row_groups(self, fp):
+        d = {'a': list(range(0, 3))}
+        df = pd.DataFrame(d)
+        with tm.ensure_clean() as path:
+            df.to_parquet(path, fp, compression=None,
+                          row_group_offsets=1)
+            result = read_parquet(path, fp, filters=[('a', '==', 0)])
+        assert len(result) == 1
+
+    def test_s3_roundtrip(self, df_compat, s3_resource, fp):
+        # GH #19134
+        check_round_trip(df_compat, fp,
+                         path='s3://pandas-test/fastparquet.parquet')
+
+    def test_partition_cols_supported(self, fp, df_full):
+        # GH #23283
+        partition_cols = ['bool', 'int']
+        df = df_full
+        with tm.ensure_clean_dir() as path:
+            df.to_parquet(path, engine="fastparquet",
+                          partition_cols=partition_cols, compression=None)
+            assert os.path.exists(path)
+            import fastparquet
+            actual_partition_cols = fastparquet.ParquetFile(path, False).cats
+            assert len(actual_partition_cols) == 2
+
+    def test_partition_on_supported(self, fp, df_full):
+        # GH #23283
+        partition_cols = ['bool', 'int']
+        df = df_full
+        with tm.ensure_clean_dir() as path:
+            df.to_parquet(path, engine="fastparquet", compression=None,
+                          partition_on=partition_cols)
+            assert os.path.exists(path)
+            import fastparquet
+            actual_partition_cols = fastparquet.ParquetFile(path, False).cats
+            assert len(actual_partition_cols) == 2
+
+    def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full):
+        # GH #23283
+        partition_cols = ['bool', 'int']
+        df = df_full
+        with pytest.raises(ValueError):
+            with tm.ensure_clean_dir() as path:
+                df.to_parquet(path, engine="fastparquet", compression=None,
+                              partition_on=partition_cols,
+                              partition_cols=partition_cols)
@@ -0,0 +1,481 @@
+# pylint: disable=E1101,E1103,W0232
+
+"""
+manage legacy pickle tests
+
+How to add pickle tests:
+
+1. Install pandas version intended to output the pickle.
+
+2. Execute "generate_legacy_storage_files.py" to create the pickle.
+$ python generate_legacy_storage_files.py <output_dir> pickle
+
+3. Move the created pickle to "data/legacy_pickle/<version>" directory.
+"""
+from distutils.version import LooseVersion
+import glob
+import os
+import shutil
+from warnings import catch_warnings, simplefilter
+
+import pytest
+
+from pandas.compat import PY3, is_platform_little_endian
+import pandas.util._test_decorators as td
+
+import pandas as pd
+from pandas import Index
+import pandas.util.testing as tm
+
+from pandas.tseries.offsets import Day, MonthEnd
+
+
+@pytest.fixture(scope='module')
+def current_pickle_data():
+    # our current version pickle data
+    from pandas.tests.io.generate_legacy_storage_files import (
+        create_pickle_data)
+    return create_pickle_data()
+
+
+# ---------------------
+# comparison functions
+# ---------------------
+def compare_element(result, expected, typ, version=None):
+    if isinstance(expected, Index):
+        tm.assert_index_equal(expected, result)
+        return
+
+    if typ.startswith('sp_'):
+        comparator = getattr(tm, "assert_%s_equal" % typ)
+        comparator(result, expected, exact_indices=False)
+    elif typ == 'timestamp':
+        if expected is pd.NaT:
+            assert result is pd.NaT
+        else:
+            assert result == expected
+            assert result.freq == expected.freq
+    else:
+        comparator = getattr(tm, "assert_%s_equal" %
+                             typ, tm.assert_almost_equal)
+        comparator(result, expected)
+
+
+def compare(data, vf, version):
+
+    # py3 compat when reading py2 pickle
+    try:
+        data = pd.read_pickle(vf)
+    except (ValueError) as e:
+        if 'unsupported pickle protocol:' in str(e):
+            # trying to read a py3 pickle in py2
+            return
+        else:
+            raise
+
+    m = globals()
+    for typ, dv in data.items():
+        for dt, result in dv.items():
+            try:
+                expected = data[typ][dt]
+            except (KeyError):
+                if version in ('0.10.1', '0.11.0') and dt == 'reg':
+                    break
+                else:
+                    raise
+
+            # use a specific comparator
+            # if available
+            comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt)
+
+            comparator = m.get(comparator, m['compare_element'])
+            comparator(result, expected, typ, version)
+    return data
+
+
+def compare_sp_series_ts(res, exp, typ, version):
+    # SparseTimeSeries integrated into SparseSeries in 0.12.0
+    # and deprecated in 0.17.0
+    if version and LooseVersion(version) <= LooseVersion("0.12.0"):
+        tm.assert_sp_series_equal(res, exp, check_series_type=False)
+    else:
+        tm.assert_sp_series_equal(res, exp)
+
+
+def compare_series_ts(result, expected, typ, version):
+    # GH 7748
+    tm.assert_series_equal(result, expected)
+    assert result.index.freq == expected.index.freq
+    assert not result.index.freq.normalize
+    tm.assert_series_equal(result > 0, expected > 0)
+
+    # GH 9291
+    freq = result.index.freq
+    assert freq + Day(1) == Day(2)
+
+    res = freq + pd.Timedelta(hours=1)
+    assert isinstance(res, pd.Timedelta)
+    assert res == pd.Timedelta(days=1, hours=1)
+
+    res = freq + pd.Timedelta(nanoseconds=1)
+    assert isinstance(res, pd.Timedelta)
+    assert res == pd.Timedelta(days=1, nanoseconds=1)
+
+
+def compare_series_dt_tz(result, expected, typ, version):
+    # 8260
+    # dtype is object < 0.17.0
+    if LooseVersion(version) < LooseVersion('0.17.0'):
+        expected = expected.astype(object)
+        tm.assert_series_equal(result, expected)
+    else:
+        tm.assert_series_equal(result, expected)
+
+
+def compare_series_cat(result, expected, typ, version):
+    # Categorical dtype is added in 0.15.0
+    # ordered is changed in 0.16.0
+    if LooseVersion(version) < LooseVersion('0.15.0'):
+        tm.assert_series_equal(result, expected, check_dtype=False,
+                               check_categorical=False)
+    elif LooseVersion(version) < LooseVersion('0.16.0'):
+        tm.assert_series_equal(result, expected, check_categorical=False)
+    else:
+        tm.assert_series_equal(result, expected)
+
+
+def compare_frame_dt_mixed_tzs(result, expected, typ, version):
+    # 8260
+    # dtype is object < 0.17.0
+    if LooseVersion(version) < LooseVersion('0.17.0'):
+        expected = expected.astype(object)
+        tm.assert_frame_equal(result, expected)
+    else:
+        tm.assert_frame_equal(result, expected)
+
+
+def compare_frame_cat_onecol(result, expected, typ, version):
+    # Categorical dtype is added in 0.15.0
+    # ordered is changed in 0.16.0
+    if LooseVersion(version) < LooseVersion('0.15.0'):
+        tm.assert_frame_equal(result, expected, check_dtype=False,
+                              check_categorical=False)
+    elif LooseVersion(version) < LooseVersion('0.16.0'):
+        tm.assert_frame_equal(result, expected, check_categorical=False)
+    else:
+        tm.assert_frame_equal(result, expected)
+
+
+def compare_frame_cat_and_float(result, expected, typ, version):
+    compare_frame_cat_onecol(result, expected, typ, version)
+
+
+def compare_index_period(result, expected, typ, version):
+    tm.assert_index_equal(result, expected)
+    assert isinstance(result.freq, MonthEnd)
+    assert result.freq == MonthEnd()
+    assert result.freqstr == 'M'
+    tm.assert_index_equal(result.shift(2), expected.shift(2))
+
+
+def compare_sp_frame_float(result, expected, typ, version):
+    if LooseVersion(version) <= LooseVersion('0.18.1'):
+        tm.assert_sp_frame_equal(result, expected, exact_indices=False,
+                                 check_dtype=False)
+    else:
+        tm.assert_sp_frame_equal(result, expected)
+
+
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                  "legacy_pickle", "*", "*.pickle"))
+
+
+@pytest.fixture(params=files)
+def legacy_pickle(request, datapath):
+    return datapath(request.param)
+
+
+# ---------------------
+# tests
+# ---------------------
+def test_pickles(current_pickle_data, legacy_pickle):
+    if not is_platform_little_endian():
+        pytest.skip("known failure on non-little endian")
+
+    version = os.path.basename(os.path.dirname(legacy_pickle))
+    with catch_warnings(record=True):
+        simplefilter("ignore")
+        compare(current_pickle_data, legacy_pickle, version)
+
+
+def test_round_trip_current(current_pickle_data):
+
+    try:
+        import cPickle as c_pickle
+
+        def c_pickler(obj, path):
+            with open(path, 'wb') as fh:
+                c_pickle.dump(obj, fh, protocol=-1)
+
+        def c_unpickler(path):
+            with open(path, 'rb') as fh:
+                fh.seek(0)
+                return c_pickle.load(fh)
+    except ImportError:
+        c_pickler = None
+        c_unpickler = None
+
+    import pickle as python_pickle
+
+    def python_pickler(obj, path):
+        with open(path, 'wb') as fh:
+            python_pickle.dump(obj, fh, protocol=-1)
+
+    def python_unpickler(path):
+        with open(path, 'rb') as fh:
+            fh.seek(0)
+            return python_pickle.load(fh)
+
+    data = current_pickle_data
+    for typ, dv in data.items():
+        for dt, expected in dv.items():
+
+            for writer in [pd.to_pickle, c_pickler, python_pickler]:
+                if writer is None:
+                    continue
+
+                with tm.ensure_clean() as path:
+
+                    # test writing with each pickler
+                    writer(expected, path)
+
+                    # test reading with each unpickler
+                    result = pd.read_pickle(path)
+                    compare_element(result, expected, typ)
+
+                    if c_unpickler is not None:
+                        result = c_unpickler(path)
+                        compare_element(result, expected, typ)
+
+                    result = python_unpickler(path)
+                    compare_element(result, expected, typ)
+
+
+def test_pickle_v0_14_1(datapath):
+
+    cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
+                         categories=['a', 'b', 'c', 'd'])
+    pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle')
+    # This code was executed once on v0.14.1 to generate the pickle:
+    #
+    # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
+    #                   name='foobar')
+    # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
+    #
+    tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
+
+
+def test_pickle_v0_15_2(datapath):
+    # ordered -> _ordered
+    # GH 9347
+
+    cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
+                         categories=['a', 'b', 'c', 'd'])
+    pickle_path = datapath('io', 'data', 'categorical_0_15_2.pickle')
+    # This code was executed once on v0.15.2 to generate the pickle:
+    #
+    # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
+    #                   name='foobar')
+    # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
+    #
+    tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
+
+
+def test_pickle_path_pathlib():
+    df = tm.makeDataFrame()
+    result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
+    tm.assert_frame_equal(df, result)
+
+
+def test_pickle_path_localpath():
+    df = tm.makeDataFrame()
+    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
+    tm.assert_frame_equal(df, result)
+
+
+# ---------------------
+# test pickle compression
+# ---------------------
+
+@pytest.fixture
+def get_random_path():
+    return u'__%s__.pickle' % tm.rands(10)
+
+
+class TestCompression(object):
+
+    _compression_to_extension = {
+        None: ".none",
+        'gzip': '.gz',
+        'bz2': '.bz2',
+        'zip': '.zip',
+        'xz': '.xz',
+    }
+
+    def compress_file(self, src_path, dest_path, compression):
+        if compression is None:
+            shutil.copyfile(src_path, dest_path)
+            return
+
+        if compression == 'gzip':
+            import gzip
+            f = gzip.open(dest_path, "w")
+        elif compression == 'bz2':
+            import bz2
+            f = bz2.BZ2File(dest_path, "w")
+        elif compression == 'zip':
+            import zipfile
+            with zipfile.ZipFile(dest_path, "w",
+                                 compression=zipfile.ZIP_DEFLATED) as f:
+                f.write(src_path, os.path.basename(src_path))
+        elif compression == 'xz':
+            lzma = pd.compat.import_lzma()
+            f = lzma.LZMAFile(dest_path, "w")
+        else:
+            msg = 'Unrecognized compression type: {}'.format(compression)
+            raise ValueError(msg)
+
+        if compression != "zip":
+            with open(src_path, "rb") as fh, f:
+                f.write(fh.read())
+
+    def test_write_explicit(self, compression, get_random_path):
+        base = get_random_path
+        path1 = base + ".compressed"
+        path2 = base + ".raw"
+
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
+
+            # write to compressed file
+            df.to_pickle(p1, compression=compression)
+
+            # decompress
+            with tm.decompress_file(p1, compression=compression) as f:
+                with open(p2, "wb") as fh:
+                    fh.write(f.read())
+
+            # read decompressed file
+            df2 = pd.read_pickle(p2, compression=None)
+
+            tm.assert_frame_equal(df, df2)
+
+    @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z'])
+    def test_write_explicit_bad(self, compression, get_random_path):
+        with pytest.raises(ValueError, match="Unrecognized compression type"):
+            with tm.ensure_clean(get_random_path) as path:
+                df = tm.makeDataFrame()
+                df.to_pickle(path, compression=compression)
+
+    @pytest.mark.parametrize('ext', [
+        '', '.gz', '.bz2', '.no_compress',
+        pytest.param('.xz', marks=td.skip_if_no_lzma)
+    ])
+    def test_write_infer(self, ext, get_random_path):
+        base = get_random_path
+        path1 = base + ext
+        path2 = base + ".raw"
+        compression = None
+        for c in self._compression_to_extension:
+            if self._compression_to_extension[c] == ext:
+                compression = c
+                break
+
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
+
+            # write to compressed file by inferred compression method
+            df.to_pickle(p1)
+
+            # decompress
+            with tm.decompress_file(p1, compression=compression) as f:
+                with open(p2, "wb") as fh:
+                    fh.write(f.read())
+
+            # read decompressed file
+            df2 = pd.read_pickle(p2, compression=None)
+
+            tm.assert_frame_equal(df, df2)
+
+    def test_read_explicit(self, compression, get_random_path):
+        base = get_random_path
+        path1 = base + ".raw"
+        path2 = base + ".compressed"
+
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
+
+            # write to uncompressed file
+            df.to_pickle(p1, compression=None)
+
+            # compress
+            self.compress_file(p1, p2, compression=compression)
+
+            # read compressed file
+            df2 = pd.read_pickle(p2, compression=compression)
+
+            tm.assert_frame_equal(df, df2)
+
+    @pytest.mark.parametrize('ext', [
+        '', '.gz', '.bz2', '.zip', '.no_compress',
+        pytest.param('.xz', marks=td.skip_if_no_lzma)
+    ])
+    def test_read_infer(self, ext, get_random_path):
+        base = get_random_path
+        path1 = base + ".raw"
+        path2 = base + ext
+        compression = None
+        for c in self._compression_to_extension:
+            if self._compression_to_extension[c] == ext:
+                compression = c
+                break
+
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
+            df = tm.makeDataFrame()
+
+            # write to uncompressed file
+            df.to_pickle(p1, compression=None)
+
+            # compress
+            self.compress_file(p1, p2, compression=compression)
+
+            # read compressed file by inferred compression method
+            df2 = pd.read_pickle(p2)
+
+            tm.assert_frame_equal(df, df2)
+
+
+# ---------------------
+# test pickle compression
+# ---------------------
+
+class TestProtocol(object):
+
+    @pytest.mark.parametrize('protocol', [-1, 0, 1, 2])
+    def test_read(self, protocol, get_random_path):
+        with tm.ensure_clean(get_random_path) as path:
+            df = tm.makeDataFrame()
+            df.to_pickle(path, protocol=protocol)
+            df2 = pd.read_pickle(path)
+            tm.assert_frame_equal(df, df2)
+
+    @pytest.mark.parametrize('protocol', [3, 4])
+    @pytest.mark.skipif(PY3, reason="Testing invalid parameters for Python 2")
+    def test_read_bad_versions(self, protocol, get_random_path):
+        # For Python 2, HIGHEST_PROTOCOL should be 2.
+        msg = ("pickle protocol {protocol} asked for; the highest available "
+               "protocol is 2").format(protocol=protocol)
+        with pytest.raises(ValueError, match=msg):
+            with tm.ensure_clean(get_random_path) as path:
+                df = tm.makeDataFrame()
+                df.to_pickle(path, protocol=protocol)
@@ -0,0 +1,29 @@
+import pytest
+
+from pandas.compat import BytesIO
+
+from pandas import read_csv
+
+from pandas.io.common import is_s3_url
+
+
+class TestS3URL(object):
+
+    def test_is_s3_url(self):
+        assert is_s3_url("s3://pandas/somethingelse.com")
+        assert not is_s3_url("s4://pandas/somethingelse.com")
+
+
+def test_streaming_s3_objects():
+    # GH17135
+    # botocore gained iteration support in 1.10.47, can now be used in read_*
+    pytest.importorskip('botocore', minversion='1.10.47')
+    from botocore.response import StreamingBody
+
+    data = [
+        b'foo,bar,baz\n1,2,3\n4,5,6\n',
+        b'just,the,header\n',
+    ]
+    for el in data:
+        body = StreamingBody(BytesIO(el), content_length=len(el))
+        read_csv(body)