[arrow] branch master updated: ARROW-2756: [Python] Remove redundant imports and minor fixes in parquet tests

wesm Tue, 24 Jul 2018 14:13:11 -0700

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new 0269203  ARROW-2756: [Python] Remove redundant imports and minor fixes 
in parquet tests
0269203 is described below

commit 0269203ded5c3ea93993ceab7ecea18731fe8a31
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Tue Jul 24 17:12:49 2018 -0400

    ARROW-2756: [Python] Remove redundant imports and minor fixes in parquet 
tests
    
    Author: Krisztián Szűcs <[email protected]>
    
    Closes #2182 from kszucs/cleanup_parquet_tests and squashes the following 
commits:
    
    45271ff2 <Krisztián Szűcs> missing import
    303fae1f <Krisztián Szűcs> remove parquet decorators from new tests after 
rebase
    08adaacd <Krisztián Szűcs> fallback and import pq in the test cases
    c5e81dc5 <Krisztián Szűcs> double check roundtrip
    a78cd5ec <Krisztián Szűcs> remove parquet decorator from newly created test 
case
    d9ee17c6 <Krisztián Szűcs> remove redundant roundtrip check
    de2cde1f <Krisztián Szűcs> remove print statements
    573b9610 <Krisztián Szűcs> remove two additional imports
    408ee6e6 <Krisztián Szűcs> pytestmark the whole module and remove redundant 
imports
---
 python/pyarrow/tests/test_hdfs.py    |  12 +--
 python/pyarrow/tests/test_parquet.py | 158 +++++++++--------------------------
 2 files changed, 47 insertions(+), 123 deletions(-)

diff --git a/python/pyarrow/tests/test_hdfs.py 
b/python/pyarrow/tests/test_hdfs.py
index 4840aee..81b03b6 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -296,7 +296,7 @@ class HdfsTestCases(object):
         expected = pa.concat_tables(test_data)
         return expected
 
-    @test_parquet.parquet
+    @pytest.mark.parquet
     def test_read_multiple_parquet_files(self):
 
         tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid())
@@ -310,7 +310,7 @@ class HdfsTestCases(object):
                                .sort_values(by='index').reset_index(drop=True),
                                expected.to_pandas())
 
-    @test_parquet.parquet
+    @pytest.mark.parquet
     def test_read_multiple_parquet_files_with_uri(self):
         import pyarrow.parquet as pq
 
@@ -326,7 +326,7 @@ class HdfsTestCases(object):
                                .sort_values(by='index').reset_index(drop=True),
                                expected.to_pandas())
 
-    @test_parquet.parquet
+    @pytest.mark.parquet
     def test_read_write_parquet_files_with_uri(self):
         import pyarrow.parquet as pq
 
@@ -346,20 +346,20 @@ class HdfsTestCases(object):
 
         pdt.assert_frame_equal(result, df)
 
-    @test_parquet.parquet
+    @pytest.mark.parquet
     def test_read_common_metadata_files(self):
         tmpdir = pjoin(self.tmp_path, 'common-metadata-' + guid())
         self.hdfs.mkdir(tmpdir)
         test_parquet._test_read_common_metadata_files(self.hdfs, tmpdir)
 
-    @test_parquet.parquet
+    @pytest.mark.parquet
     def test_write_to_dataset_with_partitions(self):
         tmpdir = pjoin(self.tmp_path, 'write-partitions-' + guid())
         self.hdfs.mkdir(tmpdir)
         test_parquet._test_write_to_dataset_with_partitions(
             tmpdir, filesystem=self.hdfs)
 
-    @test_parquet.parquet
+    @pytest.mark.parquet
     def test_write_to_dataset_no_partitions(self):
         tmpdir = pjoin(self.tmp_path, 'write-no_partitions-' + guid())
         self.hdfs.mkdir(tmpdir)
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 1d30737..c9c1a96 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -29,16 +29,16 @@ import pytest
 from pyarrow.compat import guid, u, BytesIO, unichar
 from pyarrow.tests import util
 from pyarrow.filesystem import LocalFileSystem
-import pyarrow as pa
 from .pandas_examples import dataframe_with_arrays, dataframe_with_lists
 
 import numpy as np
 import pandas as pd
-
+import pyarrow as pa
 import pandas.util.testing as tm
 
+# Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
-parquet = pytest.mark.parquet
+pytestmark = pytest.mark.parquet
 
 
 def _write_table(table, path, **kwargs):
@@ -53,6 +53,7 @@ def _write_table(table, path, **kwargs):
 
 def _read_table(*args, **kwargs):
     import pyarrow.parquet as pq
+
     return pq.read_table(*args, **kwargs)
 
 
@@ -68,12 +69,9 @@ def _check_roundtrip(table, expected=None, **params):
     if expected is None:
         expected = table
 
+    # intentionally check twice
     result = _roundtrip_table(table, **params)
-    if not result.equals(expected):
-        print(expected)
-        print(result)
-        assert result.equals(expected)
-
+    assert result.equals(expected)
     result = _roundtrip_table(result, **params)
     assert result.equals(expected)
 
@@ -89,22 +87,21 @@ def _roundtrip_pandas_dataframe(df, write_kwargs):
     return table1.to_pandas()
 
 
-@parquet
-def test_single_pylist_column_roundtrip(tmpdir):
-    for dtype in [int, float]:
-        filename = tmpdir.join('single_{}_column.parquet'
-                               .format(dtype.__name__))
-        data = [pa.array(list(map(dtype, range(5))))]
-        table = pa.Table.from_arrays(data, names=['a'])
-        _write_table(table, filename.strpath)
-        table_read = _read_table(filename.strpath)
-        for col_written, col_read in zip(table.itercolumns(),
-                                         table_read.itercolumns()):
-            assert col_written.name == col_read.name
-            assert col_read.data.num_chunks == 1
-            data_written = col_written.data.chunk(0)
-            data_read = col_read.data.chunk(0)
-            assert data_written.equals(data_read)
[email protected]('dtype', [int, float])
+def test_single_pylist_column_roundtrip(tmpdir, dtype):
+    filename = tmpdir.join('single_{}_column.parquet'
+                           .format(dtype.__name__))
+    data = [pa.array(list(map(dtype, range(5))))]
+    table = pa.Table.from_arrays(data, names=['a'])
+    _write_table(table, filename.strpath)
+    table_read = _read_table(filename.strpath)
+    for col_written, col_read in zip(table.itercolumns(),
+                                     table_read.itercolumns()):
+        assert col_written.name == col_read.name
+        assert col_read.data.num_chunks == 1
+        data_written = col_written.data.chunk(0)
+        data_read = col_read.data.chunk(0)
+        assert data_written.equals(data_read)
 
 
 def alltypes_sample(size=10000, seed=0, categorical=False):
@@ -136,10 +133,10 @@ def alltypes_sample(size=10000, seed=0, 
categorical=False):
     return pd.DataFrame(arrays)
 
 
-@parquet
 @pytest.mark.parametrize('chunk_size', [None, 1000])
 def test_pandas_parquet_2_0_rountrip(tmpdir, chunk_size):
     import pyarrow.parquet as pq
+
     df = alltypes_sample(size=10000, categorical=True)
 
     filename = tmpdir.join('pandas_rountrip.parquet')
@@ -157,7 +154,6 @@ def test_pandas_parquet_2_0_rountrip(tmpdir, chunk_size):
     tm.assert_frame_equal(df, df_read, check_categorical=False)
 
 
-@parquet
 def test_chunked_table_write():
     # ARROW-232
     df = alltypes_sample(size=10)
@@ -175,7 +171,6 @@ def test_chunked_table_write():
     _check_roundtrip(table, version='2.0')
 
 
-@parquet
 def test_empty_table_roundtrip():
     df = alltypes_sample(size=10)
     # The nanosecond->us conversion is a nuisance, so we just avoid it here
@@ -192,7 +187,6 @@ def test_empty_table_roundtrip():
     _check_roundtrip(table, version='2.0')
 
 
-@parquet
 def test_empty_lists_table_roundtrip():
     # ARROW-2744: Shouldn't crash when writing an array of empty lists
     arr = pa.array([[], []], type=pa.list_(pa.int32()))
@@ -200,7 +194,6 @@ def test_empty_lists_table_roundtrip():
     _check_roundtrip(table)
 
 
-@parquet
 def test_pandas_parquet_datetime_tz():
     import pyarrow.parquet as pq
 
@@ -227,7 +220,6 @@ def test_pandas_parquet_datetime_tz():
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_pandas_parquet_custom_metadata(tmpdir):
     import pyarrow.parquet as pq
 
@@ -247,7 +239,6 @@ def test_pandas_parquet_custom_metadata(tmpdir):
     assert js['index_columns'] == ['__index_level_0__']
 
 
-@parquet
 def test_pandas_parquet_column_multiindex(tmpdir):
     import pyarrow.parquet as pq
 
@@ -269,7 +260,6 @@ def test_pandas_parquet_column_multiindex(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_pandas_parquet_2_0_rountrip_read_pandas_no_index_written(tmpdir):
     import pyarrow.parquet as pq
 
@@ -296,7 +286,6 @@ def 
test_pandas_parquet_2_0_rountrip_read_pandas_no_index_written(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_pandas_parquet_1_0_rountrip(tmpdir):
     size = 10000
     np.random.seed(0)
@@ -328,7 +317,6 @@ def test_pandas_parquet_1_0_rountrip(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 @pytest.mark.skipif(sys.version_info < (3, 6), reason="need Python 3.6")
 def test_path_objects(tmpdir):
     # Test compatibility with PEP 519 path-like objects
@@ -341,7 +329,6 @@ def test_path_objects(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_pandas_column_selection(tmpdir):
     size = 10000
     np.random.seed(0)
@@ -390,7 +377,6 @@ def _test_dataframe(size=10000, seed=0):
     return df
 
 
-@parquet
 def test_pandas_parquet_native_file_roundtrip(tmpdir):
     df = _test_dataframe(10000)
     arrow_table = pa.Table.from_pandas(df)
@@ -402,7 +388,6 @@ def test_pandas_parquet_native_file_roundtrip(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_parquet_incremental_file_build(tmpdir):
     import pyarrow.parquet as pq
 
@@ -431,7 +416,6 @@ def test_parquet_incremental_file_build(tmpdir):
     tm.assert_frame_equal(result.to_pandas(), expected)
 
 
-@parquet
 def test_read_pandas_column_subset(tmpdir):
     import pyarrow.parquet as pq
 
@@ -445,7 +429,6 @@ def test_read_pandas_column_subset(tmpdir):
     tm.assert_frame_equal(df[['strings', 'uint8']], df_read)
 
 
-@parquet
 def test_pandas_parquet_empty_roundtrip(tmpdir):
     df = _test_dataframe(0)
     arrow_table = pa.Table.from_pandas(df)
@@ -457,7 +440,6 @@ def test_pandas_parquet_empty_roundtrip(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_pandas_parquet_pyfile_roundtrip(tmpdir):
     filename = tmpdir.join('pandas_pyfile_roundtrip.parquet').strpath
     size = 5
@@ -481,7 +463,6 @@ def test_pandas_parquet_pyfile_roundtrip(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_pandas_parquet_configuration_options(tmpdir):
     size = 10000
     np.random.seed(0)
@@ -531,7 +512,6 @@ def make_sample_file(df):
     return pq.ParquetFile(buf)
 
 
-@parquet
 def test_parquet_metadata_api():
     df = alltypes_sample(size=10000)
     df = df.reindex(columns=sorted(df.columns))
@@ -583,7 +563,6 @@ def test_parquet_metadata_api():
     assert rg_meta.num_columns == ncols + 1  # +1 for index
 
 
-@parquet
 @pytest.mark.parametrize(
     'data, dtype, min_value, max_value, null_count, num_values',
     [
@@ -626,7 +605,6 @@ def test_parquet_column_statistics_api(
     assert stat.num_values == num_values
 
 
-@parquet
 def test_compare_schemas():
     df = alltypes_sample(size=10000)
 
@@ -643,7 +621,6 @@ def test_compare_schemas():
     assert not fileh.schema[0].equals(fileh.schema[1])
 
 
-@parquet
 def test_column_of_arrays(tmpdir):
     df, schema = dataframe_with_arrays()
 
@@ -656,7 +633,6 @@ def test_column_of_arrays(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_coerce_timestamps(tmpdir):
     from collections import OrderedDict
     # ARROW-622
@@ -699,7 +675,6 @@ def test_coerce_timestamps(tmpdir):
                      coerce_timestamps='unknown')
 
 
-@parquet
 def test_column_of_lists(tmpdir):
     df, schema = dataframe_with_lists()
 
@@ -712,7 +687,6 @@ def test_column_of_lists(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
-@parquet
 def test_date_time_types():
     t1 = pa.date32()
     data1 = np.array([17259, 17260, 17261], dtype='int32')
@@ -799,7 +773,6 @@ def test_date_time_types():
     _assert_unsupported(a7)
 
 
-@parquet
 def test_large_list_records():
     # This was fixed in PARQUET-1100
 
@@ -816,7 +789,6 @@ def test_large_list_records():
     _check_roundtrip(table)
 
 
-@parquet
 def test_sanitized_spark_field_names():
     a0 = pa.array([0, 1, 2, 3, 4])
     name = 'prohib; ,\t{}'
@@ -828,7 +800,6 @@ def test_sanitized_spark_field_names():
     assert result.schema[0].name == expected_name
 
 
-@parquet
 def test_spark_flavor_preserves_pandas_metadata():
     df = _test_dataframe(size=100)
     df.index = np.arange(0, 10 * len(df), 10)
@@ -839,7 +810,6 @@ def test_spark_flavor_preserves_pandas_metadata():
     tm.assert_frame_equal(result, df)
 
 
-@parquet
 def test_fixed_size_binary():
     t0 = pa.binary(10)
     data = [b'fooooooooo', None, b'barooooooo', b'quxooooooo']
@@ -850,7 +820,6 @@ def test_fixed_size_binary():
     _check_roundtrip(table)
 
 
-@parquet
 def test_multithreaded_read():
     df = alltypes_sample(size=10000)
 
@@ -868,7 +837,6 @@ def test_multithreaded_read():
     assert table1.equals(table2)
 
 
-@parquet
 def test_min_chunksize():
     data = pd.DataFrame([np.arange(4)], columns=['A', 'B', 'C', 'D'])
     table = pa.Table.from_pandas(data.reset_index())
@@ -885,7 +853,6 @@ def test_min_chunksize():
         _write_table(table, buf, chunk_size=0)
 
 
-@parquet
 def test_pass_separate_metadata():
     import pyarrow.parquet as pq
 
@@ -907,7 +874,6 @@ def test_pass_separate_metadata():
     tm.assert_frame_equal(df, fileh.read().to_pandas())
 
 
-@parquet
 def test_read_single_row_group():
     import pyarrow.parquet as pq
 
@@ -932,7 +898,6 @@ def test_read_single_row_group():
     tm.assert_frame_equal(df, result.to_pandas())
 
 
-@parquet
 def test_read_single_row_group_with_column_subset():
     import pyarrow.parquet as pq
 
@@ -953,7 +918,6 @@ def test_read_single_row_group_with_column_subset():
     tm.assert_frame_equal(df[cols], result.to_pandas())
 
 
-@parquet
 def test_scan_contents():
     import pyarrow.parquet as pq
 
@@ -972,7 +936,6 @@ def test_scan_contents():
     assert pf.scan_contents(df.columns[:4]) == 10000
 
 
-@parquet
 def test_parquet_piece_read(tmpdir):
     import pyarrow.parquet as pq
 
@@ -988,7 +951,6 @@ def test_parquet_piece_read(tmpdir):
     assert result.equals(table)
 
 
-@parquet
 def test_parquet_piece_basics():
     import pyarrow.parquet as pq
 
@@ -1009,7 +971,6 @@ def test_parquet_piece_basics():
     assert piece1 != piece3
 
 
-@parquet
 def test_partition_set_dictionary_type():
     import pyarrow.parquet as pq
 
@@ -1024,7 +985,6 @@ def test_partition_set_dictionary_type():
         set3.dictionary
 
 
-@parquet
 def test_read_partitioned_directory(tmpdir):
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
@@ -1032,15 +992,14 @@ def test_read_partitioned_directory(tmpdir):
     _partition_test_for_filesystem(fs, base_path)
 
 
-@parquet
 def test_create_parquet_dataset_multi_threaded(tmpdir):
+    import pyarrow.parquet as pq
+
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
 
     _partition_test_for_filesystem(fs, base_path)
 
-    import pyarrow.parquet as pq
-
     manifest = pq.ParquetManifest(base_path, filesystem=fs,
                                   metadata_nthreads=1)
     dataset = pq.ParquetDataset(base_path, filesystem=fs, metadata_nthreads=16)
@@ -1051,13 +1010,12 @@ def test_create_parquet_dataset_multi_threaded(tmpdir):
     assert len(partitions.levels) == len(manifest.partitions.levels)
 
 
-@parquet
 def test_equivalency(tmpdir):
+    import pyarrow.parquet as pq
+
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
 
-    import pyarrow.parquet as pq
-
     integer_keys = [0, 1]
     string_keys = ['a', 'b', 'c']
     boolean_keys = [True, False]
@@ -1089,13 +1047,12 @@ def test_equivalency(tmpdir):
     assert False not in result_df['boolean'].values
 
 
-@parquet
 def test_cutoff_exclusive_integer(tmpdir):
+    import pyarrow.parquet as pq
+
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
 
-    import pyarrow.parquet as pq
-
     integer_keys = [0, 1, 2, 3, 4]
     partition_spec = [
         ['integers', integer_keys],
@@ -1125,17 +1082,16 @@ def test_cutoff_exclusive_integer(tmpdir):
     assert result_list == [2, 3]
 
 
-@parquet
 @pytest.mark.xfail(
     raises=TypeError,
     reason='Loss of type information in creation of categoricals.'
 )
 def test_cutoff_exclusive_datetime(tmpdir):
+    import pyarrow.parquet as pq
+
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
 
-    import pyarrow.parquet as pq
-
     date_keys = [
         datetime.date(2018, 4, 9),
         datetime.date(2018, 4, 10),
@@ -1174,13 +1130,12 @@ def test_cutoff_exclusive_datetime(tmpdir):
     assert result_df['dates'].values == expected
 
 
-@parquet
 def test_inclusive_integer(tmpdir):
+    import pyarrow.parquet as pq
+
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
 
-    import pyarrow.parquet as pq
-
     integer_keys = [0, 1, 2, 3, 4]
     partition_spec = [
         ['integers', integer_keys],
@@ -1210,13 +1165,12 @@ def test_inclusive_integer(tmpdir):
     assert result_list == [2, 3]
 
 
-@parquet
 def test_inclusive_set(tmpdir):
+    import pyarrow.parquet as pq
+
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
 
-    import pyarrow.parquet as pq
-
     integer_keys = [0, 1]
     string_keys = ['a', 'b', 'c']
     boolean_keys = [True, False]
@@ -1248,13 +1202,12 @@ def test_inclusive_set(tmpdir):
     assert False not in result_df['boolean'].values
 
 
-@parquet
 def test_invalid_pred_op(tmpdir):
+    import pyarrow.parquet as pq
+
     fs = LocalFileSystem.get_instance()
     base_path = str(tmpdir)
 
-    import pyarrow.parquet as pq
-
     integer_keys = [0, 1, 2, 3, 4]
     partition_spec = [
         ['integers', integer_keys],
@@ -1308,10 +1261,9 @@ def s3_example():
 
 
 @pytest.mark.s3
-@parquet
 def test_read_partitioned_directory_s3fs(s3_example):
-    from pyarrow.filesystem import S3FSWrapper
     import pyarrow.parquet as pq
+    from pyarrow.filesystem import S3FSWrapper
 
     fs, bucket_uri = s3_example
     wrapper = S3FSWrapper(fs)
@@ -1423,7 +1375,6 @@ def _test_read_common_metadata_files(fs, base_path):
     assert dataset2.schema.equals(dataset.schema)
 
 
-@parquet
 def test_read_common_metadata_files(tmpdir):
     base_path = str(tmpdir)
     fs = LocalFileSystem.get_instance()
@@ -1458,14 +1409,12 @@ def _test_read_metadata_files(fs, base_path):
     assert dataset.schema.equals(metadata_schema)
 
 
-@parquet
 def test_read_metadata_files(tmpdir):
     base_path = str(tmpdir)
     fs = LocalFileSystem.get_instance()
     _test_read_metadata_files(fs, base_path)
 
 
-@parquet
 def test_read_schema(tmpdir):
     import pyarrow.parquet as pq
 
@@ -1499,7 +1448,6 @@ def _touch(path):
         pass
 
 
-@parquet
 def test_read_multiple_files(tmpdir):
     import pyarrow.parquet as pq
 
@@ -1581,7 +1529,6 @@ def test_read_multiple_files(tmpdir):
         read_multiple_files(mixed_paths)
 
 
-@parquet
 def test_dataset_read_pandas(tmpdir):
     import pyarrow.parquet as pq
 
@@ -1615,12 +1562,11 @@ def test_dataset_read_pandas(tmpdir):
     tm.assert_frame_equal(result, expected)
 
 
-@parquet
 @pytest.mark.parametrize('preserve_index', [True, False])
 def test_dataset_read_pandas_common_metadata(tmpdir, preserve_index):
-    # ARROW-1103
     import pyarrow.parquet as pq
 
+    # ARROW-1103
     nfiles = 5
     size = 5
 
@@ -1674,7 +1620,6 @@ def _make_example_multifile_dataset(base_path, nfiles=10, 
file_nrows=5):
     return paths
 
 
-@parquet
 def test_ignore_private_directories(tmpdir):
     import pyarrow.parquet as pq
 
@@ -1691,7 +1636,6 @@ def test_ignore_private_directories(tmpdir):
     assert set(paths) == set(x.path for x in dataset.pieces)
 
 
-@parquet
 def test_ignore_hidden_files(tmpdir):
     import pyarrow.parquet as pq
 
@@ -1711,7 +1655,6 @@ def test_ignore_hidden_files(tmpdir):
     assert set(paths) == set(x.path for x in dataset.pieces)
 
 
-@parquet
 def test_multiindex_duplicate_values(tmpdir):
     num_rows = 3
     numbers = list(range(num_rows))
@@ -1733,7 +1676,6 @@ def test_multiindex_duplicate_values(tmpdir):
     tm.assert_frame_equal(result_df, df)
 
 
-@parquet
 def test_write_error_deletes_incomplete_file(tmpdir):
     # ARROW-1285
     df = pd.DataFrame({'a': list('abc'),
@@ -1758,7 +1700,6 @@ def test_write_error_deletes_incomplete_file(tmpdir):
     assert not os.path.exists(filename)
 
 
-@parquet
 def test_read_non_existent_file(tmpdir):
     import pyarrow.parquet as pq
 
@@ -1769,7 +1710,6 @@ def test_read_non_existent_file(tmpdir):
         assert path in e.args[0]
 
 
-@parquet
 def test_read_table_doesnt_warn():
     import pyarrow.parquet as pq
 
@@ -1784,9 +1724,9 @@ def test_read_table_doesnt_warn():
 def _test_write_to_dataset_with_partitions(base_path,
                                            filesystem=None,
                                            schema=None):
-    # ARROW-1400
     import pyarrow.parquet as pq
 
+    # ARROW-1400
     output_df = pd.DataFrame({'group1': list('aaabbbbccc'),
                               'group2': list('eefeffgeee'),
                               'num': list(range(10)),
@@ -1832,9 +1772,9 @@ def _test_write_to_dataset_with_partitions(base_path,
 
 
 def _test_write_to_dataset_no_partitions(base_path, filesystem=None):
-    # ARROW-1400
     import pyarrow.parquet as pq
 
+    # ARROW-1400
     output_df = pd.DataFrame({'group1': list('aaabbbbccc'),
                               'group2': list('eefeffgeee'),
                               'num': list(range(10)),
@@ -1865,12 +1805,10 @@ def _test_write_to_dataset_no_partitions(base_path, 
filesystem=None):
     assert output_df.equals(input_df)
 
 
-@parquet
 def test_write_to_dataset_with_partitions(tmpdir):
     _test_write_to_dataset_with_partitions(str(tmpdir))
 
 
-@parquet
 def test_write_to_dataset_with_partitions_and_schema(tmpdir):
     schema = pa.schema([pa.field('group1', type=pa.string()),
                         pa.field('group2', type=pa.string()),
@@ -1880,13 +1818,11 @@ def 
test_write_to_dataset_with_partitions_and_schema(tmpdir):
     _test_write_to_dataset_with_partitions(str(tmpdir), schema=schema)
 
 
-@parquet
 def test_write_to_dataset_no_partitions(tmpdir):
     _test_write_to_dataset_no_partitions(str(tmpdir))
 
 
 @pytest.mark.large_memory
-@parquet
 def test_large_table_int32_overflow():
     size = np.iinfo('int32').max + 1
 
@@ -1899,7 +1835,6 @@ def test_large_table_int32_overflow():
     _write_table(table, f)
 
 
-@parquet
 def test_index_column_name_duplicate(tmpdir):
     data = {
         'close': {
@@ -1924,11 +1859,10 @@ def test_index_column_name_duplicate(tmpdir):
     tm.assert_frame_equal(result_df, dfx)
 
 
-@parquet
 def test_parquet_nested_convenience(tmpdir):
-    # ARROW-1684
     import pyarrow.parquet as pq
 
+    # ARROW-1684
     df = pd.DataFrame({
         'a': [[1, 2, 3], None, [4, 5], []],
         'b': [[1.], None, None, [6., 7.]],
@@ -1946,7 +1880,6 @@ def test_parquet_nested_convenience(tmpdir):
     tm.assert_frame_equal(read.to_pandas(), df)
 
 
-@parquet
 def test_backwards_compatible_index_naming():
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
@@ -1969,7 +1902,6 @@ carat        cut  color  clarity  depth  table  price     
x     y     z
     tm.assert_frame_equal(result, expected)
 
 
-@parquet
 def test_backwards_compatible_index_multi_level_named():
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
@@ -1995,7 +1927,6 @@ carat        cut  color  clarity  depth  table  price     
x     y     z
     tm.assert_frame_equal(result, expected)
 
 
-@parquet
 def test_backwards_compatible_index_multi_level_some_named():
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
@@ -2022,7 +1953,6 @@ carat        cut  color  clarity  depth  table  price     
x     y     z
     tm.assert_frame_equal(result, expected)
 
 
-@parquet
 def test_backwards_compatible_column_metadata_handling():
     expected = pd.DataFrame(
         {'a': [1, 2, 3], 'b': [.1, .2, .3],
@@ -2045,7 +1975,6 @@ def test_backwards_compatible_column_metadata_handling():
     tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))
 
 
-@parquet
 def test_decimal_roundtrip(tmpdir):
     num_values = 10
 
@@ -2072,7 +2001,6 @@ def test_decimal_roundtrip(tmpdir):
     tm.assert_frame_equal(result, expected)
 
 
-@parquet
 @pytest.mark.xfail(
     raises=pa.ArrowException, reason='Parquet does not support negative scale'
 )
@@ -2087,9 +2015,7 @@ def test_decimal_roundtrip_negative_scale(tmpdir):
     tm.assert_frame_equal(result, expected)
 
 
-@parquet
 def test_parquet_writer_context_obj(tmpdir):
-
     import pyarrow.parquet as pq
 
     df = _test_dataframe(100)
@@ -2115,9 +2041,7 @@ def test_parquet_writer_context_obj(tmpdir):
     tm.assert_frame_equal(result.to_pandas(), expected)
 
 
-@parquet
 def test_parquet_writer_context_obj_with_exception(tmpdir):
-
     import pyarrow.parquet as pq
 
     df = _test_dataframe(100)

[arrow] branch master updated: ARROW-2756: [Python] Remove redundant imports and minor fixes in parquet tests

Reply via email to