[ https://issues.apache.org/jira/browse/ARROW-586?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Adam Szałucha updated ARROW-586: -------------------------------- Description: When I try to read parquet file saved by Apache Spark I get the following error --------------------------------------------------------------------------- ArrowException Traceback (most recent call last) <ipython-input-14-6dfa089b9299> in <module>() ----> 1 table = pq.read_multiple_files(files2) /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in read_multiple_files(paths, columns, filesystem, nthreads, metadata, schema) 141 142 if metadata is None and schema is None: --> 143 schema = open_file(paths[0]).schema 144 elif schema is None: 145 schema = metadata.schema /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in open_file(path, meta) 132 if filesystem is None: 133 def open_file(path, meta=None): --> 134 return ParquetFile(path, metadata=meta) 135 else: 136 def open_file(path, meta=None): /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in __init__(self, source, metadata) 36 def __init__(self, source, metadata=None): 37 self.reader = ParquetReader() ---> 38 self.reader.open(source, metadata=metadata) 39 40 @property /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/_parquet.pyx in pyarrow._parquet.ParquetReader.open (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/_parquet.cxx:7144)() /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/io.pyx in pyarrow.io.get_reader (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/io.cxx:9489)() /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/io.pyx in pyarrow.io.MemoryMappedFile.__cinit__ (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/io.cxx:7732)() /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/error.pyx in pyarrow.error.check_status (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/error.cxx:1197)() ArrowException: IOError: Failed to open file: part-00000-2066b71b-c55f-411a-a682-4cc94ddb6d16.snappy.parquet was: When I try to read parquet file I get the following error --------------------------------------------------------------------------- ArrowException Traceback (most recent call last) <ipython-input-14-6dfa089b9299> in <module>() ----> 1 table = pq.read_multiple_files(files2) /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in read_multiple_files(paths, columns, filesystem, nthreads, metadata, schema) 141 142 if metadata is None and schema is None: --> 143 schema = open_file(paths[0]).schema 144 elif schema is None: 145 schema = metadata.schema /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in open_file(path, meta) 132 if filesystem is None: 133 def open_file(path, meta=None): --> 134 return ParquetFile(path, metadata=meta) 135 else: 136 def open_file(path, meta=None): /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in __init__(self, source, metadata) 36 def __init__(self, source, metadata=None): 37 self.reader = ParquetReader() ---> 38 self.reader.open(source, metadata=metadata) 39 40 @property /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/_parquet.pyx in pyarrow._parquet.ParquetReader.open (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/_parquet.cxx:7144)() /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/io.pyx in pyarrow.io.get_reader (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/io.cxx:9489)() /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/io.pyx in pyarrow.io.MemoryMappedFile.__cinit__ (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/io.cxx:7732)() /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/error.pyx in pyarrow.error.check_status (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/error.cxx:1197)() ArrowException: IOError: Failed to open file: part-00000-2066b71b-c55f-411a-a682-4cc94ddb6d16.snappy.parquet > Problem with reading parquet files saved by Apache Spark > -------------------------------------------------------- > > Key: ARROW-586 > URL: https://issues.apache.org/jira/browse/ARROW-586 > Project: Apache Arrow > Issue Type: Bug > Components: Python > Affects Versions: 0.1.0 > Reporter: Adam Szałucha > > When I try to read parquet file saved by Apache Spark I get the following > error > --------------------------------------------------------------------------- > ArrowException Traceback (most recent call last) > <ipython-input-14-6dfa089b9299> in <module>() > ----> 1 table = pq.read_multiple_files(files2) > /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in > read_multiple_files(paths, columns, filesystem, nthreads, metadata, schema) > 141 > 142 if metadata is None and schema is None: > --> 143 schema = open_file(paths[0]).schema > 144 elif schema is None: > 145 schema = metadata.schema > /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in > open_file(path, meta) > 132 if filesystem is None: > 133 def open_file(path, meta=None): > --> 134 return ParquetFile(path, metadata=meta) > 135 else: > 136 def open_file(path, meta=None): > /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/parquet.py in > __init__(self, source, metadata) > 36 def __init__(self, source, metadata=None): > 37 self.reader = ParquetReader() > ---> 38 self.reader.open(source, metadata=metadata) > 39 > 40 @property > /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/_parquet.pyx in > pyarrow._parquet.ParquetReader.open > (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/_parquet.cxx:7144)() > /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/io.pyx in > pyarrow.io.get_reader > (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/io.cxx:9489)() > /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/io.pyx in > pyarrow.io.MemoryMappedFile.__cinit__ > (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/io.cxx:7732)() > /Users/adremja/miniconda3/lib/python3.6/site-packages/pyarrow/error.pyx in > pyarrow.error.check_status > (/Users/travis/miniconda3/conda-bld/recipe_1485750760150/work/arrow-7ac320bde52ae47007dadac7398e22a203c6a48d/python/build/temp.macosx-10.9-x86_64-3.6/error.cxx:1197)() > ArrowException: IOError: Failed to open file: > part-00000-2066b71b-c55f-411a-a682-4cc94ddb6d16.snappy.parquet -- This message was sent by Atlassian JIRA (v6.3.15#6346)