This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c91e874  ARROW-3843: [C++][Python] Allow a "degenerate" Parquet file 
with no columns
c91e874 is described below

commit c91e8747efd1c1cdcbac02528c6c20462c4574df
Author: Wes McKinney <[email protected]>
AuthorDate: Fri Mar 22 09:41:02 2019 -0500

    ARROW-3843: [C++][Python] Allow a "degenerate" Parquet file with no columns
    
    Author: Wes McKinney <[email protected]>
    
    Closes #3985 from wesm/ARROW-3843 and squashes the following commits:
    
    2aba9262d <Wes McKinney> Add C++ unit test for zero-columns. Distinguish 
from degenerate root node
    1588a08b0 <Wes McKinney> Allow a degenerate Parquet file with no columns
---
 cpp/src/parquet/schema-test.cc       | 7 +++++++
 cpp/src/parquet/schema.cc            | 9 +++++++--
 python/pyarrow/tests/test_parquet.py | 7 +++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/schema-test.cc b/cpp/src/parquet/schema-test.cc
index 3d7eb0e..ecb7f26 100644
--- a/cpp/src/parquet/schema-test.cc
+++ b/cpp/src/parquet/schema-test.cc
@@ -448,6 +448,13 @@ TEST_F(TestSchemaConverter, NestedExample) {
   ASSERT_TRUE(check_for_parent_consistency(group_));
 }
 
+TEST_F(TestSchemaConverter, ZeroColumns) {
+  // ARROW-3843
+  SchemaElement elements[1];
+  elements[0] = NewGroup("schema", FieldRepetitionType::REPEATED, 0, 0);
+  ASSERT_NO_THROW(Convert(elements, 1));
+}
+
 TEST_F(TestSchemaConverter, InvalidRoot) {
   // According to the Parquet specification, the first element in the
   // list<SchemaElement> is a group whose children (and their descendants)
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index 431f307..0a5668d 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -370,9 +370,14 @@ void PrimitiveNode::ToParquet(void* opaque_element) const {
 std::unique_ptr<Node> FlatSchemaConverter::Convert() {
   const SchemaElement& root = elements_[0];
 
-  // Validate the root node
   if (root.num_children == 0) {
-    throw ParquetException("Root node did not have children");
+    if (length_ == 1) {
+      // Degenerate case of Parquet file with no columns
+      return GroupNode::FromParquet(static_cast<const void*>(&root), 
next_id(), {});
+    } else {
+      throw ParquetException(
+          "Parquet schema had multiple nodes but root had no children");
+    }
   }
 
   // Relaxing this restriction as some implementations don't set this
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 34d0956..39479e3 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -236,6 +236,13 @@ def test_empty_table_roundtrip():
     _check_roundtrip(table, version='2.0')
 
 
[email protected]
+def test_empty_table_no_columns():
+    df = pd.DataFrame()
+    empty = pa.Table.from_pandas(df, preserve_index=False)
+    _check_roundtrip(empty)
+
+
 def test_empty_lists_table_roundtrip():
     # ARROW-2744: Shouldn't crash when writing an array of empty lists
     arr = pa.array([[], []], type=pa.list_(pa.int32()))

Reply via email to