This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch parquet-1.11.x
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/parquet-1.11.x by this push:
     new af713b5  PARQUET-2094: Handle negative values in page headers (#933)
af713b5 is described below

commit af713b590539eeef836ea9974de011e0c89b8115
Author: Gabor Szadovszky <[email protected]>
AuthorDate: Thu Sep 30 10:07:06 2021 +0200

    PARQUET-2094: Handle negative values in page headers (#933)
    
    (cherry picked from commit 1695d92cc07288713a9f2230f3aac61e2dc6a8e4)
---
 .../format/InvalidParquetMetadataException.java    | 30 +++++++++++++++
 .../apache/parquet/format/MetadataValidator.java   | 44 ++++++++++++++++++++++
 .../main/java/org/apache/parquet/format/Util.java  |  2 +-
 .../java/org/apache/parquet/format/TestUtil.java   | 20 +++++++++-
 4 files changed, 94 insertions(+), 2 deletions(-)

diff --git 
a/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java
 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java
new file mode 100644
index 0000000..c0852bb
--- /dev/null
+++ 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+/**
+ * A specific RuntimeException thrown when invalid values are found in the 
Parquet file metadata (including the footer,
+ * page header etc.).
+ */
+public class InvalidParquetMetadataException extends RuntimeException {
+  InvalidParquetMetadataException(String message) {
+    super(message);
+  }
+}
diff --git 
a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java
 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java
new file mode 100644
index 0000000..b3738ec
--- /dev/null
+++ 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+/**
+ * Utility class to validate different types of Parquet metadata (e.g. footer, 
page headers etc.).
+ */
+public class MetadataValidator {
+
+  static PageHeader validate(PageHeader pageHeader) {
+    int compressed_page_size = pageHeader.getCompressed_page_size();
+    validateValue(compressed_page_size >= 0,
+        String.format("Compressed page size must not be negative but was: %s", 
compressed_page_size));
+    return pageHeader;
+  }
+
+  private static <T> void validateValue(boolean valid, String message) {
+    if (!valid) {
+      throw new InvalidParquetMetadataException(message);
+    }
+  }
+
+  private MetadataValidator() {
+    // Private constructor to prevent instantiation
+  }
+
+}
diff --git 
a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
index d09d007..2c3dc72 100644
--- 
a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
+++ 
b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
@@ -75,7 +75,7 @@ public class Util {
   }
 
   public static PageHeader readPageHeader(InputStream from) throws IOException 
{
-    return read(from, new PageHeader());
+    return MetadataValidator.validate(read(from, new PageHeader()));
   }
 
   public static void writeFileMetaData(org.apache.parquet.format.FileMetaData 
fileMetadata, OutputStream to) throws IOException {
diff --git 
a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
 
b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
index 1adf099..685e251 100644
--- 
a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
+++ 
b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
@@ -23,13 +23,16 @@ import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.assertNull;
 import static org.apache.parquet.format.Util.readFileMetaData;
 import static org.apache.parquet.format.Util.writeFileMetaData;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 
 import org.junit.Test;
-
 import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer;
+
 public class TestUtil {
 
   @Test
@@ -77,6 +80,21 @@ public class TestUtil {
     assertEquals(md, md6);
   }
 
+  @Test
+  public void testInvalidPageHeader() throws IOException {
+    PageHeader ph = new PageHeader(PageType.DATA_PAGE, 100, -50);
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    Util.writePageHeader(ph, out);
+
+    try {
+      Util.readPageHeader(in(out));
+      fail("Expected exception but did not thrown");
+    } catch (InvalidParquetMetadataException e) {
+      assertTrue("Exception message does not contain the expected parts",
+          e.getMessage().contains("Compressed page size"));
+    }
+  }
+
   private ByteArrayInputStream in(ByteArrayOutputStream baos) {
     return new ByteArrayInputStream(baos.toByteArray());
   }

Reply via email to