nastra commented on code in PR #13859:
URL: https://github.com/apache/iceberg/pull/13859#discussion_r2354444963


##########
spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java:
##########
@@ -439,4 +467,83 @@ protected void assertNoLeak(String testName, 
Consumer<BufferAllocator> testFunct
       allocator.close();
     }
   }
+
+  private void assertIdenticalFileContents(
+      File actual, File expected, Schema schema, boolean vectorized) throws 
IOException {
+    try (CloseableIterable<Record> expectedIterator =
+        Parquet.read(Files.localInput(expected))
+            .project(schema)
+            .createReaderFunc(msgType -> 
GenericParquetReaders.buildReader(schema, msgType))
+            .build()) {
+      List<Record> expectedRecords = Lists.newArrayList(expectedIterator);
+      if (vectorized) {
+        assertRecordsMatch(
+            schema, expectedRecords.size(), expectedRecords, actual, false, 
BATCH_SIZE);
+      } else {
+        try (CloseableIterable<InternalRow> actualIterator =
+            Parquet.read(Files.localInput(actual))
+                .project(schema)
+                .createReaderFunc(msgType -> 
SparkParquetReaders.buildReader(schema, msgType))
+                .build()) {
+          List<InternalRow> actualRecords = Lists.newArrayList(actualIterator);
+          assertThat(actualRecords).hasSameSizeAs(expectedRecords);
+          for (int i = 0; i < actualRecords.size(); i++) {
+            GenericsHelpers.assertEqualsUnsafe(
+                schema.asStruct(), expectedRecords.get(i), 
actualRecords.get(i));
+          }
+        }
+      }
+    }
+  }
+
+  static Stream<Arguments> goldenFilesAndEncodings() {
+    return GOLDEN_FILE_ENCODINGS.stream()
+        .flatMap(
+            encoding ->
+                GOLDEN_FILE_TYPES.entrySet().stream()
+                    .flatMap(
+                        e ->
+                            Stream.of(true, false)
+                                .map(
+                                    vectorized ->
+                                        Arguments.of(
+                                            encoding, e.getKey(), 
e.getValue(), vectorized))));
+  }
+
+  private File resourceUrlToLocalFile(URL url) throws IOException, 
URISyntaxException {
+    if ("file".equals(url.getProtocol())) {
+      return Paths.get(url.toURI()).toFile();
+    }
+
+    String name = Paths.get(url.getPath()).getFileName().toString(); // e.g., 
string.parquet
+    String suffix = name.contains(".") ? name.substring(name.lastIndexOf('.')) 
: "";
+    File tmp = File.createTempFile("golden-", suffix, temp.toFile());
+    try (InputStream in = url.openStream()) {
+      java.nio.file.Files.copy(in, tmp.toPath(), REPLACE_EXISTING);
+    }
+    return tmp;
+  }
+
+  @ParameterizedTest
+  @MethodSource("goldenFilesAndEncodings")
+  public void testGoldenFiles(
+      String encoding, String typeName, PrimitiveType primitiveType, boolean 
vectorized)
+      throws Exception {
+    Path goldenResourcePath = Paths.get("encodings", encoding, typeName + 
".parquet");
+    URL goldenFileUrl = 
getClass().getClassLoader().getResource(goldenResourcePath.toString());
+    assumeThat(goldenFileUrl).isNotNull().as("type/encoding pair exists");
+
+    Path plainResourcePath = Paths.get("encodings", PLAIN, typeName + 
".parquet");
+    URL plainFileUrl = 
getClass().getClassLoader().getResource(plainResourcePath.toString());
+    if (plainFileUrl == null) {

Review Comment:
   nit: might as well use `Preconditions.checkState` here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to