nastra commented on code in PR #13859: URL: https://github.com/apache/iceberg/pull/13859#discussion_r2354445663
########## spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java: ########## @@ -439,4 +467,83 @@ protected void assertNoLeak(String testName, Consumer<BufferAllocator> testFunct allocator.close(); } } + + private void assertIdenticalFileContents( + File actual, File expected, Schema schema, boolean vectorized) throws IOException { + try (CloseableIterable<Record> expectedIterator = + Parquet.read(Files.localInput(expected)) + .project(schema) + .createReaderFunc(msgType -> GenericParquetReaders.buildReader(schema, msgType)) + .build()) { + List<Record> expectedRecords = Lists.newArrayList(expectedIterator); + if (vectorized) { + assertRecordsMatch( + schema, expectedRecords.size(), expectedRecords, actual, false, BATCH_SIZE); + } else { + try (CloseableIterable<InternalRow> actualIterator = + Parquet.read(Files.localInput(actual)) + .project(schema) + .createReaderFunc(msgType -> SparkParquetReaders.buildReader(schema, msgType)) + .build()) { + List<InternalRow> actualRecords = Lists.newArrayList(actualIterator); + assertThat(actualRecords).hasSameSizeAs(expectedRecords); + for (int i = 0; i < actualRecords.size(); i++) { + GenericsHelpers.assertEqualsUnsafe( + schema.asStruct(), expectedRecords.get(i), actualRecords.get(i)); + } + } + } + } + } + + static Stream<Arguments> goldenFilesAndEncodings() { + return GOLDEN_FILE_ENCODINGS.stream() + .flatMap( + encoding -> + GOLDEN_FILE_TYPES.entrySet().stream() + .flatMap( + e -> + Stream.of(true, false) + .map( + vectorized -> + Arguments.of( + encoding, e.getKey(), e.getValue(), vectorized)))); + } + + private File resourceUrlToLocalFile(URL url) throws IOException, URISyntaxException { + if ("file".equals(url.getProtocol())) { + return Paths.get(url.toURI()).toFile(); + } + + String name = Paths.get(url.getPath()).getFileName().toString(); // e.g., string.parquet + String suffix = name.contains(".") ? name.substring(name.lastIndexOf('.')) : ""; + File tmp = File.createTempFile("golden-", suffix, temp.toFile()); + try (InputStream in = url.openStream()) { + java.nio.file.Files.copy(in, tmp.toPath(), REPLACE_EXISTING); + } + return tmp; + } + + @ParameterizedTest + @MethodSource("goldenFilesAndEncodings") + public void testGoldenFiles( + String encoding, String typeName, PrimitiveType primitiveType, boolean vectorized) + throws Exception { + Path goldenResourcePath = Paths.get("encodings", encoding, typeName + ".parquet"); + URL goldenFileUrl = getClass().getClassLoader().getResource(goldenResourcePath.toString()); + assumeThat(goldenFileUrl).isNotNull().as("type/encoding pair exists"); Review Comment: `.as()` needs to come before the final assertion as otherwise it's going to be ignored -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org