MaxNevermind commented on code in PR #1273:
URL: https://github.com/apache/parquet-mr/pull/1273#discussion_r1526967636
##########
parquet-hadoop/src/main/java/org/apache/parquet/hadoop/rewrite/ParquetRewriter.java:
##########
@@ -904,4 +932,208 @@ public byte[] getDictPageAAD() {
return this.dictPageAAD;
}
}
+
+ private void processBlocksFromReaderWithStitching(IndexCache indexCache)
throws IOException {
+ // TODO add the test for empty files joins, it should merge schemas
+ LOG.info("Rewriting input fileLeft: {}, remaining filesLeft: {}",
reader.getFile(), inputFiles.size());
+ int rowGroupIdx = 0;
+ List<BlockMetaData> blocks = reader.getFooter().getBlocks();
+ for (BlockMetaData blockMetaData: blocks) {
+ writer.startBlock(blockMetaData.getRowCount());
+
+ // Writing the left side
+ indexCache.setBlockMetadata(blockMetaData);
+ List<ColumnChunkMetaData> chunksL = blockMetaData.getColumns();
+ for (ColumnChunkMetaData chunk : chunksL) {
+ if (chunk.isEncrypted()) { // TODO add that detail to docs
+ throw new IOException("Column " + chunk.getPath().toDotString() + "
is encrypted");
+ }
+ ColumnDescriptor descriptorL = descriptorsMap.get(chunk.getPath());
+ if (descriptorL != null) { // descriptorL might be NULL if a column is
from the right side of a join
+ reader.setStreamPosition(chunk.getStartingPos());
+ BloomFilter bloomFilter = indexCache.getBloomFilter(chunk);
+ ColumnIndex columnIndex = indexCache.getColumnIndex(chunk);
+ OffsetIndex offsetIndex = indexCache.getOffsetIndex(chunk);
+ writer.appendColumnChunk(descriptorL, reader.getStream(), chunk,
bloomFilter, columnIndex, offsetIndex);
+ }
+ }
+
+ // Writing the right side
+ for (RightColumnWriter writer: columnWritersR) {
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]