vinothchandar commented on a change in pull request #5179:
URL: https://github.com/apache/hudi/pull/5179#discussion_r839059458
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
##########
@@ -117,30 +133,119 @@ public void trySave(int taskPartitionId) {
}
}
+ private String getMetafileExtension() {
+ // To be backwards compatible, there is no extension to the properties
file base partition metafile
+ return format.isPresent() ? format.get().getFileExtension() : "";
+ }
+
+ /**
+ * Write the partition metadata in the correct format in the given file path.
+ *
+ * @param filePath Path of the file to write
+ * @throws IOException
+ */
+ private void writeMetafile(Path filePath) throws IOException {
+ if (format.isPresent()) {
+ Schema schema = HoodieAvroUtils.getRecordKeySchema();
+
+ switch (format.get()) {
+ case PARQUET:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ MessageType type =
Types.buildMessage().optional(PrimitiveTypeName.INT64).named("dummyint").named("dummy");
+ HoodieAvroWriteSupport writeSupport = new
HoodieAvroWriteSupport(type, schema, Option.empty());
+ try (ParquetWriter writer = new ParquetWriter(filePath,
writeSupport, CompressionCodecName.UNCOMPRESSED, 1024, 1024)) {
+ for (String key : props.stringPropertyNames()) {
+ writeSupport.addFooterMetadata(key, props.getProperty(key));
+ }
+ }
+ break;
+ case ORC:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ OrcFile.WriterOptions writerOptions =
OrcFile.writerOptions(fs.getConf()).fileSystem(fs)
+ .setSchema(AvroOrcUtils.createOrcSchema(schema));
+ try (Writer writer = OrcFile.createWriter(filePath, writerOptions)) {
+ for (String key : props.stringPropertyNames()) {
+ writer.addUserMetadata(key,
ByteBuffer.wrap(props.getProperty(key).getBytes()));
+ }
+ }
+ break;
+ default:
+ throw new HoodieException("Unsupported format for partition
metafiles: " + format.get());
+ }
+ } else {
+ // Backwards compatible properties file format
+ FSDataOutputStream os = fs.create(filePath, true);
+ props.store(os, "partition metadata");
+ os.hsync();
+ os.hflush();
+ os.close();
+ }
+ }
+
/**
* Read out the metadata for this partition.
*/
public void readFromFS() throws IOException {
- FSDataInputStream is = null;
+ Option<Path> metafilePath = getPartitionMetafilePath(fs, partitionPath);
+ if (!metafilePath.isPresent()) {
+ throw new HoodieException("Partition metafile not found in path " +
partitionPath);
+ }
+
try {
- Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
- is = fs.open(metaFile);
- props.load(is);
- } catch (IOException ioe) {
- throw new HoodieException("Error reading Hoodie partition metadata for "
+ partitionPath, ioe);
- } finally {
- if (is != null) {
- is.close();
+ BaseFileUtils reader =
BaseFileUtils.getInstance(metafilePath.toString());
+ Map<String, String> metadata = reader.readFooter(fs.getConf(), true,
metafilePath.get(), PARTITION_DEPTH_KEY, COMMIT_TIME_KEY);
+ props.clear();
+ metadata.forEach((k, v) -> props.put(k, v));
+ } catch (UnsupportedOperationException e) {
+ // Properties file format
+ FSDataInputStream is = null;
+ try {
+ is = fs.open(metafilePath.get());
+ props.load(is);
+ } catch (IOException ioe) {
+ throw new HoodieException("Error reading Hoodie partition metadata
from " + metafilePath, ioe);
+ } finally {
+ if (is != null) {
+ is.close();
+ }
}
}
}
// methods related to partition meta data
public static boolean hasPartitionMetadata(FileSystem fs, Path
partitionPath) {
+ return getPartitionMetafilePath(fs, partitionPath).isPresent();
+ }
+
+ /**
+ * Returns the name of the partition metadata.
+ *
+ * @param fs
+ * @param partitionPath
+ * @return Name of the partition metafile or empty option
+ */
+ public static Option<Path> getPartitionMetafilePath(FileSystem fs, Path
partitionPath) {
+ // The partition listing is a costly operation so instead we are searching
for existence of the files instead.
+ // This is in expected order as properties file based partition metafiles
should be the most common.
try {
- return fs.exists(new Path(partitionPath,
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
+ Path metafilePath = new Path(partitionPath,
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX);
Review comment:
Take that back a bit. it will be tricky for older query engine versions,
since they don't have this piece of code.
Can we check `tableConfig.getPartitionMetafileFormat()` and only check for
the old props file if its not set? Otherwise, we will make one extra call for
the new meta file formats.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]