vinothchandar commented on a change in pull request #5179:
URL: https://github.com/apache/hudi/pull/5179#discussion_r840218221
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
##########
@@ -117,30 +133,119 @@ public void trySave(int taskPartitionId) {
}
}
+ private String getMetafileExtension() {
+ // To be backwards compatible, there is no extension to the properties
file base partition metafile
+ return format.isPresent() ? format.get().getFileExtension() : "";
+ }
+
+ /**
+ * Write the partition metadata in the correct format in the given file path.
+ *
+ * @param filePath Path of the file to write
+ * @throws IOException
+ */
+ private void writeMetafile(Path filePath) throws IOException {
+ if (format.isPresent()) {
+ Schema schema = HoodieAvroUtils.getRecordKeySchema();
+
+ switch (format.get()) {
+ case PARQUET:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ MessageType type =
Types.buildMessage().optional(PrimitiveTypeName.INT64).named("dummyint").named("dummy");
+ HoodieAvroWriteSupport writeSupport = new
HoodieAvroWriteSupport(type, schema, Option.empty());
Review comment:
cc @xushiyan and @vingov . This could be an issue potentially when
testing.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
##########
@@ -117,30 +133,119 @@ public void trySave(int taskPartitionId) {
}
}
+ private String getMetafileExtension() {
+ // To be backwards compatible, there is no extension to the properties
file base partition metafile
+ return format.isPresent() ? format.get().getFileExtension() : "";
+ }
+
+ /**
+ * Write the partition metadata in the correct format in the given file path.
+ *
+ * @param filePath Path of the file to write
+ * @throws IOException
+ */
+ private void writeMetafile(Path filePath) throws IOException {
+ if (format.isPresent()) {
+ Schema schema = HoodieAvroUtils.getRecordKeySchema();
+
+ switch (format.get()) {
+ case PARQUET:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ MessageType type =
Types.buildMessage().optional(PrimitiveTypeName.INT64).named("dummyint").named("dummy");
+ HoodieAvroWriteSupport writeSupport = new
HoodieAvroWriteSupport(type, schema, Option.empty());
+ try (ParquetWriter writer = new ParquetWriter(filePath,
writeSupport, CompressionCodecName.UNCOMPRESSED, 1024, 1024)) {
+ for (String key : props.stringPropertyNames()) {
+ writeSupport.addFooterMetadata(key, props.getProperty(key));
+ }
+ }
+ break;
+ case ORC:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ OrcFile.WriterOptions writerOptions =
OrcFile.writerOptions(fs.getConf()).fileSystem(fs)
+ .setSchema(AvroOrcUtils.createOrcSchema(schema));
+ try (Writer writer = OrcFile.createWriter(filePath, writerOptions)) {
+ for (String key : props.stringPropertyNames()) {
+ writer.addUserMetadata(key,
ByteBuffer.wrap(props.getProperty(key).getBytes()));
+ }
+ }
+ break;
+ default:
Review comment:
okay makes sense. I was worried that if we flip this config, then we
will write the file in HFile (since that's the base format) for MDT (metadata
table). Guess it will throw this error. lets punt . you are right
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
##########
@@ -117,30 +133,119 @@ public void trySave(int taskPartitionId) {
}
}
+ private String getMetafileExtension() {
+ // To be backwards compatible, there is no extension to the properties
file base partition metafile
+ return format.isPresent() ? format.get().getFileExtension() : "";
+ }
+
+ /**
+ * Write the partition metadata in the correct format in the given file path.
+ *
+ * @param filePath Path of the file to write
+ * @throws IOException
+ */
+ private void writeMetafile(Path filePath) throws IOException {
+ if (format.isPresent()) {
+ Schema schema = HoodieAvroUtils.getRecordKeySchema();
+
+ switch (format.get()) {
+ case PARQUET:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ MessageType type =
Types.buildMessage().optional(PrimitiveTypeName.INT64).named("dummyint").named("dummy");
+ HoodieAvroWriteSupport writeSupport = new
HoodieAvroWriteSupport(type, schema, Option.empty());
+ try (ParquetWriter writer = new ParquetWriter(filePath,
writeSupport, CompressionCodecName.UNCOMPRESSED, 1024, 1024)) {
Review comment:
okay makes sense. we can file a code cleanup task?
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java
##########
@@ -117,30 +133,119 @@ public void trySave(int taskPartitionId) {
}
}
+ private String getMetafileExtension() {
+ // To be backwards compatible, there is no extension to the properties
file base partition metafile
+ return format.isPresent() ? format.get().getFileExtension() : "";
+ }
+
+ /**
+ * Write the partition metadata in the correct format in the given file path.
+ *
+ * @param filePath Path of the file to write
+ * @throws IOException
+ */
+ private void writeMetafile(Path filePath) throws IOException {
+ if (format.isPresent()) {
+ Schema schema = HoodieAvroUtils.getRecordKeySchema();
+
+ switch (format.get()) {
+ case PARQUET:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ MessageType type =
Types.buildMessage().optional(PrimitiveTypeName.INT64).named("dummyint").named("dummy");
+ HoodieAvroWriteSupport writeSupport = new
HoodieAvroWriteSupport(type, schema, Option.empty());
+ try (ParquetWriter writer = new ParquetWriter(filePath,
writeSupport, CompressionCodecName.UNCOMPRESSED, 1024, 1024)) {
+ for (String key : props.stringPropertyNames()) {
+ writeSupport.addFooterMetadata(key, props.getProperty(key));
+ }
+ }
+ break;
+ case ORC:
+ // Since we are only interested in saving metadata to the footer,
the schema, blocksizes and other
+ // parameters are not important.
+ OrcFile.WriterOptions writerOptions =
OrcFile.writerOptions(fs.getConf()).fileSystem(fs)
+ .setSchema(AvroOrcUtils.createOrcSchema(schema));
+ try (Writer writer = OrcFile.createWriter(filePath, writerOptions)) {
+ for (String key : props.stringPropertyNames()) {
+ writer.addUserMetadata(key,
ByteBuffer.wrap(props.getProperty(key).getBytes()));
+ }
+ }
+ break;
+ default:
+ throw new HoodieException("Unsupported format for partition
metafiles: " + format.get());
+ }
+ } else {
+ // Backwards compatible properties file format
+ FSDataOutputStream os = fs.create(filePath, true);
+ props.store(os, "partition metadata");
+ os.hsync();
+ os.hflush();
+ os.close();
+ }
+ }
+
/**
* Read out the metadata for this partition.
*/
public void readFromFS() throws IOException {
- FSDataInputStream is = null;
+ Option<Path> metafilePath = getPartitionMetafilePath(fs, partitionPath);
+ if (!metafilePath.isPresent()) {
+ throw new HoodieException("Partition metafile not found in path " +
partitionPath);
+ }
+
try {
- Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
- is = fs.open(metaFile);
- props.load(is);
- } catch (IOException ioe) {
- throw new HoodieException("Error reading Hoodie partition metadata for "
+ partitionPath, ioe);
- } finally {
- if (is != null) {
- is.close();
+ BaseFileUtils reader =
BaseFileUtils.getInstance(metafilePath.toString());
+ Map<String, String> metadata = reader.readFooter(fs.getConf(), true,
metafilePath.get(), PARTITION_DEPTH_KEY, COMMIT_TIME_KEY);
+ props.clear();
+ metadata.forEach((k, v) -> props.put(k, v));
+ } catch (UnsupportedOperationException e) {
+ // Properties file format
+ FSDataInputStream is = null;
+ try {
+ is = fs.open(metafilePath.get());
+ props.load(is);
+ } catch (IOException ioe) {
+ throw new HoodieException("Error reading Hoodie partition metadata
from " + metafilePath, ioe);
+ } finally {
+ if (is != null) {
+ is.close();
+ }
}
}
}
// methods related to partition meta data
public static boolean hasPartitionMetadata(FileSystem fs, Path
partitionPath) {
+ return getPartitionMetafilePath(fs, partitionPath).isPresent();
+ }
+
+ /**
+ * Returns the name of the partition metadata.
+ *
+ * @param fs
+ * @param partitionPath
+ * @return Name of the partition metafile or empty option
+ */
+ public static Option<Path> getPartitionMetafilePath(FileSystem fs, Path
partitionPath) {
+ // The partition listing is a costly operation so instead we are searching
for existence of the files instead.
+ // This is in expected order as properties file based partition metafiles
should be the most common.
Review comment:
duh. you are right. I still like to avoid the exists call. let me think
about it little more.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]