This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new e9706df28 ORC-1724: JsonFileDump utility should print user metadata
e9706df28 is described below
commit e9706df28a37762ee90183961b105b21c60d951b
Author: sychen <[email protected]>
AuthorDate: Tue Jul 9 12:15:51 2024 -0700
ORC-1724: JsonFileDump utility should print user metadata
### What changes were proposed in this pull request?
This PR aims to implement JsonFileDump to output user metadata.
### Why are the changes needed?
ORC-223 implements the output of user metadata in non-json format, but the
json format does not output user metadata.
### How was this patch tested?
add UT
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #1946 from cxzl25/ORC-1724.
Authored-by: sychen <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
java/tools/src/java/org/apache/orc/tools/JsonFileDump.java | 13 +++++++++++++
.../src/test/org/apache/orc/tools/TestJsonFileDump.java | 4 ++++
java/tools/src/test/resources/orc-file-dump.json | 4 ++++
3 files changed, 21 insertions(+)
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index d6166ea91..9737222da 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -49,6 +49,8 @@ import org.apache.orc.util.BloomFilterIO;
import java.io.IOException;
import java.io.StringWriter;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -222,6 +224,17 @@ public class JsonFileDump {
writer.name("numDeletes").value(acidStats.deletes);
writer.name("numUpdates").value(acidStats.updates);
}
+ List<String> keys = reader.getMetadataKeys();
+ keys.remove(OrcAcidUtils.ACID_STATS);
+ if (!keys.isEmpty()) {
+ writer.name("userMetadata").beginObject();
+ for (String key : keys) {
+ writer.name(key);
+ ByteBuffer byteBuffer = reader.getMetadataValue(key);
+
writer.value(String.valueOf(StandardCharsets.UTF_8.decode(byteBuffer)));
+ }
+ writer.endObject();
+ }
writer.name("status").value("OK");
rows.close();
diff --git a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
index 225d7c34d..0ffbea703 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
@@ -117,6 +117,10 @@ public class TestJsonFileDump {
writer.addRowBatch(batch);
}
+ writer.addUserMetadata("hive.acid.key.index",
+ StandardCharsets.UTF_8.encode("1,1,1;2,3,5;"));
+ writer.addUserMetadata("some.user.property",
+ StandardCharsets.UTF_8.encode("foo#bar$baz&"));
writer.close();
PrintStream origOut = System.out;
String outputFilename = "orc-file-dump.json";
diff --git a/java/tools/src/test/resources/orc-file-dump.json
b/java/tools/src/test/resources/orc-file-dump.json
index ed821943e..15fdba74a 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -1380,5 +1380,9 @@
"rawDataSize": 2144730,
"paddingLength": 0,
"paddingRatio": 0.0,
+ "userMetadata": {
+ "hive.acid.key.index": "1,1,1;2,3,5;",
+ "some.user.property": "foo#bar$baz&"
+ },
"status": "OK"
}