This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new bc195d61f ORC-1724: JsonFileDump utility should print user metadata
bc195d61f is described below

commit bc195d61fa8b350e18d0fd12d2344b009bfca311
Author: sychen <[email protected]>
AuthorDate: Tue Jul 9 12:15:51 2024 -0700

    ORC-1724: JsonFileDump utility should print user metadata
    
    ### What changes were proposed in this pull request?
    This PR aims to implement JsonFileDump to output user metadata.
    
    ### Why are the changes needed?
    ORC-223 implements the output of user metadata in non-json format, but the 
json format does not output user metadata.
    
    ### How was this patch tested?
    add UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #1946 from cxzl25/ORC-1724.
    
    Authored-by: sychen <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit e9706df28a37762ee90183961b105b21c60d951b)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 java/tools/src/java/org/apache/orc/tools/JsonFileDump.java  | 13 +++++++++++++
 .../src/test/org/apache/orc/tools/TestJsonFileDump.java     |  4 ++++
 java/tools/src/test/resources/orc-file-dump.json            |  4 ++++
 3 files changed, 21 insertions(+)

diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java 
b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index d6166ea91..9737222da 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -49,6 +49,8 @@ import org.apache.orc.util.BloomFilterIO;
 
 import java.io.IOException;
 import java.io.StringWriter;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -222,6 +224,17 @@ public class JsonFileDump {
           writer.name("numDeletes").value(acidStats.deletes);
           writer.name("numUpdates").value(acidStats.updates);
         }
+        List<String> keys = reader.getMetadataKeys();
+        keys.remove(OrcAcidUtils.ACID_STATS);
+        if (!keys.isEmpty()) {
+          writer.name("userMetadata").beginObject();
+          for (String key : keys) {
+            writer.name(key);
+            ByteBuffer byteBuffer = reader.getMetadataValue(key);
+            
writer.value(String.valueOf(StandardCharsets.UTF_8.decode(byteBuffer)));
+          }
+          writer.endObject();
+        }
         writer.name("status").value("OK");
         rows.close();
 
diff --git a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java 
b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
index 225d7c34d..0ffbea703 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
@@ -117,6 +117,10 @@ public class TestJsonFileDump {
       writer.addRowBatch(batch);
     }
 
+    writer.addUserMetadata("hive.acid.key.index",
+        StandardCharsets.UTF_8.encode("1,1,1;2,3,5;"));
+    writer.addUserMetadata("some.user.property",
+        StandardCharsets.UTF_8.encode("foo#bar$baz&"));
     writer.close();
     PrintStream origOut = System.out;
     String outputFilename = "orc-file-dump.json";
diff --git a/java/tools/src/test/resources/orc-file-dump.json 
b/java/tools/src/test/resources/orc-file-dump.json
index ed821943e..15fdba74a 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -1380,5 +1380,9 @@
   "rawDataSize": 2144730,
   "paddingLength": 0,
   "paddingRatio": 0.0,
+  "userMetadata": {
+    "hive.acid.key.index": "1,1,1;2,3,5;",
+    "some.user.property": "foo#bar$baz&"
+  },
   "status": "OK"
 }

Reply via email to