This is an automated email from the ASF dual-hosted git repository.

william pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new c157afaac ORC-1740: Avoid the dump tool repeatedly parsing 
ColumnStatistics
c157afaac is described below

commit c157afaac0a20306594b70887ebe7f322e41a67f
Author: sychen <[email protected]>
AuthorDate: Wed Jul 10 15:51:30 2024 -0700

    ORC-1740: Avoid the dump tool repeatedly parsing ColumnStatistics
    
    ### What changes were proposed in this pull request?
    This PR aims to avoid the dump tool repeatedly parsing ColumnStatistics.
    
    ### Why are the changes needed?
    `org.apache.orc.StripeStatistics#getColumnStatistics` always generates 
statistical information for all columns. When there are many columns, the 
parsing performance decreases.
    
    
https://github.com/apache/orc/blob/c38e20d862ce19395558e092dd42033a000fe22d/java/core/src/java/org/apache/orc/StripeStatistics.java#L57-L66
    
    ### How was this patch tested?
    local test and exist UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #1972 from cxzl25/ORC-1740.
    
    Authored-by: sychen <[email protected]>
    Signed-off-by: William Hyun <[email protected]>
---
 java/tools/src/java/org/apache/orc/tools/FileDump.java     | 5 +++--
 java/tools/src/java/org/apache/orc/tools/JsonFileDump.java | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java 
b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index c23505310..55016ddcd 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -357,9 +357,10 @@ public final class FileDump {
     for (int n = 0; n < stripeStats.size(); n++) {
       System.out.println("  Stripe " + (n + 1) + ":");
       StripeStatistics ss = stripeStats.get(n);
-      for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
+      ColumnStatistics[] columnStatistics = ss.getColumnStatistics();
+      for (int i = 0; i < columnStatistics.length; ++i) {
         System.out.println("    Column " + i + ": " +
-            ss.getColumnStatistics()[i].toString());
+            columnStatistics[i].toString());
       }
     }
     ColumnStatistics[] stats = reader.getStatistics();
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java 
b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index 9737222da..7d893a54c 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -112,10 +112,11 @@ public class JsonFileDump {
           writer.name("stripeNumber").value(n + 1);
           StripeStatistics ss = stripeStatistics.get(n);
           writer.name("columnStatistics").beginArray();
-          for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+          ColumnStatistics[] columnStatistics = ss.getColumnStatistics();
+          for (int i = 0; i < columnStatistics.length; i++) {
             writer.beginObject();
             writer.name("columnId").value(i);
-            writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+            writeColumnStatistics(writer, columnStatistics[i]);
             writer.endObject();
           }
           writer.endArray();

Reply via email to