This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dd942965474d36bbd48b51c4e351ba4a311f8752
Author: Michael Smith <[email protected]>
AuthorDate: Mon Jun 17 17:14:04 2024 -0700

    IMPALA-13166: Optimize ExprSubstitutionMap allocation
    
    Allocates HashMap and Lists with known sizes to avoid resizing them
    during initial construction. HashMap uses default capacity of 16 and
    load factor of .75, and we increase initial capacity as needed.
    
    Has no measurable impact on performance, but may help with garbage
    collection.
    
    Change-Id: If5e56401508f30e3066b1a3758d69d89a288adcd
    Reviewed-on: http://gerrit.cloudera.org:8080/21533
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../impala/analysis/ExprSubstitutionMap.java       | 24 +++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git 
a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java 
b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
index 0b4e6372f..e2fbbe88d 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
@@ -64,8 +64,14 @@ public final class ExprSubstitutionMap {
   }
 
   private void buildMap() {
-    // Build lookup map and ensure keys are unique.
-    substitutions_ = new HashMap<>();
+    // Build lookup map and ensure keys are unique. Set initial size to avoid 
rehash.
+    // https://docs.oracle.com/javase/8/docs/api/java/util/HashMap.html 
describes
+    // HashMap's arguments (and defaults): initial capacity (16) and load 
factor (0.75).
+    // "If the initial capacity is greater than the maximum number of entries 
divided
+    // by the load factor, no rehash operation will ever occur." Set initial 
capacity to
+    // current size / 0.75 (i.e. (size+1) * 4/3 to round up), with floor of 16 
to allow
+    // space for later puts.
+    substitutions_ = new HashMap<>(Math.max(16, (lhs_.size() + 1) * 4 / 3));
     List<Integer> toRemove = new ArrayList<>();
     for (int i = 0; i < lhs_.size(); i++) {
       Expr existingVal = substitutions_.putIfAbsent(lhs_.get(i), rhs_.get(i));
@@ -142,6 +148,16 @@ public final class ExprSubstitutionMap {
     return result;
   }
 
+  /*
+   * Concatenates two lists without triggering intermediate resizes in the 
resulting list.
+   */
+  private static List<Expr> concat(List<Expr> lhs, List<Expr> rhs) {
+    List<Expr> result = new ArrayList<>(lhs.size() + rhs.size());
+    result.addAll(lhs);
+    result.addAll(rhs);
+    return result;
+  }
+
   /**
    * Returns the union of two substitution maps. Always returns a non-null map.
    */
@@ -150,9 +166,7 @@ public final class ExprSubstitutionMap {
     if (f == null && g == null) return new ExprSubstitutionMap();
     if (f == null) return g;
     if (g == null) return f;
-    return new ExprSubstitutionMap(
-      Lists.newArrayList(Iterables.concat(f.lhs_, g.lhs_)),
-      Lists.newArrayList(Iterables.concat(f.rhs_, g.rhs_)));
+    return new ExprSubstitutionMap(concat(f.lhs_, g.lhs_), concat(f.rhs_, 
g.rhs_));
   }
 
   /**

Reply via email to