This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9063b83254465ad4e40e5d269a917637265a8645
Author: Michael Smith <[email protected]>
AuthorDate: Mon Jun 17 16:27:25 2024 -0700

    IMPALA-13166: Speed up getFullyQualifiedRawPath
    
    Caches getFullyQualifiedRawPath for repeat calls, and uses
    Arrays.asList to avoid copying primitive arrays. Returns an
    ImmutableList as more compact than an ArrayList.
    
    Implements equals and hashCode on Path to provide a cleaner interface
    for lookups on Path.
    
    Speeds up PlannerTest#testManyExpressionPerformance from 21s to 18s in
    local testing.
    
    Change-Id: If02d3b20a980508deafd2109a10676ab01e50981
    Reviewed-on: http://gerrit.cloudera.org:8080/21530
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 .../java/org/apache/impala/analysis/Analyzer.java  | 24 +++-----
 .../apache/impala/analysis/CollectionTableRef.java |  3 +-
 .../impala/analysis/CreateFunctionStmtBase.java    |  3 +-
 .../main/java/org/apache/impala/analysis/Path.java | 45 +++++++++++---
 .../org/apache/impala/analysis/AnalyzerTest.java   | 72 +++++++++++-----------
 5 files changed, 84 insertions(+), 63 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java 
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 86ab75c1a..4655c4091 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -891,9 +891,9 @@ public class Analyzer {
   // Set of lowercase ambiguous implicit table aliases.
   private final Set<String> ambiguousAliases_ = new HashSet<>();
 
-  // Map from lowercase fully-qualified path to its slot descriptor. Only 
contains paths
-  // that have a scalar or struct type as destination (see registerSlotRef()).
-  private final Map<List<String>, SlotDescriptor> slotPathMap_ = new 
HashMap<>();
+  // Map from path to its slot descriptor. Only contains paths that have a 
scalar or
+  // struct type as destination (see registerSlotRef()).
+  private final Map<Path, SlotDescriptor> slotPathMap_ = new HashMap<>();
 
   // Indicates whether this analyzer/block is guaranteed to have an empty 
result set
   // due to a limit 0 or constant conjunct evaluating to false.
@@ -1428,10 +1428,10 @@ public class Analyzer {
   }
 
   /**
-   * Given a list of {"table alias", "column alias"}, return the 
SlotDescriptor.
+   * Given a Path, return the SlotDescriptor.
    */
-  public SlotDescriptor getSlotDescriptor(List<String> qualifiedColumnName) {
-    return slotPathMap_.get(qualifiedColumnName);
+  public SlotDescriptor getSlotDescriptor(Path slotPath) {
+    return slotPathMap_.get(slotPath);
   }
 
   /**
@@ -1793,7 +1793,7 @@ public class Analyzer {
       List<String> key = slotPath.getFullyQualifiedRawPath();
       Preconditions.checkState(key.stream().allMatch(s -> 
s.equals(s.toLowerCase())),
           "Slot paths should be lower case: " + key);
-      SlotDescriptor existingSlotDesc = slotPathMap_.get(key);
+      SlotDescriptor existingSlotDesc = slotPathMap_.get(slotPath);
       if (existingSlotDesc != null) return existingSlotDesc;
 
       SlotDescriptor existingInTuple = findPathInCurrentTuple(slotPath);
@@ -1817,13 +1817,9 @@ public class Analyzer {
     TupleDescriptor currentTupleDesc = tupleStack_.peek();
     Preconditions.checkNotNull(currentTupleDesc);
 
-    final List<String> slotPathFQR = slotPath.getFullyQualifiedRawPath();
-    Preconditions.checkNotNull(slotPathFQR);
-
     for (SlotDescriptor slotDesc : currentTupleDesc.getSlots()) {
-      final List<String> tupleSlotFQR = 
slotDesc.getPath().getFullyQualifiedRawPath();
-      if (slotPathFQR.equals(tupleSlotFQR) &&
-            !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) 
{
+      if (slotPath.equals(slotDesc.getPath()) &&
+          !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) {
         return slotDesc;
       }
     }
@@ -1868,7 +1864,7 @@ public class Analyzer {
     Preconditions.checkState(slotPath.isRootedAtTuple());
     desc.setPath(slotPath);
     if (insertIntoSlotPathMap) {
-      slotPathMap_.put(slotPath.getFullyQualifiedRawPath(), desc);
+      slotPathMap_.put(slotPath, desc);
     }
     registerColumnPrivReq(desc);
   }
diff --git 
a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java 
b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
index 1199bd086..d339f0dde 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
@@ -136,8 +136,7 @@ public class CollectionTableRef extends TableRef {
       // substituted in SelectStmt.resolveInlineViewRefs()
       // TODO: currently we cannot use the same array twice (e.g. self join) 
in this
       //       case
-      SlotDescriptor parentSlotDesc = analyzer.getSlotDescriptor(
-          resolvedPath_.getFullyQualifiedRawPath());
+      SlotDescriptor parentSlotDesc = 
analyzer.getSlotDescriptor(resolvedPath_);
       collectionExpr_ = new SlotRef(parentSlotDesc);
       collectionExpr_ =
         collectionExpr_.trySubstitute(sourceView.getBaseTblSmap(), analyzer, 
true);
diff --git 
a/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java 
b/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java
index 6f82ac58e..9657496e5 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java
@@ -17,6 +17,7 @@
 
 package org.apache.impala.analysis;
 
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 
@@ -163,7 +164,7 @@ public abstract class CreateFunctionStmtBase extends 
StatementBase {
     // Forbid unsupported and complex types.
     if (hasSignature()) {
       List<Type> refdTypes = Lists.newArrayList(fn_.getReturnType());
-      refdTypes.addAll(Lists.newArrayList(fn_.getArgs()));
+      refdTypes.addAll(Arrays.asList(fn_.getArgs()));
       for (Type t: refdTypes) {
         if (!t.isSupported() || t.isComplexType()) {
           throw new AnalysisException(
diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java 
b/fe/src/main/java/org/apache/impala/analysis/Path.java
index 6c4e6728a..2ae4a4fac 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Path.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Path.java
@@ -18,6 +18,7 @@
 package org.apache.impala.analysis;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import com.google.common.base.MoreObjects;
@@ -35,6 +36,7 @@ import org.apache.impala.util.AcidUtils;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
 import org.slf4j.Logger;
@@ -168,6 +170,10 @@ public class Path {
   // Resolved path before we resolved it again inside the table masking view.
   private Path pathBeforeMasking_ = null;
 
+  // Fully-qualified raw path. Populated on first call to 
getFullyQualifiedRawPath.
+  // Its inputs are all private final fields, so value can't change after init.
+  private List<String> fullyQualifiedRawPath_ = null;
+
   /**
    * Constructs a Path rooted at the given rootDesc.
    */
@@ -444,7 +450,8 @@ public class Path {
    *
    * @param preferAlias {@code boolean} specifying if a path that represents 
an alias
    *                    should use that alias in the return or if the alias 
should be
-   *                    resolved to its actual db, table, and column.
+   *                    resolved to its actual db, table, and column. 
Canonical identity
+   *                    paths use {@code true} here for e.g. equals() and 
hashCode().
    *
    * @return {@link List} of {@link String}s with each element of the list 
containing an
    *         individual piece of the path. Element {@code 0} contains the 
database,
@@ -453,22 +460,40 @@ public class Path {
    */
   public List<String> getFullyQualifiedRawPath(boolean preferAlias) {
     Preconditions.checkState(rootTable_ != null || rootDesc_ != null);
-    List<String> result = Lists.newArrayListWithCapacity(rawPath_.size() + 2);
+    ImmutableList.Builder<String> builder = ImmutableList.builder();
     if (rootDesc_ != null && (preferAlias || rootTable_ == null)) {
-      result.addAll(Lists.newArrayList(rootDesc_.getAlias().split("\\.")));
+      builder.addAll(Arrays.asList(rootDesc_.getAlias().split("\\.")));
     } else {
-      result.add(rootTable_.getDb().getName());
-      result.add(rootTable_.getName());
+      builder.add(rootTable_.getDb().getName());
+      builder.add(rootTable_.getName());
       if (rootTable_ instanceof IcebergMetadataTable) {
-        result.add(((IcebergMetadataTable)rootTable_).getMetadataTableName());
+        builder.add(((IcebergMetadataTable)rootTable_).getMetadataTableName());
       }
     }
-    result.addAll(rawPath_);
-    return result;
+    builder.addAll(rawPath_);
+    return builder.build();
   }
 
+  /**
+   * Returns the fully-qualified raw path, preferring aliases for the root if 
available.
+   * Caches the result for repeated calls, as all inputs to the path are 
marked final.
+   */
   public List<String> getFullyQualifiedRawPath() {
-    return getFullyQualifiedRawPath(true);
+    if (fullyQualifiedRawPath_ == null) {
+      fullyQualifiedRawPath_ = getFullyQualifiedRawPath(true);
+    }
+    return fullyQualifiedRawPath_;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return obj instanceof Path && getFullyQualifiedRawPath().equals(
+        ((Path)obj).getFullyQualifiedRawPath());
+  }
+
+  @Override
+  public int hashCode() {
+    return getFullyQualifiedRawPath().hashCode();
   }
 
   /**
@@ -618,7 +643,7 @@ public class Path {
 
   public static Path createRelPath(Path rootPath, String... fieldNames) {
     Preconditions.checkState(rootPath.isResolved());
-    Path result = new Path(rootPath, Lists.newArrayList(fieldNames));
+    Path result = new Path(rootPath, Arrays.asList(fieldNames));
     return result;
   }
 }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
index 845784092..992fdb813 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
@@ -143,24 +143,24 @@ public class AnalyzerTest extends FrontendTestBase {
     descTbl.computeMemLayout();
 
     assertEquals(89.0f, tupleDesc.getAvgSerializedSize(), 0.0);
-    checkLayoutParams("functional.alltypes.timestamp_col", 16, 0, 80, 0, 
analyzer);
-    checkLayoutParams("functional.alltypes.date_string_col", 12, 16, 80, 1, 
analyzer);
-    checkLayoutParams("functional.alltypes.string_col", 12, 28, 80, 2, 
analyzer);
-    checkLayoutParams("functional.alltypes.bigint_col", 8, 40, 80, 3, 
analyzer);
-    checkLayoutParams("functional.alltypes.double_col", 8, 48, 80, 4, 
analyzer);
-    checkLayoutParams("functional.alltypes.id", 4, 56, 80, 5, analyzer);
-    checkLayoutParams("functional.alltypes.int_col", 4, 60, 80, 6, analyzer);
-    checkLayoutParams("functional.alltypes.float_col", 4, 64, 80, 7, analyzer);
-    checkLayoutParams("functional.alltypes.year", 4, 68, 81, 0, analyzer);
-    checkLayoutParams("functional.alltypes.month", 4, 72, 81, 1, analyzer);
-    checkLayoutParams("functional.alltypes.smallint_col", 2, 76, 81, 2, 
analyzer);
-    checkLayoutParams("functional.alltypes.bool_col", 1, 78, 81, 3, analyzer);
-    checkLayoutParams("functional.alltypes.tinyint_col", 1, 79, 81, 4, 
analyzer);
+    checkLayoutParams(tupleDesc, "timestamp_col", 16, 0, 80, 0, analyzer);
+    checkLayoutParams(tupleDesc, "date_string_col", 12, 16, 80, 1, analyzer);
+    checkLayoutParams(tupleDesc, "string_col", 12, 28, 80, 2, analyzer);
+    checkLayoutParams(tupleDesc, "bigint_col", 8, 40, 80, 3, analyzer);
+    checkLayoutParams(tupleDesc, "double_col", 8, 48, 80, 4, analyzer);
+    checkLayoutParams(tupleDesc, "id", 4, 56, 80, 5, analyzer);
+    checkLayoutParams(tupleDesc, "int_col", 4, 60, 80, 6, analyzer);
+    checkLayoutParams(tupleDesc, "float_col", 4, 64, 80, 7, analyzer);
+    checkLayoutParams(tupleDesc, "year", 4, 68, 81, 0, analyzer);
+    checkLayoutParams(tupleDesc, "month", 4, 72, 81, 1, analyzer);
+    checkLayoutParams(tupleDesc, "smallint_col", 2, 76, 81, 2, analyzer);
+    checkLayoutParams(tupleDesc, "bool_col", 1, 78, 81, 3, analyzer);
+    checkLayoutParams(tupleDesc, "tinyint_col", 1, 79, 81, 4, analyzer);
 
     Assert.assertEquals(12, dateTblTupleDesc.getAvgSerializedSize(), 0.0);
-    checkLayoutParams("functional.date_tbl.id_col", 4, 0, 12, 0, analyzer);
-    checkLayoutParams("functional.date_tbl.date_col", 4, 4, 12, 1, analyzer);
-    checkLayoutParams("functional.date_tbl.date_part", 4, 8, 12, 2, analyzer);
+    checkLayoutParams(dateTblTupleDesc, "id_col", 4, 0, 12, 0, analyzer);
+    checkLayoutParams(dateTblTupleDesc, "date_col", 4, 4, 12, 1, analyzer);
+    checkLayoutParams(dateTblTupleDesc, "date_part", 4, 8, 12, 2, analyzer);
   }
 
   private void testNonNullable() throws AnalysisException {
@@ -225,27 +225,27 @@ public class AnalyzerTest extends FrontendTestBase {
 
     assertEquals(64.0f, tupleDesc.getAvgSerializedSize(), 0.0);
     // Check non-materialized slots.
-    checkLayoutParams("functional.alltypes.id", 0, -1, 0, 0, analyzer);
-    checkLayoutParams("functional.alltypes.double_col", 0, -1, 0, 0, analyzer);
-    checkLayoutParams("functional.alltypes.string_col", 0, -1, 0, 0, analyzer);
+    checkLayoutParams(tupleDesc, "id", 0, -1, 0, 0, analyzer);
+    checkLayoutParams(tupleDesc, "double_col", 0, -1, 0, 0, analyzer);
+    checkLayoutParams(tupleDesc, "string_col", 0, -1, 0, 0, analyzer);
     // Check materialized slots.
-    checkLayoutParams("functional.alltypes.timestamp_col", 16, 0, 56, 0, 
analyzer);
-    checkLayoutParams("functional.alltypes.date_string_col", 12, 16, 56, 1, 
analyzer);
-    checkLayoutParams("functional.alltypes.bigint_col", 8, 28, 56, 2, 
analyzer);
-    checkLayoutParams("functional.alltypes.int_col", 4, 36, 56, 3, analyzer);
-    checkLayoutParams("functional.alltypes.float_col", 4, 40, 56, 4, analyzer);
-    checkLayoutParams("functional.alltypes.year", 4, 44, 56, 5, analyzer);
-    checkLayoutParams("functional.alltypes.month", 4, 48, 56, 6, analyzer);
-    checkLayoutParams("functional.alltypes.smallint_col", 2, 52, 56, 7, 
analyzer);
-    checkLayoutParams("functional.alltypes.bool_col", 1, 54, 57, 0, analyzer);
-    checkLayoutParams("functional.alltypes.tinyint_col", 1, 55, 57, 1, 
analyzer);
+    checkLayoutParams(tupleDesc, "timestamp_col", 16, 0, 56, 0, analyzer);
+    checkLayoutParams(tupleDesc, "date_string_col", 12, 16, 56, 1, analyzer);
+    checkLayoutParams(tupleDesc, "bigint_col", 8, 28, 56, 2, analyzer);
+    checkLayoutParams(tupleDesc, "int_col", 4, 36, 56, 3, analyzer);
+    checkLayoutParams(tupleDesc, "float_col", 4, 40, 56, 4, analyzer);
+    checkLayoutParams(tupleDesc, "year", 4, 44, 56, 5, analyzer);
+    checkLayoutParams(tupleDesc, "month", 4, 48, 56, 6, analyzer);
+    checkLayoutParams(tupleDesc, "smallint_col", 2, 52, 56, 7, analyzer);
+    checkLayoutParams(tupleDesc, "bool_col", 1, 54, 57, 0, analyzer);
+    checkLayoutParams(tupleDesc, "tinyint_col", 1, 55, 57, 1, analyzer);
 
     Assert.assertEquals(4, dateTblTupleDesc.getAvgSerializedSize(), 0.0);
     // Non-materialized slots.
-    checkLayoutParams("functional.date_tbl.id_col", 0, -1, 0, 0, analyzer);
-    checkLayoutParams("functional.date_tbl.date_col", 0, -1, 0, 0, analyzer);
+    checkLayoutParams(dateTblTupleDesc, "id_col", 0, -1, 0, 0, analyzer);
+    checkLayoutParams(dateTblTupleDesc, "date_col", 0, -1, 0, 0, analyzer);
     // Materialized slot.
-    checkLayoutParams("functional.date_tbl.date_part", 4, 0, 4, 0, analyzer);
+    checkLayoutParams(dateTblTupleDesc, "date_part", 4, 0, 4, 0, analyzer);
   }
 
   private void checkLayoutParams(SlotDescriptor d, int byteSize, int 
byteOffset,
@@ -256,10 +256,10 @@ public class AnalyzerTest extends FrontendTestBase {
     assertEquals(nullIndicatorBit, d.getNullIndicatorBit());
   }
 
-  private void checkLayoutParams(String colAlias, int byteSize, int byteOffset,
-      int nullIndicatorByte, int nullIndicatorBit, Analyzer analyzer) {
-    List<String> colAliasRawPath = Arrays.asList(colAlias.split("\\."));
-    SlotDescriptor d = analyzer.getSlotDescriptor(colAliasRawPath);
+  private void checkLayoutParams(TupleDescriptor desc, String colAlias, int 
byteSize,
+      int byteOffset, int nullIndicatorByte, int nullIndicatorBit, Analyzer 
analyzer) {
+    Path colAliasPath = new Path(desc, Arrays.asList(colAlias.split("\\.")));
+    SlotDescriptor d = analyzer.getSlotDescriptor(colAliasPath);
     checkLayoutParams(d, byteSize, byteOffset, nullIndicatorByte, 
nullIndicatorBit);
   }
 

Reply via email to