This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 9063b83254465ad4e40e5d269a917637265a8645 Author: Michael Smith <[email protected]> AuthorDate: Mon Jun 17 16:27:25 2024 -0700 IMPALA-13166: Speed up getFullyQualifiedRawPath Caches getFullyQualifiedRawPath for repeat calls, and uses Arrays.asList to avoid copying primitive arrays. Returns an ImmutableList as more compact than an ArrayList. Implements equals and hashCode on Path to provide a cleaner interface for lookups on Path. Speeds up PlannerTest#testManyExpressionPerformance from 21s to 18s in local testing. Change-Id: If02d3b20a980508deafd2109a10676ab01e50981 Reviewed-on: http://gerrit.cloudera.org:8080/21530 Reviewed-by: Michael Smith <[email protected]> Tested-by: Michael Smith <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 24 +++----- .../apache/impala/analysis/CollectionTableRef.java | 3 +- .../impala/analysis/CreateFunctionStmtBase.java | 3 +- .../main/java/org/apache/impala/analysis/Path.java | 45 +++++++++++--- .../org/apache/impala/analysis/AnalyzerTest.java | 72 +++++++++++----------- 5 files changed, 84 insertions(+), 63 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 86ab75c1a..4655c4091 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -891,9 +891,9 @@ public class Analyzer { // Set of lowercase ambiguous implicit table aliases. private final Set<String> ambiguousAliases_ = new HashSet<>(); - // Map from lowercase fully-qualified path to its slot descriptor. Only contains paths - // that have a scalar or struct type as destination (see registerSlotRef()). - private final Map<List<String>, SlotDescriptor> slotPathMap_ = new HashMap<>(); + // Map from path to its slot descriptor. Only contains paths that have a scalar or + // struct type as destination (see registerSlotRef()). + private final Map<Path, SlotDescriptor> slotPathMap_ = new HashMap<>(); // Indicates whether this analyzer/block is guaranteed to have an empty result set // due to a limit 0 or constant conjunct evaluating to false. @@ -1428,10 +1428,10 @@ public class Analyzer { } /** - * Given a list of {"table alias", "column alias"}, return the SlotDescriptor. + * Given a Path, return the SlotDescriptor. */ - public SlotDescriptor getSlotDescriptor(List<String> qualifiedColumnName) { - return slotPathMap_.get(qualifiedColumnName); + public SlotDescriptor getSlotDescriptor(Path slotPath) { + return slotPathMap_.get(slotPath); } /** @@ -1793,7 +1793,7 @@ public class Analyzer { List<String> key = slotPath.getFullyQualifiedRawPath(); Preconditions.checkState(key.stream().allMatch(s -> s.equals(s.toLowerCase())), "Slot paths should be lower case: " + key); - SlotDescriptor existingSlotDesc = slotPathMap_.get(key); + SlotDescriptor existingSlotDesc = slotPathMap_.get(slotPath); if (existingSlotDesc != null) return existingSlotDesc; SlotDescriptor existingInTuple = findPathInCurrentTuple(slotPath); @@ -1817,13 +1817,9 @@ public class Analyzer { TupleDescriptor currentTupleDesc = tupleStack_.peek(); Preconditions.checkNotNull(currentTupleDesc); - final List<String> slotPathFQR = slotPath.getFullyQualifiedRawPath(); - Preconditions.checkNotNull(slotPathFQR); - for (SlotDescriptor slotDesc : currentTupleDesc.getSlots()) { - final List<String> tupleSlotFQR = slotDesc.getPath().getFullyQualifiedRawPath(); - if (slotPathFQR.equals(tupleSlotFQR) && - !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) { + if (slotPath.equals(slotDesc.getPath()) && + !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) { return slotDesc; } } @@ -1868,7 +1864,7 @@ public class Analyzer { Preconditions.checkState(slotPath.isRootedAtTuple()); desc.setPath(slotPath); if (insertIntoSlotPathMap) { - slotPathMap_.put(slotPath.getFullyQualifiedRawPath(), desc); + slotPathMap_.put(slotPath, desc); } registerColumnPrivReq(desc); } diff --git a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java index 1199bd086..d339f0dde 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java +++ b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java @@ -136,8 +136,7 @@ public class CollectionTableRef extends TableRef { // substituted in SelectStmt.resolveInlineViewRefs() // TODO: currently we cannot use the same array twice (e.g. self join) in this // case - SlotDescriptor parentSlotDesc = analyzer.getSlotDescriptor( - resolvedPath_.getFullyQualifiedRawPath()); + SlotDescriptor parentSlotDesc = analyzer.getSlotDescriptor(resolvedPath_); collectionExpr_ = new SlotRef(parentSlotDesc); collectionExpr_ = collectionExpr_.trySubstitute(sourceView.getBaseTblSmap(), analyzer, true); diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java b/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java index 6f82ac58e..9657496e5 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateFunctionStmtBase.java @@ -17,6 +17,7 @@ package org.apache.impala.analysis; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -163,7 +164,7 @@ public abstract class CreateFunctionStmtBase extends StatementBase { // Forbid unsupported and complex types. if (hasSignature()) { List<Type> refdTypes = Lists.newArrayList(fn_.getReturnType()); - refdTypes.addAll(Lists.newArrayList(fn_.getArgs())); + refdTypes.addAll(Arrays.asList(fn_.getArgs())); for (Type t: refdTypes) { if (!t.isSupported() || t.isComplexType()) { throw new AnalysisException( diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java b/fe/src/main/java/org/apache/impala/analysis/Path.java index 6c4e6728a..2ae4a4fac 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Path.java +++ b/fe/src/main/java/org/apache/impala/analysis/Path.java @@ -18,6 +18,7 @@ package org.apache.impala.analysis; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import com.google.common.base.MoreObjects; @@ -35,6 +36,7 @@ import org.apache.impala.util.AcidUtils; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.slf4j.Logger; @@ -168,6 +170,10 @@ public class Path { // Resolved path before we resolved it again inside the table masking view. private Path pathBeforeMasking_ = null; + // Fully-qualified raw path. Populated on first call to getFullyQualifiedRawPath. + // Its inputs are all private final fields, so value can't change after init. + private List<String> fullyQualifiedRawPath_ = null; + /** * Constructs a Path rooted at the given rootDesc. */ @@ -444,7 +450,8 @@ public class Path { * * @param preferAlias {@code boolean} specifying if a path that represents an alias * should use that alias in the return or if the alias should be - * resolved to its actual db, table, and column. + * resolved to its actual db, table, and column. Canonical identity + * paths use {@code true} here for e.g. equals() and hashCode(). * * @return {@link List} of {@link String}s with each element of the list containing an * individual piece of the path. Element {@code 0} contains the database, @@ -453,22 +460,40 @@ public class Path { */ public List<String> getFullyQualifiedRawPath(boolean preferAlias) { Preconditions.checkState(rootTable_ != null || rootDesc_ != null); - List<String> result = Lists.newArrayListWithCapacity(rawPath_.size() + 2); + ImmutableList.Builder<String> builder = ImmutableList.builder(); if (rootDesc_ != null && (preferAlias || rootTable_ == null)) { - result.addAll(Lists.newArrayList(rootDesc_.getAlias().split("\\."))); + builder.addAll(Arrays.asList(rootDesc_.getAlias().split("\\."))); } else { - result.add(rootTable_.getDb().getName()); - result.add(rootTable_.getName()); + builder.add(rootTable_.getDb().getName()); + builder.add(rootTable_.getName()); if (rootTable_ instanceof IcebergMetadataTable) { - result.add(((IcebergMetadataTable)rootTable_).getMetadataTableName()); + builder.add(((IcebergMetadataTable)rootTable_).getMetadataTableName()); } } - result.addAll(rawPath_); - return result; + builder.addAll(rawPath_); + return builder.build(); } + /** + * Returns the fully-qualified raw path, preferring aliases for the root if available. + * Caches the result for repeated calls, as all inputs to the path are marked final. + */ public List<String> getFullyQualifiedRawPath() { - return getFullyQualifiedRawPath(true); + if (fullyQualifiedRawPath_ == null) { + fullyQualifiedRawPath_ = getFullyQualifiedRawPath(true); + } + return fullyQualifiedRawPath_; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof Path && getFullyQualifiedRawPath().equals( + ((Path)obj).getFullyQualifiedRawPath()); + } + + @Override + public int hashCode() { + return getFullyQualifiedRawPath().hashCode(); } /** @@ -618,7 +643,7 @@ public class Path { public static Path createRelPath(Path rootPath, String... fieldNames) { Preconditions.checkState(rootPath.isResolved()); - Path result = new Path(rootPath, Lists.newArrayList(fieldNames)); + Path result = new Path(rootPath, Arrays.asList(fieldNames)); return result; } } diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java index 845784092..992fdb813 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java @@ -143,24 +143,24 @@ public class AnalyzerTest extends FrontendTestBase { descTbl.computeMemLayout(); assertEquals(89.0f, tupleDesc.getAvgSerializedSize(), 0.0); - checkLayoutParams("functional.alltypes.timestamp_col", 16, 0, 80, 0, analyzer); - checkLayoutParams("functional.alltypes.date_string_col", 12, 16, 80, 1, analyzer); - checkLayoutParams("functional.alltypes.string_col", 12, 28, 80, 2, analyzer); - checkLayoutParams("functional.alltypes.bigint_col", 8, 40, 80, 3, analyzer); - checkLayoutParams("functional.alltypes.double_col", 8, 48, 80, 4, analyzer); - checkLayoutParams("functional.alltypes.id", 4, 56, 80, 5, analyzer); - checkLayoutParams("functional.alltypes.int_col", 4, 60, 80, 6, analyzer); - checkLayoutParams("functional.alltypes.float_col", 4, 64, 80, 7, analyzer); - checkLayoutParams("functional.alltypes.year", 4, 68, 81, 0, analyzer); - checkLayoutParams("functional.alltypes.month", 4, 72, 81, 1, analyzer); - checkLayoutParams("functional.alltypes.smallint_col", 2, 76, 81, 2, analyzer); - checkLayoutParams("functional.alltypes.bool_col", 1, 78, 81, 3, analyzer); - checkLayoutParams("functional.alltypes.tinyint_col", 1, 79, 81, 4, analyzer); + checkLayoutParams(tupleDesc, "timestamp_col", 16, 0, 80, 0, analyzer); + checkLayoutParams(tupleDesc, "date_string_col", 12, 16, 80, 1, analyzer); + checkLayoutParams(tupleDesc, "string_col", 12, 28, 80, 2, analyzer); + checkLayoutParams(tupleDesc, "bigint_col", 8, 40, 80, 3, analyzer); + checkLayoutParams(tupleDesc, "double_col", 8, 48, 80, 4, analyzer); + checkLayoutParams(tupleDesc, "id", 4, 56, 80, 5, analyzer); + checkLayoutParams(tupleDesc, "int_col", 4, 60, 80, 6, analyzer); + checkLayoutParams(tupleDesc, "float_col", 4, 64, 80, 7, analyzer); + checkLayoutParams(tupleDesc, "year", 4, 68, 81, 0, analyzer); + checkLayoutParams(tupleDesc, "month", 4, 72, 81, 1, analyzer); + checkLayoutParams(tupleDesc, "smallint_col", 2, 76, 81, 2, analyzer); + checkLayoutParams(tupleDesc, "bool_col", 1, 78, 81, 3, analyzer); + checkLayoutParams(tupleDesc, "tinyint_col", 1, 79, 81, 4, analyzer); Assert.assertEquals(12, dateTblTupleDesc.getAvgSerializedSize(), 0.0); - checkLayoutParams("functional.date_tbl.id_col", 4, 0, 12, 0, analyzer); - checkLayoutParams("functional.date_tbl.date_col", 4, 4, 12, 1, analyzer); - checkLayoutParams("functional.date_tbl.date_part", 4, 8, 12, 2, analyzer); + checkLayoutParams(dateTblTupleDesc, "id_col", 4, 0, 12, 0, analyzer); + checkLayoutParams(dateTblTupleDesc, "date_col", 4, 4, 12, 1, analyzer); + checkLayoutParams(dateTblTupleDesc, "date_part", 4, 8, 12, 2, analyzer); } private void testNonNullable() throws AnalysisException { @@ -225,27 +225,27 @@ public class AnalyzerTest extends FrontendTestBase { assertEquals(64.0f, tupleDesc.getAvgSerializedSize(), 0.0); // Check non-materialized slots. - checkLayoutParams("functional.alltypes.id", 0, -1, 0, 0, analyzer); - checkLayoutParams("functional.alltypes.double_col", 0, -1, 0, 0, analyzer); - checkLayoutParams("functional.alltypes.string_col", 0, -1, 0, 0, analyzer); + checkLayoutParams(tupleDesc, "id", 0, -1, 0, 0, analyzer); + checkLayoutParams(tupleDesc, "double_col", 0, -1, 0, 0, analyzer); + checkLayoutParams(tupleDesc, "string_col", 0, -1, 0, 0, analyzer); // Check materialized slots. - checkLayoutParams("functional.alltypes.timestamp_col", 16, 0, 56, 0, analyzer); - checkLayoutParams("functional.alltypes.date_string_col", 12, 16, 56, 1, analyzer); - checkLayoutParams("functional.alltypes.bigint_col", 8, 28, 56, 2, analyzer); - checkLayoutParams("functional.alltypes.int_col", 4, 36, 56, 3, analyzer); - checkLayoutParams("functional.alltypes.float_col", 4, 40, 56, 4, analyzer); - checkLayoutParams("functional.alltypes.year", 4, 44, 56, 5, analyzer); - checkLayoutParams("functional.alltypes.month", 4, 48, 56, 6, analyzer); - checkLayoutParams("functional.alltypes.smallint_col", 2, 52, 56, 7, analyzer); - checkLayoutParams("functional.alltypes.bool_col", 1, 54, 57, 0, analyzer); - checkLayoutParams("functional.alltypes.tinyint_col", 1, 55, 57, 1, analyzer); + checkLayoutParams(tupleDesc, "timestamp_col", 16, 0, 56, 0, analyzer); + checkLayoutParams(tupleDesc, "date_string_col", 12, 16, 56, 1, analyzer); + checkLayoutParams(tupleDesc, "bigint_col", 8, 28, 56, 2, analyzer); + checkLayoutParams(tupleDesc, "int_col", 4, 36, 56, 3, analyzer); + checkLayoutParams(tupleDesc, "float_col", 4, 40, 56, 4, analyzer); + checkLayoutParams(tupleDesc, "year", 4, 44, 56, 5, analyzer); + checkLayoutParams(tupleDesc, "month", 4, 48, 56, 6, analyzer); + checkLayoutParams(tupleDesc, "smallint_col", 2, 52, 56, 7, analyzer); + checkLayoutParams(tupleDesc, "bool_col", 1, 54, 57, 0, analyzer); + checkLayoutParams(tupleDesc, "tinyint_col", 1, 55, 57, 1, analyzer); Assert.assertEquals(4, dateTblTupleDesc.getAvgSerializedSize(), 0.0); // Non-materialized slots. - checkLayoutParams("functional.date_tbl.id_col", 0, -1, 0, 0, analyzer); - checkLayoutParams("functional.date_tbl.date_col", 0, -1, 0, 0, analyzer); + checkLayoutParams(dateTblTupleDesc, "id_col", 0, -1, 0, 0, analyzer); + checkLayoutParams(dateTblTupleDesc, "date_col", 0, -1, 0, 0, analyzer); // Materialized slot. - checkLayoutParams("functional.date_tbl.date_part", 4, 0, 4, 0, analyzer); + checkLayoutParams(dateTblTupleDesc, "date_part", 4, 0, 4, 0, analyzer); } private void checkLayoutParams(SlotDescriptor d, int byteSize, int byteOffset, @@ -256,10 +256,10 @@ public class AnalyzerTest extends FrontendTestBase { assertEquals(nullIndicatorBit, d.getNullIndicatorBit()); } - private void checkLayoutParams(String colAlias, int byteSize, int byteOffset, - int nullIndicatorByte, int nullIndicatorBit, Analyzer analyzer) { - List<String> colAliasRawPath = Arrays.asList(colAlias.split("\\.")); - SlotDescriptor d = analyzer.getSlotDescriptor(colAliasRawPath); + private void checkLayoutParams(TupleDescriptor desc, String colAlias, int byteSize, + int byteOffset, int nullIndicatorByte, int nullIndicatorBit, Analyzer analyzer) { + Path colAliasPath = new Path(desc, Arrays.asList(colAlias.split("\\."))); + SlotDescriptor d = analyzer.getSlotDescriptor(colAliasPath); checkLayoutParams(d, byteSize, byteOffset, nullIndicatorByte, nullIndicatorBit); }
