This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit bbfa4becf18ce9c60dc6c57ae3f97c5c4b6ccd99 Merge: f9c6fe50c6 e15750431c Author: Michael Blow <michael.b...@couchbase.com> AuthorDate: Fri Sep 19 19:46:33 2025 -0400 Merge branch 'gerrit/phoenix' into 'master' * [ASTERIXDB-3641] Sampling query taking long time to run * [ASTERIXDB-3636][STO] Fix buffer reservations understimation * [NO ISSUE][*HYR][MISC] += Lazy helper for deferred initialization * [NO ISSUE][RT] Notify joblet cleanup if joblet creation fails * [NO ISSUE][*DB][EXT] Avoid serialization of JsonFactory * [ASTERIXDB-3641] Sampling query taking long time to run * [NO ISSUE][*DB][RT] Whisper logging for active partition,txn block msgs * [NO ISSUE][*DB][NET] Upgrade Netty to 4.1.125.Final * [ASTERIXDB-3644] Callback for result consumption * [NO ISSUE] Split countn_01 into multiple queries * [NO ISSUE][HYR][STO] FileMapManager performance improvements * [NO ISSUE][HYR][STO] Add missing serialVersionUID * [NO ISSUE] Fix result for pushdown test * [ASTERIXDB-3646][COMP] Missing fields when selecting all fields with window function Change-Id: I24dcea00986058def1eaac1cf317147c5b3e1196 .../active/message/ActivePartitionMessage.java | 5 + .../apache/asterix/optimizer/rules/cbo/Stats.java | 52 ++++++++-- .../optimizer/rules/pushdown/PushdownContext.java | 2 +- .../api/http/server/QueryResultApiServlet.java | 12 ++- .../apache/asterix/app/nc/NCAppRuntimeContext.java | 11 +-- .../apache/asterix/test/common/TestExecutor.java | 7 ++ .../aggregate/countn_01/countn_01.3.query.sqlpp | 18 +--- .../aggregate/countn_01/countn_01.5.query.sqlpp | 2 +- ...tn_01.5.query.sqlpp => countn_01.6.query.sqlpp} | 14 +-- ...tn_01.5.query.sqlpp => countn_01.7.query.sqlpp} | 0 .../async-deferred/AsyncDeferredQueries.xml | 2 +- .../window_subquery/window_query.001.ddl.sqlpp} | 25 ++--- .../window_subquery/window_query.002.update.sqlpp} | 35 ++++--- .../window_subquery/window_query.003.query.sqlpp} | 20 ++-- .../window_subquery/window_query.004.query.sqlpp} | 20 ++-- .../results/aggregate/countn_01/countn_01.1.adm | 5 +- .../results/aggregate/countn_01/countn_01.3.adm | 2 +- .../results/aggregate/countn_01/countn_01.4.adm | 1 + .../countn_01/{countn_01.3.adm => countn_01.5.adm} | 0 .../api/cluster_state_1/cluster_state_1.1.regexadm | 4 - .../cluster_state_1_full.1.regexadm | 4 - .../cluster_state_1_less.1.regexadm | 4 - .../same-datasource-diff-scan.004.plan | 64 ++++++------ .../same-datasource-diff-scan.005.plan | 96 +++++++++--------- .../same-datasource-diff-scan.006.plan | 98 +++++++++---------- .../same-datasource-diff-scan.007.plan | 66 ++++++------- .../pushdown/window_subquery/window_query.002.adm | 1 + .../pushdown/window_subquery/window_query.003.adm | 1 + .../same-datasource-diff-scan.004.plan | 72 +++++++------- .../same-datasource-diff-scan.005.plan | 106 ++++++++++---------- .../same-datasource-diff-scan.006.plan | 108 ++++++++++----------- .../same-datasource-diff-scan.007.plan | 64 ++++++------ .../same-datasource-diff-scan.004.plan | 72 +++++++------- .../same-datasource-diff-scan.005.plan | 106 ++++++++++---------- .../same-datasource-diff-scan.006.plan | 108 ++++++++++----------- .../same-datasource-diff-scan.007.plan | 64 ++++++------ .../src/test/resources/runtimets/sqlpp_queries.xml | 5 + ...tractParquetDeltaBinaryPackingValuesWriter.java | 2 + .../bytes/encoder/ParquetDeltaByteArrayWriter.java | 4 + .../ParquetDeltaLengthByteArrayValuesWriter.java | 3 + .../ParquetPlainFixedLengthValuesWriter.java | 2 + .../ParquetPlainVariableLengthValuesWriter.java | 2 + .../operation/lsm/flush/FlushColumnMetadata.java | 25 +++-- .../lsm/flush/FlushColumnTupleWriter.java | 12 +++ .../lsm/flush/NoWriteFlushColumnMetadata.java | 45 +++++++-- .../lsm/merge/MergeColumnTupleWriter.java | 9 ++ .../lsm/merge/MergeColumnWriteMetadata.java | 13 ++- .../asterix/column/values/IColumnValuesWriter.java | 5 + .../values/writer/AbstractColumnValuesWriter.java | 2 + .../values/writer/BooleanColumnValuesWriter.java | 9 ++ .../values/writer/DoubleColumnValuesWriter.java | 9 ++ .../values/writer/FloatColumnValuesWriter.java | 9 ++ .../values/writer/LongColumnValuesWriter.java | 15 ++- .../values/writer/NoOpColumnValuesWriter.java | 9 ++ .../writer/NullMissingColumnValuesWriter.java | 9 ++ .../values/writer/StringColumnValuesWriter.java | 12 +++ .../values/writer/UUIDColumnValuesWriter.java | 11 ++- .../values/writer/DummyColumnValuesWriter.java | 5 + .../asterix/common/config/StorageProperties.java | 33 ++++--- .../parser/factory/JSONDataParserFactory.java | 6 +- .../asterix/runtime/message/TxnIdBlockRequest.java | 5 + asterixdb/pom.xml | 2 +- ...4j-persistence-extension--1.3.0_MIT_License.txt | 3 +- .../algebricks/algebricks-core/pom.xml | 2 +- .../hyracks/api/result/IResultDirectory.java | 2 + .../api/result/IResultPartitionManager.java | 4 +- .../hyracks/api/result/ResultDirectoryRecord.java | 4 + .../apache/hyracks/api/result/ResultJobRecord.java | 23 ++++- .../java/org/apache/hyracks/api/util/Lazy.java | 40 ++++---- .../hyracks/client/result/ResultDirectory.java | 5 + .../client/result/ResultDirectoryRemoteProxy.java | 5 + hyracks-fullstack/hyracks/hyracks-cloud/pom.xml | 2 +- .../hyracks/control/cc/ClusterControllerIPCI.java | 7 ++ .../control/cc/result/IResultDirectoryService.java | 2 + .../control/cc/result/ResultDirectoryService.java | 12 +++ .../cc/work/ReportResultPartitionConsumedWork.java | 57 +++++++++++ .../control/common/base/IClusterController.java | 2 + .../hyracks/control/common/ipc/CCNCFunctions.java | 34 +++++++ .../common/ipc/ClusterControllerRemoteProxy.java | 7 ++ .../control/nc/result/ResultPartitionManager.java | 15 ++- .../control/nc/result/ResultPartitionReader.java | 4 +- .../nc/work/FailedJobletCreationCleanupWork.java | 61 ++++++++++++ .../hyracks/control/nc/work/StartTasksWork.java | 6 +- .../hyracks/hyracks-data/hyracks-data-std/pom.xml | 2 +- .../hyracks/hyracks-dataflow-common/pom.xml | 2 +- .../examples/btree/helper/RuntimeContext.java | 3 +- .../hyracks/hyracks-storage-am-btree/pom.xml | 2 +- .../hyracks/hyracks-storage-am-common/pom.xml | 2 +- .../hyracks-storage-am-lsm-btree-column/pom.xml | 2 +- .../column/api/AbstractColumnTupleWriter.java | 6 ++ .../column/impls/btree/ColumnBTreeBulkloader.java | 19 ++-- .../hyracks/hyracks-storage-am-lsm-common/pom.xml | 2 +- .../am/lsm/common/impls/VirtualBufferCache.java | 33 +++---- .../hyracks/hyracks-storage-common/pom.xml | 4 + .../common/buffercache/ColumnBufferPool.java | 99 ++++++++++--------- .../common/buffercache/FreeColumnBufferPool.java | 17 ++++ .../common/buffercache/IColumnBufferPool.java | 4 +- .../storage/common/file/FileMapManager.java | 54 +++++++++-- .../storage/common/file/IFileMapManager.java | 19 ++++ .../common/file/SynchronizedFileMapManager.java | 87 +++++++++++++++++ .../support/TestStorageManagerComponentHolder.java | 3 +- .../storage/common/ColumnBufferPoolTest.java | 16 +-- hyracks-fullstack/hyracks/hyracks-util/pom.xml | 2 +- hyracks-fullstack/pom.xml | 4 +- 104 files changed, 1421 insertions(+), 792 deletions(-) diff --cc asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java index 7ffffd53b7,78cf365daa..aa73da13e0 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java @@@ -175,13 -173,17 +175,14 @@@ public class Stats boolean unnestOp1 = joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx1 - 1)); boolean unnestOp2 = joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx2 - 1)); boolean unnestOp = unnestOp1 || unnestOp2; + boolean okOp = acceptableOp(joinExpr.getFunctionIdentifier()); - Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) index1.getIndexDetails(); - Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) index2.getIndexDetails(); - if (((idxDetails1.getSourceCardinality() < idxDetails1.getSampleCardinalityTarget()) - || (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget()) - || (!(joinExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ))) - || exprUsedVars.size() > 2) && !unnestOp && okOp) { //* if there are more than 2 variables, it is not a simple join like r.a op s.a + ILogicalOperator leafInput1 = joinEnum.leafInputs.get(idx1 - 1); + ILogicalOperator leafInput2 = joinEnum.leafInputs.get(idx2 - 1); + LogicalVariable var1 = exprUsedVars.get(0); + LogicalVariable var2 = exprUsedVars.get(1); + // If there are more than 2 variables, it is not a simple join like r.a op s.a - if (!unnestOp && (exprUsedVars.size() > 2 ++ if (okOp && !unnestOp && (exprUsedVars.size() > 2 + || isJoinSelFromSamplesApplicable(leafInput1, leafInput2, index1, index2, var1, var2))) { double sels = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1), joinEnum.leafInputs.get(idx2 - 1), index1, index2, joinExpr, jOp); if (sels == 0.0) { @@@ -195,28 -197,45 +196,67 @@@ } } + private boolean isJoinSelFromSamplesApplicable(ILogicalOperator leafInput1, ILogicalOperator leafInput2, + Index index1, Index index2, LogicalVariable var1, LogicalVariable var2) throws AlgebricksException { + Index.SampleIndexDetails details1 = (Index.SampleIndexDetails) index1.getIndexDetails(); + Index.SampleIndexDetails details2 = (Index.SampleIndexDetails) index2.getIndexDetails(); + if (details1.getSourceCardinality() >= details1.getSampleCardinalityTarget() + && details2.getSourceCardinality() >= details2.getSampleCardinalityTarget()) { + return false; + } + double numDistinct1 = computeNumDistinct(leafInput1, var1, index1); + if (numDistinct1 < 0) { + return false; + } + double avgNumRowsPerValue1 = details1.getSourceCardinality() / numDistinct1; + double numDistinct2 = computeNumDistinct(leafInput2, var2, index2); + if (numDistinct2 < 0) { + return false; + } + double avgNumRowsPerValue2 = details2.getSourceCardinality() / numDistinct2; + return avgNumRowsPerValue1 * avgNumRowsPerValue2 * Math.min(numDistinct1, numDistinct2) <= Math + .max(Math.max(details1.getSourceCardinality(), details2.getSourceCardinality()), 750000); + } + + private double computeNumDistinct(ILogicalOperator leafInput, LogicalVariable var, Index index) + throws AlgebricksException { + List<List<IAObject>> result = runSamplingQueryDistinct(this.optCtx, leafInput, var, index); + if (result == null) { + return -1; // Negative value indicates failure + } + double numDistincts = findPredicateCardinality(result, true); + Index.SampleIndexDetails details = (Index.SampleIndexDetails) index.getIndexDetails(); + if (numDistincts == 0) { + numDistincts = details.getSourceCardinality(); // All values are nulls + } + if (numDistincts == 0) { + numDistincts = 1; // Sample is empty + } + return numDistincts; + } + + private boolean acceptableOp(FunctionIdentifier functionIdentifier) { + if (functionIdentifier.equals(AlgebricksBuiltinFunctions.NEQ)) { + return true; + } + if (functionIdentifier.equals(AlgebricksBuiltinFunctions.LT)) { + return true; + } + if (functionIdentifier.equals(AlgebricksBuiltinFunctions.GT)) { + return true; + } + if (functionIdentifier.equals(AlgebricksBuiltinFunctions.GE)) { + return true; + } + if (functionIdentifier.equals(AlgebricksBuiltinFunctions.LE)) { + return true; + } + if (functionIdentifier.equals(BuiltinFunctions.IF_MISSING_OR_NULL)) { // added this for q16 in CH2 + return true; + } + return false; + } + private double naiveJoinSelectivity(List<LogicalVariable> exprUsedVars, double card1, double card2, int idx1, int idx2, boolean unnestOp1, boolean unnestOp2) throws AlgebricksException { ILogicalOperator leafInput;