This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit bbfa4becf18ce9c60dc6c57ae3f97c5c4b6ccd99
Merge: f9c6fe50c6 e15750431c
Author: Michael Blow <michael.b...@couchbase.com>
AuthorDate: Fri Sep 19 19:46:33 2025 -0400

    Merge branch 'gerrit/phoenix' into 'master'
    
     * [ASTERIXDB-3641] Sampling query taking long time to run
     * [ASTERIXDB-3636][STO] Fix buffer reservations understimation
     * [NO ISSUE][*HYR][MISC] += Lazy helper for deferred initialization
     * [NO ISSUE][RT] Notify joblet cleanup if joblet creation fails
     * [NO ISSUE][*DB][EXT] Avoid serialization of JsonFactory
     * [ASTERIXDB-3641] Sampling query taking long time to run
     * [NO ISSUE][*DB][RT] Whisper logging for active partition,txn block msgs
     * [NO ISSUE][*DB][NET] Upgrade Netty to 4.1.125.Final
     * [ASTERIXDB-3644] Callback for result consumption
     * [NO ISSUE] Split countn_01 into multiple queries
     * [NO ISSUE][HYR][STO] FileMapManager performance improvements
     * [NO ISSUE][HYR][STO] Add missing serialVersionUID
     * [NO ISSUE] Fix result for pushdown test
     * [ASTERIXDB-3646][COMP] Missing fields when selecting all fields with 
window function
    
    Change-Id: I24dcea00986058def1eaac1cf317147c5b3e1196

 .../active/message/ActivePartitionMessage.java     |   5 +
 .../apache/asterix/optimizer/rules/cbo/Stats.java  |  52 ++++++++--
 .../optimizer/rules/pushdown/PushdownContext.java  |   2 +-
 .../api/http/server/QueryResultApiServlet.java     |  12 ++-
 .../apache/asterix/app/nc/NCAppRuntimeContext.java |  11 +--
 .../apache/asterix/test/common/TestExecutor.java   |   7 ++
 .../aggregate/countn_01/countn_01.3.query.sqlpp    |  18 +---
 .../aggregate/countn_01/countn_01.5.query.sqlpp    |   2 +-
 ...tn_01.5.query.sqlpp => countn_01.6.query.sqlpp} |  14 +--
 ...tn_01.5.query.sqlpp => countn_01.7.query.sqlpp} |   0
 .../async-deferred/AsyncDeferredQueries.xml        |   2 +-
 .../window_subquery/window_query.001.ddl.sqlpp}    |  25 ++---
 .../window_subquery/window_query.002.update.sqlpp} |  35 ++++---
 .../window_subquery/window_query.003.query.sqlpp}  |  20 ++--
 .../window_subquery/window_query.004.query.sqlpp}  |  20 ++--
 .../results/aggregate/countn_01/countn_01.1.adm    |   5 +-
 .../results/aggregate/countn_01/countn_01.3.adm    |   2 +-
 .../results/aggregate/countn_01/countn_01.4.adm    |   1 +
 .../countn_01/{countn_01.3.adm => countn_01.5.adm} |   0
 .../api/cluster_state_1/cluster_state_1.1.regexadm |   4 -
 .../cluster_state_1_full.1.regexadm                |   4 -
 .../cluster_state_1_less.1.regexadm                |   4 -
 .../same-datasource-diff-scan.004.plan             |  64 ++++++------
 .../same-datasource-diff-scan.005.plan             |  96 +++++++++---------
 .../same-datasource-diff-scan.006.plan             |  98 +++++++++----------
 .../same-datasource-diff-scan.007.plan             |  66 ++++++-------
 .../pushdown/window_subquery/window_query.002.adm  |   1 +
 .../pushdown/window_subquery/window_query.003.adm  |   1 +
 .../same-datasource-diff-scan.004.plan             |  72 +++++++-------
 .../same-datasource-diff-scan.005.plan             | 106 ++++++++++----------
 .../same-datasource-diff-scan.006.plan             | 108 ++++++++++-----------
 .../same-datasource-diff-scan.007.plan             |  64 ++++++------
 .../same-datasource-diff-scan.004.plan             |  72 +++++++-------
 .../same-datasource-diff-scan.005.plan             | 106 ++++++++++----------
 .../same-datasource-diff-scan.006.plan             | 108 ++++++++++-----------
 .../same-datasource-diff-scan.007.plan             |  64 ++++++------
 .../src/test/resources/runtimets/sqlpp_queries.xml |   5 +
 ...tractParquetDeltaBinaryPackingValuesWriter.java |   2 +
 .../bytes/encoder/ParquetDeltaByteArrayWriter.java |   4 +
 .../ParquetDeltaLengthByteArrayValuesWriter.java   |   3 +
 .../ParquetPlainFixedLengthValuesWriter.java       |   2 +
 .../ParquetPlainVariableLengthValuesWriter.java    |   2 +
 .../operation/lsm/flush/FlushColumnMetadata.java   |  25 +++--
 .../lsm/flush/FlushColumnTupleWriter.java          |  12 +++
 .../lsm/flush/NoWriteFlushColumnMetadata.java      |  45 +++++++--
 .../lsm/merge/MergeColumnTupleWriter.java          |   9 ++
 .../lsm/merge/MergeColumnWriteMetadata.java        |  13 ++-
 .../asterix/column/values/IColumnValuesWriter.java |   5 +
 .../values/writer/AbstractColumnValuesWriter.java  |   2 +
 .../values/writer/BooleanColumnValuesWriter.java   |   9 ++
 .../values/writer/DoubleColumnValuesWriter.java    |   9 ++
 .../values/writer/FloatColumnValuesWriter.java     |   9 ++
 .../values/writer/LongColumnValuesWriter.java      |  15 ++-
 .../values/writer/NoOpColumnValuesWriter.java      |   9 ++
 .../writer/NullMissingColumnValuesWriter.java      |   9 ++
 .../values/writer/StringColumnValuesWriter.java    |  12 +++
 .../values/writer/UUIDColumnValuesWriter.java      |  11 ++-
 .../values/writer/DummyColumnValuesWriter.java     |   5 +
 .../asterix/common/config/StorageProperties.java   |  33 ++++---
 .../parser/factory/JSONDataParserFactory.java      |   6 +-
 .../asterix/runtime/message/TxnIdBlockRequest.java |   5 +
 asterixdb/pom.xml                                  |   2 +-
 ...4j-persistence-extension--1.3.0_MIT_License.txt |   3 +-
 .../algebricks/algebricks-core/pom.xml             |   2 +-
 .../hyracks/api/result/IResultDirectory.java       |   2 +
 .../api/result/IResultPartitionManager.java        |   4 +-
 .../hyracks/api/result/ResultDirectoryRecord.java  |   4 +
 .../apache/hyracks/api/result/ResultJobRecord.java |  23 ++++-
 .../java/org/apache/hyracks/api/util/Lazy.java     |  40 ++++----
 .../hyracks/client/result/ResultDirectory.java     |   5 +
 .../client/result/ResultDirectoryRemoteProxy.java  |   5 +
 hyracks-fullstack/hyracks/hyracks-cloud/pom.xml    |   2 +-
 .../hyracks/control/cc/ClusterControllerIPCI.java  |   7 ++
 .../control/cc/result/IResultDirectoryService.java |   2 +
 .../control/cc/result/ResultDirectoryService.java  |  12 +++
 .../cc/work/ReportResultPartitionConsumedWork.java |  57 +++++++++++
 .../control/common/base/IClusterController.java    |   2 +
 .../hyracks/control/common/ipc/CCNCFunctions.java  |  34 +++++++
 .../common/ipc/ClusterControllerRemoteProxy.java   |   7 ++
 .../control/nc/result/ResultPartitionManager.java  |  15 ++-
 .../control/nc/result/ResultPartitionReader.java   |   4 +-
 .../nc/work/FailedJobletCreationCleanupWork.java   |  61 ++++++++++++
 .../hyracks/control/nc/work/StartTasksWork.java    |   6 +-
 .../hyracks/hyracks-data/hyracks-data-std/pom.xml  |   2 +-
 .../hyracks/hyracks-dataflow-common/pom.xml        |   2 +-
 .../examples/btree/helper/RuntimeContext.java      |   3 +-
 .../hyracks/hyracks-storage-am-btree/pom.xml       |   2 +-
 .../hyracks/hyracks-storage-am-common/pom.xml      |   2 +-
 .../hyracks-storage-am-lsm-btree-column/pom.xml    |   2 +-
 .../column/api/AbstractColumnTupleWriter.java      |   6 ++
 .../column/impls/btree/ColumnBTreeBulkloader.java  |  19 ++--
 .../hyracks/hyracks-storage-am-lsm-common/pom.xml  |   2 +-
 .../am/lsm/common/impls/VirtualBufferCache.java    |  33 +++----
 .../hyracks/hyracks-storage-common/pom.xml         |   4 +
 .../common/buffercache/ColumnBufferPool.java       |  99 ++++++++++---------
 .../common/buffercache/FreeColumnBufferPool.java   |  17 ++++
 .../common/buffercache/IColumnBufferPool.java      |   4 +-
 .../storage/common/file/FileMapManager.java        |  54 +++++++++--
 .../storage/common/file/IFileMapManager.java       |  19 ++++
 .../common/file/SynchronizedFileMapManager.java    |  87 +++++++++++++++++
 .../support/TestStorageManagerComponentHolder.java |   3 +-
 .../storage/common/ColumnBufferPoolTest.java       |  16 +--
 hyracks-fullstack/hyracks/hyracks-util/pom.xml     |   2 +-
 hyracks-fullstack/pom.xml                          |   4 +-
 104 files changed, 1421 insertions(+), 792 deletions(-)

diff --cc 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 7ffffd53b7,78cf365daa..aa73da13e0
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@@ -175,13 -173,17 +175,14 @@@ public class Stats 
              boolean unnestOp1 = 
joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx1 - 1));
              boolean unnestOp2 = 
joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx2 - 1));
              boolean unnestOp = unnestOp1 || unnestOp2;
 +            boolean okOp = acceptableOp(joinExpr.getFunctionIdentifier());
-             Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) 
index1.getIndexDetails();
-             Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) 
index2.getIndexDetails();
-             if (((idxDetails1.getSourceCardinality() < 
idxDetails1.getSampleCardinalityTarget())
-                     || (idxDetails2.getSourceCardinality() < 
idxDetails2.getSampleCardinalityTarget())
-                     || 
(!(joinExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ)))
-                     || exprUsedVars.size() > 2) && !unnestOp && okOp) { //* 
if there are more than 2 variables, it is not a simple join like r.a op s.a
+             ILogicalOperator leafInput1 = joinEnum.leafInputs.get(idx1 - 1);
+             ILogicalOperator leafInput2 = joinEnum.leafInputs.get(idx2 - 1);
+             LogicalVariable var1 = exprUsedVars.get(0);
+             LogicalVariable var2 = exprUsedVars.get(1);
+             // If there are more than 2 variables, it is not a simple join 
like r.a op s.a
 -            if (!unnestOp && (exprUsedVars.size() > 2
++            if (okOp && !unnestOp && (exprUsedVars.size() > 2
+                     || isJoinSelFromSamplesApplicable(leafInput1, leafInput2, 
index1, index2, var1, var2))) {
                  double sels = 
findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
                          joinEnum.leafInputs.get(idx2 - 1), index1, index2, 
joinExpr, jOp);
                  if (sels == 0.0) {
@@@ -195,28 -197,45 +196,67 @@@
          }
      }
  
+     private boolean isJoinSelFromSamplesApplicable(ILogicalOperator 
leafInput1, ILogicalOperator leafInput2,
+             Index index1, Index index2, LogicalVariable var1, LogicalVariable 
var2) throws AlgebricksException {
+         Index.SampleIndexDetails details1 = (Index.SampleIndexDetails) 
index1.getIndexDetails();
+         Index.SampleIndexDetails details2 = (Index.SampleIndexDetails) 
index2.getIndexDetails();
+         if (details1.getSourceCardinality() >= 
details1.getSampleCardinalityTarget()
+                 && details2.getSourceCardinality() >= 
details2.getSampleCardinalityTarget()) {
+             return false;
+         }
+         double numDistinct1 = computeNumDistinct(leafInput1, var1, index1);
+         if (numDistinct1 < 0) {
+             return false;
+         }
+         double avgNumRowsPerValue1 = details1.getSourceCardinality() / 
numDistinct1;
+         double numDistinct2 = computeNumDistinct(leafInput2, var2, index2);
+         if (numDistinct2 < 0) {
+             return false;
+         }
+         double avgNumRowsPerValue2 = details2.getSourceCardinality() / 
numDistinct2;
+         return avgNumRowsPerValue1 * avgNumRowsPerValue2 * 
Math.min(numDistinct1, numDistinct2) <= Math
+                 .max(Math.max(details1.getSourceCardinality(), 
details2.getSourceCardinality()), 750000);
+     }
+ 
+     private double computeNumDistinct(ILogicalOperator leafInput, 
LogicalVariable var, Index index)
+             throws AlgebricksException {
+         List<List<IAObject>> result = runSamplingQueryDistinct(this.optCtx, 
leafInput, var, index);
+         if (result == null) {
+             return -1; // Negative value indicates failure
+         }
+         double numDistincts = findPredicateCardinality(result, true);
+         Index.SampleIndexDetails details = (Index.SampleIndexDetails) 
index.getIndexDetails();
+         if (numDistincts == 0) {
+             numDistincts = details.getSourceCardinality(); // All values are 
nulls
+         }
+         if (numDistincts == 0) {
+             numDistincts = 1; // Sample is empty
+         }
+         return numDistincts;
+     }
+ 
 +    private boolean acceptableOp(FunctionIdentifier functionIdentifier) {
 +        if (functionIdentifier.equals(AlgebricksBuiltinFunctions.NEQ)) {
 +            return true;
 +        }
 +        if (functionIdentifier.equals(AlgebricksBuiltinFunctions.LT)) {
 +            return true;
 +        }
 +        if (functionIdentifier.equals(AlgebricksBuiltinFunctions.GT)) {
 +            return true;
 +        }
 +        if (functionIdentifier.equals(AlgebricksBuiltinFunctions.GE)) {
 +            return true;
 +        }
 +        if (functionIdentifier.equals(AlgebricksBuiltinFunctions.LE)) {
 +            return true;
 +        }
 +        if (functionIdentifier.equals(BuiltinFunctions.IF_MISSING_OR_NULL)) { 
// added this for q16 in CH2
 +            return true;
 +        }
 +        return false;
 +    }
 +
      private double naiveJoinSelectivity(List<LogicalVariable> exprUsedVars, 
double card1, double card2, int idx1,
              int idx2, boolean unnestOp1, boolean unnestOp2) throws 
AlgebricksException {
          ILogicalOperator leafInput;

Reply via email to