This is an automated email from the ASF dual-hosted git repository.
capistrant pushed a commit to branch 34.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/34.0.0 by this push:
new c77d8d5a1d9 Enable Dart ResultIterator to consider queryTimeout and
enhance CI with stack traces (#18263) (#18284)
c77d8d5a1d9 is described below
commit c77d8d5a1d9f9296ff587037052624fc79430c44
Author: Lucas Capistrant <[email protected]>
AuthorDate: Fri Jul 18 12:43:03 2025 -0500
Enable Dart ResultIterator to consider queryTimeout and enhance CI with
stack traces (#18263) (#18284)
* Revert "undo jfr related"
This reverts commit 9a58c15216faa3c5587a54ded33eeae5a7961895.
* add-tag
* add capture jstack every minute
* add timeout to dartyquerytresultreader
Co-authored-by: Zoltan Haindrich <[email protected]>
---
.../scripts/{run_unit-tests => collect_jstacks} | 17 +++++++++-------
.github/scripts/run_unit-tests | 2 +-
.github/scripts/setup_test_profiling_env.sh | 15 +++++++-------
.github/workflows/worker.yml | 17 ++++++++++++++--
.../msq/dart/controller/sql/DartQueryMaker.java | 23 ++++++++++++++++++++--
.../java/org/apache/druid/msq/exec/Controller.java | 3 +++
.../org/apache/druid/msq/exec/ControllerImpl.java | 6 ++++++
.../java/org/apache/druid/query/QueryContext.java | 10 ++++++++++
8 files changed, 73 insertions(+), 20 deletions(-)
diff --git a/.github/scripts/run_unit-tests b/.github/scripts/collect_jstacks
similarity index 72%
copy from .github/scripts/run_unit-tests
copy to .github/scripts/collect_jstacks
index 91bdbfcbbc0..7f4d2a6a160 100755
--- a/.github/scripts/run_unit-tests
+++ b/.github/scripts/collect_jstacks
@@ -14,12 +14,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+#--------------------------------------------------------------------
-set -e
-set -x
+echo "@ capturing threaddumps every minute for all java processes" >&2
-OPTS+="-pl
!integration-tests,!:druid-it-tools,!:druid-it-image,!:druid-it-cases"
-OPTS+=" -Dsurefire.failIfNoSpecifiedTests=false -P skip-static-checks
-Dweb.console.skip=true"
-OPTS+=" -Djacoco.destFile=target/jacoco-${HASH}.exec"
-
-mvn -B $OPTS test "$@"
+while :;do
+ sleep 60
+ t=`date +%s`
+ echo "@ capturing threaddumps $t" >&2
+ pgrep java | while read pid;do
+ jstack $pid > target/jstack.$pid.$t
+ done
+done
diff --git a/.github/scripts/run_unit-tests b/.github/scripts/run_unit-tests
index 91bdbfcbbc0..0e13bd3206c 100755
--- a/.github/scripts/run_unit-tests
+++ b/.github/scripts/run_unit-tests
@@ -22,4 +22,4 @@ OPTS+="-pl
!integration-tests,!:druid-it-tools,!:druid-it-image,!:druid-it-cases
OPTS+=" -Dsurefire.failIfNoSpecifiedTests=false -P skip-static-checks
-Dweb.console.skip=true"
OPTS+=" -Djacoco.destFile=target/jacoco-${HASH}.exec"
-mvn -B $OPTS test "$@"
+mvn -B $OPTS test "-DjfrProfilerArgLine=$JFR_PROFILER_ARG_LINE" "$@"
diff --git a/.github/scripts/setup_test_profiling_env.sh
b/.github/scripts/setup_test_profiling_env.sh
index b3bf4dc1844..0becd0cc257 100755
--- a/.github/scripts/setup_test_profiling_env.sh
+++ b/.github/scripts/setup_test_profiling_env.sh
@@ -21,8 +21,9 @@ JAR_INPUT_FILE="jfr-profiler-1.0.0.jar"
JAR_OUTPUT_FILE="jfr-profiler.jar"
ENV_VAR="JFR_PROFILER_ARG_LINE"
-if [ "$#" -ne 5 ]; then
- echo "usage: $0 <jdk_version> <run_id> <run_number> <run_attempt> <module>"
+if [ "$#" -lt 1 ]; then
+ echo "usage: $0 <jdk_version> [<tag>=<value> ...]"
+ exit 1
fi
if [[ "$1" -ge "17" ]];
@@ -35,18 +36,16 @@ then
# Extract the version number using grep and awk
jvm_version=$(echo "$output" | grep "version" | awk -F '"' '{print $2}')
+ shift
+ tags="${@/#/-Djfr.profiler.tags.}"
echo $ENV_VAR=-javaagent:"$PWD"/$JAR_OUTPUT_FILE \
-Djfr.profiler.http.username=druid-ci \
-Djfr.profiler.http.password=w3Fb6PW8LIo849mViEkbgA== \
-Djfr.profiler.tags.project=druid \
- -Djfr.profiler.tags.run_id=$2 \
- -Djfr.profiler.tags.run_number=$3 \
- -Djfr.profiler.tags.run_attempt=$4 \
- -Djfr.profiler.tags.module=$5 \
- -Djfr.profiler.tags.jvm_version=$jvm_version
+ -Djfr.profiler.tags.jvm_version=$jvm_version \
+ "${tags[@]}"
else
echo $ENV_VAR=\"\"
fi
-
diff --git a/.github/workflows/worker.yml b/.github/workflows/worker.yml
index 7c0b7842e4e..f79cc8ad11b 100644
--- a/.github/workflows/worker.yml
+++ b/.github/workflows/worker.yml
@@ -41,6 +41,14 @@ on:
env:
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5
+ INPUTS_JDK: ${{ inputs.jdk }}
+ INPUTS_KEY: ${{ inputs.key }}
+ GITHUB_RUN_ID: ${{ github.run_id }}
+ GITHUB_RUN_NUMBER: ${{ github.run_number }}
+ GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
+ GITHUB_EVENT_REF: ${{ github.event_name }}-${{ github.event.number ||
github.ref_name }}
+ GITHUB_RUN_URL: ${{ github.server_url }}/${{ github.repository
}}/actions/runs/${{ github.run_id }}
+ GITHUB_WORKFLOW_REF: ${{ github.workflow_ref }}
jobs:
execute:
@@ -65,8 +73,12 @@ jobs:
pattern: ${{ inputs.artifacts_to_download }}
merge-multiple: true
- - name: Calculate hash for artifact name
- run: echo "HASH=$(echo -n "${{ inputs.key }}" | sha256sum | cut -c-8)"
>> $GITHUB_ENV
+ - name: Prepare GITHUB_ENV
+ run: |
+ set -x
+ ./.github/scripts/setup_test_profiling_env.sh ${{ inputs.jdk }}
run_id=$GITHUB_RUN_ID run_number=$GITHUB_RUN_NUMBER
run_attempt=$GITHUB_RUN_ATTEMPT key=${{ inputs.key }}
event_ref=$GITHUB_EVENT_REF run_url=$GITHUB_RUN_URL >> $GITHUB_ENV
+ echo "HASH=$(echo -n "${{ inputs.key }}" | sha256sum | cut -c-8)" >>
$GITHUB_ENV
+ ./.github/scripts/collect_jstacks &
- name: 'Execute: ${{ inputs.script }}'
run: ${{ inputs.script }}
@@ -83,3 +95,4 @@ jobs:
**/core.[0-9]*
**/TEST-*.xml
**/target/jacoco*.exec
+ **/target/jstack*
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java
index 60563d6daf4..c40a59f208b 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/sql/DartQueryMaker.java
@@ -22,6 +22,7 @@ package org.apache.druid.msq.dart.controller.sql;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterators;
import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.druid.error.DruidException;
import org.apache.druid.io.LimitedOutputStream;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Either;
@@ -67,6 +68,7 @@ import org.apache.druid.sql.calcite.run.SqlResults;
import javax.annotation.Nullable;
import java.io.ByteArrayOutputStream;
+import java.time.Duration;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -79,6 +81,7 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
/**
@@ -362,12 +365,13 @@ public class DartQueryMaker implements QueryMaker
class ResultIteratorMaker implements BaseSequence.IteratorMaker<Object[],
ResultIterator>
{
private final ControllerHolder controllerHolder;
- private final ResultIterator resultIterator = new ResultIterator();
+ private final ResultIterator resultIterator;
private boolean made;
public ResultIteratorMaker(ControllerHolder holder)
{
this.controllerHolder = holder;
+ this.resultIterator = new
ResultIterator(controllerHolder.getController().getQueryContext().getTimeoutDuration());
submitController();
}
@@ -464,6 +468,14 @@ public class DartQueryMaker implements QueryMaker
private volatile boolean complete;
+ @Nullable
+ private final Duration timeout;
+
+ public ResultIterator(@Nullable Duration timeout)
+ {
+ this.timeout = timeout;
+ }
+
@Override
public boolean hasNext()
{
@@ -482,7 +494,14 @@ public class DartQueryMaker implements QueryMaker
{
if (current == null) {
try {
- current = rowBuffer.take();
+ if (timeout != null) {
+ current = rowBuffer.poll(timeout.toMillis(),
TimeUnit.MILLISECONDS);
+ if (current == null) {
+ throw DruidException.defensive("Result reader timed out [%s]",
timeout);
+ }
+ } else {
+ current = rowBuffer.take();
+ }
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java
index d2fc2bd5b43..1370a89da23 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Controller.java
@@ -28,6 +28,7 @@ import org.apache.druid.msq.indexing.error.CancellationReason;
import org.apache.druid.msq.indexing.error.MSQErrorReport;
import org.apache.druid.msq.kernel.StageId;
import org.apache.druid.msq.statistics.PartialKeyStatisticsInformation;
+import org.apache.druid.query.QueryContext;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.sql.calcite.run.SqlEngine;
@@ -135,4 +136,6 @@ public interface Controller
TaskReport.ReportMap liveReports();
ControllerContext getControllerContext();
+
+ QueryContext getQueryContext();
}
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index a68a58ae476..0851fa7be7b 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -2942,4 +2942,10 @@ public class ControllerImpl implements Controller
{
return context;
}
+
+ @Override
+ public QueryContext getQueryContext()
+ {
+ return querySpec.getContext();
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/query/QueryContext.java
b/processing/src/main/java/org/apache/druid/query/QueryContext.java
index d5368425ed0..b90be50e89a 100644
--- a/processing/src/main/java/org/apache/druid/query/QueryContext.java
+++ b/processing/src/main/java/org/apache/druid/query/QueryContext.java
@@ -34,6 +34,7 @@ import org.apache.druid.query.filter.TypedInFilter;
import javax.annotation.Nullable;
import java.io.IOException;
+import java.time.Duration;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
@@ -505,6 +506,15 @@ public class QueryContext
);
}
+ @Nullable
+ public Duration getTimeoutDuration()
+ {
+ if (hasTimeout()) {
+ return Duration.ofMillis(getTimeout());
+ }
+ return null;
+ }
+
public long getDefaultTimeout()
{
final long defaultTimeout = getLong(QueryContexts.DEFAULT_TIMEOUT_KEY,
QueryContexts.DEFAULT_TIMEOUT_MILLIS);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]