FrankChen021 commented on code in PR #19511:
URL: https://github.com/apache/druid/pull/19511#discussion_r3298113952
##########
indexing-service/src/main/java/org/apache/druid/indexing/compact/CascadingReindexingTemplate.java:
##########
@@ -328,86 +316,32 @@ public List<CompactionJob> createCompactionJobs(
return Collections.emptyList();
}
- final List<CompactionJob> allJobs = new ArrayList<>();
- final DateTime currentTime = jobParams.getScheduleStartTime();
-
- SegmentTimeline timeline = jobParams.getTimeline(dataSource);
-
+ final SegmentTimeline timeline = jobParams.getTimeline(dataSource);
if (timeline == null || timeline.isEmpty()) {
LOG.warn("Segment timeline null or empty for [%s] skipping creating
compaction jobs.", dataSource);
return Collections.emptyList();
}
- List<IntervalPartitioningInfo> searchIntervals =
generateAlignedSearchIntervals(currentTime);
- if (searchIntervals.isEmpty()) {
- LOG.warn("No search intervals generated for dataSource[%s], no
reindexing jobs will be created", dataSource);
- return Collections.emptyList();
- }
+ final ReindexingPlan plan = new
ReindexingPlanner(this).plan(jobParams.getScheduleStartTime(), timeline);
Review Comment:
[P2] Job creation ignores planner validation errors
`ReindexingPlanner.plan()` now converts invalid granularity timelines into a
`PlanValidationError` and an empty interval list for API preview, but the job
creation path never checks `plan.getValidationError()`. Invalid cascading
configs that previously failed job generation can now silently produce no jobs,
masking a broken supervisor as an empty run. Please preserve the old failure or
surface the validation error before returning jobs.
##########
indexing-service/src/main/java/org/apache/druid/indexing/compact/ReindexingPlanner.java:
##########
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.compact;
+
+import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.server.compaction.IntervalPartitioningInfo;
+import
org.apache.druid.server.coordinator.InlineSchemaDataSourceCompactionConfig;
+import org.apache.druid.timeline.SegmentTimeline;
+import org.joda.time.DateTime;
+import org.joda.time.Interval;
+import org.joda.time.Period;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Builds a {@link ReindexingPlan} from a {@link CascadingReindexingTemplate}
for a given reference
+ * time and the live segment timeline. The plan is the single source of truth
for both
+ * compaction-job creation and the API timeline view, ensuring the two cannot
drift.
+ *
+ * <p>The data range bounds the planned intervals (no-overlap intervals become
+ * {@link ReindexingPlan.IntervalDisposition#SKIPPED_NO_DATA}), {@code
skipOffsetFromLatest} is
+ * anchored on the live data, and intervals that extend past the skip boundary
are truncated or
+ * marked {@link ReindexingPlan.IntervalDisposition#SKIPPED_BEYOND_BOUNDARY}.
+ *
+ * <p>Validation failures (misconfigured granularity ordering, no rules) are
caught and surfaced
+ * on the returned plan as a {@link ReindexingPlan.PlanValidationError} so
HTTP callers can render
+ * them in the response rather than as 500s.
+ */
+final class ReindexingPlanner
+{
+ private static final Logger LOG = new Logger(ReindexingPlanner.class);
+
+ private final CascadingReindexingTemplate template;
+
+ ReindexingPlanner(CascadingReindexingTemplate template)
+ {
+ this.template = template;
+ }
+
+ /**
+ * Build a plan for the given reference time. The plan reflects exactly what
+ * {@link CascadingReindexingTemplate#createCompactionJobs} would do given
the same
+ * {@code timeline} and reference time.
+ *
+ * @param timeline live segment timeline for the datasource; must be
non-null and non-empty —
+ * callers are responsible for short-circuiting when no data
exists rather than
+ * calling this with a fabricated empty timeline.
+ */
+ ReindexingPlan plan(DateTime referenceTime, SegmentTimeline timeline)
+ {
+ final String dataSource = template.getDataSource();
+
+ if (!template.getReindexingRuleProvider().isReady()) {
+ LOG.info(
+ "Rule provider [%s] is not ready for dataSource[%s], returning empty
plan",
+ template.getReindexingRuleProvider().getType(),
+ dataSource
+ );
+ return new ReindexingPlan(dataSource, referenceTime, null,
Collections.emptyList(), null);
+ }
+
+ final List<IntervalPartitioningInfo> searchIntervals;
+ try {
+ searchIntervals = template.generateAlignedSearchIntervals(referenceTime);
+ }
+ catch (SegmentGranularityTimelineValidationException e) {
+ LOG.warn(e, "Granularity timeline validation failed for dataSource[%s]",
dataSource);
+ return new ReindexingPlan(
+ dataSource,
+ referenceTime,
+ null,
+ Collections.emptyList(),
+ new ReindexingPlan.PlanValidationError(
+
ReindexingPlan.PlanValidationError.Type.INVALID_GRANULARITY_TIMELINE,
+ e.getMessage(),
+ e.getOlderInterval(),
+ e.getOlderGranularity(),
+ e.getNewerInterval(),
+ e.getNewerGranularity()
+ )
+ );
+ }
+ catch (IAE e) {
+ LOG.warn(e, "Validation failed while planning timeline for
dataSource[%s]", dataSource);
+ return new ReindexingPlan(
+ dataSource,
+ referenceTime,
+ null,
+ Collections.emptyList(),
+ new ReindexingPlan.PlanValidationError(
+ ReindexingPlan.PlanValidationError.Type.VALIDATION_ERROR,
+ e.getMessage()
+ )
+ );
+ }
+
+ if (searchIntervals.isEmpty()) {
+ LOG.debug("No search intervals generated for dataSource[%s]",
dataSource);
+ return new ReindexingPlan(dataSource, referenceTime, null,
Collections.emptyList(), null);
+ }
+
+ final ReindexingPlan.SkipOffsetResolution skipOffsetResolution =
buildSkipOffsetResolution(timeline, referenceTime);
+ final Interval dataRangeWithSkipOffset =
computeDataRangeWithSkipOffset(timeline, referenceTime);
+ if (dataRangeWithSkipOffset == null) {
+ LOG.debug("All data for dataSource[%s] is within skip offsets; no
intervals will be planned", dataSource);
+ return new ReindexingPlan(dataSource, referenceTime,
skipOffsetResolution, Collections.emptyList(), null);
+ }
+
+ final List<ReindexingPlan.PlannedInterval> planned = new
ArrayList<>(searchIntervals.size());
+ for (int i = 0; i < searchIntervals.size(); i++) {
+ final IntervalPartitioningInfo originalInfo = searchIntervals.get(i);
+ final Interval originalInterval = originalInfo.getInterval();
+
+ if (!originalInterval.overlaps(dataRangeWithSkipOffset)) {
Review Comment:
[P2] Skip-offset-only intervals are mislabeled as no-data
The no-overlap branch also catches intervals that are entirely after
`dataRangeWithSkipOffset` because of `skipOffsetFromNow` or
`skipOffsetFromLatest`. Those intervals still overlap the live timeline, but
they are recorded as `SKIPPED_NO_DATA`; `ReindexingTimelineView` filters that
disposition out, so the preview hides intervals skipped only by the skip-offset
boundary. Please distinguish live intervals beyond the truncation boundary and
mark them `SKIPPED_BEYOND_BOUNDARY`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]