Copilot commented on code in PR #17280:
URL: https://github.com/apache/iotdb/pull/17280#discussion_r2915391387
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/window/partition/Partition.java:
##########
@@ -27,185 +28,195 @@
import org.apache.tsfile.utils.Binary;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
public class Partition {
- private final List<TsBlock> tsBlocks;
+ private final List<Column[]> segments;
private int cachedPositionCount = -1;
public Partition(List<TsBlock> tsBlocks, int startIndexInFirstBlock, int
endIndexInLastBlock) {
+ this.segments = new ArrayList<>(tsBlocks.size());
if (tsBlocks.size() == 1) {
int length = endIndexInLastBlock - startIndexInFirstBlock;
- this.tsBlocks =
-
Collections.singletonList(tsBlocks.get(0).getRegion(startIndexInFirstBlock,
length));
- return;
+ TsBlock region = tsBlocks.get(0).getRegion(startIndexInFirstBlock,
length);
+ segments.add(region.getValueColumns());
+ } else {
+ TsBlock firstBlock = tsBlocks.get(0).subTsBlock(startIndexInFirstBlock);
+ segments.add(firstBlock.getValueColumns());
+ for (int i = 1; i < tsBlocks.size() - 1; i++) {
+ segments.add(tsBlocks.get(i).getValueColumns());
+ }
+ TsBlock lastBlock = tsBlocks.get(tsBlocks.size() - 1).getRegion(0,
endIndexInLastBlock);
+ segments.add(lastBlock.getValueColumns());
}
+ }
- this.tsBlocks = new ArrayList<>(tsBlocks.size());
- // First TsBlock
- TsBlock firstBlock = tsBlocks.get(0).subTsBlock(startIndexInFirstBlock);
- this.tsBlocks.add(firstBlock);
- // Middle TsBlock
- for (int i = 1; i < tsBlocks.size() - 1; i++) {
- this.tsBlocks.add(tsBlocks.get(i));
+ public Partition(List<Slice> slices) {
+ this.segments = new ArrayList<>(slices.size());
+ for (Slice slice : slices) {
+ segments.add(slice.getRequiredColumns());
}
- // Last TsBlock
- TsBlock lastBlock = tsBlocks.get(tsBlocks.size() - 1).getRegion(0,
endIndexInLastBlock);
- this.tsBlocks.add(lastBlock);
+ }
+
+ private Partition(List<Column[]> segments, boolean directSegments) {
+ this.segments = segments;
}
public int getPositionCount() {
if (cachedPositionCount == -1) {
- // Lazy initialized
cachedPositionCount = 0;
- for (TsBlock block : tsBlocks) {
- cachedPositionCount += block.getPositionCount();
+ for (Column[] segment : segments) {
+ cachedPositionCount += segment[0].getPositionCount();
}
Review Comment:
`getPositionCount()` (and `getPartitionIndex()` below) assume every segment
has at least one column and uses `segment[0]` to derive the row count. If a
window partition is built with 0 required/value columns (possible for window
functions like `row_number()` or `count(*)` with no input symbols), this will
throw `ArrayIndexOutOfBoundsException`. Consider tracking segment row counts
separately (e.g., store `int[] segmentPositionCounts` from
`TsBlock.getPositionCount()` / `Slice.getSize()`), so partitions with zero
columns are still supported.
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/window/partition/Partition.java:
##########
@@ -27,185 +28,195 @@
import org.apache.tsfile.utils.Binary;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
public class Partition {
- private final List<TsBlock> tsBlocks;
+ private final List<Column[]> segments;
private int cachedPositionCount = -1;
public Partition(List<TsBlock> tsBlocks, int startIndexInFirstBlock, int
endIndexInLastBlock) {
+ this.segments = new ArrayList<>(tsBlocks.size());
if (tsBlocks.size() == 1) {
int length = endIndexInLastBlock - startIndexInFirstBlock;
- this.tsBlocks =
-
Collections.singletonList(tsBlocks.get(0).getRegion(startIndexInFirstBlock,
length));
- return;
+ TsBlock region = tsBlocks.get(0).getRegion(startIndexInFirstBlock,
length);
+ segments.add(region.getValueColumns());
+ } else {
+ TsBlock firstBlock = tsBlocks.get(0).subTsBlock(startIndexInFirstBlock);
+ segments.add(firstBlock.getValueColumns());
+ for (int i = 1; i < tsBlocks.size() - 1; i++) {
+ segments.add(tsBlocks.get(i).getValueColumns());
+ }
+ TsBlock lastBlock = tsBlocks.get(tsBlocks.size() - 1).getRegion(0,
endIndexInLastBlock);
+ segments.add(lastBlock.getValueColumns());
}
+ }
- this.tsBlocks = new ArrayList<>(tsBlocks.size());
- // First TsBlock
- TsBlock firstBlock = tsBlocks.get(0).subTsBlock(startIndexInFirstBlock);
- this.tsBlocks.add(firstBlock);
- // Middle TsBlock
- for (int i = 1; i < tsBlocks.size() - 1; i++) {
- this.tsBlocks.add(tsBlocks.get(i));
+ public Partition(List<Slice> slices) {
+ this.segments = new ArrayList<>(slices.size());
+ for (Slice slice : slices) {
+ segments.add(slice.getRequiredColumns());
}
- // Last TsBlock
- TsBlock lastBlock = tsBlocks.get(tsBlocks.size() - 1).getRegion(0,
endIndexInLastBlock);
- this.tsBlocks.add(lastBlock);
+ }
+
+ private Partition(List<Column[]> segments, boolean directSegments) {
Review Comment:
The private constructor `Partition(List<Column[]> segments, boolean
directSegments)` takes a `directSegments` flag that is never read. This is
confusing and makes the API harder to understand/maintain. Either remove the
parameter or store it and document what behavioral difference it is meant to
control.
```suggestion
private Partition(List<Column[]> segments) {
```
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/window/TableWindowOperator.java:
##########
@@ -144,178 +140,86 @@ public ListenableFuture<?> isBlocked() {
public TsBlock next() throws Exception {
long startTime = System.nanoTime();
- // Transform is not finished
if (!cachedPartitionExecutors.isEmpty()) {
TsBlock tsBlock = transform(startTime);
if (tsBlock != null) {
return tsBlock;
}
- // Receive more data when result TsBlock builder is not full
- // In this case, all partition executors are done
}
if (inputOperator.hasNextWithTimer()) {
- // This TsBlock is pre-sorted with PARTITION BY and ORDER BY channels
TsBlock preSortedBlock = inputOperator.nextWithTimer();
- // StreamSort Operator sometimes returns null
if (preSortedBlock == null || preSortedBlock.isEmpty()) {
return null;
}
- cachedPartitionExecutors = partition(preSortedBlock);
- if (cachedPartitionExecutors.isEmpty()) {
- // No partition found
- // i.e., partition crosses multiple TsBlocks
- return null;
- }
+ partitionRecognizer.addTsBlock(preSortedBlock);
+ processRecognizerStates();
- // May return null if builder is not full
- return transform(startTime);
- } else if (!cachedTsBlocks.isEmpty()) {
- // Form last partition
- TsBlock lastTsBlock = cachedTsBlocks.get(cachedTsBlocks.size() - 1);
- int endIndexOfLastTsBlock = lastTsBlock.getPositionCount();
- PartitionExecutor partitionExecutor =
- new PartitionExecutor(
- cachedTsBlocks,
- inputDataTypes,
- startIndexInFirstBlock,
- endIndexOfLastTsBlock,
- outputChannels,
- windowFunctions,
- frameInfoList,
- sortChannels);
- cachedPartitionExecutors.addLast(partitionExecutor);
- cachedTsBlocks.clear();
- releaseAllCachedTsBlockMemory();
-
- TsBlock tsBlock = transform(startTime);
- if (tsBlock == null) {
- // TsBlockBuilder is not full
- // Force build since this is the last partition
- tsBlock =
- tsBlockBuilder.build(
- new RunLengthEncodedColumn(
- TIME_COLUMN_TEMPLATE, tsBlockBuilder.getPositionCount()));
- tsBlockBuilder.reset();
+ if (!cachedPartitionExecutors.isEmpty()) {
+ return transform(startTime);
+ }
+ return null;
+ } else if (!noMoreDataSignaled) {
+ partitionRecognizer.noMoreData();
+ noMoreDataSignaled = true;
+ processRecognizerStates();
+
+ if (!cachedPartitionExecutors.isEmpty()) {
+ TsBlock tsBlock = transform(startTime);
+ if (tsBlock == null && !tsBlockBuilder.isEmpty()) {
+ tsBlock = getTsBlockFromTsBlockBuilder();
+ }
+ return tsBlock;
}
-
- return tsBlock;
} else if (!tsBlockBuilder.isEmpty()) {
- // Return remaining data in result TsBlockBuilder
- // This happens when last partition is too large
- // And TsBlockBuilder is not full at the end of transform
return getTsBlockFromTsBlockBuilder();
}
return null;
}
- private LinkedList<PartitionExecutor> partition(TsBlock tsBlock) {
- LinkedList<PartitionExecutor> partitionExecutors = new LinkedList<>();
-
- int partitionStartInCurrentBlock = 0;
- int partitionEndInCurrentBlock = partitionStartInCurrentBlock + 1;
-
- // In this stage, we only consider partition channels
- List<Column> partitionColumns = extractPartitionColumns(tsBlock);
-
- // Previous TsBlocks forms a partition
- if (!cachedTsBlocks.isEmpty()) {
- TsBlock lastTsBlock = cachedTsBlocks.get(cachedTsBlocks.size() - 1);
- int endIndexOfLastTsBlock = lastTsBlock.getPositionCount();
-
- // Whether the first row of current TsBlock is not equal to
- // last row of previous cached TsBlocks
- List<Column> lastPartitionColumns = extractPartitionColumns(lastTsBlock);
- if (!partitionComparator.equal(
- partitionColumns, 0, lastPartitionColumns, endIndexOfLastTsBlock -
1)) {
- PartitionExecutor partitionExecutor =
- new PartitionExecutor(
- cachedTsBlocks,
- inputDataTypes,
- startIndexInFirstBlock,
- endIndexOfLastTsBlock,
- outputChannels,
- windowFunctions,
- frameInfoList,
- sortChannels);
-
- partitionExecutors.addLast(partitionExecutor);
- cachedTsBlocks.clear();
- releaseAllCachedTsBlockMemory();
- startIndexInFirstBlock = -1;
+ private void processRecognizerStates() {
+ while (true) {
+ PartitionState state = partitionRecognizer.nextState();
+ switch (state.getStateType()) {
+ case INIT:
+ case NEED_MORE_DATA:
+ return;
+ case FINISHED:
+ finalizeCurrentPartition();
+ return;
+ case NEW_PARTITION:
+ finalizeCurrentPartition();
+ addSliceToCache(state.getSlice());
+ break;
+ case ITERATING:
+ addSliceToCache(state.getSlice());
+ break;
}
}
+ }
- // Try to find all partitions
- int count = tsBlock.getPositionCount();
- while (count == 1 || partitionEndInCurrentBlock < count) {
- // Try to find one partition
- while (partitionEndInCurrentBlock < count
- && partitionComparator.equalColumns(
- partitionColumns, partitionStartInCurrentBlock,
partitionEndInCurrentBlock)) {
- partitionEndInCurrentBlock++;
- }
-
- if (partitionEndInCurrentBlock != count) {
- // Find partition
- PartitionExecutor partitionExecutor;
- if (partitionStartInCurrentBlock != 0 || startIndexInFirstBlock == -1)
{
- // Small partition within this TsBlock
- partitionExecutor =
- new PartitionExecutor(
- Collections.singletonList(tsBlock),
- inputDataTypes,
- partitionStartInCurrentBlock,
- partitionEndInCurrentBlock,
- outputChannels,
- windowFunctions,
- frameInfoList,
- sortChannels);
- } else {
- // Large partition crosses multiple TsBlocks
- reserveOneTsBlockMemory(tsBlock);
- cachedTsBlocks.add(tsBlock);
- partitionExecutor =
- new PartitionExecutor(
- cachedTsBlocks,
- inputDataTypes,
- startIndexInFirstBlock,
- partitionEndInCurrentBlock,
- outputChannels,
- windowFunctions,
- frameInfoList,
- sortChannels);
- // Clear TsBlock of last partition
- cachedTsBlocks.clear();
- releaseAllCachedTsBlockMemory();
- }
- partitionExecutors.addLast(partitionExecutor);
-
- partitionStartInCurrentBlock = partitionEndInCurrentBlock;
- // Reset cross-TsBlock tracking after partition completion
- startIndexInFirstBlock = -1;
- } else {
- // Last partition of TsBlock
- // The beginning of next TsBlock may have rows in this partition
- if (startIndexInFirstBlock == -1) {
- startIndexInFirstBlock = partitionStartInCurrentBlock;
- }
- reserveOneTsBlockMemory(tsBlock);
- cachedTsBlocks.add(tsBlock);
- // For count == 1
- break;
- }
+ private void finalizeCurrentPartition() {
+ if (!partitionCache.isEmpty()) {
+ Partition partition = new Partition(partitionCache.getSlices());
+ PartitionExecutor partitionExecutor =
+ new PartitionExecutor(
+ partition,
+ inputDataTypes,
+ outputChannels,
+ windowFunctions,
+ frameInfoList,
+ sortChannels);
+ cachedPartitionExecutors.addLast(partitionExecutor);
+ releasePartitionCacheMemory();
+ partitionCache.clear();
}
Review Comment:
`releasePartitionCacheMemory()` releases memory based on
`partitionCache.getEstimatedSize()`, but `PartitionCache.clear()` does not
reset its internal `estimatedSize` field. After the first partition is
finalized, subsequent partitions will cause `released` to be too large (and
`totalMemorySize` can go negative), leading to incorrect cumulative memory
accounting. Fix by resetting `estimatedSize` to 0 when clearing the cache (or
provide a dedicated reset method and call it alongside `clear()`).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]