Caideyipi commented on code in PR #17770:
URL: https://github.com/apache/iotdb/pull/17770#discussion_r3303309407


##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/pipe/event/common/tsfile/parser/scan/TsFileInsertionEventScanParser.java:
##########
@@ -319,8 +324,8 @@ private Tablet getNextTablet() {
                 PipeMemoryWeightUtil.calculateTabletRowCountAndMemory(data);
             tablet =
                 new Tablet(
-                    currentDevice.toString(), currentMeasurements, 
rowCountAndMemorySize.getLeft());
-            tablet.initBitMaps();
+                    currentDeviceString, currentMeasurements, 
rowCountAndMemorySize.getLeft());
+            PipeTabletUtils.initEmptyBitMaps(tablet);

Review Comment:
   Agreed. The empty bitmap shell is not needed now because markNullValue 
creates the array and the per-column BitMap lazily. Removed these eager init 
calls from the TsFile parser paths in 4879c3a.



##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/pipe/event/common/tablet/PipeTabletUtils.java:
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.pipe.event.common.tablet;
+
+import org.apache.iotdb.db.i18n.DataNodePipeMessages;
+
+import org.apache.tsfile.common.conf.TSFileConfig;
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.utils.Binary;
+import org.apache.tsfile.utils.BitMap;
+import org.apache.tsfile.write.UnSupportedDataTypeException;
+import org.apache.tsfile.write.record.Tablet;
+import org.apache.tsfile.write.schema.IMeasurementSchema;
+import org.apache.tsfile.write.schema.MeasurementSchema;
+
+import java.time.LocalDate;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+public final class PipeTabletUtils {
+
+  private PipeTabletUtils() {}
+
+  public static final class TabletStringInternPool {
+
+    private final Map<String, String> internedStrings = new HashMap<>();
+
+    public String intern(final String value) {
+      if (Objects.isNull(value)) {
+        return null;
+      }
+
+      final String internedValue = internedStrings.get(value);
+      if (Objects.nonNull(internedValue)) {
+        return internedValue;
+      }
+
+      internedStrings.put(value, value);
+      return value;
+    }
+
+    public void intern(final String[] values) {
+      if (Objects.isNull(values)) {
+        return;
+      }
+
+      for (int i = 0; i < values.length; ++i) {
+        values[i] = intern(values[i]);
+      }
+    }
+
+    public void intern(final List<String> values) {
+      if (Objects.isNull(values)) {
+        return;
+      }
+
+      for (int i = 0; i < values.size(); ++i) {
+        values.set(i, intern(values.get(i)));
+      }
+    }
+
+    public Tablet intern(final Tablet tablet) {
+      if (Objects.isNull(tablet)) {
+        return null;
+      }
+
+      tablet.setInsertTargetName(intern(tablet.getDeviceId()));
+      internMeasurementSchemas(tablet.getSchemas());
+      return tablet;
+    }
+
+    public void internMeasurementSchemas(final List<IMeasurementSchema> 
schemas) {
+      if (Objects.isNull(schemas)) {
+        return;
+      }
+
+      for (final IMeasurementSchema schema : schemas) {
+        if (schema instanceof MeasurementSchema) {
+          intern((MeasurementSchema) schema);
+        }
+      }
+    }
+
+    public MeasurementSchema intern(final MeasurementSchema schema) {
+      if (Objects.isNull(schema)) {
+        return null;
+      }
+
+      schema.setMeasurementName(intern(schema.getMeasurementName()));
+      schema.setProps(intern(schema.getProps()));
+      return schema;
+    }
+
+    private Map<String, String> intern(final Map<String, String> props) {
+      if (Objects.isNull(props) || props.isEmpty()) {
+        return props;
+      }
+
+      final Map<String, String> internedProps = new HashMap<>(props.size());
+      for (final Map.Entry<String, String> entry : props.entrySet()) {
+        internedProps.put(intern(entry.getKey()), intern(entry.getValue()));
+      }
+      return internedProps;
+    }
+  }
+
+  public static Tablet internTablet(
+      final Tablet tablet, final TabletStringInternPool 
tabletStringInternPool) {
+    return Objects.nonNull(tabletStringInternPool) ? 
tabletStringInternPool.intern(tablet) : tablet;
+  }
+
+  public static void initEmptyBitMaps(final Tablet tablet) {
+    tablet.setBitMaps(new BitMap[getColumnCount(tablet)]);
+  }
+
+  public static void compactBitMaps(final Tablet tablet) {
+    if (Objects.isNull(tablet)) {
+      return;
+    }
+    tablet.setBitMaps(compactBitMaps(tablet.getBitMaps(), 
tablet.getRowSize()));
+  }
+
+  public static BitMap[] compactBitMaps(final BitMap[] bitMaps, final int 
rowCount) {
+    if (Objects.isNull(bitMaps)) {
+      return null;
+    }
+
+    boolean hasMarkedBitMap = false;
+    for (int i = 0; i < bitMaps.length; ++i) {
+      if (Objects.nonNull(bitMaps[i])
+          && bitMaps[i].isAllUnmarked(Math.min(rowCount, 
bitMaps[i].getSize()))) {
+        bitMaps[i] = null;
+      }
+      if (Objects.nonNull(bitMaps[i])) {
+        hasMarkedBitMap = true;
+      }
+    }
+
+    return hasMarkedBitMap ? bitMaps : null;
+  }
+
+  public static BitMap[] copyBitMapsOrCreateEmpty(final Tablet tablet) {
+    final BitMap[] bitMaps = tablet.getBitMaps();
+    return Objects.nonNull(bitMaps)
+        ? Arrays.copyOf(bitMaps, bitMaps.length)
+        : new BitMap[getColumnCount(tablet)];
+  }
+
+  public static void markNullValue(final Tablet tablet, final int rowIndex, 
final int columnIndex) {
+    final BitMap[] bitMaps = ensureBitMaps(tablet, columnIndex + 1);
+    if (Objects.isNull(bitMaps[columnIndex])) {
+      bitMaps[columnIndex] = new BitMap(tablet.getMaxRowNumber());
+    }
+    bitMaps[columnIndex].mark(rowIndex);
+  }
+
+  public static void putTimestamp(final Tablet tablet, final int rowIndex, 
final long timestamp) {
+    tablet.getTimestamps()[rowIndex] = timestamp;
+    tablet.setRowSize(Math.max(tablet.getRowSize(), rowIndex + 1));
+  }
+
+  public static void putValue(
+      final Tablet tablet,
+      final int rowIndex,
+      final int columnIndex,
+      final TSDataType dataType,
+      final Object value) {
+    switch (dataType) {

Review Comment:
   These helpers keep row filling independent from Tablet.addValue: 
putTimestamp is the only place that advances rowSize after a row has at least 
one non-null value, and putValue writes by column index while preserving 
DATE/Binary conversions. I also removed the obsolete initEmptyBitMaps helper 
and made bitmap allocation entirely lazy through markNullValue.



##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/plan/node/write/InsertTabletNode.java:
##########
@@ -441,6 +445,42 @@ protected BitMap[] initBitmaps(int columnSize, int 
rowSize) {
     return bitMaps;
   }
 
+  protected BitMap[] initBitmapsForSplit(int columnSize, int rowSize) {
+    if (this.bitMaps == null) {
+      return null;
+    }
+
+    final int sourceRowCount = rowCount > 0 ? rowCount : times == null ? 0 : 
times.length;
+    final BitMap[] splitBitMaps = new BitMap[columnSize];
+    boolean hasBitMap = false;
+    for (int i = 0; i < columnSize && i < this.bitMaps.length; ++i) {
+      if (this.bitMaps[i] != null
+          && !this.bitMaps[i].isAllUnmarked(Math.min(sourceRowCount, 
this.bitMaps[i].getSize()))) {
+        splitBitMaps[i] = new BitMap(rowSize);
+        hasBitMap = true;
+      }
+    }
+    return hasBitMap ? splitBitMaps : null;
+  }
+
+  protected static BitMap[] compactBitMaps(final BitMap[] bitMaps, final int 
rowCount) {
+    if (bitMaps == null) {
+      return null;
+    }
+
+    boolean hasBitMap = false;
+    for (int i = 0; i < bitMaps.length; ++i) {
+      if (bitMaps[i] != null
+          && bitMaps[i].isAllUnmarked(Math.min(rowCount, 
bitMaps[i].getSize()))) {
+        bitMaps[i] = null;
+      }
+      if (bitMaps[i] != null) {
+        hasBitMap = true;
+      }
+    }
+    return hasBitMap ? bitMaps : null;
+  }

Review Comment:
   Fixed in c16fa23. The duplicated loop is now in BitMapUtils.compactBitMaps, 
and InsertTabletNode delegates to it.



##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/pipe/event/common/tsfile/parser/scan/TsFileInsertionEventScanParser.java:
##########
@@ -412,37 +418,68 @@ private boolean putValueToColumns(final BatchData data, 
final Tablet tablet, fin
             case TEXT:
             case BLOB:
             case STRING:
-              tablet.addValue(rowIndex, i, Binary.EMPTY_VALUE.getValues());
+              PipeTabletUtils.putValue(
+                  tablet, rowIndex, i, tablet.getSchemas().get(i).getType(), 
Binary.EMPTY_VALUE);
           }
-          tablet.getBitMaps()[i].mark(rowIndex);
+          PipeTabletUtils.markNullValue(tablet, rowIndex, i);
           continue;
         }
 
         isNeedFillTime = true;
         switch (tablet.getSchemas().get(i).getType()) {
           case BOOLEAN:
-            tablet.addValue(rowIndex, i, primitiveType.getBoolean());
+            PipeTabletUtils.putValue(
+                tablet,
+                rowIndex,
+                i,
+                tablet.getSchemas().get(i).getType(),
+                primitiveType.getBoolean());

Review Comment:
   The point is to avoid Tablet.addValue plus eager tablet.initBitMaps. 
putValue writes directly to the preallocated value arrays by column index, and 
markNullValue allocates only the column bitmap that actually has nulls. 
PipeRowCollector was migrated to the same path as well.



##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/statement/crud/InsertTabletStatement.java:
##########
@@ -900,6 +900,24 @@ private Object convertColumnToTablet(
     return columnValue;
   }
 
+  private static BitMap[] compactBitMaps(final BitMap[] bitMaps, final int 
rowCount) {
+    if (bitMaps == null) {
+      return null;
+    }
+
+    boolean hasBitMap = false;
+    for (int i = 0; i < bitMaps.length; ++i) {
+      if (bitMaps[i] != null
+          && bitMaps[i].isAllUnmarked(Math.min(rowCount, 
bitMaps[i].getSize()))) {
+        bitMaps[i] = null;
+      }
+      if (bitMaps[i] != null) {
+        hasBitMap = true;
+      }
+    }
+    return hasBitMap ? bitMaps : null;
+  }
+

Review Comment:
   Fixed in c16fa23 as well. InsertTabletStatement now uses the shared 
BitMapUtils.compactBitMaps implementation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to