This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 92308f2ba5a Only delete data, but don't delete tsfile when performing 
a drop column statement on the table model (#16936)
92308f2ba5a is described below

commit 92308f2ba5a7cc8e48f346aa4ee3f145f8c41537
Author: libo <[email protected]>
AuthorDate: Mon Jan 12 17:07:27 2026 +0800

    Only delete data, but don't delete tsfile when performing a drop column 
statement on the table model (#16936)
    
    * Remove the code check port is occupied, and resolve the problem that 
can't rename file successfully.
    
    * Only delete data, but don't delete tsfile when performing a drop column 
statement on the table model or a drop tag statement on the tree model.
    
    * Fix NPE
    
    * Don't delete tsfile when use tag as a where clause in the delete 
statement on table model.
    
    * stash
    
    * Won't delete according tsfile files directly if IDPredicate type is not 
NOP when delete table data by tag.
    
    * Occur incompatible exception when merging statistics, need to rewrite 
statistics in current chunk metadata so that resolve "Statistics classes 
mismatched: class org.apache.tsfile.file.metadata.statistics.BinaryStatistics 
vs. class org.apache.tsfile.file.metadata.statistics.IntegerStatistics" 
exception.
    
    * Don't merge statistics when two types are not compatible;
    Fix regenerate statistics logics.
    
    * Fix out of bounds  problem from unit test.
    
    * Use a constant variable instead of frequently creating a empty Binary 
object.
    
    * When the statement to execute is to drop a column, put it in the mods 
file instead of deleting the tsfile.
    
    * Repair and add the logic involve DATE data type.
    
    * Fix problem that out of bounds in the SchemaUtilsTest.
    
    * In order to avoid get chunkMetadata of other index in the array, wish to 
get chunkMetadata current index itself, when execute query statement.
    Place null value that when chunkMetaData is not compatible with 
chunkMetadData of target data type
    
    * Fix the issue "Only the target component should be written, not all of 
them".
    
    * Fix unit test.
    
    * Fix unit test.
---
 .../relational/it/db/it/IoTDBDeletionTableIT.java  | 158 +++++++++++
 .../it/schema/IoTDBAlterColumnTypeIT.java          |   1 +
 .../schemaregion/utils/ResourceByPathUtils.java    |  26 +-
 .../db/storageengine/dataregion/DataRegion.java    |  21 +-
 .../dataregion/modification/DeletionPredicate.java |   4 +
 .../org/apache/iotdb/db/utils/SchemaUtils.java     | 294 +++++++++++++--------
 .../org/apache/iotdb/db/utils/SchemaUtilsTest.java | 149 ++++++++++-
 7 files changed, 516 insertions(+), 137 deletions(-)

diff --git 
a/integration-test/src/test/java/org/apache/iotdb/relational/it/db/it/IoTDBDeletionTableIT.java
 
b/integration-test/src/test/java/org/apache/iotdb/relational/it/db/it/IoTDBDeletionTableIT.java
index 41ac6334030..e6939c5226a 100644
--- 
a/integration-test/src/test/java/org/apache/iotdb/relational/it/db/it/IoTDBDeletionTableIT.java
+++ 
b/integration-test/src/test/java/org/apache/iotdb/relational/it/db/it/IoTDBDeletionTableIT.java
@@ -33,8 +33,11 @@ import 
org.apache.iotdb.itbase.exception.ParallelRequestTimeoutException;
 import org.apache.iotdb.rpc.IoTDBConnectionException;
 import org.apache.iotdb.rpc.StatementExecutionException;
 
+import org.apache.tsfile.enums.ColumnCategory;
+import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.read.common.RowRecord;
 import org.apache.tsfile.read.common.TimeRange;
+import org.apache.tsfile.write.record.Tablet;
 import org.awaitility.Awaitility;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -60,6 +63,7 @@ import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
 import java.util.Random;
@@ -74,6 +78,7 @@ import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static 
org.apache.iotdb.relational.it.session.IoTDBSessionRelationalIT.genValue;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -2289,6 +2294,159 @@ public class IoTDBDeletionTableIT {
     cleanData(testNum);
   }
 
+  @Test
+  public void testDeleteDataByTag() throws IoTDBConnectionException, 
StatementExecutionException {
+    try (ITableSession session = 
EnvFactory.getEnv().getTableSessionConnectionWithDB("test")) {
+      session.executeNonQueryStatement(
+          "CREATE TABLE IF NOT EXISTS delete_by_tag (deviceId STRING TAG, s1 
INT32 FIELD)");
+
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (1, 'sensor', 
1)");
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (2, 'sensor', 
2)");
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (3, 'sensor', 
3)");
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (4, 'sensor', 
4)");
+
+      session.executeNonQueryStatement("DELETE FROM delete_by_tag WHERE 
deviceId = 'sensor'");
+
+      SessionDataSet dataSet =
+          session.executeQueryStatement("select * from delete_by_tag order by 
time");
+      assertFalse(dataSet.hasNext());
+
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (1, 'sensor', 
1)");
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (2, 'sensor', 
2)");
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (3, 'sensor', 
3)");
+      session.executeNonQueryStatement(
+          "insert into delete_by_tag (time, deviceId, s1) values (4, 'sensor', 
4)");
+      session.executeNonQueryStatement("FLUSH");
+
+      session.executeNonQueryStatement("DELETE FROM delete_by_tag WHERE 
deviceId = 'sensor'");
+
+      dataSet = session.executeQueryStatement("select * from delete_by_tag 
order by time");
+      assertFalse(dataSet.hasNext());
+    } finally {
+      try (ITableSession session = 
EnvFactory.getEnv().getTableSessionConnectionWithDB("test")) {
+        session.executeNonQueryStatement("DROP TABLE IF EXISTS delete_by_tag");
+      }
+    }
+  }
+
+  @Test
+  public void testDropAndAlter() throws IoTDBConnectionException, 
StatementExecutionException {
+    try (ITableSession session = 
EnvFactory.getEnv().getTableSessionConnectionWithDB("test")) {
+      session.executeNonQueryStatement("CREATE TABLE IF NOT EXISTS 
drop_and_alter (s1 int32)");
+
+      // time=1 and time=2 are INT32 and deleted by drop column
+      Tablet tablet =
+          new Tablet(
+              "drop_and_alter",
+              Collections.singletonList("s1"),
+              Collections.singletonList(TSDataType.INT32),
+              Collections.singletonList(ColumnCategory.FIELD));
+      tablet.addTimestamp(0, 1);
+      tablet.addValue("s1", 0, genValue(TSDataType.INT32, 1));
+      session.insert(tablet);
+      tablet.reset();
+
+      session.executeNonQueryStatement("FLUSH");
+
+      tablet =
+          new Tablet(
+              "drop_and_alter",
+              Collections.singletonList("s1"),
+              Collections.singletonList(TSDataType.INT32),
+              Collections.singletonList(ColumnCategory.FIELD));
+      tablet.addTimestamp(0, 2);
+      tablet.addValue("s1", 0, genValue(TSDataType.INT32, 2));
+      session.insert(tablet);
+      tablet.reset();
+
+      session.executeNonQueryStatement("ALTER TABLE drop_and_alter DROP COLUMN 
s1");
+
+      // time=3 and time=4 are STRING
+      tablet =
+          new Tablet(
+              "drop_and_alter",
+              Collections.singletonList("s1"),
+              Collections.singletonList(TSDataType.STRING),
+              Collections.singletonList(ColumnCategory.FIELD));
+      tablet.addTimestamp(0, 3);
+      tablet.addValue("s1", 0, genValue(TSDataType.STRING, 3));
+      session.insert(tablet);
+      tablet.reset();
+
+      session.executeNonQueryStatement("FLUSH");
+
+      tablet =
+          new Tablet(
+              "drop_and_alter",
+              Collections.singletonList("s1"),
+              Collections.singletonList(TSDataType.STRING),
+              Collections.singletonList(ColumnCategory.FIELD));
+      tablet.addTimestamp(0, 4);
+      tablet.addValue("s1", 0, genValue(TSDataType.STRING, 4));
+      session.insert(tablet);
+      tablet.reset();
+
+      session.executeNonQueryStatement("ALTER TABLE drop_and_alter DROP COLUMN 
s1");
+      session.executeNonQueryStatement("ALTER TABLE drop_and_alter ADD COLUMN 
s1 TEXT");
+
+      // time=5 and time=6 are TEXT
+      tablet =
+          new Tablet(
+              "drop_and_alter",
+              Collections.singletonList("s1"),
+              Collections.singletonList(TSDataType.TEXT),
+              Collections.singletonList(ColumnCategory.FIELD));
+      tablet.addTimestamp(0, 5);
+      tablet.addValue("s1", 0, genValue(TSDataType.STRING, 5));
+      session.insert(tablet);
+      tablet.reset();
+
+      session.executeNonQueryStatement("FLUSH");
+
+      tablet =
+          new Tablet(
+              "drop_and_alter",
+              Collections.singletonList("s1"),
+              Collections.singletonList(TSDataType.TEXT),
+              Collections.singletonList(ColumnCategory.FIELD));
+      tablet.addTimestamp(0, 6);
+      tablet.addValue("s1", 0, genValue(TSDataType.STRING, 6));
+      session.insert(tablet);
+      tablet.reset();
+
+      SessionDataSet dataSet =
+          session.executeQueryStatement("select * from drop_and_alter order by 
time");
+      // s1 is dropped but the time should remain
+      RowRecord rec;
+      int cnt = 0;
+      for (int i = 1; i < 7; i++) {
+        rec = dataSet.next();
+        assertEquals(i, rec.getFields().get(0).getLongV());
+        LOGGER.error(
+            "time is {}, value is {}, value type is {}",
+            rec.getFields().get(0).getLongV(),
+            rec.getFields().get(1),
+            rec.getFields().get(1).getDataType());
+        //        assertNull(rec.getFields().get(1).getDataType());
+        //        Assert.assertEquals(TSDataType.TEXT, 
rec.getFields().get(1).getDataType());
+        cnt++;
+      }
+      Assert.assertEquals(6, cnt);
+      assertFalse(dataSet.hasNext());
+    } finally {
+      try (ITableSession session = 
EnvFactory.getEnv().getTableSessionConnectionWithDB("test")) {
+        session.executeNonQueryStatement("DROP TABLE IF EXISTS 
drop_and_alter");
+      }
+    }
+  }
+
   private static void prepareDatabase() {
     try (Connection connection = 
EnvFactory.getEnv().getConnection(BaseEnv.TABLE_SQL_DIALECT);
         Statement statement = connection.createStatement()) {
diff --git 
a/integration-test/src/test/java/org/apache/iotdb/relational/it/schema/IoTDBAlterColumnTypeIT.java
 
b/integration-test/src/test/java/org/apache/iotdb/relational/it/schema/IoTDBAlterColumnTypeIT.java
index d2f01f928b8..53f15a192e8 100644
--- 
a/integration-test/src/test/java/org/apache/iotdb/relational/it/schema/IoTDBAlterColumnTypeIT.java
+++ 
b/integration-test/src/test/java/org/apache/iotdb/relational/it/schema/IoTDBAlterColumnTypeIT.java
@@ -140,6 +140,7 @@ public class IoTDBAlterColumnTypeIT {
       throws IoTDBConnectionException, StatementExecutionException {
     try (ITableSession session = 
EnvFactory.getEnv().getTableSessionConnectionWithDB("test")) {
       session.executeNonQueryStatement("SET CONFIGURATION 
enable_unseq_space_compaction='false'");
+      session.executeNonQueryStatement("SET CONFIGURATION 
enable_seq_space_compaction='false'");
       if (from == TSDataType.DATE && !to.isCompatible(from)) {
         throw new NotSupportedException("Not supported DATE type.");
       }
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/utils/ResourceByPathUtils.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/utils/ResourceByPathUtils.java
index 188c4ec6529..2bfbdec4e3b 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/utils/ResourceByPathUtils.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/schemaengine/schemaregion/utils/ResourceByPathUtils.java
@@ -268,19 +268,19 @@ class AlignedResourceByPathUtils extends 
ResourceByPathUtils {
       modified = (modified || alignedChunkMetadata.isModified());
       TSDataType targetDataType = 
alignedFullPath.getSchemaList().get(index).getType();
       if (targetDataType.equals(TSDataType.STRING)
-          && (alignedChunkMetadata.getValueChunkMetadataList().stream()
-                  .filter(iChunkMetadata -> iChunkMetadata.getDataType() != 
targetDataType)
-                  .count()
-              > 0)) {
+          && ((alignedChunkMetadata.getValueChunkMetadataList().get(index) != 
null)
+              && 
(alignedChunkMetadata.getValueChunkMetadataList().get(index).getDataType()
+                  != targetDataType))) {
         // create new statistics object via new data type, and merge 
statistics information
-        alignedChunkMetadata =
-            
SchemaUtils.rewriteAlignedChunkMetadataStatistics(alignedChunkMetadata, 
targetDataType);
+        SchemaUtils.rewriteAlignedChunkMetadataStatistics(
+            alignedChunkMetadata, index, targetDataType);
         alignedChunkMetadata.setModified(true);
       }
       if (!useFakeStatistics) {
         
timeStatistics.mergeStatistics(alignedChunkMetadata.getTimeChunkMetadata().getStatistics());
         for (int i = 0; i < valueTimeSeriesMetadataList.size(); i++) {
-          if (alignedChunkMetadata.getValueChunkMetadataList().get(i) != null) 
{
+          if (!alignedChunkMetadata.getValueChunkMetadataList().isEmpty()
+              && alignedChunkMetadata.getValueChunkMetadataList().get(i) != 
null) {
             exist[i] = true;
             valueTimeSeriesMetadataList
                 .get(i)
@@ -542,8 +542,18 @@ class MeasurementResourceByPathUtils extends 
ResourceByPathUtils {
     boolean isModified = false;
     for (IChunkMetadata chunkMetadata : chunkMetadataList) {
       isModified = (isModified || chunkMetadata.isModified());
+      TSDataType targetDataType = fullPath.getMeasurementSchema().getType();
+      if (targetDataType.equals(TSDataType.STRING)
+          && (chunkMetadata.getDataType() != targetDataType)) {
+        // create new statistics object via new data type, and merge 
statistics information
+        SchemaUtils.rewriteNonAlignedChunkMetadataStatistics(
+            (ChunkMetadata) chunkMetadata, targetDataType);
+        chunkMetadata.setModified(true);
+      }
       if (!useFakeStatistics) {
-        seriesStatistics.mergeStatistics(chunkMetadata.getStatistics());
+        if (chunkMetadata != null && 
targetDataType.isCompatible(chunkMetadata.getDataType())) {
+          seriesStatistics.mergeStatistics(chunkMetadata.getStatistics());
+        }
         continue;
       }
       startTime = Math.min(startTime, chunkMetadata.getStartTime());
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java
index e0b9ab91cd9..746e042ab14 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/DataRegion.java
@@ -116,6 +116,7 @@ import 
org.apache.iotdb.db.storageengine.dataregion.flush.TsFileFlushPolicy;
 import org.apache.iotdb.db.storageengine.dataregion.memtable.IMemTable;
 import org.apache.iotdb.db.storageengine.dataregion.memtable.TsFileProcessor;
 import 
org.apache.iotdb.db.storageengine.dataregion.memtable.TsFileProcessorInfo;
+import org.apache.iotdb.db.storageengine.dataregion.modification.IDPredicate;
 import org.apache.iotdb.db.storageengine.dataregion.modification.ModEntry;
 import 
org.apache.iotdb.db.storageengine.dataregion.modification.ModificationFile;
 import 
org.apache.iotdb.db.storageengine.dataregion.modification.TableDeletionEntry;
@@ -3235,18 +3236,14 @@ public class DataRegion implements IDataRegionForQuery {
     List<TsFileResource> deletedByMods = new ArrayList<>();
     List<TsFileResource> deletedByFiles = new ArrayList<>();
     boolean isDropMeasurementExist = false;
-    boolean isDropTagExist = false;
+    IDPredicate.IDPredicateType idPredicateType = null;
 
     if (deletion instanceof TableDeletionEntry) {
-      TableDeletionEntry entry = (TableDeletionEntry) deletion;
-      isDropMeasurementExist = 
!entry.getPredicate().getMeasurementNames().isEmpty();
-    } else {
-      TreeDeletionEntry entry = (TreeDeletionEntry) deletion;
-      if (entry.getPathPattern() instanceof MeasurementPath) {
-        Map<String, String> tagMap = ((MeasurementPath) 
entry.getPathPattern()).getTagMap();
-        isDropTagExist = (tagMap != null) && !tagMap.isEmpty();
-      }
+      TableDeletionEntry tableDeletionEntry = (TableDeletionEntry) deletion;
+      isDropMeasurementExist = 
!tableDeletionEntry.getPredicate().getMeasurementNames().isEmpty();
+      idPredicateType = tableDeletionEntry.getPredicate().getIdPredicateType();
     }
+
     for (TsFileResource sealedTsFile : sealedTsFiles) {
       if (canSkipDelete(sealedTsFile, deletion)) {
         continue;
@@ -3310,7 +3307,9 @@ public class DataRegion implements IDataRegionForQuery {
                   fileStartTime,
                   fileEndTime);
             }
-            if (isFileFullyMatchedByTime(deletion, fileStartTime, 
fileEndTime)) {
+            if (isFileFullyMatchedByTime(deletion, fileStartTime, fileEndTime)
+                && idPredicateType.equals(IDPredicate.IDPredicateType.NOP)
+                && !isDropMeasurementExist) {
               ++matchSize;
             } else {
               deletedByMods.add(sealedTsFile);
@@ -3343,7 +3342,7 @@ public class DataRegion implements IDataRegionForQuery {
       } // else do nothing
     }
 
-    if (!deletedByFiles.isEmpty() && !isDropMeasurementExist && 
!isDropTagExist) {
+    if (!deletedByFiles.isEmpty()) {
       deleteTsFileCompletely(deletedByFiles);
       if (logger.isDebugEnabled()) {
         logger.debug(
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/modification/DeletionPredicate.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/modification/DeletionPredicate.java
index 0a22da2a90a..877c94f7081 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/modification/DeletionPredicate.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/modification/DeletionPredicate.java
@@ -76,6 +76,10 @@ public class DeletionPredicate implements 
StreamSerializable, BufferSerializable
     return idPredicate;
   }
 
+  public IDPredicate.IDPredicateType getIdPredicateType() {
+    return this.idPredicate.type;
+  }
+
   public String getTableName() {
     return tableName;
   }
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java
index c16f5339ee8..320f00483f5 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java
@@ -27,7 +27,6 @@ import org.apache.iotdb.db.utils.constant.SqlConstant;
 import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.file.metadata.AbstractAlignedChunkMetadata;
 import org.apache.tsfile.file.metadata.AbstractAlignedTimeSeriesMetadata;
-import org.apache.tsfile.file.metadata.AlignedChunkMetadata;
 import org.apache.tsfile.file.metadata.ChunkMetadata;
 import org.apache.tsfile.file.metadata.IChunkMetadata;
 import org.apache.tsfile.file.metadata.TimeseriesMetadata;
@@ -41,7 +40,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -55,6 +53,7 @@ public class SchemaUtils {
 
   private static final Map<TSDataType, Class> dataTypeColumnClassMap;
   public static final Logger logger = 
LoggerFactory.getLogger(SchemaUtils.class);
+  private static final Binary EMPTY_BINARY = new Binary("", 
StandardCharsets.UTF_8);
 
   static {
     dataTypeColumnClassMap = new HashMap<>();
@@ -399,121 +398,31 @@ public class SchemaUtils {
     }
   }
 
-  public static AbstractAlignedChunkMetadata 
rewriteAlignedChunkMetadataStatistics(
-      AbstractAlignedChunkMetadata alignedChunkMetadata, TSDataType 
targetDataType) {
-    List<IChunkMetadata> newValueChunkMetadataList = new ArrayList<>();
-    for (IChunkMetadata valueChunkMetadata : 
alignedChunkMetadata.getValueChunkMetadataList()) {
+  public static void rewriteAlignedChunkMetadataStatistics(
+      AbstractAlignedChunkMetadata alignedChunkMetadata, int index, TSDataType 
targetDataType) {
+    IChunkMetadata valueChunkMetadata = 
alignedChunkMetadata.getValueChunkMetadataList().get(index);
+    if (valueChunkMetadata != null
+        && targetDataType.isCompatible(valueChunkMetadata.getDataType())) {
       Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
-      switch (valueChunkMetadata.getDataType()) {
-        case INT32:
-        case DATE:
-        case INT64:
-        case TIMESTAMP:
-        case FLOAT:
-        case DOUBLE:
-        case BOOLEAN:
-          if (targetDataType == TSDataType.STRING) {
-            Binary[] binaryValues = new Binary[4];
-            binaryValues[0] =
-                new Binary(
-                    
valueChunkMetadata.getStatistics().getFirstValue().toString(),
-                    StandardCharsets.UTF_8);
-            binaryValues[1] =
-                new Binary(
-                    
valueChunkMetadata.getStatistics().getLastValue().toString(),
-                    StandardCharsets.UTF_8);
-            if (valueChunkMetadata.getDataType() == TSDataType.BOOLEAN) {
-              binaryValues[2] = new Binary(Boolean.FALSE.toString(), 
StandardCharsets.UTF_8);
-              binaryValues[3] = new Binary(Boolean.TRUE.toString(), 
StandardCharsets.UTF_8);
-            } else {
-              binaryValues[2] =
-                  new Binary(
-                      
valueChunkMetadata.getStatistics().getMinValue().toString(),
-                      StandardCharsets.UTF_8);
-              binaryValues[3] =
-                  new Binary(
-                      
valueChunkMetadata.getStatistics().getMaxValue().toString(),
-                      StandardCharsets.UTF_8);
-            }
-            long[] longValues = new long[4];
-            longValues[0] = valueChunkMetadata.getStatistics().getStartTime();
-            longValues[1] = valueChunkMetadata.getStatistics().getEndTime();
-            longValues[2] = longValues[1];
-            longValues[3] = longValues[1];
-            statistics.update(longValues, binaryValues, binaryValues.length);
-          } else if (targetDataType == TSDataType.TEXT) {
-            Binary[] binaryValues = new Binary[2];
-            if (valueChunkMetadata.getDataType() == TSDataType.BOOLEAN) {
-              binaryValues[0] = new Binary(Boolean.FALSE.toString(), 
StandardCharsets.UTF_8);
-              binaryValues[1] = new Binary(Boolean.TRUE.toString(), 
StandardCharsets.UTF_8);
-            } else {
-              binaryValues[0] =
-                  new Binary(
-                      
valueChunkMetadata.getStatistics().getMinValue().toString(),
-                      StandardCharsets.UTF_8);
-              binaryValues[1] =
-                  new Binary(
-                      
valueChunkMetadata.getStatistics().getMaxValue().toString(),
-                      StandardCharsets.UTF_8);
-            }
-            long[] longValues = new long[2];
-            longValues[0] = valueChunkMetadata.getStatistics().getStartTime();
-            longValues[1] = valueChunkMetadata.getStatistics().getEndTime();
-            statistics.update(longValues, binaryValues, binaryValues.length);
-          } else {
-            statistics = valueChunkMetadata.getStatistics();
-          }
-          break;
-        case STRING:
-          if (targetDataType == TSDataType.TEXT) {
-            Binary[] binaryValues = new Binary[2];
-            binaryValues[0] =
-                new Binary(
-                    Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)
-                            .contains(valueChunkMetadata.getDataType())
-                        ? ""
-                        : 
valueChunkMetadata.getStatistics().getMinValue().toString(),
-                    StandardCharsets.UTF_8);
-            binaryValues[1] =
-                new Binary(
-                    Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)
-                            .contains(valueChunkMetadata.getDataType())
-                        ? ""
-                        : 
valueChunkMetadata.getStatistics().getMaxValue().toString(),
-                    StandardCharsets.UTF_8);
-            long[] longValues = new long[2];
-            longValues[0] = valueChunkMetadata.getStatistics().getStartTime();
-            longValues[1] = valueChunkMetadata.getStatistics().getEndTime();
-            statistics.update(longValues, binaryValues, binaryValues.length);
-          } else {
-            statistics = valueChunkMetadata.getStatistics();
-          }
-          break;
-        case TEXT:
-        case BLOB:
-          if (targetDataType == TSDataType.STRING) {
-            Binary[] binaryValues = new Binary[2];
-            binaryValues[0] = new Binary("", StandardCharsets.UTF_8);
-            binaryValues[1] = new Binary("", StandardCharsets.UTF_8);
-            long[] longValues = new long[2];
-            longValues[0] = valueChunkMetadata.getStatistics().getStartTime();
-            longValues[1] = valueChunkMetadata.getStatistics().getEndTime();
-            statistics.update(longValues, binaryValues, binaryValues.length);
-          } else {
-            statistics = valueChunkMetadata.getStatistics();
-          }
-          break;
-        default:
-          break;
-      }
+      statistics = getNewStatistics(valueChunkMetadata, targetDataType, 
statistics);
 
       ChunkMetadata newChunkMetadata = (ChunkMetadata) valueChunkMetadata;
       newChunkMetadata.setTsDataType(targetDataType);
       newChunkMetadata.setStatistics(statistics);
-      newValueChunkMetadataList.add(newChunkMetadata);
+    } else {
+      alignedChunkMetadata.getValueChunkMetadataList().set(index, null);
+    }
+  }
+
+  public static void rewriteNonAlignedChunkMetadataStatistics(
+      ChunkMetadata chunkMetadata, TSDataType targetDataType) {
+    if (chunkMetadata != null && 
targetDataType.isCompatible(chunkMetadata.getDataType())) {
+      Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
+      statistics = getNewStatistics(chunkMetadata, targetDataType, statistics);
+
+      chunkMetadata.setTsDataType(targetDataType);
+      chunkMetadata.setStatistics(statistics);
     }
-    return new AlignedChunkMetadata(
-        alignedChunkMetadata.getTimeChunkMetadata(), 
newValueChunkMetadataList);
   }
 
   public static TSEncoding getDataTypeCompatibleEncoding(TSDataType dataType, 
TSEncoding encoding) {
@@ -522,4 +431,167 @@ public class SchemaUtils {
     }
     return encoding;
   }
+
+  public static Statistics<?> getNewStatistics(
+      IChunkMetadata chunkMetadata, TSDataType targetDataType, Statistics<?> 
statistics) {
+    switch (chunkMetadata.getDataType()) {
+      case INT32:
+      case INT64:
+      case TIMESTAMP:
+      case FLOAT:
+      case DOUBLE:
+      case BOOLEAN:
+        if (targetDataType == TSDataType.STRING) {
+          Binary[] binaryValues = new Binary[4];
+          binaryValues[0] =
+              new Binary(
+                  chunkMetadata.getStatistics().getFirstValue().toString(), 
StandardCharsets.UTF_8);
+          binaryValues[1] =
+              new Binary(
+                  chunkMetadata.getStatistics().getLastValue().toString(), 
StandardCharsets.UTF_8);
+          if (chunkMetadata.getDataType() == TSDataType.BOOLEAN) {
+            binaryValues[2] = new Binary(Boolean.FALSE.toString(), 
StandardCharsets.UTF_8);
+            binaryValues[3] = new Binary(Boolean.TRUE.toString(), 
StandardCharsets.UTF_8);
+          } else {
+            binaryValues[2] =
+                new Binary(
+                    chunkMetadata.getStatistics().getMinValue().toString(), 
StandardCharsets.UTF_8);
+            binaryValues[3] =
+                new Binary(
+                    chunkMetadata.getStatistics().getMaxValue().toString(), 
StandardCharsets.UTF_8);
+          }
+          long[] longValues = new long[4];
+          longValues[0] = chunkMetadata.getStatistics().getStartTime();
+          longValues[1] = chunkMetadata.getStatistics().getEndTime();
+          longValues[2] = longValues[1];
+          longValues[3] = longValues[1];
+          statistics.update(longValues, binaryValues, binaryValues.length);
+        } else if (targetDataType == TSDataType.TEXT) {
+          Binary[] binaryValues = new Binary[2];
+          if (chunkMetadata.getDataType() == TSDataType.BOOLEAN) {
+            binaryValues[0] = new Binary(Boolean.FALSE.toString(), 
StandardCharsets.UTF_8);
+            binaryValues[1] = new Binary(Boolean.TRUE.toString(), 
StandardCharsets.UTF_8);
+          } else {
+            binaryValues[0] =
+                new Binary(
+                    chunkMetadata.getStatistics().getMinValue().toString(), 
StandardCharsets.UTF_8);
+            binaryValues[1] =
+                new Binary(
+                    chunkMetadata.getStatistics().getMaxValue().toString(), 
StandardCharsets.UTF_8);
+          }
+          long[] longValues = new long[2];
+          longValues[0] = chunkMetadata.getStatistics().getStartTime();
+          longValues[1] = chunkMetadata.getStatistics().getEndTime();
+          statistics.update(longValues, binaryValues, binaryValues.length);
+        } else {
+          statistics = chunkMetadata.getStatistics();
+        }
+        break;
+      case DATE:
+        if (targetDataType == TSDataType.STRING) {
+          Binary[] binaryValues = new Binary[4];
+          binaryValues[0] =
+              new Binary(
+                  TSDataType.getDateStringValue(
+                      (Integer) chunkMetadata.getStatistics().getFirstValue()),
+                  StandardCharsets.UTF_8);
+          binaryValues[1] =
+              new Binary(
+                  TSDataType.getDateStringValue(
+                      (Integer) chunkMetadata.getStatistics().getLastValue()),
+                  StandardCharsets.UTF_8);
+          binaryValues[2] =
+              new Binary(
+                  TSDataType.getDateStringValue(
+                      (Integer) chunkMetadata.getStatistics().getMinValue()),
+                  StandardCharsets.UTF_8);
+          binaryValues[3] =
+              new Binary(
+                  TSDataType.getDateStringValue(
+                      (Integer) chunkMetadata.getStatistics().getMaxValue()),
+                  StandardCharsets.UTF_8);
+          long[] longValues = new long[4];
+          longValues[0] = chunkMetadata.getStatistics().getStartTime();
+          longValues[1] = chunkMetadata.getStatistics().getEndTime();
+          longValues[2] = longValues[1];
+          longValues[3] = longValues[1];
+          statistics.update(longValues, binaryValues, binaryValues.length);
+        } else if (targetDataType == TSDataType.TEXT) {
+          Binary[] binaryValues = new Binary[2];
+          binaryValues[0] =
+              new Binary(
+                  TSDataType.getDateStringValue(
+                      (Integer) chunkMetadata.getStatistics().getFirstValue()),
+                  StandardCharsets.UTF_8);
+          binaryValues[1] =
+              new Binary(
+                  TSDataType.getDateStringValue(
+                      (Integer) chunkMetadata.getStatistics().getLastValue()),
+                  StandardCharsets.UTF_8);
+          long[] longValues = new long[2];
+          longValues[0] = chunkMetadata.getStatistics().getStartTime();
+          longValues[1] = chunkMetadata.getStatistics().getEndTime();
+          statistics.update(longValues, binaryValues, binaryValues.length);
+        }
+        break;
+      case STRING:
+        if (targetDataType == TSDataType.TEXT) {
+          Binary[] binaryValues = new Binary[2];
+          binaryValues[0] =
+              new Binary(
+                  chunkMetadata.getStatistics().getMinValue().toString(), 
StandardCharsets.UTF_8);
+          binaryValues[1] =
+              new Binary(
+                  chunkMetadata.getStatistics().getMaxValue().toString(), 
StandardCharsets.UTF_8);
+          long[] longValues = new long[2];
+          longValues[0] = chunkMetadata.getStatistics().getStartTime();
+          longValues[1] = chunkMetadata.getStatistics().getEndTime();
+          statistics.update(longValues, binaryValues, binaryValues.length);
+        } else if (targetDataType == TSDataType.BLOB) {
+          statistics.update(
+              chunkMetadata.getStatistics().getStartTime(),
+              new Binary(
+                  chunkMetadata.getStatistics().getMinValue().toString(), 
StandardCharsets.UTF_8));
+          statistics.update(
+              chunkMetadata.getStatistics().getEndTime(),
+              new Binary(
+                  chunkMetadata.getStatistics().getMaxValue().toString(), 
StandardCharsets.UTF_8));
+        } else {
+          statistics = chunkMetadata.getStatistics();
+        }
+        break;
+      case TEXT:
+        if (targetDataType == TSDataType.STRING) {
+          Binary[] binaryValues = new Binary[2];
+          binaryValues[0] = (Binary) 
chunkMetadata.getStatistics().getFirstValue();
+          binaryValues[1] = (Binary) 
chunkMetadata.getStatistics().getLastValue();
+          long[] longValues = new long[2];
+          longValues[0] = chunkMetadata.getStatistics().getStartTime();
+          longValues[1] = chunkMetadata.getStatistics().getEndTime();
+          statistics.update(longValues, binaryValues, binaryValues.length);
+        } else if (targetDataType == TSDataType.BLOB) {
+          statistics.update(chunkMetadata.getStatistics().getStartTime(), 
EMPTY_BINARY);
+          statistics.update(chunkMetadata.getStatistics().getEndTime(), 
EMPTY_BINARY);
+        } else {
+          statistics = chunkMetadata.getStatistics();
+        }
+        break;
+      case BLOB:
+        if (targetDataType == TSDataType.STRING || targetDataType == 
TSDataType.TEXT) {
+          Binary[] binaryValues = new Binary[2];
+          binaryValues[0] = EMPTY_BINARY;
+          binaryValues[1] = EMPTY_BINARY;
+          long[] longValues = new long[2];
+          longValues[0] = chunkMetadata.getStatistics().getStartTime();
+          longValues[1] = chunkMetadata.getStatistics().getEndTime();
+          statistics.update(longValues, binaryValues, binaryValues.length);
+        } else {
+          statistics = chunkMetadata.getStatistics();
+        }
+        break;
+      default:
+        break;
+    }
+    return statistics;
+  }
 }
diff --git 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/SchemaUtilsTest.java
 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/SchemaUtilsTest.java
index 17dffd69785..cffc16f22ac 100644
--- 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/SchemaUtilsTest.java
+++ 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/utils/SchemaUtilsTest.java
@@ -29,13 +29,19 @@ import org.apache.tsfile.file.metadata.IChunkMetadata;
 import org.apache.tsfile.file.metadata.enums.CompressionType;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.tsfile.file.metadata.statistics.Statistics;
+import org.apache.tsfile.utils.Binary;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+
+import static org.apache.tsfile.file.metadata.statistics.Statistics.canMerge;
 
 public class SchemaUtilsTest {
   @Test
@@ -85,7 +91,9 @@ public class SchemaUtilsTest {
   public void rewriteAlignedChunkMetadataStatistics() {
     for (TSDataType targetDataType : Arrays.asList(TSDataType.STRING, 
TSDataType.TEXT)) {
       for (TSDataType tsDataType : TSDataType.values()) {
-        if (tsDataType == TSDataType.UNKNOWN) {
+        if (tsDataType == TSDataType.UNKNOWN
+            || tsDataType == TSDataType.VECTOR
+            || tsDataType == TSDataType.OBJECT) {
           continue;
         }
         List<IChunkMetadata> valueChunkMetadatas =
@@ -100,15 +108,142 @@ public class SchemaUtilsTest {
         AlignedChunkMetadata alignedChunkMetadata =
             new AlignedChunkMetadata(new ChunkMetadata(), valueChunkMetadatas);
         try {
-          AbstractAlignedChunkMetadata abstractAlignedChunkMetadata =
-              SchemaUtils.rewriteAlignedChunkMetadataStatistics(
-                  alignedChunkMetadata, targetDataType);
-          Assert.assertEquals(
-              targetDataType,
-              
abstractAlignedChunkMetadata.getValueChunkMetadataList().get(0).getDataType());
+          SchemaUtils.rewriteAlignedChunkMetadataStatistics(
+              alignedChunkMetadata, 0, targetDataType);
+          if (alignedChunkMetadata != null
+              && !alignedChunkMetadata.getValueChunkMetadataList().isEmpty()) {
+            Assert.assertEquals(
+                targetDataType,
+                
alignedChunkMetadata.getValueChunkMetadataList().get(0).getDataType());
+          }
+        } catch (ClassCastException e) {
+          Assert.fail(e.getMessage());
+        }
+      }
+    }
+  }
+
+  @Test
+  public void mergeMetadataStatistics() throws Exception {
+    Set<TSDataType> unsupportTsDataType = new HashSet<>();
+    unsupportTsDataType.add(TSDataType.UNKNOWN);
+    unsupportTsDataType.add(TSDataType.VECTOR);
+    for (TSDataType sourceDataType : Arrays.asList(TSDataType.DOUBLE)) {
+      for (TSDataType targetDataType : Arrays.asList(TSDataType.TEXT, 
TSDataType.BLOB)) {
+
+        if (sourceDataType.equals(targetDataType)) {
+          continue;
+        }
+        if (unsupportTsDataType.contains(sourceDataType)
+            || unsupportTsDataType.contains(targetDataType)) {
+          continue;
+        }
+
+        System.out.println("from " + sourceDataType + " to " + targetDataType);
+
+        // Aligned series
+        Statistics<?> s1 = Statistics.getStatsByType(sourceDataType);
+        s1.update(new long[] {1, 2}, new double[] {1.0, 2.0}, 2);
+        Statistics<?> s2 = Statistics.getStatsByType(TSDataType.DOUBLE);
+        s2.update(new long[] {1, 2}, new double[] {1.0, 2.0}, 2);
+        List<IChunkMetadata> valueChunkMetadatas =
+            Arrays.asList(
+                new ChunkMetadata(
+                    "s0",
+                    sourceDataType,
+                    SchemaUtils.getDataTypeCompatibleEncoding(sourceDataType, 
TSEncoding.RLE),
+                    CompressionType.LZ4,
+                    0,
+                    s1),
+                new ChunkMetadata(
+                    "s1",
+                    TSDataType.DOUBLE,
+                    
SchemaUtils.getDataTypeCompatibleEncoding(TSDataType.DOUBLE, TSEncoding.RLE),
+                    CompressionType.LZ4,
+                    0,
+                    s2));
+        IChunkMetadata alignedChunkMetadata =
+            new AlignedChunkMetadata(new ChunkMetadata(), valueChunkMetadatas);
+
+        Statistics<?> s3 = Statistics.getStatsByType(targetDataType);
+        if (targetDataType == TSDataType.BLOB) {
+          s3.update(3, new Binary("3", StandardCharsets.UTF_8));
+          s3.update(4, new Binary("4", StandardCharsets.UTF_8));
+        } else {
+          s3.update(
+              new long[] {1, 2},
+              new Binary[] {
+                new Binary("3", StandardCharsets.UTF_8), new Binary("4", 
StandardCharsets.UTF_8),
+              },
+              2);
+        }
+        Statistics<?> s4 = Statistics.getStatsByType(targetDataType);
+        if (targetDataType == TSDataType.BLOB) {
+          s3.update(4, new Binary("4", StandardCharsets.UTF_8));
+        } else {
+          s4.update(
+              new long[] {1, 2},
+              new Binary[] {
+                new Binary("5", StandardCharsets.UTF_8), new Binary("6", 
StandardCharsets.UTF_8),
+              },
+              2);
+        }
+        List<IChunkMetadata> targetChunkMetadatas =
+            Arrays.asList(
+                new ChunkMetadata(
+                    "s0",
+                    targetDataType,
+                    SchemaUtils.getDataTypeCompatibleEncoding(targetDataType, 
TSEncoding.RLE),
+                    CompressionType.LZ4,
+                    0,
+                    s3),
+                new ChunkMetadata(
+                    "s1",
+                    targetDataType,
+                    SchemaUtils.getDataTypeCompatibleEncoding(targetDataType, 
TSEncoding.RLE),
+                    CompressionType.LZ4,
+                    0,
+                    s4));
+        AbstractAlignedChunkMetadata abstractAlignedChunkMetadata =
+            (AbstractAlignedChunkMetadata) alignedChunkMetadata;
+        try {
+          for (int i = 0; i < 2; i++) {
+            SchemaUtils.rewriteAlignedChunkMetadataStatistics(
+                abstractAlignedChunkMetadata, i, targetDataType);
+          }
         } catch (ClassCastException e) {
           Assert.fail(e.getMessage());
         }
+
+        for (int i = 0; i < targetChunkMetadatas.size(); i++) {
+          if 
(!abstractAlignedChunkMetadata.getValueChunkMetadataList().isEmpty()
+              && 
abstractAlignedChunkMetadata.getValueChunkMetadataList().get(i) != null) {
+            if (targetChunkMetadatas.get(i).getStatistics().getClass()
+                    == abstractAlignedChunkMetadata
+                        .getValueChunkMetadataList()
+                        .get(i)
+                        .getStatistics()
+                        .getClass()
+                || canMerge(
+                    abstractAlignedChunkMetadata
+                        .getValueChunkMetadataList()
+                        .get(i)
+                        .getStatistics()
+                        .getType(),
+                    targetChunkMetadatas.get(i).getStatistics().getType())) {
+              targetChunkMetadatas
+                  .get(i)
+                  .getStatistics()
+                  .mergeStatistics(
+                      abstractAlignedChunkMetadata
+                          .getValueChunkMetadataList()
+                          .get(i)
+                          .getStatistics());
+            } else {
+              throw new Exception("unsupported");
+            }
+          }
+        }
       }
     }
   }


Reply via email to