jt2594838 commented on code in PR #16936:
URL: https://github.com/apache/iotdb/pull/16936#discussion_r2674535597
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java:
##########
@@ -516,10 +415,120 @@ public static AbstractAlignedChunkMetadata
rewriteAlignedChunkMetadataStatistics
alignedChunkMetadata.getTimeChunkMetadata(),
newValueChunkMetadataList);
}
+ public static void rewriteNonAlignedChunkMetadataStatistics(
+ ChunkMetadata chunkMetadata, TSDataType targetDataType) {
+ Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
+ statistics = getNewStatistics(chunkMetadata, targetDataType, statistics);
+
+ chunkMetadata.setTsDataType(targetDataType);
+ chunkMetadata.setStatistics(statistics);
+ }
+
public static TSEncoding getDataTypeCompatibleEncoding(TSDataType dataType,
TSEncoding encoding) {
if (!encoding.isSupported(dataType)) {
return EncodingInferenceUtils.getDefaultEncoding(dataType);
}
return encoding;
}
+
+ public static Statistics<?> getNewStatistics(
+ IChunkMetadata chunkMetadata, TSDataType targetDataType, Statistics<?>
statistics) {
+ switch (chunkMetadata.getDataType()) {
+ case INT32:
+ case DATE:
+ case INT64:
+ case TIMESTAMP:
+ case FLOAT:
+ case DOUBLE:
+ case BOOLEAN:
+ if (targetDataType == TSDataType.STRING) {
+ Binary[] binaryValues = new Binary[4];
+ binaryValues[0] =
+ new Binary(
+ chunkMetadata.getStatistics().getFirstValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[1] =
+ new Binary(
+ chunkMetadata.getStatistics().getLastValue().toString(),
StandardCharsets.UTF_8);
+ if (chunkMetadata.getDataType() == TSDataType.BOOLEAN) {
+ binaryValues[2] = new Binary(Boolean.FALSE.toString(),
StandardCharsets.UTF_8);
+ binaryValues[3] = new Binary(Boolean.TRUE.toString(),
StandardCharsets.UTF_8);
+ } else {
+ binaryValues[2] =
+ new Binary(
+ chunkMetadata.getStatistics().getMinValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[3] =
+ new Binary(
+ chunkMetadata.getStatistics().getMaxValue().toString(),
StandardCharsets.UTF_8);
+ }
+ long[] longValues = new long[4];
+ longValues[0] = chunkMetadata.getStatistics().getStartTime();
+ longValues[1] = chunkMetadata.getStatistics().getEndTime();
+ longValues[2] = longValues[1];
+ longValues[3] = longValues[1];
+ statistics.update(longValues, binaryValues, binaryValues.length);
+ } else if (targetDataType == TSDataType.TEXT) {
+ Binary[] binaryValues = new Binary[2];
+ if (chunkMetadata.getDataType() == TSDataType.BOOLEAN) {
+ binaryValues[0] = new Binary(Boolean.FALSE.toString(),
StandardCharsets.UTF_8);
+ binaryValues[1] = new Binary(Boolean.TRUE.toString(),
StandardCharsets.UTF_8);
+ } else {
+ binaryValues[0] =
+ new Binary(
+ chunkMetadata.getStatistics().getMinValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[1] =
+ new Binary(
+ chunkMetadata.getStatistics().getMaxValue().toString(),
StandardCharsets.UTF_8);
+ }
+ long[] longValues = new long[2];
+ longValues[0] = chunkMetadata.getStatistics().getStartTime();
+ longValues[1] = chunkMetadata.getStatistics().getEndTime();
+ statistics.update(longValues, binaryValues, binaryValues.length);
+ } else {
+ statistics = chunkMetadata.getStatistics();
+ }
+ break;
+ case STRING:
+ if (targetDataType == TSDataType.TEXT) {
+ Binary[] binaryValues = new Binary[2];
+ binaryValues[0] =
+ new Binary(
+ Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)
+ .contains(chunkMetadata.getDataType())
+ ? ""
+ : chunkMetadata.getStatistics().getMinValue().toString(),
+ StandardCharsets.UTF_8);
Review Comment:
Why are you checking the data type of the chunk metadata again, now that it
is in the switch clause?
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java:
##########
@@ -516,10 +415,120 @@ public static AbstractAlignedChunkMetadata
rewriteAlignedChunkMetadataStatistics
alignedChunkMetadata.getTimeChunkMetadata(),
newValueChunkMetadataList);
}
+ public static void rewriteNonAlignedChunkMetadataStatistics(
+ ChunkMetadata chunkMetadata, TSDataType targetDataType) {
+ Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
+ statistics = getNewStatistics(chunkMetadata, targetDataType, statistics);
+
+ chunkMetadata.setTsDataType(targetDataType);
+ chunkMetadata.setStatistics(statistics);
+ }
+
public static TSEncoding getDataTypeCompatibleEncoding(TSDataType dataType,
TSEncoding encoding) {
if (!encoding.isSupported(dataType)) {
return EncodingInferenceUtils.getDefaultEncoding(dataType);
}
return encoding;
}
+
+ public static Statistics<?> getNewStatistics(
+ IChunkMetadata chunkMetadata, TSDataType targetDataType, Statistics<?>
statistics) {
+ switch (chunkMetadata.getDataType()) {
+ case INT32:
+ case DATE:
+ case INT64:
+ case TIMESTAMP:
+ case FLOAT:
+ case DOUBLE:
+ case BOOLEAN:
+ if (targetDataType == TSDataType.STRING) {
+ Binary[] binaryValues = new Binary[4];
+ binaryValues[0] =
+ new Binary(
+ chunkMetadata.getStatistics().getFirstValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[1] =
+ new Binary(
+ chunkMetadata.getStatistics().getLastValue().toString(),
StandardCharsets.UTF_8);
+ if (chunkMetadata.getDataType() == TSDataType.BOOLEAN) {
+ binaryValues[2] = new Binary(Boolean.FALSE.toString(),
StandardCharsets.UTF_8);
+ binaryValues[3] = new Binary(Boolean.TRUE.toString(),
StandardCharsets.UTF_8);
+ } else {
+ binaryValues[2] =
+ new Binary(
+ chunkMetadata.getStatistics().getMinValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[3] =
+ new Binary(
+ chunkMetadata.getStatistics().getMaxValue().toString(),
StandardCharsets.UTF_8);
+ }
+ long[] longValues = new long[4];
+ longValues[0] = chunkMetadata.getStatistics().getStartTime();
+ longValues[1] = chunkMetadata.getStatistics().getEndTime();
+ longValues[2] = longValues[1];
+ longValues[3] = longValues[1];
+ statistics.update(longValues, binaryValues, binaryValues.length);
+ } else if (targetDataType == TSDataType.TEXT) {
+ Binary[] binaryValues = new Binary[2];
+ if (chunkMetadata.getDataType() == TSDataType.BOOLEAN) {
+ binaryValues[0] = new Binary(Boolean.FALSE.toString(),
StandardCharsets.UTF_8);
+ binaryValues[1] = new Binary(Boolean.TRUE.toString(),
StandardCharsets.UTF_8);
+ } else {
+ binaryValues[0] =
+ new Binary(
+ chunkMetadata.getStatistics().getMinValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[1] =
+ new Binary(
+ chunkMetadata.getStatistics().getMaxValue().toString(),
StandardCharsets.UTF_8);
+ }
+ long[] longValues = new long[2];
+ longValues[0] = chunkMetadata.getStatistics().getStartTime();
+ longValues[1] = chunkMetadata.getStatistics().getEndTime();
+ statistics.update(longValues, binaryValues, binaryValues.length);
+ } else {
+ statistics = chunkMetadata.getStatistics();
+ }
+ break;
+ case STRING:
+ if (targetDataType == TSDataType.TEXT) {
+ Binary[] binaryValues = new Binary[2];
+ binaryValues[0] =
+ new Binary(
+ Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)
+ .contains(chunkMetadata.getDataType())
+ ? ""
+ : chunkMetadata.getStatistics().getMinValue().toString(),
+ StandardCharsets.UTF_8);
+ binaryValues[1] =
+ new Binary(
+ Arrays.asList(TSDataType.TEXT, TSDataType.BLOB)
+ .contains(chunkMetadata.getDataType())
+ ? ""
+ : chunkMetadata.getStatistics().getMaxValue().toString(),
+ StandardCharsets.UTF_8);
+ long[] longValues = new long[2];
+ longValues[0] = chunkMetadata.getStatistics().getStartTime();
+ longValues[1] = chunkMetadata.getStatistics().getEndTime();
+ statistics.update(longValues, binaryValues, binaryValues.length);
+ } else {
+ statistics = chunkMetadata.getStatistics();
+ }
+ break;
+ case TEXT:
+ case BLOB:
+ if (targetDataType == TSDataType.STRING) {
+ Binary[] binaryValues = new Binary[2];
+ binaryValues[0] = new Binary("", StandardCharsets.UTF_8);
+ binaryValues[1] = new Binary("", StandardCharsets.UTF_8);
Review Comment:
May creaete a constant as a placeholder for this.
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java:
##########
@@ -516,10 +415,120 @@ public static AbstractAlignedChunkMetadata
rewriteAlignedChunkMetadataStatistics
alignedChunkMetadata.getTimeChunkMetadata(),
newValueChunkMetadataList);
}
+ public static void rewriteNonAlignedChunkMetadataStatistics(
+ ChunkMetadata chunkMetadata, TSDataType targetDataType) {
+ Statistics<?> statistics = Statistics.getStatsByType(targetDataType);
+ statistics = getNewStatistics(chunkMetadata, targetDataType, statistics);
+
+ chunkMetadata.setTsDataType(targetDataType);
+ chunkMetadata.setStatistics(statistics);
+ }
+
public static TSEncoding getDataTypeCompatibleEncoding(TSDataType dataType,
TSEncoding encoding) {
if (!encoding.isSupported(dataType)) {
return EncodingInferenceUtils.getDefaultEncoding(dataType);
}
return encoding;
}
+
+ public static Statistics<?> getNewStatistics(
+ IChunkMetadata chunkMetadata, TSDataType targetDataType, Statistics<?>
statistics) {
+ switch (chunkMetadata.getDataType()) {
+ case INT32:
+ case DATE:
+ case INT64:
+ case TIMESTAMP:
+ case FLOAT:
+ case DOUBLE:
+ case BOOLEAN:
+ if (targetDataType == TSDataType.STRING) {
+ Binary[] binaryValues = new Binary[4];
+ binaryValues[0] =
+ new Binary(
+ chunkMetadata.getStatistics().getFirstValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[1] =
+ new Binary(
+ chunkMetadata.getStatistics().getLastValue().toString(),
StandardCharsets.UTF_8);
+ if (chunkMetadata.getDataType() == TSDataType.BOOLEAN) {
+ binaryValues[2] = new Binary(Boolean.FALSE.toString(),
StandardCharsets.UTF_8);
+ binaryValues[3] = new Binary(Boolean.TRUE.toString(),
StandardCharsets.UTF_8);
+ } else {
+ binaryValues[2] =
+ new Binary(
+ chunkMetadata.getStatistics().getMinValue().toString(),
StandardCharsets.UTF_8);
+ binaryValues[3] =
+ new Binary(
+ chunkMetadata.getStatistics().getMaxValue().toString(),
StandardCharsets.UTF_8);
+ }
Review Comment:
No need to process DATE especially?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]