jt2594838 commented on code in PR #16699:
URL: https://github.com/apache/iotdb/pull/16699#discussion_r2513361144


##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/schema/table/TsFileTableSchemaUtil.java:
##########
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.commons.schema.table;
+
+import org.apache.iotdb.commons.schema.table.column.TsTableColumnCategory;
+import org.apache.iotdb.commons.schema.table.column.TsTableColumnSchema;
+
+import org.apache.tsfile.enums.ColumnCategory;
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.file.metadata.TableSchema;
+import org.apache.tsfile.utils.ReadWriteIOUtils;
+import org.apache.tsfile.write.schema.IMeasurementSchema;
+import org.apache.tsfile.write.schema.MeasurementSchema;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/** Utility class for converting between TsTable and TSFile TableSchema */
+public class TsFileTableSchemaUtil {
+
+  private TsFileTableSchemaUtil() {
+    // Utility class, prevent instantiation
+  }
+
+  /** Column category filter for efficient parsing */
+  public enum ColumnCategoryFilter {
+    /** Include TAG and FIELD only (exclude TIME and ATTRIBUTE) - for TsFile 
writing */
+    NO_ATTRIBUTE(
+        cat -> cat != TsTableColumnCategory.TIME && cat != 
TsTableColumnCategory.ATTRIBUTE);
+
+    private final java.util.function.Predicate<TsTableColumnCategory> 
predicate;
+
+    ColumnCategoryFilter(final 
java.util.function.Predicate<TsTableColumnCategory> predicate) {
+      this.predicate = predicate;
+    }
+
+    public boolean test(final TsTableColumnCategory category) {
+      return predicate.test(category);
+    }
+  }
+
+  /**
+   * High-performance method to convert TsTable ByteBuffer to TableSchema with 
column filtering.
+   * Only parses and includes columns that match the filter, skipping others 
for efficiency. This is
+   * critical for tables with many columns (e.g., 10,000+ columns).
+   *
+   * @param tsTableBuffer ByteBuffer containing serialized TsTable
+   * @param filter Column category filter to determine which columns to include
+   * @return TableSchema object
+   */
+  private static TableSchema tsTableBufferToTableSchemaInternal(
+      final ByteBuffer tsTableBuffer, final ColumnCategoryFilter filter) {
+    final String tableName = ReadWriteIOUtils.readString(tsTableBuffer);
+    final int columnNum = ReadWriteIOUtils.readInt(tsTableBuffer);
+
+    final List<IMeasurementSchema> measurementSchemas = new 
ArrayList<>(columnNum);
+    final List<ColumnCategory> columnTypes = new ArrayList<>(columnNum);
+
+    for (int i = 0; i < columnNum; i++) {
+      final byte categoryByte = ReadWriteIOUtils.readByte(tsTableBuffer);
+      final TsTableColumnCategory category = 
TsTableColumnCategory.deserialize(categoryByte);
+
+      // Early filtering: if we don't need this column, skip all its data
+      if (!filter.test(category)) {
+        skipColumnData(tsTableBuffer, category);
+        continue;
+      }
+
+      // Only parse data for columns we need
+      final String columnName = ReadWriteIOUtils.readString(tsTableBuffer);
+      final TSDataType dataType = ReadWriteIOUtils.readDataType(tsTableBuffer);
+
+      if (category == TsTableColumnCategory.FIELD) {
+        ReadWriteIOUtils.readEncoding(tsTableBuffer);
+        ReadWriteIOUtils.readCompressionType(tsTableBuffer);
+      }
+
+      ReadWriteIOUtils.readMap(tsTableBuffer); // Column props
+
+      measurementSchemas.add(new MeasurementSchema(columnName, dataType));
+      columnTypes.add(category.toTsFileColumnType());
+    }
+
+    ReadWriteIOUtils.readMap(tsTableBuffer); // Table props (skip)
+
+    return new TableSchema(tableName, measurementSchemas, columnTypes);
+  }
+
+  /**
+   * Fast skip column data without parsing. Critical for performance with many 
columns.
+   *
+   * @param buffer ByteBuffer to skip data from
+   * @param category Column category to determine how much data to skip
+   */
+  private static void skipColumnData(
+      final ByteBuffer buffer, final TsTableColumnCategory category) {
+    // Skip column name
+    ReadWriteIOUtils.readString(buffer);
+    // Skip data type
+    ReadWriteIOUtils.readDataType(buffer);
+    // Skip encoding and compression for FIELD columns
+    if (category == TsTableColumnCategory.FIELD) {
+      ReadWriteIOUtils.readEncoding(buffer);
+      ReadWriteIOUtils.readCompressionType(buffer);
+    }
+    // Skip column props
+    ReadWriteIOUtils.readMap(buffer);

Review Comment:
   May add skipString and skipMap, which only change the buffer position 
instead of creating temporary objects.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to