This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch support_dataframe_to_tsfile
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit 9e48483996ee585a64802fcdc5e65cc8442507ae
Author: ColinLee <[email protected]>
AuthorDate: Wed Feb 11 00:25:38 2026 +0800

    tmp code.
---
 cpp/src/common/constant/tsfile_constant.h          |   4 ++--
 cpp/src/common/global.cc                           |   2 +-
 cpp/src/cwrapper/tsfile_cwrapper.cc                |   5 -----
 cpp/src/utils/db_utils.h                           |   6 ++++--
 .../tests/resources/table_with_time_column.tsfile  | Bin 0 -> 644 bytes
 python/tests/test_dataframe.py                     |  24 ++++++++++-----------
 python/tests/test_load_tsfile_from_iotdb.py        |  23 ++++++++++++++------
 python/tests/test_to_tsfile.py                     |  12 +++++------
 python/tests/test_write_and_read.py                |  16 +++++++-------
 python/tsfile/constants.py                         |  18 ++++++++--------
 python/tsfile/schema.py                            |   8 ++++---
 11 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/cpp/src/common/constant/tsfile_constant.h 
b/cpp/src/common/constant/tsfile_constant.h
index d3f4dec1..096c645a 100644
--- a/cpp/src/common/constant/tsfile_constant.h
+++ b/cpp/src/common/constant/tsfile_constant.h
@@ -37,15 +37,15 @@ static const std::string BACK_QUOTE_STRING = "`";
 static const std::string DOUBLE_BACK_QUOTE_STRING = "``";
 
 static const unsigned char TIME_COLUMN_MASK = 0x80;
+static const std::string TIME_COLUMN_NAME = "time";
 static const unsigned char VALUE_COLUMN_MASK = 0x40;
-
-static const std::string TIME_COLUMN_ID = "";
 static const int NO_STR_TO_READ = -1;
 
 static const std::regex IDENTIFIER_PATTERN("([a-zA-Z0-9_\\u2E80-\\u9FFF]+)");
 static const std::regex NODE_NAME_PATTERN(
     "(\\*{0,2}[a-zA-Z0-9_\\u2E80-\\u9FFF]+\\*{0,2})");
 static const int DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME = 3;
+
 }  // namespace storage
 
 #endif
diff --git a/cpp/src/common/global.cc b/cpp/src/common/global.cc
index 37b8c1bb..fd1d0132 100644
--- a/cpp/src/common/global.cc
+++ b/cpp/src/common/global.cc
@@ -122,7 +122,7 @@ int init_common() {
     g_time_column_schema.data_type_ = INT64;
     g_time_column_schema.encoding_ = PLAIN;
     g_time_column_schema.compression_ = UNCOMPRESSED;
-    g_time_column_schema.column_name_ = std::string("time");
+    g_time_column_schema.column_name_ = storage::TIME_COLUMN_NAME;
     return ret;
 }
 
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc 
b/cpp/src/cwrapper/tsfile_cwrapper.cc
index f384698b..539d5b96 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -116,11 +116,6 @@ TsFileWriter tsfile_writer_new(WriteFile file, 
TableSchema* schema,
             *err_code = common::E_INVALID_SCHEMA;
             return nullptr;
         }
-        // Ignore time column definition.
-        if (cur_schema.column_category == TIME) {
-            continue;
-        }
-
         column_schemas.emplace_back(
             cur_schema.column_name,
             static_cast<common::TSDataType>(cur_schema.data_type),
diff --git a/cpp/src/utils/db_utils.h b/cpp/src/utils/db_utils.h
index 85d99b1a..5a1dea8d 100644
--- a/cpp/src/utils/db_utils.h
+++ b/cpp/src/utils/db_utils.h
@@ -37,6 +37,7 @@ namespace common {
 extern TSEncoding get_value_encoder(TSDataType data_type);
 extern CompressionType get_default_compressor();
 
+// TODO: remove this.
 typedef struct FileID {
     int64_t seq_;  // timestamp when create
     int32_t version_;
@@ -64,13 +65,14 @@ typedef struct FileID {
 #endif
 } FileID;
 
+// TODO: remove this.
 typedef uint16_t NodeID;
 struct TsID {
     NodeID db_nid_;
     NodeID device_nid_;
     NodeID measurement_nid_;
 
-    TsID() : db_nid_(0), device_nid_(0), measurement_nid_(0){};
+    TsID() : db_nid_(0), device_nid_(0), measurement_nid_(0) {};
 
     TsID(NodeID db_nid, NodeID device_nid, NodeID measurement_nid)
         : db_nid_(db_nid),
@@ -157,7 +159,7 @@ struct TsID {
  * This enumeration class defines the supported categories for columns within a
  * table schema, distinguishing between tag and field columns.
  */
-enum class ColumnCategory { TAG = 0, FIELD = 1 };
+enum class ColumnCategory { TAG = 0, FIELD = 1, ATTRIBUTE = 2, TIME = 3 };
 
 /**
  * @brief Represents the schema information for a single column.
diff --git a/python/tests/resources/table_with_time_column.tsfile 
b/python/tests/resources/table_with_time_column.tsfile
new file mode 100644
index 00000000..66be782a
Binary files /dev/null and 
b/python/tests/resources/table_with_time_column.tsfile differ
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 09d0001b..de49bc1c 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -22,7 +22,7 @@ import numpy as np
 import pandas as pd
 import pytest
 
-from tsfile import ColumnSchema, TableSchema, TSDataType
+from tsfile import ColumnSchema, TableSchema, TSDataType, TIME_COLUMN
 from tsfile import TsFileTableWriter, ColumnCategory
 from tsfile import to_dataframe
 from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
@@ -70,10 +70,10 @@ def test_write_dataframe_basic():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = 
convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
         assert df_read.shape == (100, 4)
-        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read[TIME_COLUMN].equals(df_sorted["time"])
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
         assert df_read["value2"].equals(df_sorted["value2"])
@@ -99,12 +99,12 @@ def test_write_dataframe_with_index():
             df.index = [i * 10 for i in range(50)]  # Set index as timestamps
             writer.write_dataframe(df)
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = df.sort_index()
         df_sorted = convert_to_nullable_types(df_sorted.reset_index(drop=True))
         time_series = pd.Series(df.sort_index().index.values, dtype='Int64')
         assert df_read.shape == (50, 3)
-        assert df_read["time"].equals(time_series)
+        assert df_read[TIME_COLUMN].equals(time_series)
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
     finally:
@@ -130,10 +130,10 @@ def test_write_dataframe_case_insensitive():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = 
convert_to_nullable_types(df.sort_values('Time').reset_index(drop=True))
         assert df_read.shape == (30, 3)
-        assert df_read["time"].equals(df_sorted["Time"])
+        assert df_read[TIME_COLUMN].equals(df_sorted["Time"])
         assert df_read["device"].equals(df_sorted["Device"])
         assert df_read["value"].equals(df_sorted["VALUE"])
     finally:
@@ -218,7 +218,7 @@ def test_write_dataframe_all_datatypes():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = 
convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
         assert df_read.shape == (50, 11)
         assert df_read["bool_col"].equals(df_sorted["bool_col"])
@@ -257,10 +257,10 @@ def test_write_dataframe_schema_time_column():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = 
convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
         assert df_read.shape == (50, 3)
-        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read[TIME_COLUMN].equals(df_sorted[TIME_COLUMN])
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
     finally:
@@ -286,7 +286,7 @@ def test_write_dataframe_schema_time_and_dataframe_time():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(
             
df.sort_values('Time').rename(columns=str.lower).reset_index(drop=True)
         )
@@ -312,7 +312,7 @@ def test_write_dataframe_empty():
                 'time': [],
                 'value': []
             })
-            with pytest.raises(ValueError) as err:
+            with pytest.raises(ValueError):
                 writer.write_dataframe(df)
 
     finally:
diff --git a/python/tests/test_load_tsfile_from_iotdb.py 
b/python/tests/test_load_tsfile_from_iotdb.py
index d865dd35..8dcc0b1c 100644
--- a/python/tests/test_load_tsfile_from_iotdb.py
+++ b/python/tests/test_load_tsfile_from_iotdb.py
@@ -15,12 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-
+import math
 import os
 
 import numpy as np
 
 import tsfile as ts
+from tsfile import TIME_COLUMN
 
 
 def test_load_tsfile_from_iotdb():
@@ -31,8 +32,8 @@ def test_load_tsfile_from_iotdb():
 
     ## --------
     assert len(df) == 105, "row count mismatch"
-    assert df["time"].isna().sum() == 0
-    assert int(df["time"].sum()) == 15960
+    assert df[TIME_COLUMN].isna().sum() == 0
+    assert int(df[TIME_COLUMN].sum()) == 15960
     assert df["temperature"].isna().sum() == 5
     assert df["status"].isna().sum() == 5
     assert (df["status"] == True).sum() == 50
@@ -44,8 +45,8 @@ def test_load_tsfile_from_iotdb():
     df = ts.to_dataframe(simple_tabl1_path)
     ## ---------
     assert len(df) == 60
-    assert df["time"].isna().sum() == 0
-    assert df["time"].sum() == (
+    assert df[TIME_COLUMN].isna().sum() == 0
+    assert df[TIME_COLUMN].sum() == (
             (1760106020000 + 1760106049000) * 30 // 2 +
             (1760106080000 + 1760106109000) * 30 // 2
     )
@@ -78,8 +79,8 @@ def test_load_tsfile_from_iotdb():
     df = ts.to_dataframe(simple_tabl2_path)
     ## ---------
     assert len(df) == 40
-    assert df["time"].isna().sum() == 0
-    assert int(df["time"].sum()) == 70404242080000
+    assert df[TIME_COLUMN].isna().sum() == 0
+    assert int(df[TIME_COLUMN].sum()) == 70404242080000
 
     assert df["s0"].isna().sum() == 0
     assert df["s1"].isna().sum() == 0
@@ -109,3 +110,11 @@ def test_load_tsfile_from_iotdb():
 
     assert df["s9"].isna().sum() == 5
     ## ---------
+    table_with_time_column_path = os.path.join(dir_path, 
'table_with_time_column.tsfile')
+    df = ts.to_dataframe(table_with_time_column_path)
+
+    assert len(df) == 25
+    assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
+    assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
+    assert (df["region_id"] == "loc").sum() == 25
+
diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index c3a970e3..a35d5e89 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -22,7 +22,7 @@ import numpy as np
 import pandas as pd
 import pytest
 
-from tsfile import to_dataframe, TsFileReader, ColumnCategory
+from tsfile import to_dataframe, TsFileReader, ColumnCategory, TIME_COLUMN
 from tsfile.utils import dataframe_to_tsfile
 
 
@@ -132,11 +132,11 @@ def test_dataframe_to_tsfile_custom_time_column():
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table", 
time_column="timestamp")
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = 
convert_to_nullable_types(df.sort_values('timestamp').reset_index(drop=True))
 
         assert df_read.shape == (30, 3)
-        assert df_read["time"].equals(df_sorted["timestamp"])
+        assert df_read[TIME_COLUMN].equals(df_sorted["timestamp"])
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
     finally:
@@ -181,7 +181,7 @@ def test_dataframe_to_tsfile_with_tag_columns():
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table", 
tag_column=["device", "location"])
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = 
convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
 
         assert df_read.shape == (20, 4)
@@ -214,7 +214,7 @@ def test_dataframe_to_tsfile_tag_time_unsorted():
 
         assert df_read.shape == (10, 3)
         assert df_read["device"].equals(df_expected["device"])
-        assert df_read["time"].equals(df_expected["time"])
+        assert df_read[TIME_COLUMN].equals(df_expected["time"])
         assert df_read["value"].equals(df_expected["value"])
     finally:
         if os.path.exists(tsfile_path):
@@ -244,7 +244,7 @@ def test_dataframe_to_tsfile_all_datatypes():
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = 
convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
 
         assert df_read.shape == (50, 11)
diff --git a/python/tests/test_write_and_read.py 
b/python/tests/test_write_and_read.py
index 3cef99c4..57294a84 100644
--- a/python/tests/test_write_and_read.py
+++ b/python/tests/test_write_and_read.py
@@ -25,7 +25,7 @@ import pytest
 from pandas import Float64Dtype
 from pandas.core.dtypes.common import is_integer_dtype
 
-from tsfile import ColumnSchema, TableSchema, TSEncoding
+from tsfile import ColumnSchema, TableSchema, TSEncoding, TIME_COLUMN
 from tsfile import Compressor
 from tsfile import TSDataType
 from tsfile import Tablet, RowRecord, Field
@@ -170,7 +170,7 @@ def test_tree_query_to_dataframe_variants():
         assert df_all.shape[0] == total_rows
         for measurement in all_measurements:
             assert measurement in df_all.columns
-        assert "time" in df_all.columns
+        assert TIME_COLUMN in df_all.columns
         path_columns = sorted(
             [col for col in df_all.columns if col.startswith("col_")],
             key=lambda name: int(name.split("_")[1]),
@@ -179,7 +179,7 @@ def test_tree_query_to_dataframe_variants():
 
         for _, row in df_all.iterrows():
             device = _extract_device(row, path_columns)
-            timestamp = int(row["time"])
+            timestamp = int(row[TIME_COLUMN])
             assert (device, timestamp) in expected_values
             expected_row = expected_values[(device, timestamp)]
             for measurement in all_measurements:
@@ -201,7 +201,7 @@ def test_tree_query_to_dataframe_variants():
                 assert measurement not in df_subset.columns
         for _, row in df_subset.iterrows():
             device = _extract_device(row, path_columns)
-            timestamp = int(row["time"])
+            timestamp = int(row[TIME_COLUMN])
             expected_row = expected_values[(device, timestamp)]
             for measurement in requested_columns:
                 value = row.get(measurement)
@@ -227,7 +227,7 @@ def test_tree_query_to_dataframe_variants():
         iter_rows = 0
         for batch in iterator:
             assert isinstance(batch, pd.DataFrame)
-            assert set(batch.columns).issuperset({"time", "level"})
+            assert set(batch.columns).issuperset({TIME_COLUMN, "level"})
             iter_rows += len(batch)
         assert iter_rows == 18
 
@@ -242,7 +242,7 @@ def test_tree_query_to_dataframe_variants():
         iter_rows = 0
         for batch in iterator:
             assert isinstance(batch, pd.DataFrame)
-            assert set(batch.columns).issuperset({"time", "level"})
+            assert set(batch.columns).issuperset({TIME_COLUMN, "level"})
             iter_rows += len(batch)
         assert iter_rows == 9
 
@@ -384,7 +384,7 @@ def test_table_writer_and_reader():
                                     0, 10) as result:
                 cur_line = 0
                 while result.next():
-                    cur_time = result.get_value_by_name("time")
+                    cur_time = result.get_value_by_name(TIME_COLUMN)
                     assert result.get_value_by_name("device") == "device" + 
str(cur_time)
                     assert result.is_null_by_name("device") == False
                     assert result.is_null_by_name("value") == False
@@ -545,7 +545,7 @@ def test_tsfile_to_df():
         df1 = to_dataframe("table_write_to_df.tsfile")
         assert df1.shape == (4097, 4)
         assert df1["value2"].sum() == 100 * (1 + 4096) / 2 * 4096
-        assert is_integer_dtype(df1["time"])
+        assert is_integer_dtype(df1[TIME_COLUMN])
         assert df1["value"].dtype == Float64Dtype()
         assert is_integer_dtype(df1["value2"])
         df2 = to_dataframe("table_write_to_df.tsfile", column_names=["device", 
"value2"])
diff --git a/python/tsfile/constants.py b/python/tsfile/constants.py
index 6f233e27..18da3aef 100644
--- a/python/tsfile/constants.py
+++ b/python/tsfile/constants.py
@@ -15,10 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-from datetime import datetime
 from enum import unique, IntEnum
+
 import numpy as np
 
+TIME_COLUMN = "time"
+
 @unique
 class TSDataType(IntEnum):
     BOOLEAN = 0
@@ -103,7 +105,7 @@ class TSDataType(IntEnum):
                 return cls.STRING
         except (ImportError, AttributeError):
             pass
-        
+
         if hasattr(dtype, 'type'):
             dtype = dtype.type
             if dtype is np.bool_:
@@ -118,12 +120,12 @@ class TSDataType(IntEnum):
                 return cls.DOUBLE
             elif dtype is np.object_:
                 return cls.STRING
-        
+
         dtype_str = str(dtype)
 
         if 'stringdtype' in dtype_str.lower() or 
dtype_str.startswith('string'):
             return cls.STRING
-        
+
         dtype_map = {
             'bool': cls.BOOLEAN,
             'boolean': cls.BOOLEAN,
@@ -137,17 +139,17 @@ class TSDataType(IntEnum):
             'object': cls.STRING,
             'string': cls.STRING,
         }
-        
+
         if dtype_str in dtype_map:
             return dtype_map[dtype_str]
-        
+
         dtype_lower = dtype_str.lower()
         if dtype_lower in dtype_map:
             return dtype_map[dtype_lower]
 
         if 'object_' in dtype_lower or dtype_str == "<class 'numpy.object_'>":
             return cls.STRING
-        
+
         if dtype_str.startswith('datetime64'):
             return cls.TIMESTAMP
 
@@ -163,8 +165,6 @@ _TSDATATYPE_COMPATIBLE_SOURCES = {
 }
 
 
-
-
 @unique
 class TSEncoding(IntEnum):
     PLAIN = 0
diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py
index 91732eee..f0fa39b1 100644
--- a/python/tsfile/schema.py
+++ b/python/tsfile/schema.py
@@ -119,15 +119,17 @@ class TableSchema:
         self.table_name = table_name.lower()
         if len(columns) == 0:
             raise ValueError("Columns cannot be empty")
-        self.columns = columns
-        for column in self.columns:
+        self.columns = []
+        for column in columns:
             if column.get_category() == ColumnCategory.TIME:
                 if self.time_column is not None:
                     raise ValueError(
                         f"Table '{self.table_name}' cannot have multiple time 
columns: "
-                        f"'{self.time_column.name}' and '{column.name}'"
+                        f"'{self.time_column.get_column_name()}' and 
'{column.get_column_name()}'"
                     )
                 self.time_column = column
+            else:
+                self.columns.append(column)
 
     def get_table_name(self):
         return self.table_name

Reply via email to