This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch colin_python_V4 in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 11414986199fcb3fd50aaafcaff31a77efb1a76a Author: colin <shuoli...@163.com> AuthorDate: Fri Feb 28 19:27:43 2025 +0800 fix code dump. --- cpp/src/common/tablet.cc | 18 +++++++++++------ python/setup.py | 2 +- python/tests/test_write.py | 45 ++++++++++++++++++++++++++++++----------- python/tsfile/schema.py | 12 ++++++++++- python/tsfile/tablet.py | 21 +++++++++++++------ python/tsfile/tsfile_cpp.pxd | 2 +- python/tsfile/tsfile_py_cpp.pyx | 11 +++++++--- python/tsfile/tsfile_reader.pyx | 3 +-- 8 files changed, 82 insertions(+), 32 deletions(-) diff --git a/cpp/src/common/tablet.cc b/cpp/src/common/tablet.cc index 54f39df5..ac4a2708 100644 --- a/cpp/src/common/tablet.cc +++ b/cpp/src/common/tablet.cc @@ -30,6 +30,7 @@ namespace storage { int Tablet::init() { ASSERT(timestamps_ == nullptr); timestamps_ = (int64_t *)malloc(sizeof(int64_t) * max_row_num_); + cur_row_size_ = 0; size_t schema_count = schema_vec_->size(); std::pair<std::map<std::string, int>::iterator, bool> ins_res; @@ -137,6 +138,7 @@ int Tablet::add_timestamp(uint32_t row_index, int64_t timestamp) { } timestamps_[row_index] = timestamp; cur_row_size_ = std::max(row_index + 1, cur_row_size_); + return E_OK; } @@ -257,6 +259,12 @@ int Tablet::add_value(uint32_t row_index, uint32_t schema_index, return ret; } +template <> +int Tablet::add_value(uint32_t row_index, uint32_t schema_index, + const char *val) { + return add_value(row_index, schema_index, String(val)); +} + template <typename T> int Tablet::add_value(uint32_t row_index, const std::string &measurement_name, T val) { @@ -271,11 +279,10 @@ int Tablet::add_value(uint32_t row_index, const std::string &measurement_name, return ret; } -template<> -int Tablet::add_value(uint32_t row_index, - const std::string &measurement_name, - const char *val) { - add_value(row_index, measurement_name, String(val)); +template <> +int Tablet::add_value(uint32_t row_index, const std::string &measurement_name, + const char *val) { + return add_value(row_index, measurement_name, String(val)); } template int Tablet::add_value(uint32_t row_index, uint32_t schema_index, @@ -304,7 +311,6 @@ template int Tablet::add_value(uint32_t row_index, template int Tablet::add_value(uint32_t row_index, const std::string &measurement_name, String val); - void Tablet::set_column_categories( const std::vector<ColumnCategory> &column_categories) { column_categories_ = column_categories; diff --git a/python/setup.py b/python/setup.py index 7449babd..84727278 100644 --- a/python/setup.py +++ b/python/setup.py @@ -62,7 +62,7 @@ tsfile_py_include_file = os.path.join(project_dir, "tsfile", "tsfile_cwrapper.h" copy_tsfile_header(tsfile_c_include_file, tsfile_py_include_file) ## Copy shared library -tsfile_shared_source_dir = os.path.join(project_dir, "..", "cpp", "build","Release", "lib") +tsfile_shared_source_dir = os.path.join(project_dir, "..", "cpp", "target", "build", "lib") tsfile_shared_dir = os.path.join(project_dir, "tsfile") if system == "Darwin": diff --git a/python/tests/test_write.py b/python/tests/test_write.py index 0f5cd423..56b20728 100644 --- a/python/tests/test_write.py +++ b/python/tests/test_write.py @@ -24,18 +24,39 @@ from tsfile import ColumnSchema, TableSchema from tsfile import Tablet, RowRecord, Field from tsfile import TSDataType -def test_row_record_write(): - try: - writer = TsFileWriter("record_write.tsfile") - timeseries = TimeseriesSchema("level1", TSDataType.INT64) - writer.register_timeseries("root.device1", timeseries) - - record = RowRecord("root.device1", 10,[Field("level1", 10, TSDataType.INT64)]) - writer.write_row_record(record) - writer.close() - finally: - if os.path.exists("record_write.tsfile"): - os.remove("record_write.tsfile") +# def test_row_record_write(): +# try: +# writer = TsFileWriter("record_write.tsfile") +# timeseries = TimeseriesSchema("level1", TSDataType.INT64) +# writer.register_timeseries("root.device1", timeseries) +# +# record = RowRecord("root.device1", 10,[Field("level1", 10, TSDataType.INT64)]) +# writer.write_row_record(record) +# writer.close() +# finally: +# if os.path.exists("record_write.tsfile"): +# os.remove("record_write.tsfile") +# +# def test_tablet_write(): +# try: +# writer = TsFileWriter("tablet_write.tsfile") +# timeseries1 = TimeseriesSchema("level1", TSDataType.INT64) +# timeseries2 = TimeseriesSchema("level2", TSDataType.DOUBLE) +# device = DeviceSchema("root.device1", [timeseries1, timeseries2]) +# writer.register_device(device) +# +# tablet = Tablet(["level1", "level2"], [TSDataType.INT64, TSDataType.DOUBLE], 100) +# tablet.set_table_name("root.device1") +# for i in range(100): +# tablet.add_timestamp(i, i) +# tablet.add_value_by_index(0, i, i + 1) +# tablet.add_value_by_name("level2", i, i * 0.1) +# +# writer.write_tablet(tablet) +# writer.close() +# finally: +# if os.path.exists("tablet_write.tsfile"): +# os.remove("tablet_write.tsfile") def test_tablet_write(): try: diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index f8b7e8bc..e790a5f2 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -21,6 +21,10 @@ from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor class TimeseriesSchema: + """ + Metadata schema for a time series (name, data type, encoding, compression). + """ + timeseries_name = None data_type = None encoding_type = None @@ -47,6 +51,8 @@ class TimeseriesSchema: class DeviceSchema: + """Represents a device entity containing multiple time series.""" + device_name = None timeseries_list = None @@ -62,10 +68,12 @@ class DeviceSchema: class ColumnSchema: + """Defines schema for a table column (name, datatype, category).""" + column_name = None data_type = None - def __init__(self, column_name: str, data_type: TSDataType, category: ColumnCategory = ColumnCategory.FIELD): + def __init__(self, column_name: str, data_type: TSDataType, category: ColumnCategory = ColumnCategory.FIELD): self.column_name = column_name self.data_type = data_type self.category = category @@ -81,6 +89,7 @@ class ColumnSchema: class TableSchema: + """Schema definition for a table structure.""" table_name = None columns = None @@ -96,6 +105,7 @@ class TableSchema: class ResultSetMetaData: + """Metadata container for query result sets (columns, types, table name).""" column_list = None data_types = None table_name = None diff --git a/python/tsfile/tablet.py b/python/tsfile/tablet.py index 5e28bff5..d7aa9d48 100644 --- a/python/tsfile/tablet.py +++ b/python/tsfile/tablet.py @@ -27,9 +27,20 @@ from .constants import TSDataType, ColumnCategory class Tablet(object): + """ + A pre-allocated columnar data container for batch data with type constraints. - def __init__(self, column_name_list: list[str], type_list: list[TSDataType], - max_row_num: int = 1024): + Initializes: + - column_name_list: Ordered names for data columns + - type_list: TSDataType values specifying allowed types per column + - max_row_num: Pre-allocated row capacity (default 1024) + + Creates timestamp buffer and typed data columns, with value range validation ranges + for numeric types. + """ + + def __init__(self, column_name_list: list[str], type_list: list[TSDataType], + max_row_num: int = 1024): self.timestamp_list = [None for _ in range(max_row_num)] self.data_list: List[List[Union[int, float, bool, str, bytes, None]]] = [ [None for _ in range(max_row_num)] for _ in range(len(column_name_list)) @@ -46,7 +57,7 @@ class Tablet(object): TSDataType.DOUBLE: (np.finfo(np.float64).min, np.finfo(np.float64).max), } - def _check_index(self, col_index : int, row_index : int): + def _check_index(self, col_index: int, row_index: int): if not (0 <= col_index < len(self.column_name_list)): raise IndexError(f"column index {col_index} out of range [0, {len(self.column_name_list) - 1}]") @@ -141,12 +152,10 @@ class Tablet(object): f"Row index {row_index} out of range [0, {self.max_row_num - 1}]" ) return self.data_list[col_index][row_index] + def get_value_list_by_name(self, column_name: str): try: col_index = self.column_name_list.index(column_name) except ValueError: raise ValueError(f"Column '{column_name}' does not exist") from None return self.data_list[col_index] - - - \ No newline at end of file diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 5ad31e9f..9ebe8eee 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -141,7 +141,7 @@ cdef extern from "./tsfile_cwrapper.h": ErrorCode tablet_add_value_by_index_float(Tablet tablet, uint32_t row_index, uint32_t column_index, float value); ErrorCode tablet_add_value_by_index_bool(Tablet tablet, uint32_t row_index, uint32_t column_index, bint value); ErrorCode tablet_add_value_by_index_string(Tablet tablet, uint32_t row_index, - uint32_t column_index, char * value); + uint32_t column_index, const char * value); void free_tablet(Tablet * tablet); diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index 78334c2f..3f7c22cd 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -25,6 +25,7 @@ from libc.stdlib cimport malloc from libc.string cimport strdup from cpython.exc cimport PyErr_SetObject from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_AsUTF8 +from cpython.bytes cimport PyBytes_AsString from tsfile.exceptions import ERROR_MAPPING from tsfile.schema import ResultSetMetaData as ResultSetMetaDataPy @@ -185,7 +186,7 @@ cdef Tablet to_c_tablet(object tablet): cdef char** columns_names cdef TSDataType* column_types cdef bytes row_bytes - cdef char *row_str + cdef const char *row_str if tablet.get_target_name() is not None: device_id_bytes = PyUnicode_AsUTF8String(tablet.get_target_name()) @@ -215,6 +216,7 @@ cdef Tablet to_c_tablet(object tablet): continue timestamp = timestamp_py tablet_add_timestamp(ctablet, row, timestamp) + print("insert timestamp " + str(timestamp)) for col in range(column_num): data_type = to_c_data_type(tablet.get_data_type_list()[col]) @@ -247,11 +249,14 @@ cdef Tablet to_c_tablet(object tablet): if value[row] is not None: tablet_add_value_by_index_double(ctablet, row, col, value[row]) + # STRING elif data_type == TS_DATATYPE_STRING: for row in range(max_row_num): if value[row] is not None: - row_bytes = PyUnicode_AsUTF8String(value[row]) - row_str = row_bytes + py_value = value[row] + row_bytes = PyUnicode_AsUTF8String(py_value) + row_str = PyBytes_AsString(row_bytes) + print("begin to write:" + value[row]) tablet_add_value_by_index_string(ctablet, row, col, row_str) diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index d10eec1c..6f3f0a3a 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -75,7 +75,6 @@ cdef class ResultSetPy: """ cdef ErrorCode code = 0 self.check_result_set_invalid() - print("get next here") has_next = tsfile_result_set_next(self.result, &code) check_error(code) return has_next @@ -290,7 +289,7 @@ cdef class TsFileReaderPy: # result_set_bak to avoid runtime error. result_set_bak = list(self.activate_result_set_list) for result_set in result_set_bak: - result_set.set_invalid_result_set(True) + result_set.set_invalid_result_set() cdef ErrorCode err_code err_code = tsfile_reader_close(self.reader)