jt2594838 commented on code in PR #388: URL: https://github.com/apache/tsfile/pull/388#discussion_r1967080346
########## cpp/src/cwrapper/tsfile_cwrapper.cc: ########## @@ -29,17 +29,31 @@ static bool is_init = false; Tablet tablet_new_with_device(const char *device_id, char **column_name_list, - TSDataType *data_types, int column_num, - int max_rows) { + TSDataType *data_types, ColumnCategory *category, + int column_num, int max_rows) { std::vector<std::string> measurement_list; Review Comment: `new_with_device` is for the tree model, which does not need categories. ########## python/tsfile/constants.py: ########## @@ -45,6 +45,32 @@ def to_py_type(self): elif self == TSDataType.TEXT or self == TSDataType.STRING: return str + def to_pandas_dtype(self): + """ + Convert datatype to pandas dtype + """ + if self == TSDataType.BOOLEAN: + return "bool" + elif self == TSDataType.INT32: + return "int32" + elif self == TSDataType.INT64: + return "int64" + elif self == TSDataType.FLOAT: + return "float32" + elif self == TSDataType.DOUBLE: + return "float64" + elif self == TSDataType.TEXT or self == TSDataType.STRING: + return "object" + elif self == TSDataType.TIMESTAMP: + return "datetime64[ns]" + elif self == TSDataType.DATE: + return "datetime64[ns]" Review Comment: Double-check this. Dates are int32 like 20250112 and is "datetime64[ns]" compatible? ########## python/tsfile/schema.py: ########## @@ -17,57 +17,110 @@ # from typing import List -from .constants import TSDataType, Category, TSEncoding, Compressor +from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor + class TimeseriesSchema: timeseries_name = None data_type = None encoding_type = None compression_type = None - def __init__(self, timeseries_name : str, data_type : TSDataType, encoding_type : TSEncoding = None, compression_type : Compressor = None): + def __init__(self, timeseries_name: str, data_type: TSDataType, encoding_type: TSEncoding = TSEncoding.PLAIN, + compression_type: Compressor = Compressor.UNCOMPRESSED): self.timeseries_name = timeseries_name self.data_type = data_type - self.encoding_type = encoding_type if encoding_type is not None else TSEncoding.PLAIN - self.compression_type = compression_type if compression_type is not None else Compressor.UNCOMPRESSED + self.encoding_type = encoding_type + self.compression_type = compression_type + + def get_timeseries_name(self): + return self.timeseries_name + + def get_data_type(self): + return self.data_type + + def get_encoding_type(self): + return self.encoding_type + + def get_compression_type(self): + return self.compression_type + class DeviceSchema: device_name = None timeseries_list = None - def __init__(self, device_name : str, timeseries_list : List[TimeseriesSchema]): + + def __init__(self, device_name: str, timeseries_list: List[TimeseriesSchema]): self.device_name = device_name self.timeseries_list = timeseries_list + def get_device_name(self): + return self.device_name + + def get_timeseries_list(self): + return self.timeseries_list + + class ColumnSchema: column_name = None data_type = None - category = None - def __init__(self, column_name : str, data_type : TSDataType, category : Category): + + def __init__(self, column_name: str, data_type: TSDataType, category: ColumnCategory = ColumnCategory.FIELD): self.column_name = column_name self.data_type = data_type self.category = category + def get_column_name(self): + return self.column_name + + def get_data_type(self): + return self.data_type + + def get_category(self): + return self.category + class TableSchema: table_name = None columns = None - def __init__(self, table_name : str, columns : List[ColumnSchema]): + + def __init__(self, table_name: str, columns: List[ColumnSchema]): self.table_name = table_name self.columns = columns + def get_table_name(self): + return self.table_name + + def get_columns(self): + return self.columns + + class ResultSetMetaData: column_list = None data_types = None - device_name = None - def __init__(self, column_list : List[str], data_types : List[TSDataType]): + device_id = None + + def __init__(self, column_list: List[str], data_types: List[TSDataType]): self.column_list = column_list self.data_types = data_types - def set_device_name(self, device_name : str): - self.device_name = device_name - def get_data_type(self, column_index : int) -> TSDataType: + def set_device_name(self, device_id: str): + self.device_id = device_id Review Comment: method name ########## python/tsfile/tablet.py: ########## @@ -87,11 +91,9 @@ def add_timestamp(self, row_index: int, timestamp: int): self.timestamp_list[row_index] = timestamp def _check_numeric_range(self, value: Union[int, float], data_type: TSDataType): + if math.isnan(value) or math.isinf(value): + return Review Comment: and (data_type != INT32 or data_type != INT64) ########## python/tsfile/tsfile_reader.pyx: ########## @@ -100,69 +154,122 @@ cdef class ResultSetPy: """ Checks whether the field with the specified column name in the result set is null. """ + self.check_result_set_invalid() ind = self.metadata.get_column_name_index(name) return self.is_null_by_index(ind) + def check_result_set_invalid(self): + if self.not_invalid_result_set: + raise Exception("Invalid result set. TsFile Reader not exists") + + def get_result_set_invalid(self): + return self.not_invalid_result_set + def close(self): """ Close result set, free C resource. :return: """ - free_tsfile_result_set(&self.result) + if self.result != NULL: + free_tsfile_result_set(&self.result) + + + if self.tsfile_reader is not None: + reader = self.tsfile_reader() + if reader is not None: + reader.notify_result_set_discard(self) + + self.result = NULL + self.not_invalid_result_set = True + + def set_invalid_result_set(self, invalid : bool): + self.not_invalid_result_set = invalid + self.close() Review Comment: Double check ########## python/tsfile/tsfile_reader.pyx: ########## @@ -29,14 +37,23 @@ cdef class ResultSetPy: """ Get data from a query result. """ + __pyx_allow_weakref__ = True cdef ResultSet result - cdef public object metadata - cdef public object device_name + cdef object metadata + cdef object device_name + cdef object not_invalid_result_set Review Comment: Avoid using double negation -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@tsfile.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org