Hi all,
I am currently working on CTsfile and CPPTsfile. There is already some C++ code
that needs some organizing work, such as removing dependencies on other
modules, adjusting the compilation chain, and removing company tags from the
code. Currently, there is a header file interface definition on CTsFile that is
up for discussion. This definition will also be used to wrap C++ files to
ensure that users can use our code in the most similar way possible.
A header file definition and a simple tsfile read-write process code can be
found in the attachment.
I've noticed that the code uses void* to store data. When constructing
expressions, it's necessary to convert certain data into a specific format,
like "(void*)&(float){30.0f}" at "Expression tempreature_condition = {
"temperature", {(void*)&(float){30.0f}, TS_TYPE_FLOAT},GT, NULL,0 };” .
which might increase the usage cost for developers. It's also feasible to use
char* to express the data conditions in the constant expressions within
Expression. However, this method requires additional type conversion.
All suggestions and ideas are open for discussion.
Colin Lee.#include "CTsFile.h"
#include <stdio.h>
#include <stdbool.h>
#include <limits.h>
#include <stdint.h>
#define HANDLE_ERRNO(errno) \
do {\
printf("Error opening file: %d\n", errno);\
return false;\
} while(0)
int main () {
// Some code about tsfile
}
bool query_data_from_tsfile() {
TsFileConf conf = { 512 };
ErrCode errno = 0;
TsFileReader reader = tsreader_open("file.tsfile", &conf, &errno);
if (reader == NULL) {
HANDLE_ERRNO(errno);
}
TableSchema* schema = tsfile_get_table_schema(reader, "table1");
if (schema == NULL) {
printf("Error getting schema\n");
return false;
}
// Query: location = Antarctic and temperature > 30.0 and humidity < 80
Expression tempreature_condition = {
"temperature",
{(void*)&(float){30.0f}, TS_TYPE_FLOAT},
GT,
NULL,
0
};
Expression humidity_condition = {
"humidity",
{(void*)&(int32_t){80}, TS_TYPE_INT32},
LT,
NULL,
0
};
Expression* columns_filter[] = {
&tempreature_condition,
&humidity_condition
};
Expression column_condition = {
NULL,
{NULL, 0},
AND,
columns_filter,
2
};
Expression id_condition = {
"location",
{"Antarctic", TS_TYPE_TEXT},
EQ,
NULL,
0
};
char* columns_selected[] = {
"temperature",
"humidity"
};
TsDataIterator iterator = tsfile_query(
reader, "table1", columns_selected, 2, LONG_MIN, LONG_MAX, &id_condition, &column_condition
);
DataResult result;
while ((result = ts_next(iterator, 10)) != NULL) {
for (int i = 0; i < result->max_capacity; i++) {
printf("time: %ld \n", result->times[i]);
printf("temperature: %f \n", *((float*)(result->value[0]) + i));
printf("humidity: %d \n", *((int32_t*)(result->value[1]) + i));
}
}
clean_data_iterator(iterator);
destory_tablet(result);
return true;
}
bool insert_data_to_tsfile() {
// Create a tsfile
TsFileConf conf = { 512 };
ErrCode errno = 0;
TsFileWriter writer = tswriter_open("file.ts", &conf, &errno);
HANDLE_ERRNO(errno);
// Register table with one column
ColumnSchema level[] = {
{"level", TS_COLUMN_MEASUREMENT | TS_TYPE_INT32 | TS_ENCODING_PLAIN},
};
ErrCode ret = tsfile_register_table(writer, "table1", level, 1);
HANDLE_ERRNO(ret);
// Register table with three columns
ColumnSchema cols [] = {
{"location", TS_COLUMN_ID | TS_TYPE_TEXT | TS_ENCODING_PLAIN},
{"temperature", TS_COLUMN_MEASUREMENT | TS_TYPE_FLOAT | TS_ENCODING_TS_2DIFF| TS_COMPRESS_LZ4},
{"humidity", TS_COLUMN_MEASUREMENT | TS_TYPE_INT32 | TS_ENCODING_TS_2DIFF| TS_COMPRESS_LZ4}
};
ret = tsfile_register_table(writer, "table1", cols, 3);
HANDLE_ERRNO(ret);
// table1 ï¼location(id), temperature, humidity, level
// create tablet to insert data
ColumnSchema cols_to_insert_schema[] = {
{"temperature", TS_TYPE_FLOAT},
{"humidity", TS_TYPE_INT32}
};
Tablet tablet = create_tablet("table1", cols_to_insert_schema, 2, 10);
// insert data
ret = tablet_insert_datapoint(&tablet, 1, &"temperature", (void*)&(float){1.0f}, 1);
HANDLE_ERRNO(ret);
int32_t humidity = 80;
ret = tablet_insert_datapoint(&tablet, 1, &"humidity", (void*)&(int32_t){80}, 1);
HANDLE_ERRNO(ret);
void* value[2] = {
(void*)&(float){1.2f},
(void*)&(int32_t){82}
};
char* cols_to_insert[2] = {
"temperature",
"humidity"
};
ret = tablet_insert_datapoints(&tablet, 1, cols_to_insert, value, 2);
HANDLE_ERRNO(ret);
ret = tsfile_write_tablet_datapoint(writer, &tablet);
HANDLE_ERRNO(ret);
ret = destory_tablet(&tablet);
HANDLE_ERRNO(ret);
ret = tsfile_flush(writer);
HANDLE_ERRNO(ret);
ret = tsfile_close(writer);
HANDLE_ERRNO(ret);
return true;
}
typedef long long SchemaInfo;
typedef int ErrCode;
typedef long long timestamp;
// TsFileConf is a configuration struct for TsFileReader and TsFileWriter.
// For example, limit the data usage in memory during writing or query.
// Internal member variables can be added in the future as needed.
typedef struct tsfile_conf {
int mem_threshold_kb;
}TsFileConf;
// Tsfile's reader and writer are pointers pointing to a region in memory;
// The contents of the structure need to be implemented according to the actual design.
typedef struct tsfile_reader {
// internal code;
} *TsFileReader;
typedef struct tsfile_writer {
// internal code;
} *TsFileWriter;
// ColumnSchema is a struct that describes the schema of a column.
// The schema_info field is a bit field that describes the column's data type, encoding, and compression.
// We can use the bit operation to get the specific information.
typedef struct column_schema {
char* column_name;
SchemaInfo schema_info;
} ColumnSchema;
// TableSchema is a struct that describes the schema of a table.
typedef struct table_shcema {
char* table_name;
ColumnSchema** column_shcema;
int column_num;
} TableSchema;
// A Tablet is a data structure used for batch insertion of data.
// We need to create such a structure before inserting.
// The value field is a pointer array that stores the data to be inserted.
// We can use schemainfo to parse the specific data arrays that each pointer points to.
// The bitmap field is a bitmap array that indicates whether the data has been inserted.
typedef struct tablet {
char* table_name;
ColumnSchema** column_shcema;
int column_num;
timestamp* times;
bool** bitmap;
void** value;
int cur_num;
int max_capacity;
} Tablet;
// TsDataIterator is a struct that describes the iterator of the query result.
// Also, it is a pointer pointing to a region in memory.
typedef struct data_iterator {
// internal code;
} *TsDataIterator;
// DataResult is a struct that describes the result of the query.
// It shares the same structure as the Tablet.
typedef Tablet* DataResult;
// For COLUMN TYPE 0 - 7
#define TS_COLUMN_ID 1<<0
#define TS_COLUMN_MEASUREMENT 1<<1
// For VALUE TYPE 8 - 15
#define TS_TYPE_INT32 1<<8
#define TS_TYPE_BOOLEAN 1<<9
#define TS_TYPE_FLOAT 1<<10
#define TS_TYPE_DOUBLE 1<<11
#define TS_TYPE_INT64 1<<12
#define TS_TYPE_TEXT 1<<13
// For ENCODING TYPE 16 - 31
#define TS_ENCODING_PLAIN 1<<16
#define TS_ENCODING_TS_2DIFF 1<<17
// For COMPRESS TYPE 32 --
#define TS_COMPRESS_UNCOMPRESS 1<<32
#define TS_COMPRESS_LZ4 1<<33
// Expression operator type
typedef enum operator_type {
OR,
AND,
NOT,
LT,
LE,
EQ,
GT,
GE,
NE,
IN,
ISNULL,
BETWEEN,
CONSTANT,
COLUMN
} OperatorType;
// Constant is a struct that describes the constant condition in the expression.
// value_type is a int32 value that describes the data type of the constant.
typedef struct constant {
void* value_condition;
int value_type;
} Constant;
// Expression is a struct that describes the expression-tree in the query.
// The members depend on operator_type.
typedef struct expression {
char* column_name;
Constant constant_condition;
OperatorType operator_type;
Expression** child_expressions;
int child_num;
} Expression;
// OPEN API
// Open a tsfile for reading or writing.
// The filename is the path of the tsfile.
// The conf is a pointer to the TsFileConf struct.
// The flags is an int value that describes the open mode like O_CREAT, O_RDWR and O_TRUNC in posix.
TsFileReader tsreader_open(const char *filename, ErrCode* errno);
TsFileReader tsreader_open(const char *filename, TsFileConf *conf, ErrCode* errno);
TsFileWriter tswriter_open(const char *filename, ErrCode* errno);
TsFileWriter tswriter_open(const char *filename, TsFileConf *conf, ErrCode* errno);
TsFileWriter tswriter_open(const char *filename, int flags, TsFileConf *conf, ErrCode* errno);
// Get schema from reader or writer.
TableSchema* tsfile_get_table_schema(TsFileReader reader, const char* table_name);
TableSchema* tsfile_get_table_schema(TsFileWriter writer, const char* table_name);
ColumnSchema* tsfile_get_column_schema(TsFileReader reader, const char* table_name, const char* column_name);
ColumnSchema* tsfile_get_column_schema(TsFileWriter writer, const char* table_name, const char* column_name);
// Create table with writer.
ErrCode tsfile_register_table(TsFileWriter writer, const char* table_name, ColumnSchema** column_schema, int column_num);
ErrCode tsfile_register_table(TsFileWriter writer, TableSchema* table_schema);
// Create tablet and insert data.
Tablet create_tablet(const char* table_name, ColumnSchema** column_schema, int column_num, int max_line);
Tablet create_tablet(TableSchema* table_schema, int max_line);
// Insert data into tablet.
ErrCode tablet_insert_datapoint(Tablet* tablet, timestamp time, const char** column_names, void* value, int colmn_num);
// Some APIs much more convenient to use.
ErrCode tablet_insert_datapoint(Tablet* tablet, timestamp time, const char* column_name, float value);
// Insert tablet into tsfile writer.
ErrCode tsfile_write_tablet_datapoint(TsFileWriter writer, Tablet* tablet);
// Destory tablet after insert.
ErrCode destory_tablet(Tablet* tablet);
// Flush data to disk
ErrCode tsfile_flush(TsFileWriter writer);
// Query data from tsfile reader
TsDataIterator tsfile_query(TsFileReader reader, const char* table_name, const char** columns, int column_num,
timestamp start_time, timestamp end_time, Expression* id_condition,
Expression* column_condition);
// Get next data from iterator
DataResult tsfile_next(TsDataIterator iterator, int line_num);
// Free query's memory
void clean_data_iterator(TsDataIterator iterator);