jt2594838 commented on code in PR #345:
URL: https://github.com/apache/tsfile/pull/345#discussion_r1897221081


##########
cpp/src/common/device_id.h:
##########
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef COMMON_DEVICE_ID_H
+#define COMMON_DEVICE_ID_H
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "common/allocator/byte_stream.h"
+#include "utils/errno_define.h"
+
+class IDeviceID {
+   public:
+    virtual ~IDeviceID() = default;
+    virtual int serialize(common::ByteStream& write_stream) { return 0; }
+    virtual std::string get_table_name() { return ""; }
+    virtual int segment_num() { return 0; }
+    virtual std::vector<std::string> get_segments() const { return {}; }
+    virtual std::string get_device_name() const { return ""; };
+    virtual bool operator<(const IDeviceID& other) { return 0; }
+    virtual bool operator==(const IDeviceID& other) { return false; }
+    virtual bool operator!=(const IDeviceID& other) { return false; }
+};
+
+struct IDeviceIDComparator {
+    bool operator()(const std::shared_ptr<IDeviceID>& lhs,
+                    const std::shared_ptr<IDeviceID>& rhs) const {
+        return *lhs < *rhs;
+    }
+};
+
+class StringArrayDeviceID : public IDeviceID {
+   public:
+    explicit StringArrayDeviceID(const std::vector<std::string>& segments)
+        : segments_(formalize(segments)) {}
+
+    explicit StringArrayDeviceID(const std::string& device_id_string)
+        : segments_(split_device_id_string(device_id_string)) {}
+
+    ~StringArrayDeviceID() override = default;
+
+    std::string get_device_name() const override {
+        return std::accumulate(std::next(segments_.begin()), segments_.end(),
+                               segments_.front(),
+                               [](std::string a, const std::string& b) {
+                                   return std::move(a) + "." + b;
+                               });
+    };
+
+    int serialize(common::ByteStream& write_stream) override {
+        int ret = common::E_OK;
+        if (RET_FAIL(common::SerializationUtil::write_var_int(segment_num(),
+                                                              write_stream))) {
+            return ret;
+        }
+        for (const auto& segment : segments_) {
+            if (RET_FAIL(common::SerializationUtil::write_var_int(
+                    segment.size(), write_stream))) {
+                return ret;
+            } else if (RET_FAIL(write_stream.write_buf(segment.c_str(),
+                                                       segment.size()))) {
+                return ret;
+            }
+        }
+        return ret;
+    }
+
+    std::string get_table_name() override {
+        return segments_.empty() ? "" : segments_[0];
+    }
+
+    int segment_num() override { return static_cast<int>(segments_.size()); }
+
+    std::vector<std::string> get_segments() const override { return segments_; 
}
+
+    virtual bool operator<(const IDeviceID& other) override {
+        auto other_segments = other.get_segments();
+        return std::lexicographical_compare(segments_.begin(), segments_.end(),
+                                            other_segments.begin(),
+                                            other_segments.end());
+    }
+
+    bool operator==(const IDeviceID& other) override {
+        auto other_segments = other.get_segments();
+        return (segments_.size() == other_segments.size()) &&
+               std::equal(segments_.begin(), segments_.end(),
+                          other_segments.begin());
+    }
+
+    bool operator!=(const IDeviceID& other) override {
+        return !(*this == other);
+    }
+
+   private:
+    std::vector<std::string> segments_;
+
+    static std::vector<std::string> formalize(
+        const std::vector<std::string>& segments) {
+        auto it =
+            std::find_if(segments.rbegin(), segments.rend(),
+                         [](const std::string& seg) { return !seg.empty(); });
+        return std::vector<std::string>(segments.begin(), it.base());
+    }
+
+    static std::vector<std::string> split_device_id_string(
+        std::basic_string<char> device_id_string) {
+        std::vector<std::string> splits;
+        std::istringstream stream(device_id_string);
+        std::string segment;
+        while (std::getline(stream, segment, '.')) {
+            splits.push_back(segment);
+        }
+        return splits;
+    }

Review Comment:
   Split a tree model deviceId may not be as simple, because it may contain 
escaped characters like "root.db1.`d1.1`.s1". 
   @zwhzzz0821 has introduced antlr for this. You may use that.



##########
cpp/src/common/schema.h:
##########
@@ -84,5 +113,147 @@ struct MeasurementSchemaGroup {
     TimeChunkWriter *time_chunk_writer_ = nullptr;
 };
 
+enum class ColumnCategory { ID, MEASUREMENT };
+
+class TableSchema {
+   public:
+    TableSchema() = default;
+    TableSchema(const std::string &table_name,
+                const std::vector<std::shared_ptr<MeasurementSchema>>
+                    &measurement_schemas,
+                const std::vector<ColumnCategory> &column_categories)
+        : table_name_(table_name),
+          measurement_schemas_(measurement_schemas),
+          column_categories_(column_categories) {
+        int idx = 0;
+        for (auto &measurement_schema : measurement_schemas_) {
+            column_pos_index_.insert(
+                std::make_pair(measurement_schema->measurement_name_, idx++));
+        }
+    }
+
+    TableSchema(TableSchema &&other) noexcept
+        : table_name_(std::move(other.table_name_)),
+          measurement_schemas_(std::move(other.measurement_schemas_)),
+          column_categories_(std::move(other.column_categories_)) {}
+
+    TableSchema(const TableSchema &other) = default;
+
+    int serialize_to(common::ByteStream &out) {
+        int ret = common::E_OK;
+        if (RET_FAIL(common::SerializationUtil::write_var_uint(
+                measurement_schemas_.size(), out))) {
+        } else {
+            for (size_t i = 0; IS_SUCC(ret) && i < measurement_schemas_.size();
+                 i++) {
+                auto column_schema = measurement_schemas_[i];
+                auto column_category = column_categories_[i];
+                // column_schema-
+                common::SerializationUtil::write_i32(
+                    static_cast<int32_t>(column_category), out);

Review Comment:
   Column schema? And column_category should not be i32.



##########
cpp/src/common/tablet.h:
##########
@@ -109,12 +144,16 @@ class Tablet {
 
    private:
     int max_row_num_;
-    std::string device_id_;
+    int cur_row_size_;
+    std::string insert_target_name_;
     std::shared_ptr<std::vector<MeasurementSchema>> schema_vec_;
     std::map<std::string, int> schema_map_;
     int64_t *timestamps_;
     void **value_matrix_;
     common::BitMap *bitmaps_;
+    std::vector<ColumnCategory> column_categories_;
+    std::vector<int> id_column_indexes_;
+    bool owned_schemas_;

Review Comment:
   What is this for?



##########
cpp/src/common/schema.h:
##########
@@ -84,5 +113,147 @@ struct MeasurementSchemaGroup {
     TimeChunkWriter *time_chunk_writer_ = nullptr;
 };
 
+enum class ColumnCategory { ID, MEASUREMENT };

Review Comment:
   Rename to TAG and FIELD. Also, since ATTRIBUTE and TIME are removed, please 
double-confirm the consistency of serialization with the Java edition.



##########
cpp/src/common/device_id.h:
##########
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef COMMON_DEVICE_ID_H
+#define COMMON_DEVICE_ID_H
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "common/allocator/byte_stream.h"
+#include "utils/errno_define.h"
+
+class IDeviceID {
+   public:
+    virtual ~IDeviceID() = default;
+    virtual int serialize(common::ByteStream& write_stream) { return 0; }
+    virtual std::string get_table_name() { return ""; }
+    virtual int segment_num() { return 0; }
+    virtual std::vector<std::string> get_segments() const { return {}; }
+    virtual std::string get_device_name() const { return ""; };
+    virtual bool operator<(const IDeviceID& other) { return 0; }
+    virtual bool operator==(const IDeviceID& other) { return false; }
+    virtual bool operator!=(const IDeviceID& other) { return false; }
+};
+
+struct IDeviceIDComparator {
+    bool operator()(const std::shared_ptr<IDeviceID>& lhs,
+                    const std::shared_ptr<IDeviceID>& rhs) const {
+        return *lhs < *rhs;
+    }
+};
+
+class StringArrayDeviceID : public IDeviceID {
+   public:
+    explicit StringArrayDeviceID(const std::vector<std::string>& segments)
+        : segments_(formalize(segments)) {}
+
+    explicit StringArrayDeviceID(const std::string& device_id_string)
+        : segments_(split_device_id_string(device_id_string)) {}
+
+    ~StringArrayDeviceID() override = default;
+
+    std::string get_device_name() const override {
+        return std::accumulate(std::next(segments_.begin()), segments_.end(),
+                               segments_.front(),
+                               [](std::string a, const std::string& b) {
+                                   return std::move(a) + "." + b;
+                               });
+    };
+
+    int serialize(common::ByteStream& write_stream) override {
+        int ret = common::E_OK;
+        if (RET_FAIL(common::SerializationUtil::write_var_int(segment_num(),
+                                                              write_stream))) {
+            return ret;
+        }
+        for (const auto& segment : segments_) {
+            if (RET_FAIL(common::SerializationUtil::write_var_int(
+                    segment.size(), write_stream))) {
+                return ret;
+            } else if (RET_FAIL(write_stream.write_buf(segment.c_str(),
+                                                       segment.size()))) {
+                return ret;
+            }
+        }
+        return ret;
+    }
+
+    std::string get_table_name() override {
+        return segments_.empty() ? "" : segments_[0];
+    }
+
+    int segment_num() override { return static_cast<int>(segments_.size()); }
+
+    std::vector<std::string> get_segments() const override { return segments_; 
}
+
+    virtual bool operator<(const IDeviceID& other) override {
+        auto other_segments = other.get_segments();
+        return std::lexicographical_compare(segments_.begin(), segments_.end(),
+                                            other_segments.begin(),
+                                            other_segments.end());
+    }
+
+    bool operator==(const IDeviceID& other) override {
+        auto other_segments = other.get_segments();
+        return (segments_.size() == other_segments.size()) &&
+               std::equal(segments_.begin(), segments_.end(),
+                          other_segments.begin());
+    }
+
+    bool operator!=(const IDeviceID& other) override {
+        return !(*this == other);
+    }
+
+   private:
+    std::vector<std::string> segments_;
+
+    static std::vector<std::string> formalize(
+        const std::vector<std::string>& segments) {
+        auto it =
+            std::find_if(segments.rbegin(), segments.rend(),
+                         [](const std::string& seg) { return !seg.empty(); });
+        return std::vector<std::string>(segments.begin(), it.base());
+    }
+
+    static std::vector<std::string> split_device_id_string(
+        std::basic_string<char> device_id_string) {
+        std::vector<std::string> splits;
+        std::istringstream stream(device_id_string);
+        std::string segment;
+        while (std::getline(stream, segment, '.')) {
+            splits.push_back(segment);
+        }
+        return splits;
+    }
+};
+
+class PlainDeviceID : public IDeviceID {

Review Comment:
   This class is unnecessary, and all should be converted to 
StringArrayDeviceId directly.



##########
cpp/src/common/schema.h:
##########
@@ -68,6 +70,33 @@ struct MeasurementSchema {
           compression_type_(compression_type),
           chunk_writer_(nullptr),
           value_chunk_writer_(nullptr) {}
+
+    int serialize_to(common::ByteStream &out) {
+        int ret = common::E_OK;
+        if (RET_FAIL(
+                common::SerializationUtil::write_str(measurement_name_, out))) 
{
+        } else if (RET_FAIL(
+                       common::SerializationUtil::write_i32(data_type_, out))) 
{
+        } else if (RET_FAIL(
+                       common::SerializationUtil::write_i32(encoding_, out))) {
+        } else if (RET_FAIL(common::SerializationUtil::write_i32(
+                       compression_type_, out))) {
+        }
+        if (ret == common::E_OK) {
+            if 
(RET_FAIL(common::SerializationUtil::write_str(measurement_name_,
+                                                              out))) {

Review Comment:
   I think this should be the size of the map.



##########
cpp/src/common/schema.h:
##########
@@ -84,5 +113,147 @@ struct MeasurementSchemaGroup {
     TimeChunkWriter *time_chunk_writer_ = nullptr;
 };
 
+enum class ColumnCategory { ID, MEASUREMENT };
+
+class TableSchema {
+   public:
+    TableSchema() = default;
+    TableSchema(const std::string &table_name,
+                const std::vector<std::shared_ptr<MeasurementSchema>>
+                    &measurement_schemas,

Review Comment:
   Rename to column_schemas. (No change to the class name for now)



##########
cpp/src/common/schema.h:
##########
@@ -68,6 +70,33 @@ struct MeasurementSchema {
           compression_type_(compression_type),
           chunk_writer_(nullptr),
           value_chunk_writer_(nullptr) {}
+
+    int serialize_to(common::ByteStream &out) {
+        int ret = common::E_OK;
+        if (RET_FAIL(
+                common::SerializationUtil::write_str(measurement_name_, out))) 
{
+        } else if (RET_FAIL(
+                       common::SerializationUtil::write_i32(data_type_, out))) 
{
+        } else if (RET_FAIL(
+                       common::SerializationUtil::write_i32(encoding_, out))) {
+        } else if (RET_FAIL(common::SerializationUtil::write_i32(
+                       compression_type_, out))) {

Review Comment:
   I do not think they are int32.



##########
cpp/src/common/schema.h:
##########
@@ -84,5 +113,147 @@ struct MeasurementSchemaGroup {
     TimeChunkWriter *time_chunk_writer_ = nullptr;
 };
 
+enum class ColumnCategory { ID, MEASUREMENT };

Review Comment:
   Rename to TAG and FIELD. Also, since ATTRIBUTE and TIME are not included (I 
am ok with that), please double-check the consistency of serialization with the 
Java edition.



##########
cpp/src/common/schema.h:
##########
@@ -84,5 +113,147 @@ struct MeasurementSchemaGroup {
     TimeChunkWriter *time_chunk_writer_ = nullptr;
 };
 
+enum class ColumnCategory { ID, MEASUREMENT };
+
+class TableSchema {
+   public:
+    TableSchema() = default;
+    TableSchema(const std::string &table_name,
+                const std::vector<std::shared_ptr<MeasurementSchema>>
+                    &measurement_schemas,
+                const std::vector<ColumnCategory> &column_categories)
+        : table_name_(table_name),
+          measurement_schemas_(measurement_schemas),
+          column_categories_(column_categories) {

Review Comment:
   Table name and column names should be converted to lower cases.



##########
cpp/src/common/tablet.h:
##########
@@ -85,17 +88,49 @@ class Tablet {
                            return MeasurementSchema(name, type);
                        });
     }
+
+    Tablet(const std::string &insert_target_name,
+           const std::vector<std::string> &column_names,
+           const std::vector<common::TSDataType> &data_types,
+           const std::vector<ColumnCategory> &column_categories,
+           int max_rows = DEFAULT_MAX_ROWS, bool has_column_categories = true)

Review Comment:
   This constructor is explicitly for the Table model, so column_categories 
must be provided, and has_column_categories can be removed.



##########
cpp/src/common/tablet.cc:
##########
@@ -141,4 +183,33 @@ template int Tablet::add_value(uint32_t row_index,
                                const std::string &measurement_name, float val);
 template int Tablet::add_value(uint32_t row_index,
                                const std::string &measurement_name, double 
val);
+void Tablet::set_column_categories(const std::vector<ColumnCategory>& 
column_categories) {
+    column_categories_ = column_categories;
+    id_column_indexes_.clear();
+    for (size_t i = 0; i < column_categories_.size(); i++) {
+        ColumnCategory columnCategory = column_categories_[i];
+        if (columnCategory == ColumnCategory::ID) {
+            id_column_indexes_.push_back(i);
+        }
+    }
+}
+
+std::shared_ptr<IDeviceID> Tablet::get_device_id(int i) const {
+    std::vector<std::string> id_array;
+    id_array.push_back(insert_target_name_);
+    for (auto id_column_idx : id_column_indexes_) {
+        // TODO: support TEXT
+        common::TSDataType data_type = INVALID_DATATYPE;
+        void* value_ptr = get_value(i, id_column_idx, data_type);
+        switch (data_type) {
+            case INT64:
+                
id_array.push_back(to_string(*static_cast<int64_t*>(value_ptr)));
+                break;
+            default:
+                break ;
+        }
+    }

Review Comment:
   TAG columns should all be STRING for now. You may add the single support 
here.



##########
cpp/src/common/tablet.cc:
##########
@@ -104,6 +105,47 @@ int Tablet::add_value(uint32_t row_index, uint32_t 
schema_index, T val) {
     return ret;
 }
 
+void* Tablet::get_value(int row_index, uint32_t schema_index, 
common::TSDataType& data_type) const {
+    if (LIKELY(schema_index >= schema_vec_->size())) {
+        return nullptr;
+    }

Review Comment:
   I think this should be UNLIKELY.



##########
cpp/src/common/tablet.cc:
##########
@@ -104,6 +105,47 @@ int Tablet::add_value(uint32_t row_index, uint32_t 
schema_index, T val) {
     return ret;
 }
 
+void* Tablet::get_value(int row_index, uint32_t schema_index, 
common::TSDataType& data_type) const {
+    if (LIKELY(schema_index >= schema_vec_->size())) {
+        return nullptr;
+    }
+    const MeasurementSchema& schema = schema_vec_->at(schema_index);
+
+    void* column_values = value_matrix_[schema_index];
+    data_type = schema.data_type_;
+    if (!bitmaps_[schema_index].test(row_index)) {
+        return nullptr;
+    }

Review Comment:
   I think a marked position represents null in the Java edition. Please be 
consistent (or am I misunderstanding `test()`?).
   ![Uploading image.png…]()
   



##########
cpp/src/common/tablet.cc:
##########
@@ -82,6 +82,7 @@ int Tablet::add_timestamp(uint32_t row_index, int64_t 
timestamp) {
         return E_OUT_OF_RANGE;
     }
     timestamps_[row_index] = timestamp;
+    cur_row_size_++;
     return E_OK;

Review Comment:
   Well, although I guess no one will do this, if someone really inserts 
timestamps into the same position, then the cur_row_size_ will be wrong.
   Maybe you should update it to max(cur_row_size, row_index).



##########
cpp/src/common/tablet.h:
##########
@@ -85,17 +88,49 @@ class Tablet {
                            return MeasurementSchema(name, type);
                        });
     }
+
+    Tablet(const std::string &insert_target_name,
+           const std::vector<std::string> &column_names,
+           const std::vector<common::TSDataType> &data_types,
+           const std::vector<ColumnCategory> &column_categories,
+           int max_rows = DEFAULT_MAX_ROWS, bool has_column_categories = true)
+        : max_row_num_(max_rows),
+          cur_row_size_(0),
+          insert_target_name_(insert_target_name),
+          timestamps_(nullptr),
+          value_matrix_(nullptr),
+          bitmaps_(nullptr),
+          owned_schemas_(false) {
+        schema_vec_ = std::make_shared<std::vector<MeasurementSchema>>();
+        for (size_t i = 0; i < column_names.size(); i++) {
+            schema_vec_->emplace_back(
+                MeasurementSchema(column_names[i], data_types[i], 
common::PLAIN,
+                                  common::UNCOMPRESSED));
+        }
+        if (has_column_categories) {
+            set_column_categories(column_categories);
+        }
+    }
+
     ~Tablet() { destroy(); }
 
     int init();
     void destroy();
     size_t get_column_count() const { return schema_vec_->size(); }
+    int get_cur_row_size() const { return cur_row_size_; }
+    void set_row_size(int row_size) { cur_row_size_ = row_size; }

Review Comment:
   Actually, we should not provide `set_row_size` afterwards. The real row size 
should be updated along the calls of add_timestamp and add_value.



##########
cpp/src/common/tsfile_common.cc:
##########
@@ -173,6 +174,58 @@ int TSMIterator::get_next(String &ret_device_name, String 
&ret_measurement_name,
     return ret;
 }
 
+int TsFileMeta::serialize_to_table_model(common::ByteStream &out) {
+    int ret = common::E_OK;
+
+    if (RET_SUCC(common::SerializationUtil::write_var_uint(
+            table_metadata_index_node_map_.size(), out))) {
+        for (auto &idx_nodes_iter : table_metadata_index_node_map_) {
+            if (RET_FAIL(common::SerializationUtil::write_mystring(
+                    idx_nodes_iter.first, out))) {
+            } else if (RET_SUCC(idx_nodes_iter.second->serialize_to(out))) {
+            } else {
+                return ret;
+            }
+        }
+    }
+
+    if (RET_SUCC(common::SerializationUtil::write_var_uint(
+            table_schemas_.size(), out))) {
+        for (auto &table_schema_iter : table_schemas_) {
+            if (RET_FAIL(common::SerializationUtil::write_str(
+                    table_schema_iter.first, out))) {

Review Comment:
   Should return here?



##########
cpp/src/common/tsfile_common.h:
##########
@@ -287,15 +294,17 @@ struct ChunkMeta {
 };
 
 struct ChunkGroupMeta {
-    // std::string device_name_;
-    common::String device_name_;
+    std::shared_ptr<IDeviceID> device_name_;
+    common::String device_name_str_;

Review Comment:
   Use consistent terms like device_id.



##########
cpp/src/common/tsfile_common.h:
##########
@@ -868,6 +886,8 @@ struct TsFileMeta {
         return ret;
     }
 
+    int serialize_to_table_model(common::ByteStream &out);

Review Comment:
   Just serialize. We do not distinguish the two models in serialization.



##########
cpp/src/common/tsfile_common.cc:
##########
@@ -173,6 +174,58 @@ int TSMIterator::get_next(String &ret_device_name, String 
&ret_measurement_name,
     return ret;
 }
 
+int TsFileMeta::serialize_to_table_model(common::ByteStream &out) {
+    int ret = common::E_OK;
+
+    if (RET_SUCC(common::SerializationUtil::write_var_uint(
+            table_metadata_index_node_map_.size(), out))) {
+        for (auto &idx_nodes_iter : table_metadata_index_node_map_) {
+            if (RET_FAIL(common::SerializationUtil::write_mystring(
+                    idx_nodes_iter.first, out))) {
+            } else if (RET_SUCC(idx_nodes_iter.second->serialize_to(out))) {
+            } else {
+                return ret;
+            }
+        }
+    }
+
+    if (RET_SUCC(common::SerializationUtil::write_var_uint(
+            table_schemas_.size(), out))) {
+        for (auto &table_schema_iter : table_schemas_) {
+            if (RET_FAIL(common::SerializationUtil::write_str(
+                    table_schema_iter.first, out))) {
+            } else if (RET_SUCC(common::SerializationUtil::write_str(
+                           table_schema_iter.first, out))) {
+            } else {
+                return ret;
+            }
+        }
+    }
+
+    if (RET_FAIL(common::SerializationUtil::write_i64(meta_offset_, out))) {
+        return ret;
+    }
+    if (bloom_filter_ != nullptr) {
+        if (RET_FAIL(bloom_filter_->serialize_to(out))) {
+            return ret;
+        }
+    }
+
+    if (RET_SUCC(common::SerializationUtil::write_var_int(
+            tsfile_properties_.size(), out))) {
+        for (auto tsfile_property : tsfile_properties_) {
+            if (RET_FAIL(common::SerializationUtil::write_str(
+                    tsfile_property.first, out))) {

Review Comment:
   Also here?



##########
cpp/src/common/tablet.h:
##########
@@ -85,17 +88,49 @@ class Tablet {
                            return MeasurementSchema(name, type);
                        });
     }
+
+    Tablet(const std::string &insert_target_name,
+           const std::vector<std::string> &column_names,
+           const std::vector<common::TSDataType> &data_types,
+           const std::vector<ColumnCategory> &column_categories,
+           int max_rows = DEFAULT_MAX_ROWS, bool has_column_categories = true)
+        : max_row_num_(max_rows),
+          cur_row_size_(0),
+          insert_target_name_(insert_target_name),
+          timestamps_(nullptr),
+          value_matrix_(nullptr),
+          bitmaps_(nullptr),
+          owned_schemas_(false) {
+        schema_vec_ = std::make_shared<std::vector<MeasurementSchema>>();
+        for (size_t i = 0; i < column_names.size(); i++) {
+            schema_vec_->emplace_back(
+                MeasurementSchema(column_names[i], data_types[i], 
common::PLAIN,
+                                  common::UNCOMPRESSED));

Review Comment:
   Use the default encoding and compression of each type.



##########
cpp/src/common/tsfile_common.cc:
##########
@@ -173,6 +174,58 @@ int TSMIterator::get_next(String &ret_device_name, String 
&ret_measurement_name,
     return ret;
 }
 
+int TsFileMeta::serialize_to_table_model(common::ByteStream &out) {
+    int ret = common::E_OK;
+
+    if (RET_SUCC(common::SerializationUtil::write_var_uint(
+            table_metadata_index_node_map_.size(), out))) {
+        for (auto &idx_nodes_iter : table_metadata_index_node_map_) {
+            if (RET_FAIL(common::SerializationUtil::write_mystring(
+                    idx_nodes_iter.first, out))) {

Review Comment:
   Should return here?



##########
cpp/src/common/tsfile_common.h:
##########
@@ -287,15 +294,17 @@ struct ChunkMeta {
 };
 
 struct ChunkGroupMeta {
-    // std::string device_name_;
-    common::String device_name_;
+    std::shared_ptr<IDeviceID> device_name_;
+    common::String device_name_str_;
     common::SimpleList<ChunkMeta *> chunk_meta_list_;
 
     explicit ChunkGroupMeta(common::PageArena *pa_ptr)
-        : device_name_(), chunk_meta_list_(pa_ptr) {}
+        : chunk_meta_list_(pa_ptr) {}
 
-    FORCE_INLINE int init(const std::string &dev_name, common::PageArena &pa) {
-        return device_name_.dup_from(dev_name, pa);
+    FORCE_INLINE int init(std::shared_ptr<IDeviceID> device_id,
+                          common::PageArena &pa) {
+        device_name_ = device_id;
+        return device_name_str_.dup_from(device_id->get_device_name(), pa);

Review Comment:
   Avoid using the string form. Concatenating the segments with "." may cause 
trouble if a segment contains "." already.



##########
cpp/src/common/tsfile_common.cc:
##########
@@ -173,6 +174,58 @@ int TSMIterator::get_next(String &ret_device_name, String 
&ret_measurement_name,
     return ret;
 }
 
+int TsFileMeta::serialize_to_table_model(common::ByteStream &out) {
+    int ret = common::E_OK;
+
+    if (RET_SUCC(common::SerializationUtil::write_var_uint(
+            table_metadata_index_node_map_.size(), out))) {
+        for (auto &idx_nodes_iter : table_metadata_index_node_map_) {
+            if (RET_FAIL(common::SerializationUtil::write_mystring(
+                    idx_nodes_iter.first, out))) {
+            } else if (RET_SUCC(idx_nodes_iter.second->serialize_to(out))) {
+            } else {
+                return ret;
+            }
+        }
+    }
+
+    if (RET_SUCC(common::SerializationUtil::write_var_uint(
+            table_schemas_.size(), out))) {
+        for (auto &table_schema_iter : table_schemas_) {
+            if (RET_FAIL(common::SerializationUtil::write_str(
+                    table_schema_iter.first, out))) {
+            } else if (RET_SUCC(common::SerializationUtil::write_str(
+                           table_schema_iter.first, out))) {
+            } else {
+                return ret;
+            }
+        }
+    }
+
+    if (RET_FAIL(common::SerializationUtil::write_i64(meta_offset_, out))) {
+        return ret;
+    }
+    if (bloom_filter_ != nullptr) {
+        if (RET_FAIL(bloom_filter_->serialize_to(out))) {
+            return ret;
+        }
+    }

Review Comment:
   Even if the bloom_filter is null, a zero should be written.



##########
cpp/src/common/device_id.h:
##########
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef COMMON_DEVICE_ID_H
+#define COMMON_DEVICE_ID_H
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "common/allocator/byte_stream.h"
+#include "utils/errno_define.h"
+
+class IDeviceID {
+   public:
+    virtual ~IDeviceID() = default;
+    virtual int serialize(common::ByteStream& write_stream) { return 0; }
+    virtual std::string get_table_name() { return ""; }
+    virtual int segment_num() { return 0; }
+    virtual std::vector<std::string> get_segments() const { return {}; }
+    virtual std::string get_device_name() const { return ""; };
+    virtual bool operator<(const IDeviceID& other) { return 0; }
+    virtual bool operator==(const IDeviceID& other) { return false; }
+    virtual bool operator!=(const IDeviceID& other) { return false; }
+};
+
+struct IDeviceIDComparator {
+    bool operator()(const std::shared_ptr<IDeviceID>& lhs,
+                    const std::shared_ptr<IDeviceID>& rhs) const {
+        return *lhs < *rhs;
+    }
+};
+
+class StringArrayDeviceID : public IDeviceID {
+   public:
+    explicit StringArrayDeviceID(const std::vector<std::string>& segments)
+        : segments_(formalize(segments)) {}
+
+    explicit StringArrayDeviceID(const std::string& device_id_string)
+        : segments_(split_device_id_string(device_id_string)) {}
+
+    ~StringArrayDeviceID() override = default;
+
+    std::string get_device_name() const override {
+        return std::accumulate(std::next(segments_.begin()), segments_.end(),
+                               segments_.front(),
+                               [](std::string a, const std::string& b) {
+                                   return std::move(a) + "." + b;
+                               });
+    };
+
+    int serialize(common::ByteStream& write_stream) override {
+        int ret = common::E_OK;
+        if (RET_FAIL(common::SerializationUtil::write_var_int(segment_num(),
+                                                              write_stream))) {
+            return ret;
+        }
+        for (const auto& segment : segments_) {
+            if (RET_FAIL(common::SerializationUtil::write_var_int(
+                    segment.size(), write_stream))) {
+                return ret;
+            } else if (RET_FAIL(write_stream.write_buf(segment.c_str(),
+                                                       segment.size()))) {
+                return ret;
+            }
+        }
+        return ret;
+    }
+
+    std::string get_table_name() override {
+        return segments_.empty() ? "" : segments_[0];
+    }
+
+    int segment_num() override { return static_cast<int>(segments_.size()); }
+
+    std::vector<std::string> get_segments() const override { return segments_; 
}
+
+    virtual bool operator<(const IDeviceID& other) override {
+        auto other_segments = other.get_segments();
+        return std::lexicographical_compare(segments_.begin(), segments_.end(),
+                                            other_segments.begin(),
+                                            other_segments.end());
+    }
+
+    bool operator==(const IDeviceID& other) override {
+        auto other_segments = other.get_segments();
+        return (segments_.size() == other_segments.size()) &&
+               std::equal(segments_.begin(), segments_.end(),
+                          other_segments.begin());
+    }
+
+    bool operator!=(const IDeviceID& other) override {
+        return !(*this == other);
+    }
+
+   private:
+    std::vector<std::string> segments_;
+
+    static std::vector<std::string> formalize(
+        const std::vector<std::string>& segments) {
+        auto it =
+            std::find_if(segments.rbegin(), segments.rend(),
+                         [](const std::string& seg) { return !seg.empty(); });
+        return std::vector<std::string>(segments.begin(), it.base());
+    }
+
+    static std::vector<std::string> split_device_id_string(
+        std::basic_string<char> device_id_string) {
+        std::vector<std::string> splits;
+        std::istringstream stream(device_id_string);
+        std::string segment;
+        while (std::getline(stream, segment, '.')) {
+            splits.push_back(segment);
+        }
+        return splits;
+    }

Review Comment:
   Also, there is a default strategy for generating table names in the Java 
edition. Please refer to that and avoid inconsistency.
   For example, the table name converted from path "root.db1.t1.d1" will be 
"root.db1.t1" instead of "root".



##########
cpp/src/writer/tsfile_writer.h:
##########
@@ -61,13 +65,26 @@ class TsFileWriter {
     int register_aligned_timeseries(
         const std::string &device_id,
         const std::vector<MeasurementSchema *> &measurement_schemas);
+    void register_table(const std::shared_ptr<TableSchema>& table_schema);
     int write_record(const TsRecord &record);
     int write_tablet(const Tablet &tablet);
     int write_record_aligned(const TsRecord &record);
     int write_tablet_aligned(const Tablet &tablet);
-    std::map<std::string, MeasurementSchemaGroup *> *get_schema_group_map() {
-        return &schemas_;
-    }
+    int write_table(const Tablet &tablet);

Review Comment:
   Would be better to create a new class like TsFileTabletWriter wrapping this 
class, and only public the Table model interfaces in it.



##########
cpp/src/file/tsfile_io_writer.cc:
##########
@@ -405,30 +417,67 @@ int TsFileIOWriter::write_file_index() {
     }
 
     if (IS_SUCC(ret)) {
-        MetaIndexNode *device_index_root_node = nullptr;
-        if (RET_FAIL(build_device_level(device_map, device_index_root_node,
-                                        writing_mm))) {
+        if (!generate_table_schema_) {
+            MetaIndexNode *device_index_root_node = nullptr;
+            if (RET_FAIL(build_device_level(device_map, device_index_root_node,
+                                            writing_mm))) {
+            } else {
+                TsFileMeta tsfile_meta;
+                tsfile_meta.index_node_ = device_index_root_node;
+                tsfile_meta.meta_offset_ = meta_offset;
+                tsfile_meta.bloom_filter_ = &filter;
+                int64_t tsfile_meta_offset = cur_file_position();
+                uint32_t size = 0;  // cppcheck-suppress unreadVariable
+                OFFSET_DEBUG("before tsfile_meta written");
+                if (RET_FAIL(tsfile_meta.serialize_to(write_stream_))) {
+                } else if (RET_FAIL(filter.serialize_to(write_stream_))) {
+                } else {
+                    int64_t tsfile_meta_end_offset = cur_file_position();
+                    size =
+                        (uint32_t)(tsfile_meta_end_offset - 
tsfile_meta_offset);
+                    ret = SerializationUtil::write_ui32(size, write_stream_);
+                }
+#if DEBUG_SE
+                std::cout << "writer tsfile_meta: " << tsfile_meta
+                          << ", tsfile_meta_offset=" << tsfile_meta_offset
+                          << ", size=" << size << std::endl;
+#endif
+                tsfile_meta.index_node_ =
+                    nullptr;  // memory management is delegated to writing_mm
+                tsfile_meta.bloom_filter_ = nullptr;
+            }
         } else {

Review Comment:
   I do not think `generate_table_schema_` should control this. There are no 
two ways of constructing Metadata, only one way.



##########
cpp/src/writer/tsfile_writer.cc:
##########
@@ -163,10 +172,12 @@ int TsFileWriter::register_timeseries(
     return register_timeseries(device_id, ms, false);
 }
 
-int TsFileWriter::register_timeseries(const std::string &device_id,
+int TsFileWriter::register_timeseries(const std::string &device_path,
                                       MeasurementSchema *measurement_schema,
                                       bool is_aligned) {
-    DeviceSchemaIter device_iter = schemas_.find(device_id);
+    std::shared_ptr<IDeviceID> device_id =
+        std::make_shared<PlainDeviceID>(device_path);
+    DeviceSchemasMapIter device_iter = schemas_.find(device_id);

Review Comment:
   PlainDeviceID is not for the tree model, it is DEPRECATED,



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to