This is an automated email from the ASF dual-hosted git repository.

leaves12138 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new d0dcdad  feat: add core options and utility helpers
d0dcdad is described below

commit d0dcdadaaf2e50df203a57985216c3c039c5bfeb
Author: Yonghao Fang <[email protected]>
AuthorDate: Tue Jun 9 09:40:56 2026 +0800

    feat: add core options and utility helpers
    
    Merge PR #59: add core options and utility helpers.
---
 include/paimon/utils/special_field_ids.h           |  47 ++++++++
 src/paimon/core/options/changelog_producer.h       |  34 ++++++
 src/paimon/core/options/compress_options.h         |  30 +++++
 src/paimon/core/options/expire_config.h            |  62 ++++++++++
 src/paimon/core/options/external_path_strategy.h   |  32 +++++
 src/paimon/core/options/lookup_compact_mode.h      |  30 +++++
 src/paimon/core/options/lookup_strategy.h          |  53 ++++++++
 src/paimon/core/options/lookup_strategy_test.cpp   |  46 +++++++
 src/paimon/core/options/merge_engine.h             |  34 ++++++
 src/paimon/core/options/sort_engine.h              |  31 +++++
 src/paimon/core/options/sort_order.h               |  30 +++++
 src/paimon/core/utils/commit_increment.h           |  65 ++++++++++
 src/paimon/core/utils/duration.h                   |  47 ++++++++
 src/paimon/core/utils/file_utils.cpp               |  77 ++++++++++++
 src/paimon/core/utils/file_utils.h                 |  58 +++++++++
 src/paimon/core/utils/file_utils_test.cpp          |  56 +++++++++
 src/paimon/core/utils/offset_row.h                 | 134 +++++++++++++++++++++
 src/paimon/core/utils/offset_row_test.cpp          | 109 +++++++++++++++++
 src/paimon/core/utils/partition_path_utils.cpp     | 129 ++++++++++++++++++++
 src/paimon/core/utils/partition_path_utils.h       |  77 ++++++++++++
 .../core/utils/partition_path_utils_test.cpp       | 106 ++++++++++++++++
 src/paimon/core/utils/path_factory.h               |  36 ++++++
 src/paimon/core/utils/special_field_ids.cpp        |  31 +++++
 23 files changed, 1354 insertions(+)

diff --git a/include/paimon/utils/special_field_ids.h 
b/include/paimon/utils/special_field_ids.h
new file mode 100644
index 0000000..accf07f
--- /dev/null
+++ b/include/paimon/utils/special_field_ids.h
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+
+namespace paimon {
+
+/// A utility class for accessing special field IDs used in metadata.
+class SpecialFieldIds {
+ protected:
+    /// System defined constant for field id boundary. Value: INT32_MAX - 10000
+    static const int32_t CPP_FIELD_ID_END = 
std::numeric_limits<int32_t>::max() - 10000;
+
+ public:
+    /// Special field ID reserved for sequence number. Value: INT32_MAX - 1
+    static const int32_t SEQUENCE_NUMBER = std::numeric_limits<int32_t>::max() 
- 1;
+    /// Special field ID reserved for value kind. Value: INT32_MAX - 2
+    static const int32_t VALUE_KIND = std::numeric_limits<int32_t>::max() - 2;
+    /// Special field ID reserved for row kind. Value: INT32_MAX - 3
+    static const int32_t ROW_KIND = std::numeric_limits<int32_t>::max() - 3;
+    /// Special field ID reserved for row ID. Value: INT32_MAX - 5
+    static const int32_t ROW_ID = std::numeric_limits<int32_t>::max() - 5;
+
+    /// Special field ID reserved for index score. Value: CPP_FIELD_ID_END - 1
+    static const int32_t INDEX_SCORE = CPP_FIELD_ID_END - 1;
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/options/changelog_producer.h 
b/src/paimon/core/options/changelog_producer.h
new file mode 100644
index 0000000..744f767
--- /dev/null
+++ b/src/paimon/core/options/changelog_producer.h
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the changelog producer for table.
+enum class ChangelogProducer {
+    // No changelog file.
+    NONE = 1,
+    // Double write to a changelog file when flushing memory table, the 
changelog is from input.
+    INPUT = 2,
+    // Generate changelog files with each full compaction.
+    FULL_COMPACTION = 3,
+    // Generate changelog files through 'lookup' before committing the data 
writing.
+    LOOKUP = 4
+};
+}  // namespace paimon
diff --git a/src/paimon/core/options/compress_options.h 
b/src/paimon/core/options/compress_options.h
new file mode 100644
index 0000000..6f18e38
--- /dev/null
+++ b/src/paimon/core/options/compress_options.h
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+#include <cstdint>
+#include <string>
+
+namespace paimon {
+/// Options of compression.
+struct CompressOptions {
+    std::string compress;
+    int32_t zstd_level;
+};
+}  // namespace paimon
diff --git a/src/paimon/core/options/expire_config.h 
b/src/paimon/core/options/expire_config.h
new file mode 100644
index 0000000..492930d
--- /dev/null
+++ b/src/paimon/core/options/expire_config.h
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+namespace paimon {
+
+class ExpireConfig {
+ public:
+    ExpireConfig() = default;
+    ExpireConfig(int32_t snapshot_retain_max, int32_t snapshot_retain_min,
+                 int64_t snapshot_time_retain_ms, int32_t snapshot_max_deletes,
+                 bool snapshot_clean_empty_directories)
+        : snapshot_retain_max_(snapshot_retain_max),
+          snapshot_retain_min_(snapshot_retain_min),
+          snapshot_time_retain_ms_(snapshot_time_retain_ms),
+          snapshot_max_deletes_(snapshot_max_deletes),
+          snapshot_clean_empty_directories_(snapshot_clean_empty_directories) 
{}
+
+    int32_t GetSnapshotRetainMin() const {
+        return snapshot_retain_min_;
+    }
+    int32_t GetSnapshotRetainMax() const {
+        return snapshot_retain_max_;
+    }
+    int64_t GetSnapshotTimeRetainMs() const {
+        return snapshot_time_retain_ms_;
+    }
+    int32_t GetSnapshotMaxDeletes() const {
+        return snapshot_max_deletes_;
+    }
+    bool CleanEmptyDirectories() const {
+        return snapshot_clean_empty_directories_;
+    }
+
+ private:
+    int32_t snapshot_retain_max_;
+    int32_t snapshot_retain_min_;
+    int64_t snapshot_time_retain_ms_;
+    int32_t snapshot_max_deletes_;
+    bool snapshot_clean_empty_directories_;
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/options/external_path_strategy.h 
b/src/paimon/core/options/external_path_strategy.h
new file mode 100644
index 0000000..6b6a0c9
--- /dev/null
+++ b/src/paimon/core/options/external_path_strategy.h
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the strategy for selecting external storage paths.
+enum class ExternalPathStrategy {
+    // Do not choose any external storage, data will still be written to the 
default warehouse path.
+    NONE = 1,
+    // Select a specific file system as the external path. Currently supported 
are S3 and OSS.
+    SPECIFIC_FS = 2,
+    // When writing a new file, a path is chosen from data-file.external-paths 
in turn.
+    ROUND_ROBIN = 3
+};
+}  // namespace paimon
diff --git a/src/paimon/core/options/lookup_compact_mode.h 
b/src/paimon/core/options/lookup_compact_mode.h
new file mode 100644
index 0000000..a1e6c92
--- /dev/null
+++ b/src/paimon/core/options/lookup_compact_mode.h
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// The compact mode for lookup compaction.
+enum class LookupCompactMode {
+    /// Lookup compaction will use ForceUpLevel0Compaction strategy to 
radically compact new files.
+    RADICAL = 1,
+    /// Lookup compaction will use UniversalCompaction strategy to gently 
compact new files.
+    GENTLE = 2
+};
+}  // namespace paimon
diff --git a/src/paimon/core/options/lookup_strategy.h 
b/src/paimon/core/options/lookup_strategy.h
new file mode 100644
index 0000000..094aae5
--- /dev/null
+++ b/src/paimon/core/options/lookup_strategy.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Strategy for lookup.
+struct LookupStrategy {
+ public:
+    bool operator==(const LookupStrategy& other) const {
+        if (this == &other) {
+            return true;
+        }
+        return is_first_row == other.is_first_row && produce_changelog == 
other.produce_changelog &&
+               deletion_vector == other.deletion_vector && need_lookup == 
other.need_lookup;
+    }
+
+    static LookupStrategy From(bool is_first_row, bool produce_changelog, bool 
deletion_vector,
+                               bool force_lookup) {
+        return LookupStrategy(is_first_row, produce_changelog, 
deletion_vector, force_lookup);
+    }
+
+    const bool need_lookup;
+    const bool is_first_row;
+    const bool produce_changelog;
+    const bool deletion_vector;
+
+ private:
+    LookupStrategy(bool _is_first_row, bool _produce_changelog, bool 
_deletion_vector,
+                   bool _force_lookup)
+        : need_lookup(_produce_changelog || _deletion_vector || _is_first_row 
|| _force_lookup),
+          is_first_row(_is_first_row),
+          produce_changelog(_produce_changelog),
+          deletion_vector(_deletion_vector) {}
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/options/lookup_strategy_test.cpp 
b/src/paimon/core/options/lookup_strategy_test.cpp
new file mode 100644
index 0000000..0273271
--- /dev/null
+++ b/src/paimon/core/options/lookup_strategy_test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/options/lookup_strategy.h"
+
+#include "gtest/gtest.h"
+
+namespace paimon::test {
+
+TEST(LookupStrategyTest, TestFrom) {
+    auto strategy = LookupStrategy::From(
+        /*is_first_row=*/true,
+        /*produce_changelog=*/false,
+        /*deletion_vector=*/false,
+        /*force_lookup=*/false);
+
+    ASSERT_TRUE(strategy.need_lookup);
+    ASSERT_TRUE(strategy.is_first_row);
+    ASSERT_FALSE(strategy.produce_changelog);
+    ASSERT_FALSE(strategy.deletion_vector);
+}
+
+TEST(LookupStrategyTest, TestNeedLookupCombinations) {
+    ASSERT_FALSE(LookupStrategy::From(false, false, false, false).need_lookup);
+    ASSERT_TRUE(LookupStrategy::From(false, true, false, false).need_lookup);
+    ASSERT_TRUE(LookupStrategy::From(false, false, true, false).need_lookup);
+    ASSERT_TRUE(LookupStrategy::From(false, false, false, true).need_lookup);
+}
+
+}  // namespace paimon::test
diff --git a/src/paimon/core/options/merge_engine.h 
b/src/paimon/core/options/merge_engine.h
new file mode 100644
index 0000000..0c30306
--- /dev/null
+++ b/src/paimon/core/options/merge_engine.h
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the merge engine for table with primary key.
+enum class MergeEngine {
+    // De-duplicate and keep the last row.
+    DEDUPLICATE = 1,
+    // Partial update non-null fields.
+    PARTIAL_UPDATE = 2,
+    // Aggregate fields with same primary key.
+    AGGREGATE = 3,
+    // De-duplicate and keep the first row.
+    FIRST_ROW = 4
+};
+}  // namespace paimon
diff --git a/src/paimon/core/options/sort_engine.h 
b/src/paimon/core/options/sort_engine.h
new file mode 100644
index 0000000..871895b
--- /dev/null
+++ b/src/paimon/core/options/sort_engine.h
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the sort engine for table with primary key.
+enum class SortEngine {
+    // Use min-heap for multiway sorting.
+    MIN_HEAP = 1,
+    // Use loser-tree for multiway sorting. Compared with heapsort, loser-tree 
has fewer comparisons
+    // and is more efficient.
+    LOSER_TREE = 2
+};
+}  // namespace paimon
diff --git a/src/paimon/core/options/sort_order.h 
b/src/paimon/core/options/sort_order.h
new file mode 100644
index 0000000..1e8dd02
--- /dev/null
+++ b/src/paimon/core/options/sort_order.h
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the sort order for field sequence id.
+enum class SortOrder {
+    // specifies sequence.field sort order is ascending.
+    ASCENDING = 1,
+    // specifies sequence.field sort order is descending.
+    DESCENDING = 2
+};
+}  // namespace paimon
diff --git a/src/paimon/core/utils/commit_increment.h 
b/src/paimon/core/utils/commit_increment.h
new file mode 100644
index 0000000..e371821
--- /dev/null
+++ b/src/paimon/core/utils/commit_increment.h
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <memory>
+
+#include "paimon/core/compact/compact_deletion_file.h"
+#include "paimon/core/io/compact_increment.h"
+#include "paimon/core/io/data_increment.h"
+
+namespace paimon {
+
+// Changes to commit.
+class CommitIncrement {
+ public:
+    CommitIncrement(const DataIncrement& data_increment, const 
CompactIncrement& compact_increment,
+                    const std::shared_ptr<CompactDeletionFile>& 
compact_deletion_file)
+        : data_increment_(data_increment),
+          compact_increment_(compact_increment),
+          compact_deletion_file_(compact_deletion_file) {}
+
+    const DataIncrement& GetNewFilesIncrement() const {
+        return data_increment_;
+    }
+
+    const CompactIncrement& GetCompactIncrement() const {
+        return compact_increment_;
+    }
+
+    DataIncrement& GetNewFilesIncrement() {
+        return data_increment_;
+    }
+
+    CompactIncrement& GetCompactIncrement() {
+        return compact_increment_;
+    }
+
+    std::shared_ptr<CompactDeletionFile> GetCompactDeletionFile() const {
+        return compact_deletion_file_;
+    }
+
+ private:
+    DataIncrement data_increment_;
+    CompactIncrement compact_increment_;
+    std::shared_ptr<CompactDeletionFile> compact_deletion_file_;
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/utils/duration.h b/src/paimon/core/utils/duration.h
new file mode 100644
index 0000000..ba64355
--- /dev/null
+++ b/src/paimon/core/utils/duration.h
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <chrono>
+
+namespace paimon {
+
+// Calculate operation duration.
+class Duration {
+ public:
+    Duration() : start_(std::chrono::high_resolution_clock::now()) {}
+
+    uint64_t Get() {
+        return std::chrono::duration_cast<std::chrono::milliseconds>(
+                   std::chrono::high_resolution_clock::now() - start_)
+            .count();
+    }
+
+    uint64_t Reset() {
+        uint64_t duration = Get();
+        start_ = std::chrono::high_resolution_clock::now();
+        return duration;
+    }
+
+ private:
+    std::chrono::high_resolution_clock::time_point start_;
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/utils/file_utils.cpp 
b/src/paimon/core/utils/file_utils.cpp
new file mode 100644
index 0000000..1a803e3
--- /dev/null
+++ b/src/paimon/core/utils/file_utils.cpp
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/file_utils.h"
+
+#include <algorithm>
+#include <optional>
+#include <utility>
+
+#include "fmt/format.h"
+#include "paimon/common/utils/path_util.h"
+#include "paimon/common/utils/string_utils.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/result.h"
+
+namespace paimon {
+
+Status FileUtils::ListVersionedFiles(const std::shared_ptr<FileSystem>& fs, 
const std::string& dir,
+                                     const std::string& prefix, 
std::vector<int64_t>* files) {
+    std::vector<std::string> file_strs;
+    PAIMON_RETURN_NOT_OK(ListOriginalVersionedFiles(fs, dir, prefix, 
&file_strs));
+    for (const auto& file_str : file_strs) {
+        std::optional<int64_t> file_number = 
StringUtils::StringToValue<int64_t>(file_str);
+        if (file_number == std::nullopt) {
+            return Status::Invalid(fmt::format("fail to convert {} to number", 
file_str));
+        }
+        files->emplace_back(file_number.value());
+    }
+    return Status::OK();
+}
+
+Status FileUtils::ListOriginalVersionedFiles(const 
std::shared_ptr<FileSystem>& fs,
+                                             const std::string& dir, const 
std::string& prefix,
+                                             std::vector<std::string>* files) {
+    std::vector<std::unique_ptr<BasicFileStatus>> file_status_list;
+    PAIMON_RETURN_NOT_OK(ListVersionedFileStatus(fs, dir, prefix, 
&file_status_list));
+    for (auto& file_status : file_status_list) {
+        std::string file_name = PathUtil::GetName(file_status->GetPath());
+        files->emplace_back(file_name.substr(prefix.size()));
+    }
+    return Status::OK();
+}
+
+Status FileUtils::ListVersionedFileStatus(
+    const std::shared_ptr<FileSystem>& fs, const std::string& dir, const 
std::string& prefix,
+    std::vector<std::unique_ptr<BasicFileStatus>>* file_status_list) {
+    PAIMON_ASSIGN_OR_RAISE(bool exist, fs->Exists(dir));
+    if (exist) {
+        std::vector<std::unique_ptr<BasicFileStatus>> file_statuses;
+        PAIMON_RETURN_NOT_OK(fs->ListDir(dir, &file_statuses));
+        for (auto& file_status : file_statuses) {
+            std::string file_name = PathUtil::GetName(file_status->GetPath());
+            if (StringUtils::StartsWith(file_name, prefix)) {
+                file_status_list->emplace_back(std::move(file_status));
+            }
+        }
+    }
+    return Status::OK();
+}
+
+}  // namespace paimon
diff --git a/src/paimon/core/utils/file_utils.h 
b/src/paimon/core/utils/file_utils.h
new file mode 100644
index 0000000..cbb5f76
--- /dev/null
+++ b/src/paimon/core/utils/file_utils.h
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paimon/status.h"
+#include "paimon/type_fwd.h"
+
+namespace paimon {
+class BasicFileStatus;
+class FileSystem;
+
+/// Utils for file reading and writing.
+class FileUtils {
+ public:
+    /// List versioned files for the directory.
+    ///
+    /// @return Status
+    static Status ListVersionedFiles(const std::shared_ptr<FileSystem>& fs, 
const std::string& dir,
+                                     const std::string& prefix, 
std::vector<int64_t>* files);
+
+    /// List original versioned files for the directory.
+    ///
+    /// @return Status
+    static Status ListOriginalVersionedFiles(const 
std::shared_ptr<FileSystem>& fs,
+                                             const std::string& dir, const 
std::string& prefix,
+                                             std::vector<std::string>* files);
+
+    /// List versioned file status for the directory.
+    ///
+    /// @return Status
+    static Status ListVersionedFileStatus(
+        const std::shared_ptr<FileSystem>& fs, const std::string& dir, const 
std::string& prefix,
+        std::vector<std::unique_ptr<BasicFileStatus>>* file_status_list);
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/utils/file_utils_test.cpp 
b/src/paimon/core/utils/file_utils_test.cpp
new file mode 100644
index 0000000..ef3db4d
--- /dev/null
+++ b/src/paimon/core/utils/file_utils_test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/file_utils.h"
+
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "paimon/fs/local/local_file_system.h"
+#include "paimon/testing/utils/testharness.h"
+
+namespace paimon::test {
+
+TEST(FileUtilsTest, TestSimple) {
+    std::string test_data_path =
+        paimon::test::GetDataDir() + "/orc/append_09.db/append_09/snapshot/";
+    std::vector<int64_t> files;
+    auto fs = std::make_shared<LocalFileSystem>();
+    ASSERT_OK(FileUtils::ListVersionedFiles(std::move(fs), test_data_path, 
"snapshot-", &files));
+    ASSERT_EQ(files.size(), 5u);
+}
+
+TEST(FileUtilsTest, TestNotExist) {
+    std::string test_data_path =
+        paimon::test::GetDataDir() + "/orc/append_09.db/append_09/not_exist/";
+    std::vector<int64_t> files;
+    auto fs = std::make_shared<LocalFileSystem>();
+    ASSERT_OK(FileUtils::ListVersionedFiles(std::move(fs), test_data_path, 
"snapshot-", &files));
+    ASSERT_EQ(files.size(), 0u);
+}
+
+TEST(FileUtilsTest, TestNotNumber) {
+    std::string test_data_path =
+        paimon::test::GetDataDir() + "/orc/append_09.db/append_09/manifest/";
+    std::vector<int64_t> files;
+    auto fs = std::make_shared<LocalFileSystem>();
+    ASSERT_NOK(FileUtils::ListVersionedFiles(std::move(fs), test_data_path, 
"manifest-", &files));
+}
+
+}  // namespace paimon::test
diff --git a/src/paimon/core/utils/offset_row.h 
b/src/paimon/core/utils/offset_row.h
new file mode 100644
index 0000000..1fbe671
--- /dev/null
+++ b/src/paimon/core/utils/offset_row.h
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "fmt/format.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/common/data/internal_row.h"
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/memory/bytes.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class Bytes;
+class InternalArray;
+class InternalMap;
+class RowKind;
+
+/// A `InternalRow` to wrap row with offset.
+class OffsetRow : public InternalRow {
+ public:
+    OffsetRow(const InternalRow& row, int32_t arity, int32_t offset)
+        : row_(row), arity_(arity), offset_(offset) {}
+
+    int32_t GetFieldCount() const override {
+        return arity_;
+    }
+
+    Result<const RowKind*> GetRowKind() const override {
+        return row_.GetRowKind();
+    }
+
+    void SetRowKind(const RowKind* kind) override {}
+
+    bool IsNullAt(int32_t pos) const override {
+        return row_.IsNullAt(offset_ + pos);
+    }
+
+    bool GetBoolean(int32_t pos) const override {
+        return row_.GetBoolean(offset_ + pos);
+    }
+
+    char GetByte(int32_t pos) const override {
+        return row_.GetByte(offset_ + pos);
+    }
+
+    int16_t GetShort(int32_t pos) const override {
+        return row_.GetShort(offset_ + pos);
+    }
+
+    int32_t GetInt(int32_t pos) const override {
+        return row_.GetInt(offset_ + pos);
+    }
+
+    int32_t GetDate(int32_t pos) const override {
+        return row_.GetDate(offset_ + pos);
+    }
+
+    int64_t GetLong(int32_t pos) const override {
+        return row_.GetLong(offset_ + pos);
+    }
+
+    float GetFloat(int32_t pos) const override {
+        return row_.GetFloat(offset_ + pos);
+    }
+
+    double GetDouble(int32_t pos) const override {
+        return row_.GetDouble(offset_ + pos);
+    }
+
+    BinaryString GetString(int32_t pos) const override {
+        return row_.GetString(offset_ + pos);
+    }
+
+    std::string_view GetStringView(int32_t pos) const override {
+        return row_.GetStringView(offset_ + pos);
+    }
+
+    Decimal GetDecimal(int32_t pos, int32_t precision, int32_t scale) const 
override {
+        return row_.GetDecimal(offset_ + pos, precision, scale);
+    }
+
+    Timestamp GetTimestamp(int32_t pos, int32_t precision) const override {
+        return row_.GetTimestamp(offset_ + pos, precision);
+    }
+
+    std::shared_ptr<Bytes> GetBinary(int32_t pos) const override {
+        return row_.GetBinary(offset_ + pos);
+    }
+
+    std::shared_ptr<InternalArray> GetArray(int32_t pos) const override {
+        return row_.GetArray(offset_ + pos);
+    }
+
+    std::shared_ptr<InternalMap> GetMap(int32_t pos) const override {
+        return row_.GetMap(offset_ + pos);
+    }
+
+    std::shared_ptr<InternalRow> GetRow(int32_t pos, int32_t num_fields) const 
override {
+        return row_.GetRow(offset_ + pos, num_fields);
+    }
+
+    std::string ToString() const override {
+        return fmt::format("OffsetRow, arity {}, offset {}", arity_, offset_);
+    }
+
+ private:
+    const InternalRow& row_;
+    int32_t arity_;
+    int32_t offset_;
+};
+}  // namespace paimon
diff --git a/src/paimon/core/utils/offset_row_test.cpp 
b/src/paimon/core/utils/offset_row_test.cpp
new file mode 100644
index 0000000..ec0c30d
--- /dev/null
+++ b/src/paimon/core/utils/offset_row_test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/offset_row.h"
+
+#include <utility>
+#include <variant>
+
+#include "arrow/api.h"
+#include "arrow/ipc/json_simple.h"
+#include "gtest/gtest.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/columnar/columnar_map.h"
+#include "paimon/common/data/data_define.h"
+#include "paimon/common/data/generic_row.h"
+#include "paimon/common/data/internal_array.h"
+#include "paimon/common/data/internal_map.h"
+#include "paimon/common/types/row_kind.h"
+#include "paimon/common/utils/decimal_utils.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/testing/utils/binary_row_generator.h"
+
+namespace paimon::test {
+TEST(OffsetRowTest, TestSimple) {
+    auto pool = GetDefaultPool();
+    // generate internal row
+    GenericRow internal_row(17);
+    internal_row.SetField(0, false);
+    internal_row.SetField(1, true);
+    internal_row.SetField(2, static_cast<char>(1));
+    internal_row.SetField(3, static_cast<int16_t>(2));
+    internal_row.SetField(4, static_cast<int32_t>(3));
+    internal_row.SetField(5, static_cast<int64_t>(4));
+    internal_row.SetField(6, static_cast<float>(5.1));
+    internal_row.SetField(7, 6.12);
+    auto str = BinaryString::FromString("abcd", pool.get());
+    internal_row.SetField(8, str);
+    std::shared_ptr<Bytes> bytes = Bytes::AllocateBytes("efgh", pool.get());
+    internal_row.SetField(9, bytes);
+    std::string str9 = "apple";
+    internal_row.SetField(10, std::string_view(str9.data(), str9.size()));
+
+    Timestamp ts(100, 20);
+    internal_row.SetField(11, ts);
+    Decimal decimal(/*precision=*/30, /*scale=*/20,
+                    
DecimalUtils::StrToInt128("12345678998765432145678").value());
+    internal_row.SetField(12, decimal);
+
+    auto array = std::make_shared<BinaryArray>(BinaryArray::FromLongArray(
+        {static_cast<int64_t>(10), static_cast<int64_t>(20)}, pool.get()));
+    internal_row.SetField(13, array);
+
+    std::shared_ptr<InternalRow> binary_row =
+        BinaryRowGenerator::GenerateRowPtr({100, 200}, pool.get());
+    internal_row.SetField(14, binary_row);
+
+    auto key = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 
2, 3]").ValueOrDie();
+    auto value =
+        arrow::ipc::internal::json::ArrayFromJSON(arrow::int64(), "[2, 4, 
6]").ValueOrDie();
+    auto map = std::make_shared<ColumnarMap>(key, value, pool, /*offset=*/0, 
/*length=*/3);
+    internal_row.SetField(15, map);
+    // do not set value at pos 16, therefore, pos 16 is null
+    ASSERT_EQ(internal_row.GetFieldCount(), 17);
+
+    OffsetRow row(internal_row, /*arity=*/16, /*offset=*/1);
+    ASSERT_EQ(row.GetRowKind().value(), RowKind::Insert());
+    ASSERT_EQ(row.GetFieldCount(), 16);
+    ASSERT_EQ(row.GetBoolean(0), true);
+    ASSERT_EQ(row.GetByte(1), static_cast<char>(1));
+    ASSERT_EQ(row.GetShort(2), static_cast<int16_t>(2));
+    ASSERT_EQ(row.GetInt(3), static_cast<int32_t>(3));
+    ASSERT_EQ(row.GetDate(3), static_cast<int32_t>(3));
+    ASSERT_EQ(row.GetLong(4), static_cast<int64_t>(4));
+    ASSERT_EQ(row.GetFloat(5), static_cast<float>(5.1));
+    ASSERT_EQ(row.GetDouble(6), static_cast<double>(6.12));
+    ASSERT_EQ(row.GetString(7), str);
+    ASSERT_EQ(*row.GetBinary(8), *bytes);
+    ASSERT_EQ(std::string(row.GetStringView(9)), str9);
+    ASSERT_EQ(row.GetTimestamp(10, /*precision=*/9), ts);
+    ASSERT_EQ(row.GetDecimal(11, /*precision=*/30, /*scale=*/20), decimal);
+    ASSERT_EQ(row.GetArray(12)->ToLongArray().value(), 
array->ToLongArray().value());
+    auto binary_row_result = 
std::dynamic_pointer_cast<BinaryRow>(row.GetRow(13, 2));
+    auto binary_row_expected = 
std::dynamic_pointer_cast<BinaryRow>(binary_row);
+    ASSERT_EQ(*binary_row_result, *binary_row_expected);
+    ASSERT_EQ(row.GetMap(14)->KeyArray()->ToIntArray().value(),
+              map->KeyArray()->ToIntArray().value());
+    ASSERT_EQ(row.GetMap(14)->ValueArray()->ToLongArray().value(),
+              map->ValueArray()->ToLongArray().value());
+    ASSERT_TRUE(row.IsNullAt(15));
+    ASSERT_EQ(row.ToString(), "OffsetRow, arity 16, offset 1");
+}
+
+}  // namespace paimon::test
diff --git a/src/paimon/core/utils/partition_path_utils.cpp 
b/src/paimon/core/utils/partition_path_utils.cpp
new file mode 100644
index 0000000..632feb8
--- /dev/null
+++ b/src/paimon/core/utils/partition_path_utils.cpp
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/partition_path_utils.h"
+
+#include <array>
+#include <cstdint>
+#include <optional>
+
+#include "paimon/status.h"
+
+namespace paimon {
+
+const std::bitset<128>& PartitionPathUtils::CharToEscape() {
+    constexpr auto char_to_escape = []() {
+        std::bitset<128> bitset;
+        for (char c = 0; c < ' '; c++) {
+            bitset.set(static_cast<unsigned char>(c));
+        }
+        std::array<char, 48> clist = {
+            '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', 
'\u0007', '\u0008',
+            '\u0009', '\n',     '\u000B', '\u000C', '\r',     '\u000E', 
'\u000F', '\u0010',
+            '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', 
'\u0017', '\u0018',
+            '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', 
'\u001F', '"',
+            '#',      '%',      '\'',     '*',      '/',      ':',      '=',   
   '?',
+            '\\',     '\u007F', '{',      '}',      '[',      ']',      '^'};
+        for (char c : clist) {
+            bitset.set(static_cast<unsigned char>(c));
+        }
+        return bitset;
+    };
+    static std::bitset<128> bitset = char_to_escape();
+    return bitset;
+}
+
+Result<std::string> PartitionPathUtils::GeneratePartitionPath(
+    const std::vector<std::pair<std::string, std::string>>& partition_spec) {
+    if (partition_spec.empty()) {
+        return std::string();
+    }
+    std::stringstream ss;
+    int32_t i = 0;
+    for (const auto& [key, value] : partition_spec) {
+        if (i > 0) {
+            ss << PATH_SEPARATOR;
+        }
+        PAIMON_ASSIGN_OR_RAISE(std::string key_esc, EscapePathName(key));
+        PAIMON_ASSIGN_OR_RAISE(std::string value_esc, EscapePathName(value));
+        ss << key_esc << "=" << value_esc;
+        i++;
+    }
+    ss << PATH_SEPARATOR;
+    return ss.str();
+}
+
+Result<std::string> PartitionPathUtils::EscapePathName(const std::string& 
path) {
+    if (path.empty()) {
+        return Status::Invalid("path should not be empty");
+    }
+
+    std::optional<std::stringstream> ss;
+    for (size_t i = 0; i < path.size(); i++) {
+        char c = path[i];
+        if (NeedsEscaping(c)) {
+            if (ss == std::nullopt) {
+                ss = std::stringstream();
+                for (size_t j = 0; j < i; j++) {
+                    ss.value() << path[j];
+                }
+            }
+            EscapeChar(c, &ss.value());
+        } else if (ss != std::nullopt) {
+            ss.value() << c;
+        }
+    }
+    if (ss == std::nullopt) {
+        return path;
+    }
+    return ss.value().str();
+}
+
+void PartitionPathUtils::EscapeChar(char c, std::stringstream* ss_ptr) {
+    auto& ss = *ss_ptr;
+    ss << '%';
+    auto uc = static_cast<unsigned char>(c);
+    if (uc < 16) {
+        ss << '0';
+    }
+    std::stringstream hex_ss;
+    hex_ss << std::hex << std::uppercase << static_cast<int32_t>(uc);
+    ss << hex_ss.str();
+}
+
+Result<std::vector<std::string>> 
PartitionPathUtils::GenerateHierarchicalPartitionPaths(
+    const std::vector<std::pair<std::string, std::string>>& partition_spec) {
+    std::vector<std::string> paths;
+    if (partition_spec.empty()) {
+        return paths;
+    }
+    std::string suffix_buf;
+    for (const auto& [key, value] : partition_spec) {
+        PAIMON_ASSIGN_OR_RAISE(std::string escaped_key, EscapePathName(key));
+        PAIMON_ASSIGN_OR_RAISE(std::string escaped_value, 
EscapePathName(value));
+        suffix_buf.append(escaped_key);
+        suffix_buf.append("=");
+        suffix_buf.append(escaped_value);
+        suffix_buf.append(PATH_SEPARATOR);
+        paths.push_back(suffix_buf);
+    }
+    return paths;
+}
+
+}  // namespace paimon
diff --git a/src/paimon/core/utils/partition_path_utils.h 
b/src/paimon/core/utils/partition_path_utils.h
new file mode 100644
index 0000000..48d9adf
--- /dev/null
+++ b/src/paimon/core/utils/partition_path_utils.h
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <cstddef>
+#include <map>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "paimon/result.h"
+
+namespace paimon {
+
+// Utils for file system.
+class PartitionPathUtils {
+ public:
+    static constexpr char PATH_SEPARATOR[] = "/";
+
+    PartitionPathUtils() = delete;
+    ~PartitionPathUtils() = delete;
+    /// Make partition path from partition spec.
+    ///
+    /// @param partition_spec The partition spec.
+    /// @return An escaped, valid partition name.
+    static Result<std::string> GeneratePartitionPath(
+        const std::vector<std::pair<std::string, std::string>>& 
partition_spec);
+
+    /// Escapes a path name.
+    ///
+    /// @param path The path to escape.
+    /// @return An escaped path name.
+    static Result<std::string> EscapePathName(const std::string& path);
+
+    /// Generate all hierarchical paths from partition spec.
+    ///
+    /// For example, if the partition spec is (pt1: '0601', pt2: '12', pt3: 
'30'), this method
+    /// will return a list (start from index 0):
+    ///
+    /// <ul>
+    /// <li>pt1=0601
+    /// <li>pt1=0601/pt2=12
+    /// <li>pt1=0601/pt2=12/pt3=30
+    /// </ul>
+    static Result<std::vector<std::string>> GenerateHierarchicalPartitionPaths(
+        const std::vector<std::pair<std::string, std::string>>& 
partition_spec);
+
+ private:
+    static const std::bitset<128>& CharToEscape();
+    static bool NeedsEscaping(char c) {
+        return static_cast<size_t>(c) < CharToEscape().size() && 
CharToEscape().test(c);
+    }
+
+    static void EscapeChar(char c, std::stringstream* ss_ptr);
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/utils/partition_path_utils_test.cpp 
b/src/paimon/core/utils/partition_path_utils_test.cpp
new file mode 100644
index 0000000..a10729b
--- /dev/null
+++ b/src/paimon/core/utils/partition_path_utils_test.cpp
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/partition_path_utils.h"
+
+#include "gtest/gtest.h"
+#include "paimon/status.h"
+#include "paimon/testing/utils/testharness.h"
+
+namespace paimon::test {
+
+TEST(PartitionPathUtilsTest, TestEmptyInput) {
+    std::vector<std::pair<std::string, std::string>> partition_spec;
+    ASSERT_OK_AND_ASSIGN(std::string partition_path_str,
+                         
PartitionPathUtils::GeneratePartitionPath(partition_spec));
+    ASSERT_EQ(partition_path_str, "");
+}
+
+TEST(PartitionPathUtilsTest, TestSimple) {
+    std::vector<std::pair<std::string, std::string>> partition_spec = {
+        {"f1", "v1"},
+        {"f2", "这是一段不是特别长的中文"},
+        {"f0", "v0"},
+    };
+    ASSERT_OK_AND_ASSIGN(std::string partition_path_str,
+                         
PartitionPathUtils::GeneratePartitionPath(partition_spec));
+    ASSERT_EQ(partition_path_str, "f1=v1/f2=这是一段不是特别长的中文/f0=v0/");
+}
+
+TEST(PartitionPathUtilsTest, TestCharToEscape) {
+    std::vector<std::pair<std::string, std::string>> partition_spec = {
+        {"f0", "v0"},
+        {"f1", "v1="},
+        {"/f2?", "这是一段不是特别长\n的[中文]"},
+    };
+    ASSERT_OK_AND_ASSIGN(std::string partition_path_str,
+                         
PartitionPathUtils::GeneratePartitionPath(partition_spec));
+    ASSERT_EQ(partition_path_str, 
"f0=v0/f1=v1%3D/%2Ff2%3F=这是一段不是特别长%0A的%5B中文%5D/");
+}
+
+TEST(PartitionPathUtilsTest, testGenerateHierarchicalPartitionPaths) {
+    std::vector<std::pair<std::string, std::string>> partition_spec = {
+        {"f2", "这是一段不是特别长的中文"},
+        {"f0", "v0"},
+        {"f1", "v1"},
+    };
+    ASSERT_OK_AND_ASSIGN(std::vector<std::string> partition_path_strs,
+                         
PartitionPathUtils::GenerateHierarchicalPartitionPaths(partition_spec));
+    ASSERT_EQ(partition_path_strs.size(), 3u);
+    ASSERT_EQ(partition_path_strs[0], "f2=这是一段不是特别长的中文/");
+    ASSERT_EQ(partition_path_strs[1], "f2=这是一段不是特别长的中文/f0=v0/");
+    ASSERT_EQ(partition_path_strs[2], "f2=这是一段不是特别长的中文/f0=v0/f1=v1/");
+}
+
+TEST(PartitionPathUtilsTest, EscapeChar) {
+    std::stringstream ss;
+    PartitionPathUtils::EscapeChar(' ', &ss);
+    ASSERT_EQ(ss.str(), "%20");
+
+    ss.str("");
+    ss.clear();
+    PartitionPathUtils::EscapeChar('/', &ss);
+    ASSERT_EQ(ss.str(), "%2F");
+
+    ss.str("");
+    ss.clear();
+    PartitionPathUtils::EscapeChar('\n', &ss);
+    ASSERT_EQ(ss.str(), "%0A");
+
+    ss.str("");
+    ss.clear();
+    PartitionPathUtils::EscapeChar('A', &ss);
+    ASSERT_EQ(ss.str(), "%41");
+}
+
+TEST(PartitionPathUtilsTest, EscapePathName) {
+    ASSERT_NOK_WITH_MSG(PartitionPathUtils::EscapePathName(""), "path should 
not be empty");
+
+    ASSERT_OK_AND_ASSIGN(std::string escape_path,
+                         PartitionPathUtils::EscapePathName("normal_path"));
+    ASSERT_EQ(escape_path, "normal_path");
+
+    ASSERT_OK_AND_ASSIGN(escape_path, PartitionPathUtils::EscapePathName("a 
b/c"));
+    ASSERT_EQ(escape_path, "a b%2Fc");
+
+    ASSERT_OK_AND_ASSIGN(escape_path, PartitionPathUtils::EscapePathName(" 
/="));
+    ASSERT_EQ(escape_path, " %2F%3D");
+}
+
+}  // namespace paimon::test
diff --git a/src/paimon/core/utils/path_factory.h 
b/src/paimon/core/utils/path_factory.h
new file mode 100644
index 0000000..9bd61d4
--- /dev/null
+++ b/src/paimon/core/utils/path_factory.h
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+namespace paimon {
+
+/// Path factory to create a path.
+class PathFactory {
+ public:
+    PathFactory() = default;
+    virtual ~PathFactory() = default;
+
+    virtual std::string NewPath() const = 0;
+    virtual std::string ToPath(const std::string& file_name) const = 0;
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/utils/special_field_ids.cpp 
b/src/paimon/core/utils/special_field_ids.cpp
new file mode 100644
index 0000000..7b07837
--- /dev/null
+++ b/src/paimon/core/utils/special_field_ids.cpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/utils/special_field_ids.h"
+
+namespace paimon {
+
+const int32_t SpecialFieldIds::CPP_FIELD_ID_END;
+const int32_t SpecialFieldIds::SEQUENCE_NUMBER;
+const int32_t SpecialFieldIds::VALUE_KIND;
+const int32_t SpecialFieldIds::ROW_KIND;
+const int32_t SpecialFieldIds::ROW_ID;
+const int32_t SpecialFieldIds::INDEX_SCORE;
+
+}  // namespace paimon

Reply via email to