This is an automated email from the ASF dual-hosted git repository.
leaves12138 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new d0dcdad feat: add core options and utility helpers
d0dcdad is described below
commit d0dcdadaaf2e50df203a57985216c3c039c5bfeb
Author: Yonghao Fang <[email protected]>
AuthorDate: Tue Jun 9 09:40:56 2026 +0800
feat: add core options and utility helpers
Merge PR #59: add core options and utility helpers.
---
include/paimon/utils/special_field_ids.h | 47 ++++++++
src/paimon/core/options/changelog_producer.h | 34 ++++++
src/paimon/core/options/compress_options.h | 30 +++++
src/paimon/core/options/expire_config.h | 62 ++++++++++
src/paimon/core/options/external_path_strategy.h | 32 +++++
src/paimon/core/options/lookup_compact_mode.h | 30 +++++
src/paimon/core/options/lookup_strategy.h | 53 ++++++++
src/paimon/core/options/lookup_strategy_test.cpp | 46 +++++++
src/paimon/core/options/merge_engine.h | 34 ++++++
src/paimon/core/options/sort_engine.h | 31 +++++
src/paimon/core/options/sort_order.h | 30 +++++
src/paimon/core/utils/commit_increment.h | 65 ++++++++++
src/paimon/core/utils/duration.h | 47 ++++++++
src/paimon/core/utils/file_utils.cpp | 77 ++++++++++++
src/paimon/core/utils/file_utils.h | 58 +++++++++
src/paimon/core/utils/file_utils_test.cpp | 56 +++++++++
src/paimon/core/utils/offset_row.h | 134 +++++++++++++++++++++
src/paimon/core/utils/offset_row_test.cpp | 109 +++++++++++++++++
src/paimon/core/utils/partition_path_utils.cpp | 129 ++++++++++++++++++++
src/paimon/core/utils/partition_path_utils.h | 77 ++++++++++++
.../core/utils/partition_path_utils_test.cpp | 106 ++++++++++++++++
src/paimon/core/utils/path_factory.h | 36 ++++++
src/paimon/core/utils/special_field_ids.cpp | 31 +++++
23 files changed, 1354 insertions(+)
diff --git a/include/paimon/utils/special_field_ids.h
b/include/paimon/utils/special_field_ids.h
new file mode 100644
index 0000000..accf07f
--- /dev/null
+++ b/include/paimon/utils/special_field_ids.h
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+
+namespace paimon {
+
+/// A utility class for accessing special field IDs used in metadata.
+class SpecialFieldIds {
+ protected:
+ /// System defined constant for field id boundary. Value: INT32_MAX - 10000
+ static const int32_t CPP_FIELD_ID_END =
std::numeric_limits<int32_t>::max() - 10000;
+
+ public:
+ /// Special field ID reserved for sequence number. Value: INT32_MAX - 1
+ static const int32_t SEQUENCE_NUMBER = std::numeric_limits<int32_t>::max()
- 1;
+ /// Special field ID reserved for value kind. Value: INT32_MAX - 2
+ static const int32_t VALUE_KIND = std::numeric_limits<int32_t>::max() - 2;
+ /// Special field ID reserved for row kind. Value: INT32_MAX - 3
+ static const int32_t ROW_KIND = std::numeric_limits<int32_t>::max() - 3;
+ /// Special field ID reserved for row ID. Value: INT32_MAX - 5
+ static const int32_t ROW_ID = std::numeric_limits<int32_t>::max() - 5;
+
+ /// Special field ID reserved for index score. Value: CPP_FIELD_ID_END - 1
+ static const int32_t INDEX_SCORE = CPP_FIELD_ID_END - 1;
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/options/changelog_producer.h
b/src/paimon/core/options/changelog_producer.h
new file mode 100644
index 0000000..744f767
--- /dev/null
+++ b/src/paimon/core/options/changelog_producer.h
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the changelog producer for table.
+enum class ChangelogProducer {
+ // No changelog file.
+ NONE = 1,
+ // Double write to a changelog file when flushing memory table, the
changelog is from input.
+ INPUT = 2,
+ // Generate changelog files with each full compaction.
+ FULL_COMPACTION = 3,
+ // Generate changelog files through 'lookup' before committing the data
writing.
+ LOOKUP = 4
+};
+} // namespace paimon
diff --git a/src/paimon/core/options/compress_options.h
b/src/paimon/core/options/compress_options.h
new file mode 100644
index 0000000..6f18e38
--- /dev/null
+++ b/src/paimon/core/options/compress_options.h
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+#include <cstdint>
+#include <string>
+
+namespace paimon {
+/// Options of compression.
+struct CompressOptions {
+ std::string compress;
+ int32_t zstd_level;
+};
+} // namespace paimon
diff --git a/src/paimon/core/options/expire_config.h
b/src/paimon/core/options/expire_config.h
new file mode 100644
index 0000000..492930d
--- /dev/null
+++ b/src/paimon/core/options/expire_config.h
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+namespace paimon {
+
+class ExpireConfig {
+ public:
+ ExpireConfig() = default;
+ ExpireConfig(int32_t snapshot_retain_max, int32_t snapshot_retain_min,
+ int64_t snapshot_time_retain_ms, int32_t snapshot_max_deletes,
+ bool snapshot_clean_empty_directories)
+ : snapshot_retain_max_(snapshot_retain_max),
+ snapshot_retain_min_(snapshot_retain_min),
+ snapshot_time_retain_ms_(snapshot_time_retain_ms),
+ snapshot_max_deletes_(snapshot_max_deletes),
+ snapshot_clean_empty_directories_(snapshot_clean_empty_directories)
{}
+
+ int32_t GetSnapshotRetainMin() const {
+ return snapshot_retain_min_;
+ }
+ int32_t GetSnapshotRetainMax() const {
+ return snapshot_retain_max_;
+ }
+ int64_t GetSnapshotTimeRetainMs() const {
+ return snapshot_time_retain_ms_;
+ }
+ int32_t GetSnapshotMaxDeletes() const {
+ return snapshot_max_deletes_;
+ }
+ bool CleanEmptyDirectories() const {
+ return snapshot_clean_empty_directories_;
+ }
+
+ private:
+ int32_t snapshot_retain_max_;
+ int32_t snapshot_retain_min_;
+ int64_t snapshot_time_retain_ms_;
+ int32_t snapshot_max_deletes_;
+ bool snapshot_clean_empty_directories_;
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/options/external_path_strategy.h
b/src/paimon/core/options/external_path_strategy.h
new file mode 100644
index 0000000..6b6a0c9
--- /dev/null
+++ b/src/paimon/core/options/external_path_strategy.h
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the strategy for selecting external storage paths.
+enum class ExternalPathStrategy {
+ // Do not choose any external storage, data will still be written to the
default warehouse path.
+ NONE = 1,
+ // Select a specific file system as the external path. Currently supported
are S3 and OSS.
+ SPECIFIC_FS = 2,
+ // When writing a new file, a path is chosen from data-file.external-paths
in turn.
+ ROUND_ROBIN = 3
+};
+} // namespace paimon
diff --git a/src/paimon/core/options/lookup_compact_mode.h
b/src/paimon/core/options/lookup_compact_mode.h
new file mode 100644
index 0000000..a1e6c92
--- /dev/null
+++ b/src/paimon/core/options/lookup_compact_mode.h
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// The compact mode for lookup compaction.
+enum class LookupCompactMode {
+ /// Lookup compaction will use ForceUpLevel0Compaction strategy to
radically compact new files.
+ RADICAL = 1,
+ /// Lookup compaction will use UniversalCompaction strategy to gently
compact new files.
+ GENTLE = 2
+};
+} // namespace paimon
diff --git a/src/paimon/core/options/lookup_strategy.h
b/src/paimon/core/options/lookup_strategy.h
new file mode 100644
index 0000000..094aae5
--- /dev/null
+++ b/src/paimon/core/options/lookup_strategy.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Strategy for lookup.
+struct LookupStrategy {
+ public:
+ bool operator==(const LookupStrategy& other) const {
+ if (this == &other) {
+ return true;
+ }
+ return is_first_row == other.is_first_row && produce_changelog ==
other.produce_changelog &&
+ deletion_vector == other.deletion_vector && need_lookup ==
other.need_lookup;
+ }
+
+ static LookupStrategy From(bool is_first_row, bool produce_changelog, bool
deletion_vector,
+ bool force_lookup) {
+ return LookupStrategy(is_first_row, produce_changelog,
deletion_vector, force_lookup);
+ }
+
+ const bool need_lookup;
+ const bool is_first_row;
+ const bool produce_changelog;
+ const bool deletion_vector;
+
+ private:
+ LookupStrategy(bool _is_first_row, bool _produce_changelog, bool
_deletion_vector,
+ bool _force_lookup)
+ : need_lookup(_produce_changelog || _deletion_vector || _is_first_row
|| _force_lookup),
+ is_first_row(_is_first_row),
+ produce_changelog(_produce_changelog),
+ deletion_vector(_deletion_vector) {}
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/options/lookup_strategy_test.cpp
b/src/paimon/core/options/lookup_strategy_test.cpp
new file mode 100644
index 0000000..0273271
--- /dev/null
+++ b/src/paimon/core/options/lookup_strategy_test.cpp
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/options/lookup_strategy.h"
+
+#include "gtest/gtest.h"
+
+namespace paimon::test {
+
+TEST(LookupStrategyTest, TestFrom) {
+ auto strategy = LookupStrategy::From(
+ /*is_first_row=*/true,
+ /*produce_changelog=*/false,
+ /*deletion_vector=*/false,
+ /*force_lookup=*/false);
+
+ ASSERT_TRUE(strategy.need_lookup);
+ ASSERT_TRUE(strategy.is_first_row);
+ ASSERT_FALSE(strategy.produce_changelog);
+ ASSERT_FALSE(strategy.deletion_vector);
+}
+
+TEST(LookupStrategyTest, TestNeedLookupCombinations) {
+ ASSERT_FALSE(LookupStrategy::From(false, false, false, false).need_lookup);
+ ASSERT_TRUE(LookupStrategy::From(false, true, false, false).need_lookup);
+ ASSERT_TRUE(LookupStrategy::From(false, false, true, false).need_lookup);
+ ASSERT_TRUE(LookupStrategy::From(false, false, false, true).need_lookup);
+}
+
+} // namespace paimon::test
diff --git a/src/paimon/core/options/merge_engine.h
b/src/paimon/core/options/merge_engine.h
new file mode 100644
index 0000000..0c30306
--- /dev/null
+++ b/src/paimon/core/options/merge_engine.h
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the merge engine for table with primary key.
+enum class MergeEngine {
+ // De-duplicate and keep the last row.
+ DEDUPLICATE = 1,
+ // Partial update non-null fields.
+ PARTIAL_UPDATE = 2,
+ // Aggregate fields with same primary key.
+ AGGREGATE = 3,
+ // De-duplicate and keep the first row.
+ FIRST_ROW = 4
+};
+} // namespace paimon
diff --git a/src/paimon/core/options/sort_engine.h
b/src/paimon/core/options/sort_engine.h
new file mode 100644
index 0000000..871895b
--- /dev/null
+++ b/src/paimon/core/options/sort_engine.h
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the sort engine for table with primary key.
+enum class SortEngine {
+ // Use min-heap for multiway sorting.
+ MIN_HEAP = 1,
+ // Use loser-tree for multiway sorting. Compared with heapsort, loser-tree
has fewer comparisons
+ // and is more efficient.
+ LOSER_TREE = 2
+};
+} // namespace paimon
diff --git a/src/paimon/core/options/sort_order.h
b/src/paimon/core/options/sort_order.h
new file mode 100644
index 0000000..1e8dd02
--- /dev/null
+++ b/src/paimon/core/options/sort_order.h
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+namespace paimon {
+/// Specifies the sort order for field sequence id.
+enum class SortOrder {
+ // specifies sequence.field sort order is ascending.
+ ASCENDING = 1,
+ // specifies sequence.field sort order is descending.
+ DESCENDING = 2
+};
+} // namespace paimon
diff --git a/src/paimon/core/utils/commit_increment.h
b/src/paimon/core/utils/commit_increment.h
new file mode 100644
index 0000000..e371821
--- /dev/null
+++ b/src/paimon/core/utils/commit_increment.h
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <memory>
+
+#include "paimon/core/compact/compact_deletion_file.h"
+#include "paimon/core/io/compact_increment.h"
+#include "paimon/core/io/data_increment.h"
+
+namespace paimon {
+
+// Changes to commit.
+class CommitIncrement {
+ public:
+ CommitIncrement(const DataIncrement& data_increment, const
CompactIncrement& compact_increment,
+ const std::shared_ptr<CompactDeletionFile>&
compact_deletion_file)
+ : data_increment_(data_increment),
+ compact_increment_(compact_increment),
+ compact_deletion_file_(compact_deletion_file) {}
+
+ const DataIncrement& GetNewFilesIncrement() const {
+ return data_increment_;
+ }
+
+ const CompactIncrement& GetCompactIncrement() const {
+ return compact_increment_;
+ }
+
+ DataIncrement& GetNewFilesIncrement() {
+ return data_increment_;
+ }
+
+ CompactIncrement& GetCompactIncrement() {
+ return compact_increment_;
+ }
+
+ std::shared_ptr<CompactDeletionFile> GetCompactDeletionFile() const {
+ return compact_deletion_file_;
+ }
+
+ private:
+ DataIncrement data_increment_;
+ CompactIncrement compact_increment_;
+ std::shared_ptr<CompactDeletionFile> compact_deletion_file_;
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/utils/duration.h b/src/paimon/core/utils/duration.h
new file mode 100644
index 0000000..ba64355
--- /dev/null
+++ b/src/paimon/core/utils/duration.h
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <chrono>
+
+namespace paimon {
+
+// Calculate operation duration.
+class Duration {
+ public:
+ Duration() : start_(std::chrono::high_resolution_clock::now()) {}
+
+ uint64_t Get() {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::high_resolution_clock::now() - start_)
+ .count();
+ }
+
+ uint64_t Reset() {
+ uint64_t duration = Get();
+ start_ = std::chrono::high_resolution_clock::now();
+ return duration;
+ }
+
+ private:
+ std::chrono::high_resolution_clock::time_point start_;
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/utils/file_utils.cpp
b/src/paimon/core/utils/file_utils.cpp
new file mode 100644
index 0000000..1a803e3
--- /dev/null
+++ b/src/paimon/core/utils/file_utils.cpp
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/file_utils.h"
+
+#include <algorithm>
+#include <optional>
+#include <utility>
+
+#include "fmt/format.h"
+#include "paimon/common/utils/path_util.h"
+#include "paimon/common/utils/string_utils.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/result.h"
+
+namespace paimon {
+
+Status FileUtils::ListVersionedFiles(const std::shared_ptr<FileSystem>& fs,
const std::string& dir,
+ const std::string& prefix,
std::vector<int64_t>* files) {
+ std::vector<std::string> file_strs;
+ PAIMON_RETURN_NOT_OK(ListOriginalVersionedFiles(fs, dir, prefix,
&file_strs));
+ for (const auto& file_str : file_strs) {
+ std::optional<int64_t> file_number =
StringUtils::StringToValue<int64_t>(file_str);
+ if (file_number == std::nullopt) {
+ return Status::Invalid(fmt::format("fail to convert {} to number",
file_str));
+ }
+ files->emplace_back(file_number.value());
+ }
+ return Status::OK();
+}
+
+Status FileUtils::ListOriginalVersionedFiles(const
std::shared_ptr<FileSystem>& fs,
+ const std::string& dir, const
std::string& prefix,
+ std::vector<std::string>* files) {
+ std::vector<std::unique_ptr<BasicFileStatus>> file_status_list;
+ PAIMON_RETURN_NOT_OK(ListVersionedFileStatus(fs, dir, prefix,
&file_status_list));
+ for (auto& file_status : file_status_list) {
+ std::string file_name = PathUtil::GetName(file_status->GetPath());
+ files->emplace_back(file_name.substr(prefix.size()));
+ }
+ return Status::OK();
+}
+
+Status FileUtils::ListVersionedFileStatus(
+ const std::shared_ptr<FileSystem>& fs, const std::string& dir, const
std::string& prefix,
+ std::vector<std::unique_ptr<BasicFileStatus>>* file_status_list) {
+ PAIMON_ASSIGN_OR_RAISE(bool exist, fs->Exists(dir));
+ if (exist) {
+ std::vector<std::unique_ptr<BasicFileStatus>> file_statuses;
+ PAIMON_RETURN_NOT_OK(fs->ListDir(dir, &file_statuses));
+ for (auto& file_status : file_statuses) {
+ std::string file_name = PathUtil::GetName(file_status->GetPath());
+ if (StringUtils::StartsWith(file_name, prefix)) {
+ file_status_list->emplace_back(std::move(file_status));
+ }
+ }
+ }
+ return Status::OK();
+}
+
+} // namespace paimon
diff --git a/src/paimon/core/utils/file_utils.h
b/src/paimon/core/utils/file_utils.h
new file mode 100644
index 0000000..cbb5f76
--- /dev/null
+++ b/src/paimon/core/utils/file_utils.h
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paimon/status.h"
+#include "paimon/type_fwd.h"
+
+namespace paimon {
+class BasicFileStatus;
+class FileSystem;
+
+/// Utils for file reading and writing.
+class FileUtils {
+ public:
+ /// List versioned files for the directory.
+ ///
+ /// @return Status
+ static Status ListVersionedFiles(const std::shared_ptr<FileSystem>& fs,
const std::string& dir,
+ const std::string& prefix,
std::vector<int64_t>* files);
+
+ /// List original versioned files for the directory.
+ ///
+ /// @return Status
+ static Status ListOriginalVersionedFiles(const
std::shared_ptr<FileSystem>& fs,
+ const std::string& dir, const
std::string& prefix,
+ std::vector<std::string>* files);
+
+ /// List versioned file status for the directory.
+ ///
+ /// @return Status
+ static Status ListVersionedFileStatus(
+ const std::shared_ptr<FileSystem>& fs, const std::string& dir, const
std::string& prefix,
+ std::vector<std::unique_ptr<BasicFileStatus>>* file_status_list);
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/utils/file_utils_test.cpp
b/src/paimon/core/utils/file_utils_test.cpp
new file mode 100644
index 0000000..ef3db4d
--- /dev/null
+++ b/src/paimon/core/utils/file_utils_test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/file_utils.h"
+
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "paimon/fs/local/local_file_system.h"
+#include "paimon/testing/utils/testharness.h"
+
+namespace paimon::test {
+
+TEST(FileUtilsTest, TestSimple) {
+ std::string test_data_path =
+ paimon::test::GetDataDir() + "/orc/append_09.db/append_09/snapshot/";
+ std::vector<int64_t> files;
+ auto fs = std::make_shared<LocalFileSystem>();
+ ASSERT_OK(FileUtils::ListVersionedFiles(std::move(fs), test_data_path,
"snapshot-", &files));
+ ASSERT_EQ(files.size(), 5u);
+}
+
+TEST(FileUtilsTest, TestNotExist) {
+ std::string test_data_path =
+ paimon::test::GetDataDir() + "/orc/append_09.db/append_09/not_exist/";
+ std::vector<int64_t> files;
+ auto fs = std::make_shared<LocalFileSystem>();
+ ASSERT_OK(FileUtils::ListVersionedFiles(std::move(fs), test_data_path,
"snapshot-", &files));
+ ASSERT_EQ(files.size(), 0u);
+}
+
+TEST(FileUtilsTest, TestNotNumber) {
+ std::string test_data_path =
+ paimon::test::GetDataDir() + "/orc/append_09.db/append_09/manifest/";
+ std::vector<int64_t> files;
+ auto fs = std::make_shared<LocalFileSystem>();
+ ASSERT_NOK(FileUtils::ListVersionedFiles(std::move(fs), test_data_path,
"manifest-", &files));
+}
+
+} // namespace paimon::test
diff --git a/src/paimon/core/utils/offset_row.h
b/src/paimon/core/utils/offset_row.h
new file mode 100644
index 0000000..1fbe671
--- /dev/null
+++ b/src/paimon/core/utils/offset_row.h
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "fmt/format.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/common/data/internal_row.h"
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/memory/bytes.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class Bytes;
+class InternalArray;
+class InternalMap;
+class RowKind;
+
+/// A `InternalRow` to wrap row with offset.
+class OffsetRow : public InternalRow {
+ public:
+ OffsetRow(const InternalRow& row, int32_t arity, int32_t offset)
+ : row_(row), arity_(arity), offset_(offset) {}
+
+ int32_t GetFieldCount() const override {
+ return arity_;
+ }
+
+ Result<const RowKind*> GetRowKind() const override {
+ return row_.GetRowKind();
+ }
+
+ void SetRowKind(const RowKind* kind) override {}
+
+ bool IsNullAt(int32_t pos) const override {
+ return row_.IsNullAt(offset_ + pos);
+ }
+
+ bool GetBoolean(int32_t pos) const override {
+ return row_.GetBoolean(offset_ + pos);
+ }
+
+ char GetByte(int32_t pos) const override {
+ return row_.GetByte(offset_ + pos);
+ }
+
+ int16_t GetShort(int32_t pos) const override {
+ return row_.GetShort(offset_ + pos);
+ }
+
+ int32_t GetInt(int32_t pos) const override {
+ return row_.GetInt(offset_ + pos);
+ }
+
+ int32_t GetDate(int32_t pos) const override {
+ return row_.GetDate(offset_ + pos);
+ }
+
+ int64_t GetLong(int32_t pos) const override {
+ return row_.GetLong(offset_ + pos);
+ }
+
+ float GetFloat(int32_t pos) const override {
+ return row_.GetFloat(offset_ + pos);
+ }
+
+ double GetDouble(int32_t pos) const override {
+ return row_.GetDouble(offset_ + pos);
+ }
+
+ BinaryString GetString(int32_t pos) const override {
+ return row_.GetString(offset_ + pos);
+ }
+
+ std::string_view GetStringView(int32_t pos) const override {
+ return row_.GetStringView(offset_ + pos);
+ }
+
+ Decimal GetDecimal(int32_t pos, int32_t precision, int32_t scale) const
override {
+ return row_.GetDecimal(offset_ + pos, precision, scale);
+ }
+
+ Timestamp GetTimestamp(int32_t pos, int32_t precision) const override {
+ return row_.GetTimestamp(offset_ + pos, precision);
+ }
+
+ std::shared_ptr<Bytes> GetBinary(int32_t pos) const override {
+ return row_.GetBinary(offset_ + pos);
+ }
+
+ std::shared_ptr<InternalArray> GetArray(int32_t pos) const override {
+ return row_.GetArray(offset_ + pos);
+ }
+
+ std::shared_ptr<InternalMap> GetMap(int32_t pos) const override {
+ return row_.GetMap(offset_ + pos);
+ }
+
+ std::shared_ptr<InternalRow> GetRow(int32_t pos, int32_t num_fields) const
override {
+ return row_.GetRow(offset_ + pos, num_fields);
+ }
+
+ std::string ToString() const override {
+ return fmt::format("OffsetRow, arity {}, offset {}", arity_, offset_);
+ }
+
+ private:
+ const InternalRow& row_;
+ int32_t arity_;
+ int32_t offset_;
+};
+} // namespace paimon
diff --git a/src/paimon/core/utils/offset_row_test.cpp
b/src/paimon/core/utils/offset_row_test.cpp
new file mode 100644
index 0000000..ec0c30d
--- /dev/null
+++ b/src/paimon/core/utils/offset_row_test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/offset_row.h"
+
+#include <utility>
+#include <variant>
+
+#include "arrow/api.h"
+#include "arrow/ipc/json_simple.h"
+#include "gtest/gtest.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/columnar/columnar_map.h"
+#include "paimon/common/data/data_define.h"
+#include "paimon/common/data/generic_row.h"
+#include "paimon/common/data/internal_array.h"
+#include "paimon/common/data/internal_map.h"
+#include "paimon/common/types/row_kind.h"
+#include "paimon/common/utils/decimal_utils.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/testing/utils/binary_row_generator.h"
+
+namespace paimon::test {
+TEST(OffsetRowTest, TestSimple) {
+ auto pool = GetDefaultPool();
+ // generate internal row
+ GenericRow internal_row(17);
+ internal_row.SetField(0, false);
+ internal_row.SetField(1, true);
+ internal_row.SetField(2, static_cast<char>(1));
+ internal_row.SetField(3, static_cast<int16_t>(2));
+ internal_row.SetField(4, static_cast<int32_t>(3));
+ internal_row.SetField(5, static_cast<int64_t>(4));
+ internal_row.SetField(6, static_cast<float>(5.1));
+ internal_row.SetField(7, 6.12);
+ auto str = BinaryString::FromString("abcd", pool.get());
+ internal_row.SetField(8, str);
+ std::shared_ptr<Bytes> bytes = Bytes::AllocateBytes("efgh", pool.get());
+ internal_row.SetField(9, bytes);
+ std::string str9 = "apple";
+ internal_row.SetField(10, std::string_view(str9.data(), str9.size()));
+
+ Timestamp ts(100, 20);
+ internal_row.SetField(11, ts);
+ Decimal decimal(/*precision=*/30, /*scale=*/20,
+
DecimalUtils::StrToInt128("12345678998765432145678").value());
+ internal_row.SetField(12, decimal);
+
+ auto array = std::make_shared<BinaryArray>(BinaryArray::FromLongArray(
+ {static_cast<int64_t>(10), static_cast<int64_t>(20)}, pool.get()));
+ internal_row.SetField(13, array);
+
+ std::shared_ptr<InternalRow> binary_row =
+ BinaryRowGenerator::GenerateRowPtr({100, 200}, pool.get());
+ internal_row.SetField(14, binary_row);
+
+ auto key = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1,
2, 3]").ValueOrDie();
+ auto value =
+ arrow::ipc::internal::json::ArrayFromJSON(arrow::int64(), "[2, 4,
6]").ValueOrDie();
+ auto map = std::make_shared<ColumnarMap>(key, value, pool, /*offset=*/0,
/*length=*/3);
+ internal_row.SetField(15, map);
+ // do not set value at pos 16, therefore, pos 16 is null
+ ASSERT_EQ(internal_row.GetFieldCount(), 17);
+
+ OffsetRow row(internal_row, /*arity=*/16, /*offset=*/1);
+ ASSERT_EQ(row.GetRowKind().value(), RowKind::Insert());
+ ASSERT_EQ(row.GetFieldCount(), 16);
+ ASSERT_EQ(row.GetBoolean(0), true);
+ ASSERT_EQ(row.GetByte(1), static_cast<char>(1));
+ ASSERT_EQ(row.GetShort(2), static_cast<int16_t>(2));
+ ASSERT_EQ(row.GetInt(3), static_cast<int32_t>(3));
+ ASSERT_EQ(row.GetDate(3), static_cast<int32_t>(3));
+ ASSERT_EQ(row.GetLong(4), static_cast<int64_t>(4));
+ ASSERT_EQ(row.GetFloat(5), static_cast<float>(5.1));
+ ASSERT_EQ(row.GetDouble(6), static_cast<double>(6.12));
+ ASSERT_EQ(row.GetString(7), str);
+ ASSERT_EQ(*row.GetBinary(8), *bytes);
+ ASSERT_EQ(std::string(row.GetStringView(9)), str9);
+ ASSERT_EQ(row.GetTimestamp(10, /*precision=*/9), ts);
+ ASSERT_EQ(row.GetDecimal(11, /*precision=*/30, /*scale=*/20), decimal);
+ ASSERT_EQ(row.GetArray(12)->ToLongArray().value(),
array->ToLongArray().value());
+ auto binary_row_result =
std::dynamic_pointer_cast<BinaryRow>(row.GetRow(13, 2));
+ auto binary_row_expected =
std::dynamic_pointer_cast<BinaryRow>(binary_row);
+ ASSERT_EQ(*binary_row_result, *binary_row_expected);
+ ASSERT_EQ(row.GetMap(14)->KeyArray()->ToIntArray().value(),
+ map->KeyArray()->ToIntArray().value());
+ ASSERT_EQ(row.GetMap(14)->ValueArray()->ToLongArray().value(),
+ map->ValueArray()->ToLongArray().value());
+ ASSERT_TRUE(row.IsNullAt(15));
+ ASSERT_EQ(row.ToString(), "OffsetRow, arity 16, offset 1");
+}
+
+} // namespace paimon::test
diff --git a/src/paimon/core/utils/partition_path_utils.cpp
b/src/paimon/core/utils/partition_path_utils.cpp
new file mode 100644
index 0000000..632feb8
--- /dev/null
+++ b/src/paimon/core/utils/partition_path_utils.cpp
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/partition_path_utils.h"
+
+#include <array>
+#include <cstdint>
+#include <optional>
+
+#include "paimon/status.h"
+
+namespace paimon {
+
+const std::bitset<128>& PartitionPathUtils::CharToEscape() {
+ constexpr auto char_to_escape = []() {
+ std::bitset<128> bitset;
+ for (char c = 0; c < ' '; c++) {
+ bitset.set(static_cast<unsigned char>(c));
+ }
+ std::array<char, 48> clist = {
+ '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006',
'\u0007', '\u0008',
+ '\u0009', '\n', '\u000B', '\u000C', '\r', '\u000E',
'\u000F', '\u0010',
+ '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016',
'\u0017', '\u0018',
+ '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E',
'\u001F', '"',
+ '#', '%', '\'', '*', '/', ':', '=',
'?',
+ '\\', '\u007F', '{', '}', '[', ']', '^'};
+ for (char c : clist) {
+ bitset.set(static_cast<unsigned char>(c));
+ }
+ return bitset;
+ };
+ static std::bitset<128> bitset = char_to_escape();
+ return bitset;
+}
+
+Result<std::string> PartitionPathUtils::GeneratePartitionPath(
+ const std::vector<std::pair<std::string, std::string>>& partition_spec) {
+ if (partition_spec.empty()) {
+ return std::string();
+ }
+ std::stringstream ss;
+ int32_t i = 0;
+ for (const auto& [key, value] : partition_spec) {
+ if (i > 0) {
+ ss << PATH_SEPARATOR;
+ }
+ PAIMON_ASSIGN_OR_RAISE(std::string key_esc, EscapePathName(key));
+ PAIMON_ASSIGN_OR_RAISE(std::string value_esc, EscapePathName(value));
+ ss << key_esc << "=" << value_esc;
+ i++;
+ }
+ ss << PATH_SEPARATOR;
+ return ss.str();
+}
+
+Result<std::string> PartitionPathUtils::EscapePathName(const std::string&
path) {
+ if (path.empty()) {
+ return Status::Invalid("path should not be empty");
+ }
+
+ std::optional<std::stringstream> ss;
+ for (size_t i = 0; i < path.size(); i++) {
+ char c = path[i];
+ if (NeedsEscaping(c)) {
+ if (ss == std::nullopt) {
+ ss = std::stringstream();
+ for (size_t j = 0; j < i; j++) {
+ ss.value() << path[j];
+ }
+ }
+ EscapeChar(c, &ss.value());
+ } else if (ss != std::nullopt) {
+ ss.value() << c;
+ }
+ }
+ if (ss == std::nullopt) {
+ return path;
+ }
+ return ss.value().str();
+}
+
+void PartitionPathUtils::EscapeChar(char c, std::stringstream* ss_ptr) {
+ auto& ss = *ss_ptr;
+ ss << '%';
+ auto uc = static_cast<unsigned char>(c);
+ if (uc < 16) {
+ ss << '0';
+ }
+ std::stringstream hex_ss;
+ hex_ss << std::hex << std::uppercase << static_cast<int32_t>(uc);
+ ss << hex_ss.str();
+}
+
+Result<std::vector<std::string>>
PartitionPathUtils::GenerateHierarchicalPartitionPaths(
+ const std::vector<std::pair<std::string, std::string>>& partition_spec) {
+ std::vector<std::string> paths;
+ if (partition_spec.empty()) {
+ return paths;
+ }
+ std::string suffix_buf;
+ for (const auto& [key, value] : partition_spec) {
+ PAIMON_ASSIGN_OR_RAISE(std::string escaped_key, EscapePathName(key));
+ PAIMON_ASSIGN_OR_RAISE(std::string escaped_value,
EscapePathName(value));
+ suffix_buf.append(escaped_key);
+ suffix_buf.append("=");
+ suffix_buf.append(escaped_value);
+ suffix_buf.append(PATH_SEPARATOR);
+ paths.push_back(suffix_buf);
+ }
+ return paths;
+}
+
+} // namespace paimon
diff --git a/src/paimon/core/utils/partition_path_utils.h
b/src/paimon/core/utils/partition_path_utils.h
new file mode 100644
index 0000000..48d9adf
--- /dev/null
+++ b/src/paimon/core/utils/partition_path_utils.h
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <cstddef>
+#include <map>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "paimon/result.h"
+
+namespace paimon {
+
+// Utils for file system.
+class PartitionPathUtils {
+ public:
+ static constexpr char PATH_SEPARATOR[] = "/";
+
+ PartitionPathUtils() = delete;
+ ~PartitionPathUtils() = delete;
+ /// Make partition path from partition spec.
+ ///
+ /// @param partition_spec The partition spec.
+ /// @return An escaped, valid partition name.
+ static Result<std::string> GeneratePartitionPath(
+ const std::vector<std::pair<std::string, std::string>>&
partition_spec);
+
+ /// Escapes a path name.
+ ///
+ /// @param path The path to escape.
+ /// @return An escaped path name.
+ static Result<std::string> EscapePathName(const std::string& path);
+
+ /// Generate all hierarchical paths from partition spec.
+ ///
+ /// For example, if the partition spec is (pt1: '0601', pt2: '12', pt3:
'30'), this method
+ /// will return a list (start from index 0):
+ ///
+ /// <ul>
+ /// <li>pt1=0601
+ /// <li>pt1=0601/pt2=12
+ /// <li>pt1=0601/pt2=12/pt3=30
+ /// </ul>
+ static Result<std::vector<std::string>> GenerateHierarchicalPartitionPaths(
+ const std::vector<std::pair<std::string, std::string>>&
partition_spec);
+
+ private:
+ static const std::bitset<128>& CharToEscape();
+ static bool NeedsEscaping(char c) {
+ return static_cast<size_t>(c) < CharToEscape().size() &&
CharToEscape().test(c);
+ }
+
+ static void EscapeChar(char c, std::stringstream* ss_ptr);
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/utils/partition_path_utils_test.cpp
b/src/paimon/core/utils/partition_path_utils_test.cpp
new file mode 100644
index 0000000..a10729b
--- /dev/null
+++ b/src/paimon/core/utils/partition_path_utils_test.cpp
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/utils/partition_path_utils.h"
+
+#include "gtest/gtest.h"
+#include "paimon/status.h"
+#include "paimon/testing/utils/testharness.h"
+
+namespace paimon::test {
+
+TEST(PartitionPathUtilsTest, TestEmptyInput) {
+ std::vector<std::pair<std::string, std::string>> partition_spec;
+ ASSERT_OK_AND_ASSIGN(std::string partition_path_str,
+
PartitionPathUtils::GeneratePartitionPath(partition_spec));
+ ASSERT_EQ(partition_path_str, "");
+}
+
+TEST(PartitionPathUtilsTest, TestSimple) {
+ std::vector<std::pair<std::string, std::string>> partition_spec = {
+ {"f1", "v1"},
+ {"f2", "这是一段不是特别长的中文"},
+ {"f0", "v0"},
+ };
+ ASSERT_OK_AND_ASSIGN(std::string partition_path_str,
+
PartitionPathUtils::GeneratePartitionPath(partition_spec));
+ ASSERT_EQ(partition_path_str, "f1=v1/f2=这是一段不是特别长的中文/f0=v0/");
+}
+
+TEST(PartitionPathUtilsTest, TestCharToEscape) {
+ std::vector<std::pair<std::string, std::string>> partition_spec = {
+ {"f0", "v0"},
+ {"f1", "v1="},
+ {"/f2?", "这是一段不是特别长\n的[中文]"},
+ };
+ ASSERT_OK_AND_ASSIGN(std::string partition_path_str,
+
PartitionPathUtils::GeneratePartitionPath(partition_spec));
+ ASSERT_EQ(partition_path_str,
"f0=v0/f1=v1%3D/%2Ff2%3F=这是一段不是特别长%0A的%5B中文%5D/");
+}
+
+TEST(PartitionPathUtilsTest, testGenerateHierarchicalPartitionPaths) {
+ std::vector<std::pair<std::string, std::string>> partition_spec = {
+ {"f2", "这是一段不是特别长的中文"},
+ {"f0", "v0"},
+ {"f1", "v1"},
+ };
+ ASSERT_OK_AND_ASSIGN(std::vector<std::string> partition_path_strs,
+
PartitionPathUtils::GenerateHierarchicalPartitionPaths(partition_spec));
+ ASSERT_EQ(partition_path_strs.size(), 3u);
+ ASSERT_EQ(partition_path_strs[0], "f2=这是一段不是特别长的中文/");
+ ASSERT_EQ(partition_path_strs[1], "f2=这是一段不是特别长的中文/f0=v0/");
+ ASSERT_EQ(partition_path_strs[2], "f2=这是一段不是特别长的中文/f0=v0/f1=v1/");
+}
+
+TEST(PartitionPathUtilsTest, EscapeChar) {
+ std::stringstream ss;
+ PartitionPathUtils::EscapeChar(' ', &ss);
+ ASSERT_EQ(ss.str(), "%20");
+
+ ss.str("");
+ ss.clear();
+ PartitionPathUtils::EscapeChar('/', &ss);
+ ASSERT_EQ(ss.str(), "%2F");
+
+ ss.str("");
+ ss.clear();
+ PartitionPathUtils::EscapeChar('\n', &ss);
+ ASSERT_EQ(ss.str(), "%0A");
+
+ ss.str("");
+ ss.clear();
+ PartitionPathUtils::EscapeChar('A', &ss);
+ ASSERT_EQ(ss.str(), "%41");
+}
+
+TEST(PartitionPathUtilsTest, EscapePathName) {
+ ASSERT_NOK_WITH_MSG(PartitionPathUtils::EscapePathName(""), "path should
not be empty");
+
+ ASSERT_OK_AND_ASSIGN(std::string escape_path,
+ PartitionPathUtils::EscapePathName("normal_path"));
+ ASSERT_EQ(escape_path, "normal_path");
+
+ ASSERT_OK_AND_ASSIGN(escape_path, PartitionPathUtils::EscapePathName("a
b/c"));
+ ASSERT_EQ(escape_path, "a b%2Fc");
+
+ ASSERT_OK_AND_ASSIGN(escape_path, PartitionPathUtils::EscapePathName("
/="));
+ ASSERT_EQ(escape_path, " %2F%3D");
+}
+
+} // namespace paimon::test
diff --git a/src/paimon/core/utils/path_factory.h
b/src/paimon/core/utils/path_factory.h
new file mode 100644
index 0000000..9bd61d4
--- /dev/null
+++ b/src/paimon/core/utils/path_factory.h
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+namespace paimon {
+
+/// Path factory to create a path.
+class PathFactory {
+ public:
+ PathFactory() = default;
+ virtual ~PathFactory() = default;
+
+ virtual std::string NewPath() const = 0;
+ virtual std::string ToPath(const std::string& file_name) const = 0;
+};
+
+} // namespace paimon
diff --git a/src/paimon/core/utils/special_field_ids.cpp
b/src/paimon/core/utils/special_field_ids.cpp
new file mode 100644
index 0000000..7b07837
--- /dev/null
+++ b/src/paimon/core/utils/special_field_ids.cpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/utils/special_field_ids.h"
+
+namespace paimon {
+
+const int32_t SpecialFieldIds::CPP_FIELD_ID_END;
+const int32_t SpecialFieldIds::SEQUENCE_NUMBER;
+const int32_t SpecialFieldIds::VALUE_KIND;
+const int32_t SpecialFieldIds::ROW_KIND;
+const int32_t SpecialFieldIds::ROW_ID;
+const int32_t SpecialFieldIds::INDEX_SCORE;
+
+} // namespace paimon