Copilot commented on code in PR #63:
URL: https://github.com/apache/paimon-cpp/pull/63#discussion_r3378019076


##########
include/paimon/read_context.h:
##########
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "paimon/predicate/predicate.h"
+#include "paimon/result.h"
+#include "paimon/type_fwd.h"
+#include "paimon/utils/read_ahead_cache.h"
+#include "paimon/visibility.h"
+
+namespace paimon {
+class Executor;
+class MemoryPool;
+class Predicate;
+class FileSystem;
+
+/// `ReadContext` is some configuration for read operations.
+///
+/// Please do not use this class directly, use `ReadContextBuilder` to build a 
`ReadContext` which
+/// has input validation.
+/// @see ReadContextBuilder
+class PAIMON_EXPORT ReadContext {
+ public:
+    ReadContext(const std::string& path, const std::string& branch,
+                const std::vector<std::string>& read_schema,
+                const std::vector<int32_t>& read_field_ids,
+                const std::shared_ptr<Predicate>& predicate, bool 
enable_predicate_filter,
+                bool enable_prefetch, uint32_t prefetch_batch_count,
+                uint32_t prefetch_max_parallel_num, bool 
enable_multi_thread_row_to_batch,
+                uint32_t row_to_batch_thread_number, const 
std::optional<std::string>& table_schema,
+                const std::shared_ptr<MemoryPool>& memory_pool,
+                const std::shared_ptr<Executor>& executor,
+                const std::shared_ptr<FileSystem>& specific_file_system,
+                const std::map<std::string, std::string>& 
fs_scheme_to_identifier_map,
+                const std::map<std::string, std::string>& options,
+                PrefetchCacheMode prefetch_cache_mode, const CacheConfig& 
cache_config);
+    ~ReadContext();
+
+    const std::string& GetPath() const {
+        return path_;
+    }
+
+    const std::string& GetBranch() const {
+        return branch_;
+    }
+
+    const std::map<std::string, std::string>& 
GetFileSystemSchemeToIdentifierMap() const {
+        return fs_scheme_to_identifier_map_;
+    }
+
+    const std::map<std::string, std::string>& GetOptions() const {
+        return options_;
+    }
+
+    const std::vector<std::string>& GetReadSchema() const {
+        return read_schema_;
+    }
+
+    const std::vector<int32_t>& GetReadFieldIds() const {
+        return read_field_ids_;
+    }
+
+    const std::shared_ptr<Predicate>& GetPredicate() const {
+        return predicate_;
+    }
+
+    bool EnablePredicateFilter() const {
+        return enable_predicate_filter_;
+    }
+    bool EnablePrefetch() const {
+        return enable_prefetch_;
+    }
+    uint32_t GetPrefetchBatchCount() const {
+        return prefetch_batch_count_;
+    }
+    uint32_t GetPrefetchMaxParallelNum() const {
+        return prefetch_max_parallel_num_;
+    }
+    bool EnableMultiThreadRowToBatch() const {
+        return enable_multi_thread_row_to_batch_;
+    }
+    uint32_t GetRowToBatchThreadNumber() const {
+        return row_to_batch_thread_number_;
+    }
+    const std::optional<std::string>& GetSpecificTableSchema() {
+        return table_schema_;
+    }

Review Comment:
   This getter is not `const`, unlike the other getters, which makes 
`ReadContext` harder to use through `const` references/pointers (common for 
context/config objects). Make it a `const` method (and keep returning `const 
std::optional<std::string>&`) to align with the rest of the API.



##########
src/paimon/core/operation/abstract_split_read.cpp:
##########
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/core/operation/abstract_split_read.h"
+
+#include <cassert>
+#include <cstddef>
+#include <utility>
+
+#include "arrow/type.h"
+#include "paimon/common/reader/delegating_prefetch_reader.h"
+#include "paimon/common/reader/predicate_batch_reader.h"
+#include "paimon/common/reader/prefetch_file_batch_reader_impl.h"
+#include "paimon/common/table/special_fields.h"
+#include "paimon/common/types/data_field.h"
+#include "paimon/common/utils/object_utils.h"
+#include "paimon/core/io/complete_row_tracking_fields_reader.h"
+#include "paimon/core/io/data_file_meta.h"
+#include "paimon/core/io/data_file_path_factory.h"
+#include "paimon/core/io/field_mapping_reader.h"
+#include "paimon/core/operation/internal_read_context.h"
+#include "paimon/core/partition/partition_info.h"
+#include "paimon/core/schema/table_schema.h"
+#include "paimon/core/table/source/data_split_impl.h"
+#include "paimon/core/utils/field_mapping.h"
+#include "paimon/format/file_format.h"
+#include "paimon/format/file_format_factory.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class BinaryRow;
+class Executor;
+class FileStorePathFactory;
+class MemoryPool;
+class Predicate;
+
+AbstractSplitRead::AbstractSplitRead(const 
std::shared_ptr<FileStorePathFactory>& path_factory,
+                                     const 
std::shared_ptr<InternalReadContext>& context,
+                                     std::unique_ptr<SchemaManager>&& 
schema_manager,
+                                     const std::shared_ptr<MemoryPool>& 
memory_pool,
+                                     const std::shared_ptr<Executor>& executor)
+    : pool_(memory_pool),
+      executor_(executor),
+      path_factory_(path_factory),
+      options_(context->GetCoreOptions()),
+      raw_read_schema_(context->GetReadSchema()),
+      context_(context),
+      schema_manager_(std::move(schema_manager)) {}
+
+Result<std::vector<std::unique_ptr<FileBatchReader>>> 
AbstractSplitRead::CreateRawFileReaders(
+    const BinaryRow& partition, const 
std::vector<std::shared_ptr<DataFileMeta>>& data_files,
+    const std::shared_ptr<arrow::Schema>& read_schema, const 
std::shared_ptr<Predicate>& predicate,
+    DeletionVector::Factory dv_factory, const 
std::optional<std::vector<Range>>& row_ranges,
+    const std::shared_ptr<DataFilePathFactory>& data_file_path_factory) const {
+    if (data_files.empty()) {
+        return std::vector<std::unique_ptr<FileBatchReader>>();
+    }
+    PAIMON_ASSIGN_OR_RAISE(
+        std::unique_ptr<FieldMappingBuilder> field_mapping_builder,
+        FieldMappingBuilder::Create(read_schema, context_->GetPartitionKeys(), 
predicate));
+
+    std::vector<std::unique_ptr<FileBatchReader>> raw_file_readers;
+    raw_file_readers.reserve(data_files.size());
+    for (const auto& file : data_files) {
+        auto data_file_path = data_file_path_factory->ToPath(file);
+        PAIMON_ASSIGN_OR_RAISE(std::string data_file_identifier, 
file->FileFormat());
+        PAIMON_ASSIGN_OR_RAISE(std::unique_ptr<ReaderBuilder> reader_builder,
+                               PrepareReaderBuilder(data_file_identifier));
+        PAIMON_ASSIGN_OR_RAISE(
+            std::unique_ptr<FileBatchReader> file_reader,
+            CreateFieldMappingReader(data_file_path, file, partition, 
reader_builder.get(),
+                                     field_mapping_builder.get(), dv_factory, 
row_ranges,
+                                     data_file_path_factory));
+        if (file_reader) {
+            raw_file_readers.push_back(std::move(file_reader));
+        }
+    }
+    return std::move(raw_file_readers);
+}
+
+bool AbstractSplitRead::NeedCompleteRowTrackingFields(
+    bool row_tracking_enabled, const std::shared_ptr<arrow::Schema>& 
read_schema) {
+    if (row_tracking_enabled &&
+        (read_schema->GetFieldIndex(SpecialFields::RowId().Name()) != -1 ||
+         read_schema->GetFieldIndex(SpecialFields::SequenceNumber().Name()) != 
-1)) {
+        return true;
+    }
+    return false;
+}
+
+std::unordered_map<std::string, DeletionFile> 
AbstractSplitRead::CreateDeletionFileMap(
+    const DataSplitImpl& data_split) {
+    return CreateDeletionFileMap(data_split.DataFiles(), 
data_split.DeletionFiles());
+}
+
+std::unordered_map<std::string, DeletionFile> 
AbstractSplitRead::CreateDeletionFileMap(
+    const std::vector<std::shared_ptr<DataFileMeta>>& data_files,
+    const std::vector<std::optional<DeletionFile>>& deletion_files) {
+    std::unordered_map<std::string, DeletionFile> deletion_file_map;
+    if (deletion_files.empty()) {
+        return deletion_file_map;
+    }
+    assert(deletion_files.size() == data_files.size());
+    size_t file_count = deletion_files.size();
+    for (size_t i = 0; i < file_count; i++) {
+        if (deletion_files[i] != std::nullopt) {
+            deletion_file_map.emplace(data_files[i]->file_name, 
deletion_files[i].value());
+        }
+    }

Review Comment:
   `assert` is compiled out in release builds; if `deletion_files.size() != 
data_files.size()`, this loop can index `data_files[i]` out of bounds and 
crash. Replace the assert with a runtime guard (e.g., early return, or iterate 
up to `min(sizes)` and/or surface an error) so release builds are safe.



##########
src/paimon/core/operation/read_context.cpp:
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/read_context.h"
+
+#include <utility>
+
+#include "paimon/common/utils/path_util.h"
+#include "paimon/core/utils/branch_manager.h"
+#include "paimon/executor.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class Predicate;
+
+ReadContext::ReadContext(
+    const std::string& path, const std::string& branch, const 
std::vector<std::string>& read_schema,
+    const std::vector<int32_t>& read_field_ids, const 
std::shared_ptr<Predicate>& predicate,
+    bool enable_predicate_filter, bool enable_prefetch, uint32_t 
prefetch_batch_count,
+    uint32_t prefetch_max_parallel_num, bool enable_multi_thread_row_to_batch,
+    uint32_t row_to_batch_thread_number, const std::optional<std::string>& 
table_schema,
+    const std::shared_ptr<MemoryPool>& memory_pool, const 
std::shared_ptr<Executor>& executor,
+    const std::shared_ptr<FileSystem>& specific_file_system,
+    const std::map<std::string, std::string>& fs_scheme_to_identifier_map,
+    const std::map<std::string, std::string>& options, PrefetchCacheMode 
prefetch_cache_mode,
+    const CacheConfig& cache_config)
+    : path_(path),
+      branch_(branch),
+      read_schema_(read_schema),
+      read_field_ids_(read_field_ids),
+      predicate_(predicate),
+      enable_predicate_filter_(enable_predicate_filter),
+      enable_prefetch_(enable_prefetch),
+      prefetch_batch_count_(prefetch_batch_count),
+      prefetch_max_parallel_num_(prefetch_max_parallel_num),
+      enable_multi_thread_row_to_batch_(enable_multi_thread_row_to_batch),
+      row_to_batch_thread_number_(row_to_batch_thread_number),
+      table_schema_(table_schema),
+      memory_pool_(memory_pool),
+      executor_(executor),
+      specific_file_system_(specific_file_system),
+      fs_scheme_to_identifier_map_(fs_scheme_to_identifier_map),
+      options_(options),
+      prefetch_cache_mode_(prefetch_cache_mode),
+      cache_config_(cache_config) {}
+
+ReadContext::~ReadContext() = default;
+
+class ReadContextBuilder::Impl {
+ public:
+    friend class ReadContextBuilder;
+    void Reset() {
+        branch_ = BranchManager::DEFAULT_MAIN_BRANCH;
+        read_field_names_.clear();
+        read_field_ids_.clear();
+        fs_scheme_to_identifier_map_.clear();
+        options_.clear();
+        predicate_.reset();
+        enable_predicate_filter_ = false;
+        enable_prefetch_ = false;
+        prefetch_cache_mode_ = PrefetchCacheMode::ALWAYS;
+        prefetch_batch_count_ = 600;
+        prefetch_max_parallel_num_ = 3;
+        enable_multi_thread_row_to_batch_ = false;
+        row_to_batch_thread_number_ = 1;
+        table_schema_ = std::nullopt;
+        memory_pool_ = GetDefaultPool();
+        executor_.reset();
+        specific_file_system_.reset();
+        cache_config_ = CacheConfig();
+    }
+
+ private:
+    std::string path_;
+    std::string branch_ = BranchManager::DEFAULT_MAIN_BRANCH;
+    std::vector<std::string> read_field_names_;
+    std::vector<int32_t> read_field_ids_;
+    std::map<std::string, std::string> fs_scheme_to_identifier_map_;
+    std::map<std::string, std::string> options_;
+    std::shared_ptr<Predicate> predicate_;
+    bool enable_predicate_filter_ = false;
+    bool enable_prefetch_ = false;
+    uint32_t prefetch_batch_count_ = 600;
+    uint32_t prefetch_max_parallel_num_ = 3;
+    bool enable_multi_thread_row_to_batch_ = false;
+    uint32_t row_to_batch_thread_number_ = 1;
+    std::optional<std::string> table_schema_;
+    std::shared_ptr<MemoryPool> memory_pool_ = GetDefaultPool();
+    std::shared_ptr<Executor> executor_;
+    std::shared_ptr<FileSystem> specific_file_system_;
+    PrefetchCacheMode prefetch_cache_mode_ = PrefetchCacheMode::ALWAYS;
+    CacheConfig cache_config_;
+};
+
+ReadContextBuilder::ReadContextBuilder(const std::string& path)
+    : impl_(std::make_unique<ReadContextBuilder::Impl>()) {
+    impl_->path_ = path;
+}
+
+ReadContextBuilder::~ReadContextBuilder() = default;
+
+ReadContextBuilder::ReadContextBuilder(ReadContextBuilder&&) noexcept = 
default;
+ReadContextBuilder& ReadContextBuilder::operator=(ReadContextBuilder&&) 
noexcept = default;
+
+ReadContextBuilder& ReadContextBuilder::AddOption(const std::string& key,
+                                                  const std::string& value) {
+    impl_->options_[key] = value;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetOptions(const std::map<std::string, 
std::string>& opts) {
+    impl_->options_ = opts;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetReadSchema(
+    const std::vector<std::string>& read_field_names) {
+    impl_->read_field_names_ = read_field_names;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetReadFieldIds(
+    const std::vector<int32_t>& read_field_ids) {
+    impl_->read_field_ids_ = read_field_ids;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPredicate(const 
std::shared_ptr<Predicate>& predicate) {
+    impl_->predicate_ = predicate;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnablePredicateFilter(bool enabled) {
+    impl_->enable_predicate_filter_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnablePrefetch(bool enabled) {
+    impl_->enable_prefetch_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchBatchCount(uint32_t 
batch_count) {
+    impl_->prefetch_batch_count_ = batch_count;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchMaxParallelNum(uint32_t 
max_parallel_num) {
+    impl_->prefetch_max_parallel_num_ = max_parallel_num;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnableMultiThreadRowToBatch(bool 
enabled) {
+    impl_->enable_multi_thread_row_to_batch_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetRowToBatchThreadNumber(uint32_t 
thread_number) {
+    impl_->row_to_batch_thread_number_ = thread_number;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithMemoryPool(
+    const std::shared_ptr<MemoryPool>& memory_pool) {
+    impl_->memory_pool_ = memory_pool;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithExecutor(const 
std::shared_ptr<Executor>& executor) {
+    impl_->executor_ = executor;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetTableSchema(const std::string& 
table_schema) {
+    impl_->table_schema_ = table_schema;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithBranch(const std::string& branch) {
+    impl_->branch_ = branch;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithFileSystemSchemeToIdentifierMap(
+    const std::map<std::string, std::string>& fs_scheme_to_identifier_map) {
+    impl_->fs_scheme_to_identifier_map_ = fs_scheme_to_identifier_map;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithFileSystem(
+    const std::shared_ptr<FileSystem>& file_system) {
+    impl_->specific_file_system_ = file_system;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchCacheMode(PrefetchCacheMode 
mode) {
+    impl_->prefetch_cache_mode_ = mode;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithCacheConfig(const CacheConfig& 
cache_config) {
+    impl_->cache_config_ = cache_config;
+    return *this;
+}
+
+Result<std::unique_ptr<ReadContext>> ReadContextBuilder::Finish() {
+    PAIMON_ASSIGN_OR_RAISE(impl_->path_, 
PathUtil::NormalizePath(impl_->path_));
+    if (impl_->path_.empty()) {
+        return Status::Invalid("cannot read with empty table path");
+    }
+    if (impl_->enable_prefetch_ && impl_->prefetch_batch_count_ <= 0) {
+        return Status::Invalid("prefetch batch count should be greater than 
0");
+    }
+    if (impl_->enable_prefetch_ &&
+        impl_->prefetch_batch_count_ < impl_->prefetch_max_parallel_num_) {
+        return Status::Invalid(
+            "prefetch batch count should be greater than or equal to prefetch 
max parallel num");
+    }
+    if (!impl_->executor_) {
+        // If the user do not set executor, create default executor by 
prefetch batch count
+        uint32_t thread_count = impl_->enable_prefetch_ ? 
impl_->prefetch_max_parallel_num_ : 1;
+        impl_->executor_ = CreateDefaultExecutor(thread_count);
+    }

Review Comment:
   The comment says the default executor is created “by prefetch batch count”, 
but the implementation uses `prefetch_max_parallel_num_` (or 1). Update the 
comment to match the actual behavior to avoid misleading future 
changes/debugging.



##########
include/paimon/read_context.h:
##########
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "paimon/predicate/predicate.h"
+#include "paimon/result.h"
+#include "paimon/type_fwd.h"
+#include "paimon/utils/read_ahead_cache.h"
+#include "paimon/visibility.h"
+
+namespace paimon {
+class Executor;
+class MemoryPool;
+class Predicate;
+class FileSystem;
+
+/// `ReadContext` is some configuration for read operations.
+///
+/// Please do not use this class directly, use `ReadContextBuilder` to build a 
`ReadContext` which
+/// has input validation.
+/// @see ReadContextBuilder
+class PAIMON_EXPORT ReadContext {
+ public:
+    ReadContext(const std::string& path, const std::string& branch,
+                const std::vector<std::string>& read_schema,
+                const std::vector<int32_t>& read_field_ids,
+                const std::shared_ptr<Predicate>& predicate, bool 
enable_predicate_filter,
+                bool enable_prefetch, uint32_t prefetch_batch_count,
+                uint32_t prefetch_max_parallel_num, bool 
enable_multi_thread_row_to_batch,
+                uint32_t row_to_batch_thread_number, const 
std::optional<std::string>& table_schema,
+                const std::shared_ptr<MemoryPool>& memory_pool,
+                const std::shared_ptr<Executor>& executor,
+                const std::shared_ptr<FileSystem>& specific_file_system,
+                const std::map<std::string, std::string>& 
fs_scheme_to_identifier_map,
+                const std::map<std::string, std::string>& options,
+                PrefetchCacheMode prefetch_cache_mode, const CacheConfig& 
cache_config);
+    ~ReadContext();
+
+    const std::string& GetPath() const {
+        return path_;
+    }
+
+    const std::string& GetBranch() const {
+        return branch_;
+    }
+
+    const std::map<std::string, std::string>& 
GetFileSystemSchemeToIdentifierMap() const {
+        return fs_scheme_to_identifier_map_;
+    }
+
+    const std::map<std::string, std::string>& GetOptions() const {
+        return options_;
+    }
+
+    const std::vector<std::string>& GetReadSchema() const {
+        return read_schema_;
+    }
+
+    const std::vector<int32_t>& GetReadFieldIds() const {
+        return read_field_ids_;
+    }
+
+    const std::shared_ptr<Predicate>& GetPredicate() const {
+        return predicate_;
+    }
+
+    bool EnablePredicateFilter() const {
+        return enable_predicate_filter_;
+    }
+    bool EnablePrefetch() const {
+        return enable_prefetch_;
+    }
+    uint32_t GetPrefetchBatchCount() const {
+        return prefetch_batch_count_;
+    }
+    uint32_t GetPrefetchMaxParallelNum() const {
+        return prefetch_max_parallel_num_;
+    }
+    bool EnableMultiThreadRowToBatch() const {
+        return enable_multi_thread_row_to_batch_;
+    }
+    uint32_t GetRowToBatchThreadNumber() const {
+        return row_to_batch_thread_number_;
+    }
+    const std::optional<std::string>& GetSpecificTableSchema() {
+        return table_schema_;
+    }
+    std::shared_ptr<MemoryPool> GetMemoryPool() const {
+        return memory_pool_;
+    }
+    std::shared_ptr<Executor> GetExecutor() const {
+        return executor_;
+    }
+    std::shared_ptr<FileSystem> GetSpecificFileSystem() const {
+        return specific_file_system_;
+    }
+
+    PrefetchCacheMode GetPrefetchCacheMode() const {
+        return prefetch_cache_mode_;
+    }
+
+    const CacheConfig& GetCacheConfig() const {
+        return cache_config_;
+    }
+
+ private:
+    std::string path_;
+    std::string branch_;
+    std::vector<std::string> read_schema_;
+    std::vector<int32_t> read_field_ids_;
+    std::shared_ptr<Predicate> predicate_;
+    bool enable_predicate_filter_;
+    bool enable_prefetch_;
+    uint32_t prefetch_batch_count_;
+    uint32_t prefetch_max_parallel_num_;
+    bool enable_multi_thread_row_to_batch_;
+    uint32_t row_to_batch_thread_number_;
+    std::optional<std::string> table_schema_;
+    std::shared_ptr<MemoryPool> memory_pool_;
+    std::shared_ptr<Executor> executor_;
+    std::shared_ptr<FileSystem> specific_file_system_;
+    std::map<std::string, std::string> fs_scheme_to_identifier_map_;
+    std::map<std::string, std::string> options_;
+    PrefetchCacheMode prefetch_cache_mode_;
+    CacheConfig cache_config_;
+};
+
+/// `ReadContextBuilder` used to build a `ReadContext`, has input validation.
+class PAIMON_EXPORT ReadContextBuilder {
+ public:
+    /// Constructs a `ReadContextBuilder` with required parameters.
+    /// @param path The root path of the table.
+    explicit ReadContextBuilder(const std::string& path);
+
+    ~ReadContextBuilder();
+
+    ReadContextBuilder(ReadContextBuilder&&) noexcept;
+    ReadContextBuilder& operator=(ReadContextBuilder&&) noexcept;
+
+    /// Set the schema fields to read from the table.
+    ///
+    /// If not set, all fields from the table schema will be read. This is 
useful for
+    /// projection pushdown to reduce I/O and improve performance by reading 
only
+    /// the required columns.
+    ///
+    /// @param read_field_names Vector of field names to read from the table.
+    /// @return Reference to this builder for method chaining.
+    /// @note Currently supports top-level field selection. Future versions 
may support
+    ///       nested field selection using ArrowSchema for more granular 
projection
+    ReadContextBuilder& SetReadSchema(const std::vector<std::string>& 
read_field_names);
+    /// Set the schema fields to read from the table.
+    ///
+    /// If not set, all fields from the table schema will be read. This is 
useful for
+    /// projection pushdown to reduce I/O and improve performance by reading 
only
+    /// the required columns.
+    ///
+    /// @param read_field_ids Vector of field ids to read from the table.
+    /// @return Reference to this builder for method chaining.
+    /// @note Currently supports top-level field selection. Future versions 
may support
+    ///       nested field selection using ArrowSchema for more granular 
projection.
+    /// @note SetReadFieldIds() and SetReadSchema() are mutually exclusive.
+    ///       Calling both will ignore the read schema set by SetReadSchema().
+    ReadContextBuilder& SetReadFieldIds(const std::vector<int32_t>& 
read_field_ids);
+
+    /// Set a configuration options map to set some option entries which are 
not defined in the
+    /// table schema or whose values you want to overwrite.
+    /// @note The options map will clear the options added by `AddOption()` 
before.
+    /// @param options The configuration options map.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetOptions(const std::map<std::string, std::string>& 
options);
+
+    /// Add a single configuration option which is not defined in the table 
schema or whose value
+    /// you want to overwrite.
+    ///
+    /// If you want to add multiple options, call `AddOption()` multiple times 
or use `SetOptions()`
+    /// instead.
+    /// @param key The option key.
+    /// @param value The option value.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& AddOption(const std::string& key, const std::string& 
value);
+
+    /// Set a predicate for filtering data during reading.
+    ///
+    /// The predicate is used for both partition pruning and data filtering.
+    /// It can significantly improve performance by reducing the amount of data
+    /// that needs to be read and processed.
+    ///
+    /// @param predicate Shared pointer to the predicate for data filtering.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPredicate(const std::shared_ptr<Predicate>& 
predicate);
+
+    /// Whether to perform precise filtering according to predicates for data 
read from format
+    /// reader.
+    /// @param enabled Whether to enable precise filtering (default: false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnablePredicateFilter(bool enabled);
+
+    /// Enable or disable prefetching of data batches from individual files.
+    ///
+    /// When enabled, the reader will prefetch multiple batches in parallel to
+    /// improve throughput by overlapping I/O with computation. This is 
particularly
+    /// beneficial for high-latency storage systems.
+    ///
+    /// @param enabled Whether to enable prefetching (default: false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnablePrefetch(bool enabled);
+
+    /// Set prefetch cache mode for read operations.
+    ///
+    /// A prefetch cache is used to prebuffer data ranges before they are 
needed,
+    /// which can improve read performance by reducing redundant I/O 
operations.
+    /// @param mode (default: PrefetchCacheMode::ALWAYS)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchCacheMode(PrefetchCacheMode mode);
+
+    /// Set the cache configuration for prefetch read operations.
+    ///
+    /// @param config The cache configuration to use.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& WithCacheConfig(const CacheConfig& config);
+
+    /// Set the total number of batches to prefetch across all files.
+    ///
+    /// This controls the memory usage and parallelism of the prefetching 
mechanism.
+    /// Higher values can improve throughput but consume more memory.
+    ///
+    /// @param batch_count Total number of batches to prefetch (default: 600)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchBatchCount(uint32_t batch_count);
+
+    /// Set the maximum number of parallel prefetch operations.
+    ///
+    /// This limits the number of concurrent I/O operations to prevent 
overwhelming
+    /// the storage system or consuming excessive system resources.
+    ///
+    /// @param parallel_num Maximum parallel prefetch operations (default: 3)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchMaxParallelNum(uint32_t parallel_num);
+
+    /// Enable or disable multi-threaded row-to-batch conversion in 
merge-on-read scenarios.
+    ///
+    /// When enabled, multiple threads are used to convert row data to batch 
format
+    /// during merge operations, which can improve performance for 
CPU-intensive
+    /// merge operations.
+    ///
+    /// @param enabled Whether to enable multi-threaded conversion (default: 
false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnableMultiThreadRowToBatch(bool enabled);
+
+    /// Set the number of threads for row-to-batch conversion in merge-on-read 
scenarios.
+    ///
+    /// This controls the parallelism of row-to-batch conversion during merge 
operations.
+    /// Higher values can improve performance but may affect result ordering.
+    ///
+    /// @param thread_number Number of conversion threads (default: 1)
+    /// @return Reference to this builder for method chaining.
+    /// @note If thread_number > 1, Arrow batches from the reader may not be 
in primary key order.
+    ReadContextBuilder& SetRowToBatchThreadNumber(uint32_t thread_number);
+
+    /// Set custom memory pool for memory management.
+    /// @param memory_pool The memory pool to use.
+    /// @return Reference to this builder for method chaining.
+    /// @note If not set, the default system memory pool will be used.
+    ReadContextBuilder& WithMemoryPool(const std::shared_ptr<MemoryPool>& 
memory_pool);
+
+    /// Set custom executor for task execution.
+    /// @param executor The executor to use.
+    /// @return Reference to this builder for method chaining.
+    /// @note If not set, the default system executor will be used.
+    ReadContextBuilder& WithExecutor(const std::shared_ptr<Executor>& 
executor);
+
+    /// Set the table schema as a string to avoid schema loading I/O 
operations.
+    ///
+    /// This optimization allows the reader to use a pre-loaded schema instead 
of
+    /// reading it from the table metadata, which can improve performance 
especially
+    /// in scenarios with many small read operations.
+    ///
+    /// @param table_schema String representation of the table schema.
+    /// @return Reference to this builder for method chaining.
+    /// @note The user must ensure that the schema string is valid and matches 
the table.
+    /// @note If not set, the schema will be loaded from the table path.
+    ReadContextBuilder& SetTableSchema(const std::string& table_schema);
+
+    /// Set the specific branch to read from in a versioned table.
+    ///
+    /// Paimon supports branching for data versioning and time travel queries.
+    /// This method allows reading from a specific branch instead of the main 
branch.
+    ///
+    /// @param branch Name of the branch to read from.
+    /// @return Reference to this builder for method chaining.
+    /// @note Default branch is "main" if not specified.
+    ReadContextBuilder& WithBranch(const std::string& branch);
+
+    /// Sets a mapping from URI schemes (e.g., "file", "oss") to registered 
file system
+    /// identifiers. This allows selecting different pre-registered file 
system implementations
+    /// based on the URI scheme at runtime.
+    ///
+    /// @param fs_scheme_to_identifier_map Map from URI scheme (like "oss") to 
the corresponding
+    /// file system identifier.
+    /// @return Reference to this builder for method chaining.
+    /// @note
+    ///   - This method is intended for environments where multiple file 
systems are pre-registered.
+    ///   - The specified identifiers must correspond to file systems that 
have been registered at
+    ///   compile time or initialization.
+    ///   - Cannot be used together with `WithFileSystem()`.
+    ///   - If not set, use default file system (configured in 
`Options::FILE_SYSTEM`).
+    /// Example:
+    ///   builder.WithFileSystemSchemeToIdentifierMap({{"oss", "jindo"}, 
{"file", "local"}});
+    ///
+    ReadContextBuilder& WithFileSystemSchemeToIdentifierMap(
+        const std::map<std::string, std::string>& fs_scheme_to_identifier_map);
+
+    /// Sets a custom file system instance to be used for all file operations 
in this read context.
+    /// This bypasses the global file system registry and uses the provided 
implementation directly.
+    ///
+    /// @param file_system The file system to use.
+    /// @return Reference to this builder for method chaining.
+    /// @note If not set, use default file system (configured in 
`Options::FILE_SYSTEM`)
+    ReadContextBuilder& WithFileSystem(const std::shared_ptr<FileSystem>& 
file_system);

Review Comment:
   The public API documentation states `WithFileSystemSchemeToIdentifierMap()` 
cannot be used together with `WithFileSystem()`, but 
`ReadContextBuilder::Finish()` does not enforce this. Add validation in 
`Finish()` to return an invalid status when both are set, or clearly define and 
document precedence (and implement accordingly).



##########
src/paimon/core/operation/split_read.h:
##########
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "paimon/executor.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/read_context.h"

Review Comment:
   These headers aren’t referenced in this file’s declarations (the interface 
only uses `Split`, `BatchReader`, and `Result`). Consider removing them and/or 
forward-declaring to reduce transitive include cost and speed up builds.



##########
include/paimon/read_context.h:
##########
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "paimon/predicate/predicate.h"
+#include "paimon/result.h"
+#include "paimon/type_fwd.h"
+#include "paimon/utils/read_ahead_cache.h"
+#include "paimon/visibility.h"
+
+namespace paimon {
+class Executor;
+class MemoryPool;
+class Predicate;
+class FileSystem;
+
+/// `ReadContext` is some configuration for read operations.
+///
+/// Please do not use this class directly, use `ReadContextBuilder` to build a 
`ReadContext` which
+/// has input validation.
+/// @see ReadContextBuilder
+class PAIMON_EXPORT ReadContext {
+ public:
+    ReadContext(const std::string& path, const std::string& branch,
+                const std::vector<std::string>& read_schema,
+                const std::vector<int32_t>& read_field_ids,
+                const std::shared_ptr<Predicate>& predicate, bool 
enable_predicate_filter,
+                bool enable_prefetch, uint32_t prefetch_batch_count,
+                uint32_t prefetch_max_parallel_num, bool 
enable_multi_thread_row_to_batch,
+                uint32_t row_to_batch_thread_number, const 
std::optional<std::string>& table_schema,
+                const std::shared_ptr<MemoryPool>& memory_pool,
+                const std::shared_ptr<Executor>& executor,
+                const std::shared_ptr<FileSystem>& specific_file_system,
+                const std::map<std::string, std::string>& 
fs_scheme_to_identifier_map,
+                const std::map<std::string, std::string>& options,
+                PrefetchCacheMode prefetch_cache_mode, const CacheConfig& 
cache_config);
+    ~ReadContext();
+
+    const std::string& GetPath() const {
+        return path_;
+    }
+
+    const std::string& GetBranch() const {
+        return branch_;
+    }
+
+    const std::map<std::string, std::string>& 
GetFileSystemSchemeToIdentifierMap() const {
+        return fs_scheme_to_identifier_map_;
+    }
+
+    const std::map<std::string, std::string>& GetOptions() const {
+        return options_;
+    }
+
+    const std::vector<std::string>& GetReadSchema() const {
+        return read_schema_;
+    }
+
+    const std::vector<int32_t>& GetReadFieldIds() const {
+        return read_field_ids_;
+    }
+
+    const std::shared_ptr<Predicate>& GetPredicate() const {
+        return predicate_;
+    }
+
+    bool EnablePredicateFilter() const {
+        return enable_predicate_filter_;
+    }
+    bool EnablePrefetch() const {
+        return enable_prefetch_;
+    }
+    uint32_t GetPrefetchBatchCount() const {
+        return prefetch_batch_count_;
+    }
+    uint32_t GetPrefetchMaxParallelNum() const {
+        return prefetch_max_parallel_num_;
+    }
+    bool EnableMultiThreadRowToBatch() const {
+        return enable_multi_thread_row_to_batch_;
+    }
+    uint32_t GetRowToBatchThreadNumber() const {
+        return row_to_batch_thread_number_;
+    }
+    const std::optional<std::string>& GetSpecificTableSchema() {
+        return table_schema_;
+    }
+    std::shared_ptr<MemoryPool> GetMemoryPool() const {
+        return memory_pool_;
+    }
+    std::shared_ptr<Executor> GetExecutor() const {
+        return executor_;
+    }
+    std::shared_ptr<FileSystem> GetSpecificFileSystem() const {
+        return specific_file_system_;
+    }
+
+    PrefetchCacheMode GetPrefetchCacheMode() const {
+        return prefetch_cache_mode_;
+    }
+
+    const CacheConfig& GetCacheConfig() const {
+        return cache_config_;
+    }
+
+ private:
+    std::string path_;
+    std::string branch_;
+    std::vector<std::string> read_schema_;
+    std::vector<int32_t> read_field_ids_;
+    std::shared_ptr<Predicate> predicate_;
+    bool enable_predicate_filter_;
+    bool enable_prefetch_;
+    uint32_t prefetch_batch_count_;
+    uint32_t prefetch_max_parallel_num_;
+    bool enable_multi_thread_row_to_batch_;
+    uint32_t row_to_batch_thread_number_;
+    std::optional<std::string> table_schema_;
+    std::shared_ptr<MemoryPool> memory_pool_;
+    std::shared_ptr<Executor> executor_;
+    std::shared_ptr<FileSystem> specific_file_system_;
+    std::map<std::string, std::string> fs_scheme_to_identifier_map_;
+    std::map<std::string, std::string> options_;
+    PrefetchCacheMode prefetch_cache_mode_;
+    CacheConfig cache_config_;
+};
+
+/// `ReadContextBuilder` used to build a `ReadContext`, has input validation.
+class PAIMON_EXPORT ReadContextBuilder {
+ public:
+    /// Constructs a `ReadContextBuilder` with required parameters.
+    /// @param path The root path of the table.
+    explicit ReadContextBuilder(const std::string& path);
+
+    ~ReadContextBuilder();
+
+    ReadContextBuilder(ReadContextBuilder&&) noexcept;
+    ReadContextBuilder& operator=(ReadContextBuilder&&) noexcept;
+
+    /// Set the schema fields to read from the table.
+    ///
+    /// If not set, all fields from the table schema will be read. This is 
useful for
+    /// projection pushdown to reduce I/O and improve performance by reading 
only
+    /// the required columns.
+    ///
+    /// @param read_field_names Vector of field names to read from the table.
+    /// @return Reference to this builder for method chaining.
+    /// @note Currently supports top-level field selection. Future versions 
may support
+    ///       nested field selection using ArrowSchema for more granular 
projection
+    ReadContextBuilder& SetReadSchema(const std::vector<std::string>& 
read_field_names);
+    /// Set the schema fields to read from the table.
+    ///
+    /// If not set, all fields from the table schema will be read. This is 
useful for
+    /// projection pushdown to reduce I/O and improve performance by reading 
only
+    /// the required columns.
+    ///
+    /// @param read_field_ids Vector of field ids to read from the table.
+    /// @return Reference to this builder for method chaining.
+    /// @note Currently supports top-level field selection. Future versions 
may support
+    ///       nested field selection using ArrowSchema for more granular 
projection.
+    /// @note SetReadFieldIds() and SetReadSchema() are mutually exclusive.
+    ///       Calling both will ignore the read schema set by SetReadSchema().
+    ReadContextBuilder& SetReadFieldIds(const std::vector<int32_t>& 
read_field_ids);
+
+    /// Set a configuration options map to set some option entries which are 
not defined in the
+    /// table schema or whose values you want to overwrite.
+    /// @note The options map will clear the options added by `AddOption()` 
before.
+    /// @param options The configuration options map.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetOptions(const std::map<std::string, std::string>& 
options);
+
+    /// Add a single configuration option which is not defined in the table 
schema or whose value
+    /// you want to overwrite.
+    ///
+    /// If you want to add multiple options, call `AddOption()` multiple times 
or use `SetOptions()`
+    /// instead.
+    /// @param key The option key.
+    /// @param value The option value.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& AddOption(const std::string& key, const std::string& 
value);
+
+    /// Set a predicate for filtering data during reading.
+    ///
+    /// The predicate is used for both partition pruning and data filtering.
+    /// It can significantly improve performance by reducing the amount of data
+    /// that needs to be read and processed.
+    ///
+    /// @param predicate Shared pointer to the predicate for data filtering.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPredicate(const std::shared_ptr<Predicate>& 
predicate);
+
+    /// Whether to perform precise filtering according to predicates for data 
read from format
+    /// reader.
+    /// @param enabled Whether to enable precise filtering (default: false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnablePredicateFilter(bool enabled);
+
+    /// Enable or disable prefetching of data batches from individual files.
+    ///
+    /// When enabled, the reader will prefetch multiple batches in parallel to
+    /// improve throughput by overlapping I/O with computation. This is 
particularly
+    /// beneficial for high-latency storage systems.
+    ///
+    /// @param enabled Whether to enable prefetching (default: false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnablePrefetch(bool enabled);
+
+    /// Set prefetch cache mode for read operations.
+    ///
+    /// A prefetch cache is used to prebuffer data ranges before they are 
needed,
+    /// which can improve read performance by reducing redundant I/O 
operations.
+    /// @param mode (default: PrefetchCacheMode::ALWAYS)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchCacheMode(PrefetchCacheMode mode);
+
+    /// Set the cache configuration for prefetch read operations.
+    ///
+    /// @param config The cache configuration to use.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& WithCacheConfig(const CacheConfig& config);
+
+    /// Set the total number of batches to prefetch across all files.
+    ///
+    /// This controls the memory usage and parallelism of the prefetching 
mechanism.
+    /// Higher values can improve throughput but consume more memory.
+    ///
+    /// @param batch_count Total number of batches to prefetch (default: 600)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchBatchCount(uint32_t batch_count);
+
+    /// Set the maximum number of parallel prefetch operations.
+    ///
+    /// This limits the number of concurrent I/O operations to prevent 
overwhelming
+    /// the storage system or consuming excessive system resources.
+    ///
+    /// @param parallel_num Maximum parallel prefetch operations (default: 3)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchMaxParallelNum(uint32_t parallel_num);
+
+    /// Enable or disable multi-threaded row-to-batch conversion in 
merge-on-read scenarios.
+    ///
+    /// When enabled, multiple threads are used to convert row data to batch 
format
+    /// during merge operations, which can improve performance for 
CPU-intensive
+    /// merge operations.
+    ///
+    /// @param enabled Whether to enable multi-threaded conversion (default: 
false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnableMultiThreadRowToBatch(bool enabled);
+
+    /// Set the number of threads for row-to-batch conversion in merge-on-read 
scenarios.
+    ///
+    /// This controls the parallelism of row-to-batch conversion during merge 
operations.
+    /// Higher values can improve performance but may affect result ordering.
+    ///
+    /// @param thread_number Number of conversion threads (default: 1)
+    /// @return Reference to this builder for method chaining.
+    /// @note If thread_number > 1, Arrow batches from the reader may not be 
in primary key order.
+    ReadContextBuilder& SetRowToBatchThreadNumber(uint32_t thread_number);
+
+    /// Set custom memory pool for memory management.
+    /// @param memory_pool The memory pool to use.
+    /// @return Reference to this builder for method chaining.
+    /// @note If not set, the default system memory pool will be used.
+    ReadContextBuilder& WithMemoryPool(const std::shared_ptr<MemoryPool>& 
memory_pool);
+
+    /// Set custom executor for task execution.
+    /// @param executor The executor to use.
+    /// @return Reference to this builder for method chaining.
+    /// @note If not set, the default system executor will be used.
+    ReadContextBuilder& WithExecutor(const std::shared_ptr<Executor>& 
executor);
+
+    /// Set the table schema as a string to avoid schema loading I/O 
operations.
+    ///
+    /// This optimization allows the reader to use a pre-loaded schema instead 
of
+    /// reading it from the table metadata, which can improve performance 
especially
+    /// in scenarios with many small read operations.
+    ///
+    /// @param table_schema String representation of the table schema.
+    /// @return Reference to this builder for method chaining.
+    /// @note The user must ensure that the schema string is valid and matches 
the table.
+    /// @note If not set, the schema will be loaded from the table path.
+    ReadContextBuilder& SetTableSchema(const std::string& table_schema);
+
+    /// Set the specific branch to read from in a versioned table.
+    ///
+    /// Paimon supports branching for data versioning and time travel queries.
+    /// This method allows reading from a specific branch instead of the main 
branch.
+    ///
+    /// @param branch Name of the branch to read from.
+    /// @return Reference to this builder for method chaining.
+    /// @note Default branch is "main" if not specified.
+    ReadContextBuilder& WithBranch(const std::string& branch);
+
+    /// Sets a mapping from URI schemes (e.g., "file", "oss") to registered 
file system
+    /// identifiers. This allows selecting different pre-registered file 
system implementations
+    /// based on the URI scheme at runtime.
+    ///
+    /// @param fs_scheme_to_identifier_map Map from URI scheme (like "oss") to 
the corresponding
+    /// file system identifier.
+    /// @return Reference to this builder for method chaining.
+    /// @note
+    ///   - This method is intended for environments where multiple file 
systems are pre-registered.
+    ///   - The specified identifiers must correspond to file systems that 
have been registered at
+    ///   compile time or initialization.
+    ///   - Cannot be used together with `WithFileSystem()`.

Review Comment:
   The public API documentation states `WithFileSystemSchemeToIdentifierMap()` 
cannot be used together with `WithFileSystem()`, but 
`ReadContextBuilder::Finish()` does not enforce this. Add validation in 
`Finish()` to return an invalid status when both are set, or clearly define and 
document precedence (and implement accordingly).



##########
src/paimon/core/operation/abstract_split_read.h:
##########
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "paimon/core/core_options.h"
+#include "paimon/core/deletionvectors/deletion_vector.h"
+#include "paimon/core/io/field_mapping_reader.h"
+#include "paimon/core/operation/internal_read_context.h"
+#include "paimon/core/operation/split_read.h"
+#include "paimon/core/schema/schema_manager.h"
+#include "paimon/core/table/source/data_split_impl.h"
+#include "paimon/core/table/source/deletion_file.h"
+#include "paimon/core/utils/file_store_path_factory.h"
+#include "paimon/format/reader_builder.h"
+#include "paimon/reader/batch_reader.h"
+#include "paimon/reader/file_batch_reader.h"
+#include "paimon/result.h"
+#include "paimon/status.h"
+
+namespace arrow {
+class Schema;
+}  // namespace arrow
+
+namespace paimon {
+class BinaryRow;
+class DataField;
+class DataFilePathFactory;
+class DataSplitImpl;
+class Executor;
+class FieldMappingBuilder;
+class FileStorePathFactory;
+class InternalReadContext;
+class MemoryPool;
+class Predicate;
+struct DataFileMeta;
+class TableSchema;
+
+class AbstractSplitRead : public SplitRead {
+ public:
+    ~AbstractSplitRead() override = default;
+
+    Result<std::vector<std::unique_ptr<FileBatchReader>>> CreateRawFileReaders(
+        const BinaryRow& partition, const 
std::vector<std::shared_ptr<DataFileMeta>>& data_files,
+        const std::shared_ptr<arrow::Schema>& read_schema,
+        const std::shared_ptr<Predicate>& predicate, DeletionVector::Factory 
dv_factory,
+        const std::optional<std::vector<Range>>& row_ranges,
+        const std::shared_ptr<DataFilePathFactory>& data_file_path_factory) 
const;
+
+ protected:
+    AbstractSplitRead(const std::shared_ptr<FileStorePathFactory>& 
path_factory,
+                      const std::shared_ptr<InternalReadContext>& context,
+                      std::unique_ptr<SchemaManager>&& schema_manager,
+                      const std::shared_ptr<MemoryPool>& memory_pool,
+                      const std::shared_ptr<Executor>& executor);
+
+    static std::unordered_map<std::string, DeletionFile> CreateDeletionFileMap(
+        const DataSplitImpl& data_split);
+
+    static std::unordered_map<std::string, DeletionFile> CreateDeletionFileMap(
+        const std::vector<std::shared_ptr<DataFileMeta>>& data_files,
+        const std::vector<std::optional<DeletionFile>>& deletion_files);
+
+    Result<std::unique_ptr<BatchReader>> ApplyPredicateFilterIfNeeded(
+        std::unique_ptr<BatchReader>&& reader, const 
std::shared_ptr<Predicate>& predicate) const;
+
+ protected:
+    // return nullptr if file is skipped by index or dv
+    virtual Result<std::unique_ptr<FileBatchReader>> 
ApplyIndexAndDvReaderIfNeeded(
+        std::unique_ptr<FileBatchReader>&& file_reader, const 
std::shared_ptr<DataFileMeta>& file,
+        const std::shared_ptr<arrow::Schema>& data_schema,
+        const std::shared_ptr<arrow::Schema>& read_schema,
+        const std::shared_ptr<Predicate>& predicate, DeletionVector::Factory 
dv_factory,
+        const std::optional<std::vector<Range>>& row_ranges,
+        const std::shared_ptr<DataFilePathFactory>& data_file_path_factory) 
const = 0;
+
+    // 1. project write cols to data schema
+    // 2. add partition fields (if write cols not contain)
+    // 3. add row tracking fields
+    static Result<std::vector<DataField>> 
ProjectFieldsForRowTrackingAndDataEvolution(
+        const std::shared_ptr<TableSchema>& data_schema,
+        const std::optional<std::vector<std::string>>& write_cols);
+
+ private:
+    Result<std::unique_ptr<ReaderBuilder>> PrepareReaderBuilder(
+        const std::string& format_identifier) const;
+
+    Result<std::unique_ptr<FileBatchReader>> CreateFileBatchReader(
+        const std::shared_ptr<DataFileMeta>& file_meta, const std::string& 
data_file_path,
+        const ReaderBuilder* reader_builder) const;
+
+    // return nullptr if data file is skipped by index or dv
+    Result<std::unique_ptr<FileBatchReader>> CreateFieldMappingReader(
+        const std::string& data_file_path, const 
std::shared_ptr<DataFileMeta>& file_meta,
+        const BinaryRow& partition, const ReaderBuilder* reader_builder,
+        const FieldMappingBuilder* field_mapping_builder, 
DeletionVector::Factory dv_factory,
+        const std::optional<std::vector<Range>>& row_ranges,
+        const std::shared_ptr<DataFilePathFactory>& data_file_path_factory) 
const;
+
+    static bool NeedCompleteRowTrackingFields(bool row_tracking_enabled,
+                                              const 
std::shared_ptr<arrow::Schema>& read_schema);
+
+ protected:
+    std::shared_ptr<MemoryPool> pool_;
+    std::shared_ptr<Executor> executor_;
+    std::shared_ptr<FileStorePathFactory> path_factory_;
+    CoreOptions options_;
+    // user recall schema
+    std::shared_ptr<arrow::Schema> raw_read_schema_;

Review Comment:
   The comment “user recall schema” is unclear/incorrect phrasing. Please 
reword to something precise like “user requested read schema” or “raw 
user-provided read schema” so the intent is unambiguous.



##########
src/paimon/core/operation/read_context.cpp:
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/read_context.h"
+
+#include <utility>
+
+#include "paimon/common/utils/path_util.h"
+#include "paimon/core/utils/branch_manager.h"
+#include "paimon/executor.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class Predicate;
+
+ReadContext::ReadContext(
+    const std::string& path, const std::string& branch, const 
std::vector<std::string>& read_schema,
+    const std::vector<int32_t>& read_field_ids, const 
std::shared_ptr<Predicate>& predicate,
+    bool enable_predicate_filter, bool enable_prefetch, uint32_t 
prefetch_batch_count,
+    uint32_t prefetch_max_parallel_num, bool enable_multi_thread_row_to_batch,
+    uint32_t row_to_batch_thread_number, const std::optional<std::string>& 
table_schema,
+    const std::shared_ptr<MemoryPool>& memory_pool, const 
std::shared_ptr<Executor>& executor,
+    const std::shared_ptr<FileSystem>& specific_file_system,
+    const std::map<std::string, std::string>& fs_scheme_to_identifier_map,
+    const std::map<std::string, std::string>& options, PrefetchCacheMode 
prefetch_cache_mode,
+    const CacheConfig& cache_config)
+    : path_(path),
+      branch_(branch),
+      read_schema_(read_schema),
+      read_field_ids_(read_field_ids),
+      predicate_(predicate),
+      enable_predicate_filter_(enable_predicate_filter),
+      enable_prefetch_(enable_prefetch),
+      prefetch_batch_count_(prefetch_batch_count),
+      prefetch_max_parallel_num_(prefetch_max_parallel_num),
+      enable_multi_thread_row_to_batch_(enable_multi_thread_row_to_batch),
+      row_to_batch_thread_number_(row_to_batch_thread_number),
+      table_schema_(table_schema),
+      memory_pool_(memory_pool),
+      executor_(executor),
+      specific_file_system_(specific_file_system),
+      fs_scheme_to_identifier_map_(fs_scheme_to_identifier_map),
+      options_(options),
+      prefetch_cache_mode_(prefetch_cache_mode),
+      cache_config_(cache_config) {}
+
+ReadContext::~ReadContext() = default;
+
+class ReadContextBuilder::Impl {
+ public:
+    friend class ReadContextBuilder;
+    void Reset() {
+        branch_ = BranchManager::DEFAULT_MAIN_BRANCH;
+        read_field_names_.clear();
+        read_field_ids_.clear();
+        fs_scheme_to_identifier_map_.clear();
+        options_.clear();
+        predicate_.reset();
+        enable_predicate_filter_ = false;
+        enable_prefetch_ = false;
+        prefetch_cache_mode_ = PrefetchCacheMode::ALWAYS;
+        prefetch_batch_count_ = 600;
+        prefetch_max_parallel_num_ = 3;
+        enable_multi_thread_row_to_batch_ = false;
+        row_to_batch_thread_number_ = 1;
+        table_schema_ = std::nullopt;
+        memory_pool_ = GetDefaultPool();
+        executor_.reset();
+        specific_file_system_.reset();
+        cache_config_ = CacheConfig();
+    }
+
+ private:
+    std::string path_;
+    std::string branch_ = BranchManager::DEFAULT_MAIN_BRANCH;
+    std::vector<std::string> read_field_names_;
+    std::vector<int32_t> read_field_ids_;
+    std::map<std::string, std::string> fs_scheme_to_identifier_map_;
+    std::map<std::string, std::string> options_;
+    std::shared_ptr<Predicate> predicate_;
+    bool enable_predicate_filter_ = false;
+    bool enable_prefetch_ = false;
+    uint32_t prefetch_batch_count_ = 600;
+    uint32_t prefetch_max_parallel_num_ = 3;
+    bool enable_multi_thread_row_to_batch_ = false;
+    uint32_t row_to_batch_thread_number_ = 1;
+    std::optional<std::string> table_schema_;
+    std::shared_ptr<MemoryPool> memory_pool_ = GetDefaultPool();
+    std::shared_ptr<Executor> executor_;
+    std::shared_ptr<FileSystem> specific_file_system_;
+    PrefetchCacheMode prefetch_cache_mode_ = PrefetchCacheMode::ALWAYS;
+    CacheConfig cache_config_;
+};
+
+ReadContextBuilder::ReadContextBuilder(const std::string& path)
+    : impl_(std::make_unique<ReadContextBuilder::Impl>()) {
+    impl_->path_ = path;
+}
+
+ReadContextBuilder::~ReadContextBuilder() = default;
+
+ReadContextBuilder::ReadContextBuilder(ReadContextBuilder&&) noexcept = 
default;
+ReadContextBuilder& ReadContextBuilder::operator=(ReadContextBuilder&&) 
noexcept = default;
+
+ReadContextBuilder& ReadContextBuilder::AddOption(const std::string& key,
+                                                  const std::string& value) {
+    impl_->options_[key] = value;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetOptions(const std::map<std::string, 
std::string>& opts) {
+    impl_->options_ = opts;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetReadSchema(
+    const std::vector<std::string>& read_field_names) {
+    impl_->read_field_names_ = read_field_names;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetReadFieldIds(
+    const std::vector<int32_t>& read_field_ids) {
+    impl_->read_field_ids_ = read_field_ids;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPredicate(const 
std::shared_ptr<Predicate>& predicate) {
+    impl_->predicate_ = predicate;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnablePredicateFilter(bool enabled) {
+    impl_->enable_predicate_filter_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnablePrefetch(bool enabled) {
+    impl_->enable_prefetch_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchBatchCount(uint32_t 
batch_count) {
+    impl_->prefetch_batch_count_ = batch_count;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchMaxParallelNum(uint32_t 
max_parallel_num) {
+    impl_->prefetch_max_parallel_num_ = max_parallel_num;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnableMultiThreadRowToBatch(bool 
enabled) {
+    impl_->enable_multi_thread_row_to_batch_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetRowToBatchThreadNumber(uint32_t 
thread_number) {
+    impl_->row_to_batch_thread_number_ = thread_number;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithMemoryPool(
+    const std::shared_ptr<MemoryPool>& memory_pool) {
+    impl_->memory_pool_ = memory_pool;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithExecutor(const 
std::shared_ptr<Executor>& executor) {
+    impl_->executor_ = executor;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetTableSchema(const std::string& 
table_schema) {
+    impl_->table_schema_ = table_schema;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithBranch(const std::string& branch) {
+    impl_->branch_ = branch;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithFileSystemSchemeToIdentifierMap(
+    const std::map<std::string, std::string>& fs_scheme_to_identifier_map) {
+    impl_->fs_scheme_to_identifier_map_ = fs_scheme_to_identifier_map;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithFileSystem(
+    const std::shared_ptr<FileSystem>& file_system) {
+    impl_->specific_file_system_ = file_system;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchCacheMode(PrefetchCacheMode 
mode) {
+    impl_->prefetch_cache_mode_ = mode;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithCacheConfig(const CacheConfig& 
cache_config) {
+    impl_->cache_config_ = cache_config;
+    return *this;
+}
+
+Result<std::unique_ptr<ReadContext>> ReadContextBuilder::Finish() {
+    PAIMON_ASSIGN_OR_RAISE(impl_->path_, 
PathUtil::NormalizePath(impl_->path_));
+    if (impl_->path_.empty()) {
+        return Status::Invalid("cannot read with empty table path");
+    }
+    if (impl_->enable_prefetch_ && impl_->prefetch_batch_count_ <= 0) {
+        return Status::Invalid("prefetch batch count should be greater than 
0");
+    }
+    if (impl_->enable_prefetch_ &&
+        impl_->prefetch_batch_count_ < impl_->prefetch_max_parallel_num_) {
+        return Status::Invalid(
+            "prefetch batch count should be greater than or equal to prefetch 
max parallel num");
+    }
+    if (!impl_->executor_) {
+        // If the user do not set executor, create default executor by 
prefetch batch count
+        uint32_t thread_count = impl_->enable_prefetch_ ? 
impl_->prefetch_max_parallel_num_ : 1;
+        impl_->executor_ = CreateDefaultExecutor(thread_count);
+    }
+
+    if (impl_->enable_multi_thread_row_to_batch_ && 
impl_->row_to_batch_thread_number_ <= 0) {
+        return Status::Invalid("row to batch thread number should be greater 
than 0");
+    }

Review Comment:
   `prefetch_batch_count_` and `row_to_batch_thread_number_` are `uint32_t`, so 
`<= 0` is equivalent to `== 0` and can be misleading. Prefer `== 0` for clarity 
(and consistency with unsigned types).



##########
src/paimon/core/operation/abstract_split_read.cpp:
##########
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/core/operation/abstract_split_read.h"
+
+#include <cassert>
+#include <cstddef>
+#include <utility>
+
+#include "arrow/type.h"
+#include "paimon/common/reader/delegating_prefetch_reader.h"
+#include "paimon/common/reader/predicate_batch_reader.h"
+#include "paimon/common/reader/prefetch_file_batch_reader_impl.h"
+#include "paimon/common/table/special_fields.h"
+#include "paimon/common/types/data_field.h"
+#include "paimon/common/utils/object_utils.h"
+#include "paimon/core/io/complete_row_tracking_fields_reader.h"
+#include "paimon/core/io/data_file_meta.h"
+#include "paimon/core/io/data_file_path_factory.h"
+#include "paimon/core/io/field_mapping_reader.h"
+#include "paimon/core/operation/internal_read_context.h"
+#include "paimon/core/partition/partition_info.h"
+#include "paimon/core/schema/table_schema.h"
+#include "paimon/core/table/source/data_split_impl.h"
+#include "paimon/core/utils/field_mapping.h"
+#include "paimon/format/file_format.h"
+#include "paimon/format/file_format_factory.h"
+#include "paimon/fs/file_system.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class BinaryRow;
+class Executor;
+class FileStorePathFactory;
+class MemoryPool;
+class Predicate;
+
+AbstractSplitRead::AbstractSplitRead(const 
std::shared_ptr<FileStorePathFactory>& path_factory,
+                                     const 
std::shared_ptr<InternalReadContext>& context,
+                                     std::unique_ptr<SchemaManager>&& 
schema_manager,
+                                     const std::shared_ptr<MemoryPool>& 
memory_pool,
+                                     const std::shared_ptr<Executor>& executor)
+    : pool_(memory_pool),
+      executor_(executor),
+      path_factory_(path_factory),
+      options_(context->GetCoreOptions()),
+      raw_read_schema_(context->GetReadSchema()),
+      context_(context),
+      schema_manager_(std::move(schema_manager)) {}
+
+Result<std::vector<std::unique_ptr<FileBatchReader>>> 
AbstractSplitRead::CreateRawFileReaders(
+    const BinaryRow& partition, const 
std::vector<std::shared_ptr<DataFileMeta>>& data_files,
+    const std::shared_ptr<arrow::Schema>& read_schema, const 
std::shared_ptr<Predicate>& predicate,
+    DeletionVector::Factory dv_factory, const 
std::optional<std::vector<Range>>& row_ranges,
+    const std::shared_ptr<DataFilePathFactory>& data_file_path_factory) const {
+    if (data_files.empty()) {
+        return std::vector<std::unique_ptr<FileBatchReader>>();
+    }
+    PAIMON_ASSIGN_OR_RAISE(
+        std::unique_ptr<FieldMappingBuilder> field_mapping_builder,
+        FieldMappingBuilder::Create(read_schema, context_->GetPartitionKeys(), 
predicate));
+
+    std::vector<std::unique_ptr<FileBatchReader>> raw_file_readers;
+    raw_file_readers.reserve(data_files.size());
+    for (const auto& file : data_files) {
+        auto data_file_path = data_file_path_factory->ToPath(file);
+        PAIMON_ASSIGN_OR_RAISE(std::string data_file_identifier, 
file->FileFormat());
+        PAIMON_ASSIGN_OR_RAISE(std::unique_ptr<ReaderBuilder> reader_builder,
+                               PrepareReaderBuilder(data_file_identifier));
+        PAIMON_ASSIGN_OR_RAISE(
+            std::unique_ptr<FileBatchReader> file_reader,
+            CreateFieldMappingReader(data_file_path, file, partition, 
reader_builder.get(),
+                                     field_mapping_builder.get(), dv_factory, 
row_ranges,
+                                     data_file_path_factory));
+        if (file_reader) {
+            raw_file_readers.push_back(std::move(file_reader));
+        }
+    }
+    return std::move(raw_file_readers);

Review Comment:
   Returning a local by `std::move` can inhibit NRVO; just `return 
raw_file_readers;` lets the compiler elide/move optimally. This is small, but 
this path may be hot when building many readers.



##########
include/paimon/read_context.h:
##########
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "paimon/predicate/predicate.h"
+#include "paimon/result.h"
+#include "paimon/type_fwd.h"
+#include "paimon/utils/read_ahead_cache.h"
+#include "paimon/visibility.h"
+
+namespace paimon {
+class Executor;
+class MemoryPool;
+class Predicate;
+class FileSystem;
+
+/// `ReadContext` is some configuration for read operations.
+///
+/// Please do not use this class directly, use `ReadContextBuilder` to build a 
`ReadContext` which
+/// has input validation.
+/// @see ReadContextBuilder
+class PAIMON_EXPORT ReadContext {
+ public:
+    ReadContext(const std::string& path, const std::string& branch,
+                const std::vector<std::string>& read_schema,
+                const std::vector<int32_t>& read_field_ids,
+                const std::shared_ptr<Predicate>& predicate, bool 
enable_predicate_filter,
+                bool enable_prefetch, uint32_t prefetch_batch_count,
+                uint32_t prefetch_max_parallel_num, bool 
enable_multi_thread_row_to_batch,
+                uint32_t row_to_batch_thread_number, const 
std::optional<std::string>& table_schema,
+                const std::shared_ptr<MemoryPool>& memory_pool,
+                const std::shared_ptr<Executor>& executor,
+                const std::shared_ptr<FileSystem>& specific_file_system,
+                const std::map<std::string, std::string>& 
fs_scheme_to_identifier_map,
+                const std::map<std::string, std::string>& options,
+                PrefetchCacheMode prefetch_cache_mode, const CacheConfig& 
cache_config);
+    ~ReadContext();
+
+    const std::string& GetPath() const {
+        return path_;
+    }
+
+    const std::string& GetBranch() const {
+        return branch_;
+    }
+
+    const std::map<std::string, std::string>& 
GetFileSystemSchemeToIdentifierMap() const {
+        return fs_scheme_to_identifier_map_;
+    }
+
+    const std::map<std::string, std::string>& GetOptions() const {
+        return options_;
+    }
+
+    const std::vector<std::string>& GetReadSchema() const {
+        return read_schema_;
+    }
+
+    const std::vector<int32_t>& GetReadFieldIds() const {
+        return read_field_ids_;
+    }
+
+    const std::shared_ptr<Predicate>& GetPredicate() const {
+        return predicate_;
+    }
+
+    bool EnablePredicateFilter() const {
+        return enable_predicate_filter_;
+    }
+    bool EnablePrefetch() const {
+        return enable_prefetch_;
+    }
+    uint32_t GetPrefetchBatchCount() const {
+        return prefetch_batch_count_;
+    }
+    uint32_t GetPrefetchMaxParallelNum() const {
+        return prefetch_max_parallel_num_;
+    }
+    bool EnableMultiThreadRowToBatch() const {
+        return enable_multi_thread_row_to_batch_;
+    }
+    uint32_t GetRowToBatchThreadNumber() const {
+        return row_to_batch_thread_number_;
+    }
+    const std::optional<std::string>& GetSpecificTableSchema() {
+        return table_schema_;
+    }
+    std::shared_ptr<MemoryPool> GetMemoryPool() const {
+        return memory_pool_;
+    }
+    std::shared_ptr<Executor> GetExecutor() const {
+        return executor_;
+    }
+    std::shared_ptr<FileSystem> GetSpecificFileSystem() const {
+        return specific_file_system_;
+    }
+
+    PrefetchCacheMode GetPrefetchCacheMode() const {
+        return prefetch_cache_mode_;
+    }
+
+    const CacheConfig& GetCacheConfig() const {
+        return cache_config_;
+    }
+
+ private:
+    std::string path_;
+    std::string branch_;
+    std::vector<std::string> read_schema_;
+    std::vector<int32_t> read_field_ids_;
+    std::shared_ptr<Predicate> predicate_;
+    bool enable_predicate_filter_;
+    bool enable_prefetch_;
+    uint32_t prefetch_batch_count_;
+    uint32_t prefetch_max_parallel_num_;
+    bool enable_multi_thread_row_to_batch_;
+    uint32_t row_to_batch_thread_number_;
+    std::optional<std::string> table_schema_;
+    std::shared_ptr<MemoryPool> memory_pool_;
+    std::shared_ptr<Executor> executor_;
+    std::shared_ptr<FileSystem> specific_file_system_;
+    std::map<std::string, std::string> fs_scheme_to_identifier_map_;
+    std::map<std::string, std::string> options_;
+    PrefetchCacheMode prefetch_cache_mode_;
+    CacheConfig cache_config_;
+};
+
+/// `ReadContextBuilder` used to build a `ReadContext`, has input validation.
+class PAIMON_EXPORT ReadContextBuilder {
+ public:
+    /// Constructs a `ReadContextBuilder` with required parameters.
+    /// @param path The root path of the table.
+    explicit ReadContextBuilder(const std::string& path);
+
+    ~ReadContextBuilder();
+
+    ReadContextBuilder(ReadContextBuilder&&) noexcept;
+    ReadContextBuilder& operator=(ReadContextBuilder&&) noexcept;
+
+    /// Set the schema fields to read from the table.
+    ///
+    /// If not set, all fields from the table schema will be read. This is 
useful for
+    /// projection pushdown to reduce I/O and improve performance by reading 
only
+    /// the required columns.
+    ///
+    /// @param read_field_names Vector of field names to read from the table.
+    /// @return Reference to this builder for method chaining.
+    /// @note Currently supports top-level field selection. Future versions 
may support
+    ///       nested field selection using ArrowSchema for more granular 
projection
+    ReadContextBuilder& SetReadSchema(const std::vector<std::string>& 
read_field_names);
+    /// Set the schema fields to read from the table.
+    ///
+    /// If not set, all fields from the table schema will be read. This is 
useful for
+    /// projection pushdown to reduce I/O and improve performance by reading 
only
+    /// the required columns.
+    ///
+    /// @param read_field_ids Vector of field ids to read from the table.
+    /// @return Reference to this builder for method chaining.
+    /// @note Currently supports top-level field selection. Future versions 
may support
+    ///       nested field selection using ArrowSchema for more granular 
projection.
+    /// @note SetReadFieldIds() and SetReadSchema() are mutually exclusive.
+    ///       Calling both will ignore the read schema set by SetReadSchema().
+    ReadContextBuilder& SetReadFieldIds(const std::vector<int32_t>& 
read_field_ids);
+
+    /// Set a configuration options map to set some option entries which are 
not defined in the
+    /// table schema or whose values you want to overwrite.
+    /// @note The options map will clear the options added by `AddOption()` 
before.
+    /// @param options The configuration options map.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetOptions(const std::map<std::string, std::string>& 
options);
+
+    /// Add a single configuration option which is not defined in the table 
schema or whose value
+    /// you want to overwrite.
+    ///
+    /// If you want to add multiple options, call `AddOption()` multiple times 
or use `SetOptions()`
+    /// instead.
+    /// @param key The option key.
+    /// @param value The option value.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& AddOption(const std::string& key, const std::string& 
value);
+
+    /// Set a predicate for filtering data during reading.
+    ///
+    /// The predicate is used for both partition pruning and data filtering.
+    /// It can significantly improve performance by reducing the amount of data
+    /// that needs to be read and processed.
+    ///
+    /// @param predicate Shared pointer to the predicate for data filtering.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPredicate(const std::shared_ptr<Predicate>& 
predicate);
+
+    /// Whether to perform precise filtering according to predicates for data 
read from format
+    /// reader.
+    /// @param enabled Whether to enable precise filtering (default: false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnablePredicateFilter(bool enabled);
+
+    /// Enable or disable prefetching of data batches from individual files.
+    ///
+    /// When enabled, the reader will prefetch multiple batches in parallel to
+    /// improve throughput by overlapping I/O with computation. This is 
particularly
+    /// beneficial for high-latency storage systems.
+    ///
+    /// @param enabled Whether to enable prefetching (default: false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnablePrefetch(bool enabled);
+
+    /// Set prefetch cache mode for read operations.
+    ///
+    /// A prefetch cache is used to prebuffer data ranges before they are 
needed,
+    /// which can improve read performance by reducing redundant I/O 
operations.
+    /// @param mode (default: PrefetchCacheMode::ALWAYS)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchCacheMode(PrefetchCacheMode mode);
+
+    /// Set the cache configuration for prefetch read operations.
+    ///
+    /// @param config The cache configuration to use.
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& WithCacheConfig(const CacheConfig& config);
+
+    /// Set the total number of batches to prefetch across all files.
+    ///
+    /// This controls the memory usage and parallelism of the prefetching 
mechanism.
+    /// Higher values can improve throughput but consume more memory.
+    ///
+    /// @param batch_count Total number of batches to prefetch (default: 600)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchBatchCount(uint32_t batch_count);
+
+    /// Set the maximum number of parallel prefetch operations.
+    ///
+    /// This limits the number of concurrent I/O operations to prevent 
overwhelming
+    /// the storage system or consuming excessive system resources.
+    ///
+    /// @param parallel_num Maximum parallel prefetch operations (default: 3)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& SetPrefetchMaxParallelNum(uint32_t parallel_num);
+
+    /// Enable or disable multi-threaded row-to-batch conversion in 
merge-on-read scenarios.
+    ///
+    /// When enabled, multiple threads are used to convert row data to batch 
format
+    /// during merge operations, which can improve performance for 
CPU-intensive
+    /// merge operations.
+    ///
+    /// @param enabled Whether to enable multi-threaded conversion (default: 
false)
+    /// @return Reference to this builder for method chaining.
+    ReadContextBuilder& EnableMultiThreadRowToBatch(bool enabled);
+
+    /// Set the number of threads for row-to-batch conversion in merge-on-read 
scenarios.
+    ///
+    /// This controls the parallelism of row-to-batch conversion during merge 
operations.
+    /// Higher values can improve performance but may affect result ordering.
+    ///
+    /// @param thread_number Number of conversion threads (default: 1)
+    /// @return Reference to this builder for method chaining.
+    /// @note If thread_number > 1, Arrow batches from the reader may not be 
in primary key order.
+    ReadContextBuilder& SetRowToBatchThreadNumber(uint32_t thread_number);
+
+    /// Set custom memory pool for memory management.
+    /// @param memory_pool The memory pool to use.
+    /// @return Reference to this builder for method chaining.
+    /// @note If not set, the default system memory pool will be used.
+    ReadContextBuilder& WithMemoryPool(const std::shared_ptr<MemoryPool>& 
memory_pool);
+
+    /// Set custom executor for task execution.
+    /// @param executor The executor to use.
+    /// @return Reference to this builder for method chaining.
+    /// @note If not set, the default system executor will be used.
+    ReadContextBuilder& WithExecutor(const std::shared_ptr<Executor>& 
executor);
+
+    /// Set the table schema as a string to avoid schema loading I/O 
operations.
+    ///
+    /// This optimization allows the reader to use a pre-loaded schema instead 
of
+    /// reading it from the table metadata, which can improve performance 
especially
+    /// in scenarios with many small read operations.
+    ///
+    /// @param table_schema String representation of the table schema.
+    /// @return Reference to this builder for method chaining.
+    /// @note The user must ensure that the schema string is valid and matches 
the table.
+    /// @note If not set, the schema will be loaded from the table path.
+    ReadContextBuilder& SetTableSchema(const std::string& table_schema);
+
+    /// Set the specific branch to read from in a versioned table.
+    ///
+    /// Paimon supports branching for data versioning and time travel queries.
+    /// This method allows reading from a specific branch instead of the main 
branch.
+    ///
+    /// @param branch Name of the branch to read from.
+    /// @return Reference to this builder for method chaining.
+    /// @note Default branch is "main" if not specified.
+    ReadContextBuilder& WithBranch(const std::string& branch);
+
+    /// Sets a mapping from URI schemes (e.g., "file", "oss") to registered 
file system
+    /// identifiers. This allows selecting different pre-registered file 
system implementations
+    /// based on the URI scheme at runtime.
+    ///
+    /// @param fs_scheme_to_identifier_map Map from URI scheme (like "oss") to 
the corresponding
+    /// file system identifier.
+    /// @return Reference to this builder for method chaining.
+    /// @note
+    ///   - This method is intended for environments where multiple file 
systems are pre-registered.
+    ///   - The specified identifiers must correspond to file systems that 
have been registered at
+    ///   compile time or initialization.
+    ///   - Cannot be used together with `WithFileSystem()`.
+    ///   - If not set, use default file system (configured in 
`Options::FILE_SYSTEM`).
+    /// Example:
+    ///   builder.WithFileSystemSchemeToIdentifierMap({{"oss", "jindo"}, 
{"file", "local"}});
+    ///
+    ReadContextBuilder& WithFileSystemSchemeToIdentifierMap(
+        const std::map<std::string, std::string>& fs_scheme_to_identifier_map);
+
+    /// Sets a custom file system instance to be used for all file operations 
in this read context.

Review Comment:
   The public API documentation states `WithFileSystemSchemeToIdentifierMap()` 
cannot be used together with `WithFileSystem()`, but 
`ReadContextBuilder::Finish()` does not enforce this. Add validation in 
`Finish()` to return an invalid status when both are set, or clearly define and 
document precedence (and implement accordingly).



##########
src/paimon/core/operation/read_context.cpp:
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon/read_context.h"
+
+#include <utility>
+
+#include "paimon/common/utils/path_util.h"
+#include "paimon/core/utils/branch_manager.h"
+#include "paimon/executor.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class Predicate;
+
+ReadContext::ReadContext(
+    const std::string& path, const std::string& branch, const 
std::vector<std::string>& read_schema,
+    const std::vector<int32_t>& read_field_ids, const 
std::shared_ptr<Predicate>& predicate,
+    bool enable_predicate_filter, bool enable_prefetch, uint32_t 
prefetch_batch_count,
+    uint32_t prefetch_max_parallel_num, bool enable_multi_thread_row_to_batch,
+    uint32_t row_to_batch_thread_number, const std::optional<std::string>& 
table_schema,
+    const std::shared_ptr<MemoryPool>& memory_pool, const 
std::shared_ptr<Executor>& executor,
+    const std::shared_ptr<FileSystem>& specific_file_system,
+    const std::map<std::string, std::string>& fs_scheme_to_identifier_map,
+    const std::map<std::string, std::string>& options, PrefetchCacheMode 
prefetch_cache_mode,
+    const CacheConfig& cache_config)
+    : path_(path),
+      branch_(branch),
+      read_schema_(read_schema),
+      read_field_ids_(read_field_ids),
+      predicate_(predicate),
+      enable_predicate_filter_(enable_predicate_filter),
+      enable_prefetch_(enable_prefetch),
+      prefetch_batch_count_(prefetch_batch_count),
+      prefetch_max_parallel_num_(prefetch_max_parallel_num),
+      enable_multi_thread_row_to_batch_(enable_multi_thread_row_to_batch),
+      row_to_batch_thread_number_(row_to_batch_thread_number),
+      table_schema_(table_schema),
+      memory_pool_(memory_pool),
+      executor_(executor),
+      specific_file_system_(specific_file_system),
+      fs_scheme_to_identifier_map_(fs_scheme_to_identifier_map),
+      options_(options),
+      prefetch_cache_mode_(prefetch_cache_mode),
+      cache_config_(cache_config) {}
+
+ReadContext::~ReadContext() = default;
+
+class ReadContextBuilder::Impl {
+ public:
+    friend class ReadContextBuilder;
+    void Reset() {
+        branch_ = BranchManager::DEFAULT_MAIN_BRANCH;
+        read_field_names_.clear();
+        read_field_ids_.clear();
+        fs_scheme_to_identifier_map_.clear();
+        options_.clear();
+        predicate_.reset();
+        enable_predicate_filter_ = false;
+        enable_prefetch_ = false;
+        prefetch_cache_mode_ = PrefetchCacheMode::ALWAYS;
+        prefetch_batch_count_ = 600;
+        prefetch_max_parallel_num_ = 3;
+        enable_multi_thread_row_to_batch_ = false;
+        row_to_batch_thread_number_ = 1;
+        table_schema_ = std::nullopt;
+        memory_pool_ = GetDefaultPool();
+        executor_.reset();
+        specific_file_system_.reset();
+        cache_config_ = CacheConfig();
+    }
+
+ private:
+    std::string path_;
+    std::string branch_ = BranchManager::DEFAULT_MAIN_BRANCH;
+    std::vector<std::string> read_field_names_;
+    std::vector<int32_t> read_field_ids_;
+    std::map<std::string, std::string> fs_scheme_to_identifier_map_;
+    std::map<std::string, std::string> options_;
+    std::shared_ptr<Predicate> predicate_;
+    bool enable_predicate_filter_ = false;
+    bool enable_prefetch_ = false;
+    uint32_t prefetch_batch_count_ = 600;
+    uint32_t prefetch_max_parallel_num_ = 3;
+    bool enable_multi_thread_row_to_batch_ = false;
+    uint32_t row_to_batch_thread_number_ = 1;
+    std::optional<std::string> table_schema_;
+    std::shared_ptr<MemoryPool> memory_pool_ = GetDefaultPool();
+    std::shared_ptr<Executor> executor_;
+    std::shared_ptr<FileSystem> specific_file_system_;
+    PrefetchCacheMode prefetch_cache_mode_ = PrefetchCacheMode::ALWAYS;
+    CacheConfig cache_config_;
+};
+
+ReadContextBuilder::ReadContextBuilder(const std::string& path)
+    : impl_(std::make_unique<ReadContextBuilder::Impl>()) {
+    impl_->path_ = path;
+}
+
+ReadContextBuilder::~ReadContextBuilder() = default;
+
+ReadContextBuilder::ReadContextBuilder(ReadContextBuilder&&) noexcept = 
default;
+ReadContextBuilder& ReadContextBuilder::operator=(ReadContextBuilder&&) 
noexcept = default;
+
+ReadContextBuilder& ReadContextBuilder::AddOption(const std::string& key,
+                                                  const std::string& value) {
+    impl_->options_[key] = value;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetOptions(const std::map<std::string, 
std::string>& opts) {
+    impl_->options_ = opts;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetReadSchema(
+    const std::vector<std::string>& read_field_names) {
+    impl_->read_field_names_ = read_field_names;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetReadFieldIds(
+    const std::vector<int32_t>& read_field_ids) {
+    impl_->read_field_ids_ = read_field_ids;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPredicate(const 
std::shared_ptr<Predicate>& predicate) {
+    impl_->predicate_ = predicate;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnablePredicateFilter(bool enabled) {
+    impl_->enable_predicate_filter_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnablePrefetch(bool enabled) {
+    impl_->enable_prefetch_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchBatchCount(uint32_t 
batch_count) {
+    impl_->prefetch_batch_count_ = batch_count;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchMaxParallelNum(uint32_t 
max_parallel_num) {
+    impl_->prefetch_max_parallel_num_ = max_parallel_num;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::EnableMultiThreadRowToBatch(bool 
enabled) {
+    impl_->enable_multi_thread_row_to_batch_ = enabled;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetRowToBatchThreadNumber(uint32_t 
thread_number) {
+    impl_->row_to_batch_thread_number_ = thread_number;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithMemoryPool(
+    const std::shared_ptr<MemoryPool>& memory_pool) {
+    impl_->memory_pool_ = memory_pool;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithExecutor(const 
std::shared_ptr<Executor>& executor) {
+    impl_->executor_ = executor;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetTableSchema(const std::string& 
table_schema) {
+    impl_->table_schema_ = table_schema;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithBranch(const std::string& branch) {
+    impl_->branch_ = branch;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithFileSystemSchemeToIdentifierMap(
+    const std::map<std::string, std::string>& fs_scheme_to_identifier_map) {
+    impl_->fs_scheme_to_identifier_map_ = fs_scheme_to_identifier_map;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithFileSystem(
+    const std::shared_ptr<FileSystem>& file_system) {
+    impl_->specific_file_system_ = file_system;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::SetPrefetchCacheMode(PrefetchCacheMode 
mode) {
+    impl_->prefetch_cache_mode_ = mode;
+    return *this;
+}
+
+ReadContextBuilder& ReadContextBuilder::WithCacheConfig(const CacheConfig& 
cache_config) {
+    impl_->cache_config_ = cache_config;
+    return *this;
+}
+
+Result<std::unique_ptr<ReadContext>> ReadContextBuilder::Finish() {
+    PAIMON_ASSIGN_OR_RAISE(impl_->path_, 
PathUtil::NormalizePath(impl_->path_));
+    if (impl_->path_.empty()) {
+        return Status::Invalid("cannot read with empty table path");
+    }
+    if (impl_->enable_prefetch_ && impl_->prefetch_batch_count_ <= 0) {
+        return Status::Invalid("prefetch batch count should be greater than 
0");
+    }

Review Comment:
   `prefetch_batch_count_` and `row_to_batch_thread_number_` are `uint32_t`, so 
`<= 0` is equivalent to `== 0` and can be misleading. Prefer `== 0` for clarity 
(and consistency with unsigned types).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to