This is an automated email from the ASF dual-hosted git repository.
gongxun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new f1d6691df1e Optimize OrcWriter: precompute varlena indices to skip
invalid loops
f1d6691df1e is described below
commit f1d6691df1e32cb424dd6d49648b852adf89ac9f
Author: GongXun <[email protected]>
AuthorDate: Thu Oct 9 16:57:32 2025 +0800
Optimize OrcWriter: precompute varlena indices to skip invalid loops
Reduces per-tuple branching and cache touches, especially on schemas with
many fixed-length/byval columns
1. Add OrcWriter member: std::vector<int> varlena_slowpath_indices_
2. Precompute non-byval, typlen == -1 (varlena) column indices in
constructor from tuple_desc
3. Update PrepareWriteTuple to iterate only precomputed indices, skipping
fixed-length and byval columns
---
contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc | 18 ++++++++++++------
contrib/pax_storage/src/cpp/storage/orc/porc.h | 3 +++
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc
b/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc
index 6c8d49502e5..5c8b52272d0 100644
--- a/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc
+++ b/contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc
@@ -249,6 +249,16 @@ OrcWriter::OrcWriter(
group_stats_.Initialize(writer_options.enable_min_max_col_idxs,
writer_options.enable_bf_col_idxs);
+
+ // Precompute slowpath indices for varlena columns (non-byval and typlen ==
-1)
+ varlena_slowpath_indices_.clear();
+ varlena_slowpath_indices_.reserve(writer_options.rel_tuple_desc->natts);
+ for (int i = 0; i < writer_options.rel_tuple_desc->natts; ++i) {
+ auto attrs = TupleDescAttr(writer_options.rel_tuple_desc, i);
+ if (!attrs->attbyval && attrs->attlen == -1) {
+ varlena_slowpath_indices_.push_back(i);
+ }
+ }
}
OrcWriter::~OrcWriter() {}
@@ -311,8 +321,6 @@ void OrcWriter::Flush() {
std::vector<std::pair<int, Datum>> OrcWriter::PrepareWriteTuple(
TupleTableSlot *table_slot) {
TupleDesc tuple_desc;
- int16 type_len;
- bool type_by_val;
bool is_null;
Datum tts_value;
char type_storage;
@@ -323,18 +331,16 @@ std::vector<std::pair<int, Datum>>
OrcWriter::PrepareWriteTuple(
Assert(tuple_desc);
const auto &required_stats_cols = group_stats_.GetRequiredStatsColsMask();
- for (int i = 0; i < tuple_desc->natts; i++) {
+ for (int i : varlena_slowpath_indices_) {
bool save_origin_datum;
auto attrs = TupleDescAttr(tuple_desc, i);
- type_len = attrs->attlen;
- type_by_val = attrs->attbyval;
is_null = table_slot->tts_isnull[i];
tts_value = table_slot->tts_values[i];
type_storage = attrs->attstorage;
AssertImply(attrs->attisdropped, is_null);
- if (is_null || type_by_val || type_len != -1) {
+ if (is_null) {
continue;
}
diff --git a/contrib/pax_storage/src/cpp/storage/orc/porc.h
b/contrib/pax_storage/src/cpp/storage/orc/porc.h
index 4bfccb6dec3..69a10ffaad7 100644
--- a/contrib/pax_storage/src/cpp/storage/orc/porc.h
+++ b/contrib/pax_storage/src/cpp/storage/orc/porc.h
@@ -138,6 +138,9 @@ class OrcWriter : public MicroPartitionWriter {
::pax::porc::proto::Footer file_footer_;
::pax::porc::proto::PostScript post_script_;
::pax::MicroPartitionStats group_stats_;
+
+ // indices of columns that are non-byval and have typlen == -1 (varlena)
+ std::vector<int> varlena_slowpath_indices_;
};
class OrcReader : public MicroPartitionReader {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]