leaves12138 commented on code in PR #22:
URL: https://github.com/apache/paimon-cpp/pull/22#discussion_r3309019615


##########
src/paimon/common/data/binary_row_writer.cpp:
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/common/data/binary_row_writer.h"
+
+#include <cassert>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "fmt/format.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/common/memory/memory_segment.h"
+#include "paimon/common/utils/date_time_utils.h"
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/memory/bytes.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+
+namespace paimon {
+
+BinaryRowWriter::BinaryRowWriter(BinaryRow* row, int32_t initial_size, 
MemoryPool* pool)
+    : 
null_bits_size_in_bytes_(BinaryRow::CalculateBitSetWidthInBytes(row->GetFieldCount())),
+      fixed_size_(row->GetFixedLengthPartSize()) {
+    cursor_ = fixed_size_;
+    row_ = row;
+    segment_ = MemorySegment::Wrap(Bytes::AllocateBytes(fixed_size_ + 
initial_size, pool));
+    row_->PointTo(segment_, 0, segment_.Size());
+    pool_ = pool;
+}
+
+Result<BinaryRowWriter::FieldSetterFunc> BinaryRowWriter::CreateFieldSetter(
+    int32_t field_idx, const std::shared_ptr<arrow::DataType>& field_type) {
+    arrow::Type::type type = field_type->id();
+    BinaryRowWriter::FieldSetterFunc field_setter;
+    switch (type) {
+        case arrow::Type::type::BOOL: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteBoolean(field_idx, 
DataDefine::GetVariantValue<bool>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT8: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteByte(field_idx, 
DataDefine::GetVariantValue<char>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT16: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteShort(field_idx, 
DataDefine::GetVariantValue<int16_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT32: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteInt(field_idx, 
DataDefine::GetVariantValue<int32_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT64: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteLong(field_idx, 
DataDefine::GetVariantValue<int64_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::FLOAT: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteFloat(field_idx, 
DataDefine::GetVariantValue<float>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::DOUBLE: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteDouble(field_idx, 
DataDefine::GetVariantValue<double>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::DATE32: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteInt(field_idx, 
DataDefine::GetVariantValue<int32_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::STRING: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                const auto* view = 
DataDefine::GetVariantPtr<std::string_view>(field);
+                if (view) {
+                    return writer->WriteStringView(field_idx, *view);
+                }
+                return writer->WriteString(field_idx,
+                                           
DataDefine::GetVariantValue<BinaryString>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::BINARY: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                const auto* view = 
DataDefine::GetVariantPtr<std::string_view>(field);
+                if (view) {
+                    return writer->WriteStringView(field_idx, *view);
+                }
+                return writer->WriteBinary(
+                    field_idx, 
*DataDefine::GetVariantValue<std::shared_ptr<Bytes>>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::TIMESTAMP: {
+            auto timestamp_type =
+                
arrow::internal::checked_pointer_cast<arrow::TimestampType>(field_type);
+            int32_t precision = 
DateTimeUtils::GetPrecisionFromType(timestamp_type);
+            field_setter = [field_idx, precision](const VariantType& field,
+                                                  BinaryRowWriter* writer) -> 
void {
+                if (DataDefine::IsVariantNull(field)) {
+                    if (!Timestamp::IsCompact(precision)) {
+                        writer->WriteTimestamp(field_idx, std::nullopt, 
precision);
+                    } else {
+                        writer->SetNullAt(field_idx);
+                    }
+                    return;
+                }
+                return writer->WriteTimestamp(
+                    field_idx, DataDefine::GetVariantValue<Timestamp>(field), 
precision);
+            };
+            return field_setter;
+        }
+        case arrow::Type::type::DECIMAL: {
+            auto* decimal_type =
+                
arrow::internal::checked_cast<arrow::Decimal128Type*>(field_type.get());
+            assert(decimal_type);
+            auto precision = decimal_type->precision();
+            field_setter = [field_idx, precision](const VariantType& field,

Review Comment:
   For non-compact decimals, the setter captures only precision and then calls 
`WriteDecimal` with a `Decimal` value whose scale may differ from the Arrow 
field scale. `WriteDecimal` currently only asserts the precision and never 
checks or normalizes scale, but the reader reconstructs decimals with the 
schema scale. That means a value like `Decimal(5, 3, 123)` written to a 
`decimal128(5, 2)` field will be read back as `1.23` instead of the original 
`0.123` (or being rejected). Please capture the Arrow scale here and either 
validate `value.Scale() == scale` or rescale/reject before writing, so the 
stored unscaled bytes match the schema scale.



##########
src/paimon/common/data/binary_row_writer.cpp:
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/common/data/binary_row_writer.h"
+
+#include <cassert>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "fmt/format.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/common/memory/memory_segment.h"
+#include "paimon/common/utils/date_time_utils.h"
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/memory/bytes.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+
+namespace paimon {
+
+BinaryRowWriter::BinaryRowWriter(BinaryRow* row, int32_t initial_size, 
MemoryPool* pool)
+    : 
null_bits_size_in_bytes_(BinaryRow::CalculateBitSetWidthInBytes(row->GetFieldCount())),
+      fixed_size_(row->GetFixedLengthPartSize()) {
+    cursor_ = fixed_size_;
+    row_ = row;
+    segment_ = MemorySegment::Wrap(Bytes::AllocateBytes(fixed_size_ + 
initial_size, pool));
+    row_->PointTo(segment_, 0, segment_.Size());
+    pool_ = pool;
+}
+
+Result<BinaryRowWriter::FieldSetterFunc> BinaryRowWriter::CreateFieldSetter(
+    int32_t field_idx, const std::shared_ptr<arrow::DataType>& field_type) {
+    arrow::Type::type type = field_type->id();
+    BinaryRowWriter::FieldSetterFunc field_setter;
+    switch (type) {
+        case arrow::Type::type::BOOL: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteBoolean(field_idx, 
DataDefine::GetVariantValue<bool>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT8: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteByte(field_idx, 
DataDefine::GetVariantValue<char>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT16: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteShort(field_idx, 
DataDefine::GetVariantValue<int16_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT32: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteInt(field_idx, 
DataDefine::GetVariantValue<int32_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::INT64: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteLong(field_idx, 
DataDefine::GetVariantValue<int64_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::FLOAT: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteFloat(field_idx, 
DataDefine::GetVariantValue<float>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::DOUBLE: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteDouble(field_idx, 
DataDefine::GetVariantValue<double>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::DATE32: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                return writer->WriteInt(field_idx, 
DataDefine::GetVariantValue<int32_t>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::STRING: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                const auto* view = 
DataDefine::GetVariantPtr<std::string_view>(field);
+                if (view) {
+                    return writer->WriteStringView(field_idx, *view);
+                }
+                return writer->WriteString(field_idx,
+                                           
DataDefine::GetVariantValue<BinaryString>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::BINARY: {
+            field_setter = [field_idx](const VariantType& field, 
BinaryRowWriter* writer) -> void {
+                const auto* view = 
DataDefine::GetVariantPtr<std::string_view>(field);
+                if (view) {
+                    return writer->WriteStringView(field_idx, *view);
+                }
+                return writer->WriteBinary(
+                    field_idx, 
*DataDefine::GetVariantValue<std::shared_ptr<Bytes>>(field));
+            };
+            break;
+        }
+        case arrow::Type::type::TIMESTAMP: {
+            auto timestamp_type =
+                
arrow::internal::checked_pointer_cast<arrow::TimestampType>(field_type);
+            int32_t precision = 
DateTimeUtils::GetPrecisionFromType(timestamp_type);
+            field_setter = [field_idx, precision](const VariantType& field,
+                                                  BinaryRowWriter* writer) -> 
void {
+                if (DataDefine::IsVariantNull(field)) {
+                    if (!Timestamp::IsCompact(precision)) {
+                        writer->WriteTimestamp(field_idx, std::nullopt, 
precision);
+                    } else {
+                        writer->SetNullAt(field_idx);
+                    }
+                    return;
+                }
+                return writer->WriteTimestamp(
+                    field_idx, DataDefine::GetVariantValue<Timestamp>(field), 
precision);
+            };
+            return field_setter;
+        }
+        case arrow::Type::type::DECIMAL: {

Review Comment:
   This branch still will not handle `arrow::decimal128(...)`. In Arrow C++, 
decimal128 reports `arrow::Type::DECIMAL128` (the rest of this codebase also 
switches on `DECIMAL128`, e.g. `DataDefine::VariantValueToLiteral` and 
`BinaryArrayWriter::GetElementSize` tests). As written, 
`CreateFieldSetter(arrow::decimal128(...))` falls through to the 
unsupported-type error, so the decimal cases added in 
`binary_row_writer_test.cpp` cannot work in a real build. Please switch this to 
`DECIMAL128` (and add DECIMAL256 handling only if intended).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to