leaves12138 commented on code in PR #24:
URL: https://github.com/apache/paimon-cpp/pull/24#discussion_r3308344796


##########
src/paimon/common/data/columnar/columnar_array.cpp:
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/common/data/columnar/columnar_array.h"
+
+#include <utility>
+
+#include "arrow/api.h"
+#include "arrow/array/array_decimal.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "fmt/format.h"
+#include "paimon/common/data/columnar/columnar_batch_context.h"
+#include "paimon/common/data/columnar/columnar_map.h"
+#include "paimon/common/data/columnar/columnar_row_ref.h"
+#include "paimon/common/utils/date_time_utils.h"
+
+namespace paimon {
+Status ColumnarArray::CheckNoNull() const {
+    for (int32_t i = 0; i < length_; i++) {
+        if (IsNullAt(i)) {
+            return Status::Invalid(fmt::format("row {} is null", i));
+        }
+    }
+    return Status::OK();
+}
+
+Decimal ColumnarArray::GetDecimal(int32_t pos, int32_t precision, int32_t 
scale) const {
+    using ArrayType = typename 
arrow::TypeTraits<arrow::Decimal128Type>::ArrayType;
+    auto array = arrow::internal::checked_cast<const ArrayType*>(array_);
+    assert(array);
+    arrow::Decimal128 decimal(array->GetValue(offset_ + pos));
+    return Decimal(precision, scale,
+                   static_cast<Decimal::int128_t>(decimal.high_bits()) << 64 | 
decimal.low_bits());

Review Comment:
   Please avoid reconstructing the signed int128 value by left-shifting a 
signed high word. For negative Decimal128 values, decimal.high_bits() is 
negative, and left-shifting a negative signed integer is undefined behavior in 
C++. This same expression is also used in ColumnarRow and ColumnarRowRef. 
Please combine the bits through an unsigned 128-bit value first, then cast to 
Decimal::int128_t, and add a negative decimal test.



##########
src/paimon/common/data/columnar/columnar_row.cpp:
##########
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/common/data/columnar/columnar_row.h"
+
+#include <cassert>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_decimal.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "paimon/common/data/columnar/columnar_array.h"
+#include "paimon/common/data/columnar/columnar_map.h"
+#include "paimon/common/utils/date_time_utils.h"
+
+namespace paimon {
+Decimal ColumnarRow::GetDecimal(int32_t pos, int32_t precision, int32_t scale) 
const {
+    using ArrayType = typename 
arrow::TypeTraits<arrow::Decimal128Type>::ArrayType;
+    auto array = arrow::internal::checked_cast<const 
ArrayType*>(array_vec_[pos]);
+    assert(array);
+    arrow::Decimal128 decimal(array->GetValue(row_id_));
+    return Decimal(precision, scale,
+                   static_cast<Decimal::int128_t>(decimal.high_bits()) << 64 | 
decimal.low_bits());
+}
+
+Timestamp ColumnarRow::GetTimestamp(int32_t pos, int32_t precision) const {
+    using ArrayType = typename 
arrow::TypeTraits<arrow::TimestampType>::ArrayType;
+    auto array = arrow::internal::checked_cast<const 
ArrayType*>(array_vec_[pos]);
+    assert(array);
+    int64_t data = array->Value(row_id_);
+    auto timestamp_type =
+        
arrow::internal::checked_pointer_cast<arrow::TimestampType>(array->type());
+    // for orc format, data is saved as nano, therefore, Timestamp convert 
should consider precision
+    // in arrow array rather than input precision
+    DateTimeUtils::TimeType time_type = 
DateTimeUtils::GetTimeTypeFromArrowType(timestamp_type);
+    auto [milli, nano] = DateTimeUtils::TimestampConverter(
+        data, time_type, DateTimeUtils::TimeType::MILLISECOND, 
DateTimeUtils::TimeType::NANOSECOND);
+    return Timestamp(milli, nano);
+}
+
+std::shared_ptr<InternalRow> ColumnarRow::GetRow(int32_t pos, int32_t 
num_fields) const {
+    auto struct_array = arrow::internal::checked_cast<const 
arrow::StructArray*>(array_vec_[pos]);
+    assert(struct_array);
+    return std::make_shared<ColumnarRow>(struct_array->fields(), pool_, 
row_id_);

Review Comment:
   This nested row still drops the ownership chain. The returned ColumnarRow 
stores raw child Array pointers, but it is constructed without a holder. If the 
parent ColumnarRow is the only object owning the top-level StructArray, a 
nested row returned from GetRow can outlive the parent and then its raw child 
pointers dangle. The current DataLifeCycle test only uses result_row while the 
parent row is alive, so it does not catch this. Please keep an owning holder 
here, for example by passing a shared_ptr/sliced StructArray or by returning 
ColumnarRowRef with a ColumnarBatchContext that owns struct_array->fields().



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to