leaves12138 commented on code in PR #24: URL: https://github.com/apache/paimon-cpp/pull/24#discussion_r3308344796
########## src/paimon/common/data/columnar/columnar_array.cpp: ########## @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "paimon/common/data/columnar/columnar_array.h" + +#include <utility> + +#include "arrow/api.h" +#include "arrow/array/array_decimal.h" +#include "arrow/array/array_nested.h" +#include "arrow/array/array_primitive.h" +#include "arrow/type_traits.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/decimal.h" +#include "fmt/format.h" +#include "paimon/common/data/columnar/columnar_batch_context.h" +#include "paimon/common/data/columnar/columnar_map.h" +#include "paimon/common/data/columnar/columnar_row_ref.h" +#include "paimon/common/utils/date_time_utils.h" + +namespace paimon { +Status ColumnarArray::CheckNoNull() const { + for (int32_t i = 0; i < length_; i++) { + if (IsNullAt(i)) { + return Status::Invalid(fmt::format("row {} is null", i)); + } + } + return Status::OK(); +} + +Decimal ColumnarArray::GetDecimal(int32_t pos, int32_t precision, int32_t scale) const { + using ArrayType = typename arrow::TypeTraits<arrow::Decimal128Type>::ArrayType; + auto array = arrow::internal::checked_cast<const ArrayType*>(array_); + assert(array); + arrow::Decimal128 decimal(array->GetValue(offset_ + pos)); + return Decimal(precision, scale, + static_cast<Decimal::int128_t>(decimal.high_bits()) << 64 | decimal.low_bits()); Review Comment: Please avoid reconstructing the signed int128 value by left-shifting a signed high word. For negative Decimal128 values, decimal.high_bits() is negative, and left-shifting a negative signed integer is undefined behavior in C++. This same expression is also used in ColumnarRow and ColumnarRowRef. Please combine the bits through an unsigned 128-bit value first, then cast to Decimal::int128_t, and add a negative decimal test. ########## src/paimon/common/data/columnar/columnar_row.cpp: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "paimon/common/data/columnar/columnar_row.h" + +#include <cassert> + +#include "arrow/array/array_base.h" +#include "arrow/array/array_decimal.h" +#include "arrow/array/array_nested.h" +#include "arrow/array/array_primitive.h" +#include "arrow/type_traits.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/decimal.h" +#include "paimon/common/data/columnar/columnar_array.h" +#include "paimon/common/data/columnar/columnar_map.h" +#include "paimon/common/utils/date_time_utils.h" + +namespace paimon { +Decimal ColumnarRow::GetDecimal(int32_t pos, int32_t precision, int32_t scale) const { + using ArrayType = typename arrow::TypeTraits<arrow::Decimal128Type>::ArrayType; + auto array = arrow::internal::checked_cast<const ArrayType*>(array_vec_[pos]); + assert(array); + arrow::Decimal128 decimal(array->GetValue(row_id_)); + return Decimal(precision, scale, + static_cast<Decimal::int128_t>(decimal.high_bits()) << 64 | decimal.low_bits()); +} + +Timestamp ColumnarRow::GetTimestamp(int32_t pos, int32_t precision) const { + using ArrayType = typename arrow::TypeTraits<arrow::TimestampType>::ArrayType; + auto array = arrow::internal::checked_cast<const ArrayType*>(array_vec_[pos]); + assert(array); + int64_t data = array->Value(row_id_); + auto timestamp_type = + arrow::internal::checked_pointer_cast<arrow::TimestampType>(array->type()); + // for orc format, data is saved as nano, therefore, Timestamp convert should consider precision + // in arrow array rather than input precision + DateTimeUtils::TimeType time_type = DateTimeUtils::GetTimeTypeFromArrowType(timestamp_type); + auto [milli, nano] = DateTimeUtils::TimestampConverter( + data, time_type, DateTimeUtils::TimeType::MILLISECOND, DateTimeUtils::TimeType::NANOSECOND); + return Timestamp(milli, nano); +} + +std::shared_ptr<InternalRow> ColumnarRow::GetRow(int32_t pos, int32_t num_fields) const { + auto struct_array = arrow::internal::checked_cast<const arrow::StructArray*>(array_vec_[pos]); + assert(struct_array); + return std::make_shared<ColumnarRow>(struct_array->fields(), pool_, row_id_); Review Comment: This nested row still drops the ownership chain. The returned ColumnarRow stores raw child Array pointers, but it is constructed without a holder. If the parent ColumnarRow is the only object owning the top-level StructArray, a nested row returned from GetRow can outlive the parent and then its raw child pointers dangle. The current DataLifeCycle test only uses result_row while the parent row is alive, so it does not catch this. Please keep an owning holder here, for example by passing a shared_ptr/sliced StructArray or by returning ColumnarRowRef with a ColumnarBatchContext that owns struct_array->fields(). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
