mapleFU commented on code in PR #45351: URL: https://github.com/apache/arrow/pull/45351#discussion_r1929646868
########## python/pyarrow/tests/parquet/test_basic.py: ########## @@ -364,9 +364,9 @@ def test_byte_stream_split(): def test_store_decimal_as_integer(tempdir): arr_decimal_1_9 = pa.array(list(map(Decimal, range(100))), - type=pa.decimal128(5, 2)) + type=pa.decimal32(5, 2)) Review Comment: this should not be changed, instead we should add new tests here ########## cpp/src/parquet/arrow/reader_internal.cc: ########## @@ -211,25 +217,35 @@ Status ExtractDecimalMinMaxFromBytesType(const Statistics& statistics, std::shared_ptr<::arrow::Scalar>* max) { const DecimalLogicalType& decimal_type = checked_cast<const DecimalLogicalType&>(logical_type); - - Result<std::shared_ptr<DataType>> maybe_type = - Decimal128Type::Make(decimal_type.precision(), decimal_type.scale()); + auto precision = decimal_type.precision(); + auto scale = decimal_type.scale(); std::shared_ptr<DataType> arrow_type; - if (maybe_type.ok()) { - arrow_type = maybe_type.ValueOrDie(); + + if (precision <= Decimal32Type::kMaxPrecision) { Review Comment: Can we add a comment that the literal would can be cast to the correspond type if the real reader type is a wider decimal type? ########## cpp/src/parquet/column_writer.cc: ########## @@ -2220,8 +2229,7 @@ Status TypedColumnWriterImpl<Int64Type>::WriteArrowDense( WRITE_SERIALIZE_CASE(UINT64, UInt64Type, Int64Type) WRITE_ZERO_COPY_CASE(TIME64, Time64Type, Int64Type) WRITE_ZERO_COPY_CASE(DURATION, DurationType, Int64Type) - WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, Int64Type) - WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, Int64Type) + WRITE_SERIALIZE_CASE(DECIMAL64, Decimal64Type, Int64Type) Review Comment: ditto ########## cpp/src/parquet/column_writer.cc: ########## @@ -2048,8 +2058,7 @@ Status TypedColumnWriterImpl<Int32Type>::WriteArrowDense( WRITE_ZERO_COPY_CASE(DATE32, Date32Type, Int32Type) WRITE_SERIALIZE_CASE(DATE64, Date64Type, Int32Type) WRITE_SERIALIZE_CASE(TIME32, Time32Type, Int32Type) - WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, Int32Type) - WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, Int32Type) Review Comment: why not ``` WRITE_SERIALIZE_CASE(DECIMAL32, Decimal32Type, Int32Type) WRITE_SERIALIZE_CASE(DECIMAL64, Decimal64Type, Int32Type) WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, Int32Type) WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, Int32Type) ``` ########## cpp/src/parquet/column_writer.cc: ########## @@ -2383,29 +2391,45 @@ struct SerializeFunctor< int64_t non_null_count = array.length() - array.null_count(); int64_t size = non_null_count * ArrowType::kByteWidth; scratch_buffer = AllocateBuffer(ctx->memory_pool, size); - scratch = reinterpret_cast<int64_t*>(scratch_buffer->mutable_data()); + scratch_i32 = reinterpret_cast<int32_t*>(scratch_buffer->mutable_data()); + scratch_i64 = reinterpret_cast<int64_t*>(scratch_buffer->mutable_data()); Review Comment: why split this into two parts? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org