Re: [PR] [VARIANT] Validate precision in VariantDecimalXX structs and add missing tests [arrow-rs]

via GitHub Wed, 25 Jun 2025 09:15:16 -0700


scovich commented on code in PR #7776:
URL: https://github.com/apache/arrow-rs/pull/7776#discussion_r2167094431



##########
parquet-variant/src/variant/decimal.rs:
##########
@@ -0,0 +1,331 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_schema::ArrowError;
+
+/// Represents a 4-byte decimal value in the Variant format.
+///
+/// This struct stores a decimal number using a 32-bit signed integer for the 
coefficient
+/// and an 8-bit unsigned integer for the scale (number of decimal places). 
Its precision is limited to 9 digits.
+///
+/// For valid precision and scale values, see the Variant specification:
+/// 
<https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
+///
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct VariantDecimal4 {
+    pub(crate) integer: i32,
+    pub(crate) scale: u8,
+}
+
+impl VariantDecimal4 {
+    const MAX_PRECISION: u32 = 9;
+    const MAX_UNSCALED_VALUE: u32 = 10_u32.pow(Self::MAX_PRECISION) - 1;
+
+    pub fn try_new(integer: i32, scale: u8) -> Result<Self, ArrowError> {
+        // Validate that scale doesn't exceed precision
+        if scale as u32 > Self::MAX_PRECISION {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Scale {} of a 4-byte decimal cannot exceed the max precision 
{}",
+                scale,
+                Self::MAX_PRECISION,
+            )));
+        }
+
+        // Validate that the integer value fits within the precision
+        if integer.unsigned_abs() > Self::MAX_UNSCALED_VALUE {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "{} is too large to store in a 4-byte decimal with max 
precision {}",
+                integer,
+                Self::MAX_PRECISION
+            )));
+        }

Review Comment:
   The newly added validation



##########
parquet-variant/src/variant/decimal.rs:
##########
@@ -0,0 +1,331 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_schema::ArrowError;
+
+/// Represents a 4-byte decimal value in the Variant format.
+///
+/// This struct stores a decimal number using a 32-bit signed integer for the 
coefficient
+/// and an 8-bit unsigned integer for the scale (number of decimal places). 
Its precision is limited to 9 digits.
+///
+/// For valid precision and scale values, see the Variant specification:
+/// 
<https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
+///
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct VariantDecimal4 {
+    pub(crate) integer: i32,
+    pub(crate) scale: u8,
+}
+
+impl VariantDecimal4 {
+    const MAX_PRECISION: u32 = 9;
+    const MAX_UNSCALED_VALUE: u32 = 10_u32.pow(Self::MAX_PRECISION) - 1;

Review Comment:
   Hoisted up and renamed the existing constant, and used it to define the 
other constant



##########
parquet-variant/src/variant.rs:
##########
@@ -40,98 +42,6 @@ const MAX_SHORT_STRING_BYTES: usize = 0x3F;
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub struct ShortString<'a>(pub(crate) &'a str);
 
-/// Represents a 4-byte decimal value in the Variant format.

Review Comment:
   Moved to decimal.rs



##########
parquet-variant/src/variant.rs:
##########
@@ -1,5 +1,3 @@
-use std::ops::Deref;

Review Comment:
   not sure how this ended up here...



##########
parquet-variant/src/variant/decimal.rs:
##########
@@ -0,0 +1,331 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_schema::ArrowError;
+
+/// Represents a 4-byte decimal value in the Variant format.
+///
+/// This struct stores a decimal number using a 32-bit signed integer for the 
coefficient
+/// and an 8-bit unsigned integer for the scale (number of decimal places). 
Its precision is limited to 9 digits.
+///
+/// For valid precision and scale values, see the Variant specification:
+/// 
<https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
+///
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct VariantDecimal4 {
+    pub(crate) integer: i32,
+    pub(crate) scale: u8,
+}
+
+impl VariantDecimal4 {
+    const MAX_PRECISION: u32 = 9;
+    const MAX_UNSCALED_VALUE: u32 = 10_u32.pow(Self::MAX_PRECISION) - 1;
+
+    pub fn try_new(integer: i32, scale: u8) -> Result<Self, ArrowError> {
+        // Validate that scale doesn't exceed precision
+        if scale as u32 > Self::MAX_PRECISION {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Scale {} of a 4-byte decimal cannot exceed the max precision 
{}",
+                scale,
+                Self::MAX_PRECISION,

Review Comment:
   Updated the error message to reference the constant instead of a magic number



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [VARIANT] Validate precision in VariantDecimalXX structs and add missing tests [arrow-rs]

Reply via email to