[GitHub] [arrow-rs] tustvold commented on a diff in pull request #3711: Feat/arrow csv decimal256

via GitHub Tue, 14 Feb 2023 03:01:24 -0800


tustvold commented on code in PR #3711:
URL: https://github.com/apache/arrow-rs/pull/3711#discussion_r1105625484



##########
arrow-csv/src/reader/mod.rs:
##########
@@ -880,6 +919,77 @@ fn parse_decimal_with_parameter(
     }
 }
 
+// Parse the string format decimal value to i256 format and checking the 
precision and scale.
+// The result i256 value can't be out of bounds.
+fn parse_decimal256_with_parameter(
+    s: &str,
+    precision: u8,
+    scale: i8,
+) -> Result<i256, ArrowError> {
+    if PARSE_DECIMAL_RE.is_match(s) {
+        let mut offset = s.len();
+        let len = s.len();
+        let mut base = i256::from_i128(1);
+        let scale_usize = usize::from(scale as u8);
+
+        // handle the value after the '.' and meet the scale
+        let delimiter_position = s.find('.');
+        match delimiter_position {
+            None => {
+                // there is no '.'
+                // FIXME: Is it appropriate to write like this?

Review Comment:
   ```suggestion
   ```
   Looks correct to me



##########
arrow-csv/src/reader/mod.rs:
##########
@@ -814,6 +818,41 @@ fn build_decimal_array(
     ))
 }
 
+//TODO: is possible to use generic function replace this?

Review Comment:
   It should be possible to write a function generic over `T: DecimalType`



##########
arrow-csv/src/reader/mod.rs:
##########
@@ -880,6 +919,77 @@ fn parse_decimal_with_parameter(
     }
 }
 
+// Parse the string format decimal value to i256 format and checking the 
precision and scale.
+// The result i256 value can't be out of bounds.
+fn parse_decimal256_with_parameter(
+    s: &str,
+    precision: u8,
+    scale: i8,
+) -> Result<i256, ArrowError> {
+    if PARSE_DECIMAL_RE.is_match(s) {
+        let mut offset = s.len();
+        let len = s.len();
+        let mut base = i256::from_i128(1);
+        let scale_usize = usize::from(scale as u8);
+
+        // handle the value after the '.' and meet the scale
+        let delimiter_position = s.find('.');
+        match delimiter_position {
+            None => {
+                // there is no '.'
+                // FIXME: Is it appropriate to write like this?
+                base = i256::from_i128(10).wrapping_pow(scale as u32);
+            }
+            Some(mid) => {
+                // there is the '.'
+                if len - mid >= scale_usize + 1 {
+                    // If the string value is "123.12345" and the scale is 2, 
we should just remain '.12' and drop the '345' value.
+                    offset -= len - mid - 1 - scale_usize;
+                } else {
+                    // If the string value is "123.12" and the scale is 4, we 
should append '00' to the tail.
+                    base = i256::from_i128(10)
+                        .wrapping_pow((scale_usize + 1 + mid - len) as u32);
+                }
+            }
+        };
+
+        // each byte is digit、'-' or '.'
+        let bytes = s.as_bytes();
+        let mut negative = false;
+        let mut result = i256::from_i128(0);
+
+        bytes[0..offset].iter().rev().for_each(|&byte| match byte {
+            b'-' => {
+                negative = true;
+            }
+            b'0'..=b'9' => {
+                //TODO: support '+=' and '*=' for i256
+                //TODO: support i256::from_byte for i256

Review Comment:
   Given these don't impact the correctness of the code, I'd suggest leaving 
these out and perhaps filing a ticket to add support for these?



##########
arrow-csv/src/reader/mod.rs:
##########
@@ -880,6 +919,77 @@ fn parse_decimal_with_parameter(
     }
 }
 
+// Parse the string format decimal value to i256 format and checking the 
precision and scale.
+// The result i256 value can't be out of bounds.
+fn parse_decimal256_with_parameter(
+    s: &str,
+    precision: u8,
+    scale: i8,
+) -> Result<i256, ArrowError> {
+    if PARSE_DECIMAL_RE.is_match(s) {
+        let mut offset = s.len();
+        let len = s.len();
+        let mut base = i256::from_i128(1);
+        let scale_usize = usize::from(scale as u8);
+
+        // handle the value after the '.' and meet the scale
+        let delimiter_position = s.find('.');
+        match delimiter_position {
+            None => {
+                // there is no '.'
+                // FIXME: Is it appropriate to write like this?
+                base = i256::from_i128(10).wrapping_pow(scale as u32);
+            }
+            Some(mid) => {
+                // there is the '.'
+                if len - mid >= scale_usize + 1 {
+                    // If the string value is "123.12345" and the scale is 2, 
we should just remain '.12' and drop the '345' value.
+                    offset -= len - mid - 1 - scale_usize;
+                } else {
+                    // If the string value is "123.12" and the scale is 4, we 
should append '00' to the tail.
+                    base = i256::from_i128(10)
+                        .wrapping_pow((scale_usize + 1 + mid - len) as u32);
+                }
+            }
+        };
+
+        // each byte is digit、'-' or '.'
+        let bytes = s.as_bytes();
+        let mut negative = false;
+        let mut result = i256::from_i128(0);
+
+        bytes[0..offset].iter().rev().for_each(|&byte| match byte {
+            b'-' => {
+                negative = true;
+            }
+            b'0'..=b'9' => {
+                //TODO: support '+=' and '*=' for i256
+                //TODO: support i256::from_byte for i256

Review Comment:
   ```suggestion
   ```
   



##########
arrow-csv/src/reader/mod.rs:
##########
@@ -880,6 +919,77 @@ fn parse_decimal_with_parameter(
     }
 }
 
+// Parse the string format decimal value to i256 format and checking the 
precision and scale.
+// The result i256 value can't be out of bounds.
+fn parse_decimal256_with_parameter(
+    s: &str,
+    precision: u8,
+    scale: i8,
+) -> Result<i256, ArrowError> {
+    if PARSE_DECIMAL_RE.is_match(s) {
+        let mut offset = s.len();
+        let len = s.len();
+        let mut base = i256::from_i128(1);
+        let scale_usize = usize::from(scale as u8);
+
+        // handle the value after the '.' and meet the scale
+        let delimiter_position = s.find('.');
+        match delimiter_position {
+            None => {
+                // there is no '.'
+                // FIXME: Is it appropriate to write like this?
+                base = i256::from_i128(10).wrapping_pow(scale as u32);
+            }
+            Some(mid) => {
+                // there is the '.'
+                if len - mid >= scale_usize + 1 {
+                    // If the string value is "123.12345" and the scale is 2, 
we should just remain '.12' and drop the '345' value.
+                    offset -= len - mid - 1 - scale_usize;
+                } else {
+                    // If the string value is "123.12" and the scale is 4, we 
should append '00' to the tail.
+                    base = i256::from_i128(10)
+                        .wrapping_pow((scale_usize + 1 + mid - len) as u32);
+                }
+            }
+        };
+
+        // each byte is digit、'-' or '.'
+        let bytes = s.as_bytes();
+        let mut negative = false;
+        let mut result = i256::from_i128(0);
+
+        bytes[0..offset].iter().rev().for_each(|&byte| match byte {
+            b'-' => {
+                negative = true;
+            }
+            b'0'..=b'9' => {
+                //TODO: support '+=' and '*=' for i256
+                //TODO: support i256::from_byte for i256
+                result = result + i256::from_i128((byte - b'0').into()) * base;
+                //TODO: support Mul<i32> for i256

Review Comment:
   ```suggestion
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow-rs] tustvold commented on a diff in pull request #3711: Feat/arrow csv decimal256

Reply via email to