tustvold commented on code in PR #3183:
URL: https://github.com/apache/arrow-rs/pull/3183#discussion_r1031724821
##########
arrow-cast/src/display.rs:
##########
@@ -109,106 +100,92 @@ macro_rules! make_string_interval_month_day_nano {
.downcast_ref::<array::IntervalMonthDayNanoArray>()
.unwrap();
- let s = if array.is_null($row) {
- "NULL".to_string()
- } else {
- let value: u128 = array.value($row) as u128;
-
- let months_part: i32 =
- ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
- let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64)
as i32;
- let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;
-
- let secs = nanoseconds_part / 1000000000;
- let mins = secs / 60;
- let hours = mins / 60;
-
- let secs = secs - (mins * 60);
- let mins = mins - (hours * 60);
-
- format!(
- "0 years {} mons {} days {} hours {} mins {}.{:09} secs",
- months_part,
- days_part,
- hours,
- mins,
- secs,
- (nanoseconds_part % 1000000000),
- )
- };
-
- Ok(s)
+ // SAFETY: This is safe because array_value_to_string(the macro
caller) is already
+ // checking for is_null($row) which also acts like a bounds check.
Hence we can
+ // directly call value_unchecked.
+ let value: u128 = unsafe { array.value_unchecked($row) as u128 };
+
+ let months_part: i32 =
+ ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
+ let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as
i32;
+ let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;
+
+ let secs = nanoseconds_part / 1000000000;
+ let mins = secs / 60;
+ let hours = mins / 60;
+
+ let secs = secs - (mins * 60);
+ let mins = mins - (hours * 60);
+
+ Ok(format!(
+ "0 years {} mons {} days {} hours {} mins {}.{:09} secs",
+ months_part,
+ days_part,
+ hours,
+ mins,
+ secs,
+ (nanoseconds_part % 1000000000),
+ ))
}};
}
macro_rules! make_string_date {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
- let s = if array.is_null($row) {
- "".to_string()
- } else {
- array
- .value_as_date($row)
- .map(|d| d.to_string())
- .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
- };
+ // We are skipping the null check because array_value_to_string(the
macro caller) is already
+ // checking for is_null($row)
+ Ok(array
+ .value_as_date($row)
+ .map(|d| d.to_string())
+ .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()))
- Ok(s)
}};
}
macro_rules! make_string_time {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
- let s = if array.is_null($row) {
- "".to_string()
- } else {
- array
- .value_as_time($row)
- .map(|d| d.to_string())
- .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
- };
+ // We are skipping the null check because array_value_to_string(the
macro caller) is already
+ // checking for is_null($row)
+ Ok(array
+ .value_as_time($row)
+ .map(|d| d.to_string())
+ .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()))
- Ok(s)
}};
}
macro_rules! make_string_datetime {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
- let s = if array.is_null($row) {
- "".to_string()
- } else {
- array
- .value_as_datetime($row)
- .map(|d| format!("{:?}", d))
- .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
- };
+ // We are skipping the null check because array_value_to_string(the
macro caller) is already
Review Comment:
FWIW the definition of ArrayAccessor means it is "safe" to do this
regardless of if there is a null or not, the value will just be arbitrary
##########
arrow-cast/src/display.rs:
##########
@@ -549,3 +553,34 @@ pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N)
-> String {
String::from_utf8_unchecked(buf)
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_array_value_to_string_duration() {
+ let ns_array =
+ Arc::new(DurationNanosecondArray::from(vec![Some(1), None])) as
ArrayRef;
+ assert_eq!(
+ array_value_to_string(&ns_array, 0).unwrap(),
+ "PT0.000000001S"
+ );
+ assert_eq!(array_value_to_string(&ns_array, 1).unwrap(), "");
+
+ let us_array =
+ Arc::new(DurationMicrosecondArray::from(vec![Some(1), None])) as
ArrayRef;
+ assert_eq!(array_value_to_string(&us_array, 0).unwrap(),
"PT0.000001S");
Review Comment:
What format is this? Is it some standard?
##########
arrow-cast/src/display.rs:
##########
@@ -32,13 +32,10 @@ macro_rules! make_string {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
- let s = if array.is_null($row) {
Review Comment:
Support getting rid of the redundant is_null check :+1:
##########
arrow-cast/src/display.rs:
##########
@@ -32,13 +32,10 @@ macro_rules! make_string {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
- let s = if array.is_null($row) {
- "".to_string()
- } else {
- array.value($row).to_string()
- };
-
- Ok(s)
+ // SAFETY: This is safe because array_value_to_string(the macro
caller) is already
Review Comment:
This isn't actually true, is_null just checks that the access is within the
bounds of the null bitmap, which may be larger than the array's actual length -
either due to the length not being a multiple of 8 or otherwise.
Ignoring that I would be hesistant about this change for two reasons:
* The performance benefit is pretty marginal
* The method comes with a large disclaimer in its doc comment that it is not
optimal, we should probably fix this before reaching for unsafe :sweat_smile:
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]