alamb commented on a change in pull request #1209:
URL: https://github.com/apache/arrow-rs/pull/1209#discussion_r788025191



##########
File path: arrow/src/array/equal/mod.rs
##########
@@ -1297,4 +1297,38 @@ mod tests {
         );
         test_equal(&a, &b, false);
     }
+
+    #[test]
+    fn test_non_null_empty_strings() {
+        let s = StringArray::from(vec![Some(""), Some(""), Some("")]);
+
+        let string1 = s.data();
+
+        let string2 = ArrayData::builder(DataType::Utf8)
+            .len(string1.len())
+            .buffers(string1.buffers().to_vec())
+            .build()
+            .unwrap();
+
+        // string2 is identical to string1 except that it has no validity 
buffer but since there
+        // are no nulls, string1 and string2 are equal
+        test_equal(string1, &string2, true);

Review comment:
       I verified that this test fails without the code change in this PR
   
   ```
   
   ---- array::equal::tests::test_non_null_empty_strings stdout ----
   thread 'array::equal::tests::test_non_null_empty_strings' panicked at 
'assertion failed: `(left == right)`
     left: `false`,
    right: `true`: 
   ArrayData { data_type: Utf8, len: 3, null_count: 0, offset: 0, buffers: 
[Buffer { data: Bytes { ptr: 0x7fccacb04080, len: 16, data: [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }, offset: 0 }, Buffer { data: Bytes { ptr: 0x80, 
len: 0, data: [] }, offset: 0 }], child_data: [], null_bitmap: Some(Bitmap { 
bits: Buffer { data: Bytes { ptr: 0x7fccacb04100, len: 1, data: [7] }, offset: 
0 } }) }
   ArrayData { data_type: Utf8, len: 3, null_count: 0, offset: 0, buffers: 
[Buffer { data: Bytes { ptr: 0x7fccacb04080, len: 16, data: [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }, offset: 0 }, Buffer { data: Bytes { ptr: 0x80, 
len: 0, data: [] }, offset: 0 }], child_data: [], null_bitmap: None }', 
arrow/src/array/equal/mod.rs:506:9
   ```

##########
File path: arrow/src/array/equal/variable_size.rs
##########
@@ -86,13 +86,13 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
             let lhs_pos = lhs_start + i;
             let rhs_pos = rhs_start + i;
 
-            // the null bits can still be `None`, so we don't unwrap
+            // the null bits can still be `None`, indicating that the value is 
valid.

Review comment:
       this is so tricky. I had to read it several times to convince myself it 
was correct (even with the great test case above @helgikrs ) 👍  thank you 

##########
File path: arrow/src/array/equal/mod.rs
##########
@@ -1297,4 +1297,38 @@ mod tests {
         );
         test_equal(&a, &b, false);
     }
+
+    #[test]
+    fn test_non_null_empty_strings() {
+        let s = StringArray::from(vec![Some(""), Some(""), Some("")]);
+
+        let string1 = s.data();
+
+        let string2 = ArrayData::builder(DataType::Utf8)
+            .len(string1.len())
+            .buffers(string1.buffers().to_vec())
+            .build()
+            .unwrap();
+
+        // string2 is identical to string1 except that it has no validity 
buffer but since there

Review comment:
       👍 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to