rtyler opened a new issue, #10749:
URL: https://github.com/apache/datafusion/issues/10749

   ### Describe the bug
   
   When taking two `DataFrame` objects and running `except` the function fails 
when there are Structs in the schema, but _succeeds_ with more simple schemas.
   
   For example, this works:
   
   ```rust
           let schema = Arc::new(Schema::new(vec![Field::new(
               "value",
               DataType::Int32,
               true),
           ]));
           let batch = RecordBatch::try_new(
               Arc::clone(&schema),
               vec![
                   Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
                   ])
           .unwrap();
   
           let updated_batch = RecordBatch::try_new(
               Arc::clone(&schema),
               vec![
                   Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)])),
               ],
           )
           .unwrap();
           let _ = 
datafusion::arrow::util::pretty::print_batches(&[batch.clone()]);
           let _ = 
datafusion::arrow::util::pretty::print_batches(&[updated_batch.clone()]);
   
           let ctx = SessionContext::new();
           let before = ctx.read_batch(batch).expect("Failed to make 
DataFrame");
           let after = ctx.read_batch(updated_batch).expect("Failed to make 
DataFrame");
   
           let diff = before.except(after).expect("Failed to 
except").collect().await.expect("Failed to diff");
           assert_eq!(diff.len(), 1);
   ```
   
   ### To Reproduce
   
   ```rust
           let nested_schema = Arc::new(Schema::new(vec![
               Field::new("id", DataType::Int32, true),
               Field::new("lat", DataType::Int32, true),
               Field::new("long", DataType::Int32, true),
           ]));
           let schema = Arc::new(Schema::new(vec![Field::new(
               "value",
               DataType::Int32,
               true),
               Field::new("nested",
                   DataType::Struct(nested_schema.fields.clone()),
                   true)
           ]));
           let batch = RecordBatch::try_new(
               Arc::clone(&schema),
               vec![
                   Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])),
                   Arc::new(StructArray::from(vec![
                       (
                           Arc::new(Field::new("id", DataType::Int32, true)),
                           Arc::new(Int32Array::from(vec![1,2,3])) as ArrayRef
                       ),
                       (
                           Arc::new(Field::new("lat", DataType::Int32, true)),
                           Arc::new(Int32Array::from(vec![1,2,3])) as ArrayRef
                       ),
                       (
                           Arc::new(Field::new("long", DataType::Int32, true)),
                           Arc::new(Int32Array::from(vec![1,2,3])) as ArrayRef
                       ),
                       ]))
                   ])
           .unwrap();
   
           let updated_batch = RecordBatch::try_new(
               Arc::clone(&schema),
               vec![
                   Arc::new(Int32Array::from(vec![Some(1), Some(12), Some(3)])),
                   Arc::new(StructArray::from(vec![
                       (
                           Arc::new(Field::new("id", DataType::Int32, true)),
                           Arc::new(Int32Array::from(vec![1,2,3])) as ArrayRef
                       ),
                       (
                           Arc::new(Field::new("lat", DataType::Int32, true)),
                           Arc::new(Int32Array::from(vec![1,2,3])) as ArrayRef
                       ),
                       (
                           Arc::new(Field::new("long", DataType::Int32, true)),
                           Arc::new(Int32Array::from(vec![1,2,3])) as ArrayRef
                       ),
                       ]))
               ],
           )
           .unwrap();
           let _ = 
datafusion::arrow::util::pretty::print_batches(&[batch.clone()]);
           let _ = 
datafusion::arrow::util::pretty::print_batches(&[updated_batch.clone()]);
   
           let ctx = SessionContext::new();
           let before = ctx.read_batch(batch).expect("Failed to make 
DataFrame");
           let after = ctx.read_batch(updated_batch).expect("Failed to make 
DataFrame");
   
           let diff = before.except(after).expect("Failed to 
except").collect().await.expect("Failed to diff");
           assert_eq!(diff.len(), 1);
   ```
   
   ### Expected behavior
   
   I would expect the above to pass assertions, instead this output is produced:
   
   ```
   running 2 tests
   test tests::test_simple ... ok
   test tests::test_with_struct ... FAILED
   
   failures:
   
   ---- tests::test_with_struct stdout ----
   +-------+--------------------------+
   | value | nested                   |
   +-------+--------------------------+
   | 1     | {id: 1, lat: 1, long: 1} |
   | 2     | {id: 2, lat: 2, long: 2} |
   | 3     | {id: 3, lat: 3, long: 3} |
   +-------+--------------------------+
   +-------+--------------------------+
   | value | nested                   |
   +-------+--------------------------+
   | 1     | {id: 1, lat: 1, long: 1} |
   | 12    | {id: 2, lat: 2, long: 2} |
   | 3     | {id: 3, lat: 3, long: 3} |
   +-------+--------------------------+
   thread 'tests::test_with_struct' panicked at except-df-bug/src/lib.rs:74:84:
   Failed to diff: ArrowError(InvalidArgumentError("Invalid comparison 
operation: Struct([Field { name: \"id\", data_type: Int32, nullable: true, 
dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"lat\", 
data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: 
{} }, Field { name: \"long\", data_type: Int32, nullable: true, dict_id: 0, 
dict_is_ordered: false, metadata: {} }]) IS NOT DISTINCT FROM Struct([Field { 
name: \"id\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: 
false, metadata: {} }, Field { name: \"lat\", data_type: Int32, nullable: true, 
dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"long\", 
data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: 
{} }])"), None)
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   
   
   failures:
       tests::test_with_struct
   
   test result: FAILED. 1 passed; 1 failed; 0 ignored; 0 measured; 0 filtered 
out; finished in 0.01s
   ```
   
   ### Additional context
   
   _No response_


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to