LuQQiu opened a new issue, #15038:
URL: https://github.com/apache/datafusion/issues/15038

   ### Describe the bug
   
   Executing `array_has_any(column_name, [])` results in an error:
   ```
   ArrowError(InvalidArgumentError("RowConverter column schema mismatch, 
expected Utf8 got Int64"), None)
   ```
   
   The error message is misleading and does not clearly indicate that an empty 
array might be an invalid or unsupported input. The issue could be improved by 
either:
   - Providing a more meaningful error message that explicitly states that an 
empty array is not a valid input.
   - Handling the query more gracefully, ensuring that an empty array input 
does not cause a type mismatch in schema conversion.
   
   Can reproduce the issue with the following script:
   datafusion version "44.0"
   ```
   use std::sync::Arc;
   use datafusion::arrow::array::{Int32Array, StringArray, ArrayRef, ListArray};
   use datafusion::arrow::datatypes::{DataType, Field, Schema};
   use datafusion::arrow::record_batch::RecordBatch;
   use datafusion::arrow::buffer::OffsetBuffer;
   use datafusion::datasource::MemTable;
   use datafusion::error::Result;
   use datafusion::prelude::*;
   
   #[tokio::main]
   async fn main() -> Result<()> {
       // Create a simple datafusion table
       let ctx = SessionContext::new();
       
       let schema = Arc::new(Schema::new(vec![
           Field::new("id", DataType::Int32, false),
           Field::new("taxonomy", DataType::List(Arc::new(Field::new(
               "item", 
               DataType::Utf8, 
               true,
           ))), false),
       ]));
       
       let id_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
       
       let values = Arc::new(StringArray::from(vec![
           "category_1", "category_2",  // For id=1
           "category_3",                // For id=2  
           "category_4"                 // For id=3
       ]));
       
       let offsets = OffsetBuffer::new(vec![0, 2, 3, 4].into());
       
       let field = Arc::new(Field::new("item", DataType::Utf8, true));
       let taxonomy_array = Arc::new(ListArray::new(field, offsets, values, 
None));
       
       let batch = RecordBatch::try_new(
           schema.clone(),
           vec![id_array, taxonomy_array],
       )?;
       
       let partitions = vec![vec![batch]];
       let table = MemTable::try_new(schema, partitions)?;
       ctx.register_table("test_table", Arc::new(table))?;
       
       println!("Original data:");
       let df = ctx.table("test_table").await?;
       df.show().await?;
       
       // Execute the problematic query with empty array
       println!("\nExecuting filter with empty array:");
       let query = "SELECT * FROM test_table WHERE array_has_any(taxonomy, [])";
       // print
       // Error: ArrowError(InvalidArgumentError("RowConverter column schema 
mismatch, expected Utf8 got Int64"), None)
       ctx.sql(query).await?.show().await?;
       Ok(())
   }
   ```
   
   ### To Reproduce
   
   _No response_
   
   ### Expected behavior
   
   _No response_
   
   ### Additional context
   
   _No response_


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to