duongcongtoai commented on issue #14218:
URL: https://github.com/apache/datafusion/issues/14218#issuecomment-2627041906

   This error happens when the provider's schema is not aligned with the schema 
of the record batch produced by it, in particular column `__delta_rs_path` has 
type Utf8 according to the table provider, but in the record batches after scan 
it has type Dict(int32,Utf8), this error can be reproduced by 
   
   ```
   #[tokio::test]
   async fn test_unnest_provider_with_mistmatch_schema() -> Result<()> {
       use datafusion::datasource::MemTable;
       // define a schema.
       let schema = Arc::new(Schema::new(vec![
           Field::new("column1", DataType::Utf8, false),
           Field::new(
               "arr_col",
               DataType::List(Arc::new(Field::new("item", DataType::Int32, 
true))),
               true,
           ),
       ]));
       let strings = vec!["a", "b", "c"];
       let data = vec![
           Some(vec![Some(1), Some(2)]),
           Some(vec![Some(1), Some(2)]),
           Some(vec![Some(1), Some(2)]),
       ];
       let string_data = ListArray::from_iter_primitive::<Int32Type, _, 
_>(data);
       let keys = Int32Array::from_iter(0..strings.len() as i32);
   
       let utf8_values = StringArray::from(strings.clone());
       let utf8_dict = DictionaryArray::new(keys.clone(), 
Arc::new(utf8_values.clone()));
   
       let batch1 = RecordBatch::try_new(
           schema.clone(),
           vec![Arc::new(utf8_dict.clone()), Arc::new(string_data)],
       )?;
   
       // declare a new context. In spark API, this corresponds to a new spark 
SQLsession
       let ctx = SessionContext::new();
   
       // declare a table in memory. In spark API, this corresponds to 
createDataFrame(...).
       let provider = Arc::new(MemTable::try_new(schema, vec![vec![batch1]])?);
       ctx.register_table("t", provider.clone())?;
       let plan =
           LogicalPlanBuilder::scan("t", 
Arc::new(DefaultTableSource::new(provider)), None)?
               .unnest_column("arr_col")?
               .build()?;
       let df = ctx.execute_logical_plan(plan.clone()).await?;
       let ret = df.collect().await?;
       println!("{:?}", ret);
       Ok(())
   }
   ```
   I'm gathering more context here, have we ever allowed such schema mismatch 
and implicitly coerce the type at execution time (instead of at plan time)


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to