TheBuilderJR commented on PR #15295:
URL: https://github.com/apache/datafusion/pull/15295#issuecomment-2861173475

   @kosiew so I think the tricky part is that there are actually multiple 
evolutions.
   
   Basically my code currenty looks like this
   
   
   ```
                       let config = 
ListingTableConfig::new_with_multi_paths(urls);
   
                       let mut config = (ListingTableConfig {
                           options: Some(ListingOptions {
                               file_sort_order: 
vec![vec![col("timestamp_utc").sort(true, true)]],
                               ..config.options.unwrap_or_else(||
                                   
ListingOptions::new(Arc::new(ParquetFormat::default()))
                               )
                           }),
                           ..config
                       }).with_schema(Arc::new(schema.clone()))
                       .with_schema_adapter_factory(adapter_factory);;
   ```
   
   where urls is a list of local paths to parquet files on disks. for example 
an evolution might look as follows
   
   schema v1
   ```
   
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":
 {}}
   ```
   
   schema v2
   ```
   
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu
 
ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}}
   ```
   
   schema v3
   ```
   
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu
 
ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"error","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}}
   ```
   
   schema v4
   ```
   
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu
 
ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"document_type","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"fetch_from_source","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"source_system","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"error","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}}
   ```
   
   maybe there's 10 v1 files, 5 v2 files, 3 v3 files and 1 v4 files. Ideally 
ListingTableConfig could just derive the mapping from each. Is that possible 
with your abstraction? If so could you tell me how I should modify my code? I 
looked through your PRs and had a hard time figuring out how to scale to N 
different evolved schemas.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to