TheBuilderJR commented on PR #15295: URL: https://github.com/apache/datafusion/pull/15295#issuecomment-2861173475
@kosiew so I think the tricky part is that there are actually multiple evolutions. Basically my code currenty looks like this ``` let config = ListingTableConfig::new_with_multi_paths(urls); let mut config = (ListingTableConfig { options: Some(ListingOptions { file_sort_order: vec![vec![col("timestamp_utc").sort(true, true)]], ..config.options.unwrap_or_else(|| ListingOptions::new(Arc::new(ParquetFormat::default())) ) }), ..config }).with_schema(Arc::new(schema.clone())) .with_schema_adapter_factory(adapter_factory);; ``` where urls is a list of local paths to parquet files on disks. for example an evolution might look as follows schema v1 ``` {"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata": {}} ``` schema v2 ``` {"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}} ``` schema v3 ``` {"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"error","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}} ``` schema v4 ``` {"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"document_type","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"fetch_from_source","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"source_system","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"error","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}} ``` maybe there's 10 v1 files, 5 v2 files, 3 v3 files and 1 v4 files. Ideally ListingTableConfig could just derive the mapping from each. Is that possible with your abstraction? If so could you tell me how I should modify my code? I looked through your PRs and had a hard time figuring out how to scale to N different evolved schemas. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org