TheBuilderJR commented on PR #15295:
URL: https://github.com/apache/datafusion/pull/15295#issuecomment-2861173475
@kosiew so I think the tricky part is that there are actually multiple
evolutions.
Basically my code currenty looks like this
```
let config =
ListingTableConfig::new_with_multi_paths(urls);
let mut config = (ListingTableConfig {
options: Some(ListingOptions {
file_sort_order:
vec![vec![col("timestamp_utc").sort(true, true)]],
..config.options.unwrap_or_else(||
ListingOptions::new(Arc::new(ParquetFormat::default()))
)
}),
..config
}).with_schema(Arc::new(schema.clone()))
.with_schema_adapter_factory(adapter_factory);;
```
where urls is a list of local paths to parquet files on disks. for example
an evolution might look as follows
schema v1
```
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":
{}}
```
schema v2
```
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu
ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}}
```
schema v3
```
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu
ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"error","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}}
```
schema v4
```
{"fields":[{"name":"body","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"method","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"status_code","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"time_taken","data_type":"Float64","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"uid","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"url","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"timestamp_utc","data_type":{"Timestamp":["Millisecond","UTC"]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"qu
ery_params","data_type":{"Struct":[{"name":"customer_id","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"document_type","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"fetch_from_source","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"source_system","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}]},"nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}},{"name":"error","data_type":"Utf8","nullable":true,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{}}
```
maybe there's 10 v1 files, 5 v2 files, 3 v3 files and 1 v4 files. Ideally
ListingTableConfig could just derive the mapping from each. Is that possible
with your abstraction? If so could you tell me how I should modify my code? I
looked through your PRs and had a hard time figuring out how to scale to N
different evolved schemas.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]