bcmcmill commented on PR #966:
URL: https://github.com/apache/arrow-ballista/pull/966#issuecomment-1925469188
It would likely be better to update `parse_protobuf_file_scan_config` in
`datafusion-proto` to be like the following, so that the fix actually extends
to all file types.
```
pub fn parse_protobuf_file_scan_config(
proto: &protobuf::FileScanExecConf,
registry: &dyn FunctionRegistry,
) -> Result<FileScanConfig> {
let schema: Arc<Schema> = Arc::new(convert_required!(proto.schema)?);
let projection = proto
.projection
.iter()
.map(|i| *i as usize)
.collect::<Vec<_>>();
let projection = if projection.is_empty() {
None
} else {
Some(projection)
};
let statistics = convert_required!(proto.statistics)?;
let file_groups: Vec<Vec<PartitionedFile>> = proto
.file_groups
.iter()
.map(|f| f.try_into())
.collect::<Result<Vec<_>, _>>()?;
let object_store_url = match proto.object_store_url.is_empty() {
false => ObjectStoreUrl::parse(&proto.object_store_url)?,
true => ObjectStoreUrl::local_filesystem(),
};
// extract types of partition columns
let table_partition_cols = proto
.table_partition_cols
.iter()
.map(|col| Ok(schema.field_with_name(col)?.clone()))
.collect::<Result<Vec<_>>>()?;
let mut output_ordering = vec![];
for node_collection in &proto.output_ordering {
let sort_expr = node_collection
.physical_sort_expr_nodes
.iter()
.map(|node| {
let expr = node
.expr
.as_ref()
.map(|e| parse_physical_expr(e.as_ref(), registry,
&schema))
.unwrap()?;
Ok(PhysicalSortExpr {
expr,
options: SortOptions {
descending: !node.asc,
nulls_first: node.nulls_first,
},
})
})
.collect::<Result<Vec<PhysicalSortExpr>>>()?;
output_ordering.push(sort_expr);
}
```
I can open a PR for that over there if that is more appropriate.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]