alamb commented on code in PR #4221:
URL: https://github.com/apache/arrow-datafusion/pull/4221#discussion_r1029481591
##########
datafusion/core/src/datasource/listing_table_factory.rs:
##########
@@ -88,17 +89,45 @@ impl TableProviderFactory for ListingTableFactory {
),
};
- let provided_schema = if cmd.schema.fields().is_empty() {
- None
+ let (provided_schema, table_partition_cols) = if
cmd.schema.fields().is_empty() {
+ (
+ None,
+ cmd.table_partition_cols
+ .iter()
+ .zip((0..cmd.table_partition_cols.len()).map(|_|
DataType::Utf8))
+ .map(|x| (x.0.clone(), x.1))
+ .collect::<Vec<_>>(),
+ )
} else {
- Some(Arc::new(cmd.schema.as_ref().to_owned().into()))
+ let schema: SchemaRef =
Arc::new(cmd.schema.as_ref().to_owned().into());
+ let table_partition_cols = cmd
+ .table_partition_cols
+ .iter()
+ .map(|col| {
+ (
+ col.clone(),
+
schema.field_with_name(col).unwrap().data_type().clone(),
Review Comment:
Are you sure it is ok to `unwrap()` here (which will panic)? Given the
function returns a `Result` perhaps we could return an error if the field was
not present in the schema.
##########
datafusion/proto/src/logical_plan.rs:
##########
@@ -876,7 +890,7 @@ impl AsLogicalPlan for LogicalPlanNode {
FileFormatType::Avro(protobuf::AvroFormat {})
} else {
return Err(proto_error(format!(
- "Error converting file format, {:?} is invalid as
a datafusion foramt.",
+ "Error converting file format, {:?} is invalid as
a datafusion format.",
Review Comment:
👍
##########
datafusion/core/tests/path_partition.rs:
##########
@@ -56,7 +57,11 @@ async fn parquet_distinct_partition_col() -> Result<()> {
"year=2021/month=10/day=09/file.parquet",
"year=2021/month=10/day=28/file.parquet",
],
- &["year", "month", "day"],
+ &[
+ ("year", DataType::Int32),
Review Comment:
Nice
##########
datafusion/core/src/datasource/listing_table_factory.rs:
##########
@@ -88,17 +89,45 @@ impl TableProviderFactory for ListingTableFactory {
),
};
- let provided_schema = if cmd.schema.fields().is_empty() {
- None
+ let (provided_schema, table_partition_cols) = if
cmd.schema.fields().is_empty() {
+ (
+ None,
+ cmd.table_partition_cols
+ .iter()
+ .zip((0..cmd.table_partition_cols.len()).map(|_|
DataType::Utf8))
+ .map(|x| (x.0.clone(), x.1))
Review Comment:
```suggestion
.map(|x| (x.clone(), DataType::Utf8))
```
##########
datafusion/core/src/datasource/listing/table.rs:
##########
@@ -270,16 +268,19 @@ impl ListingOptions {
///
/// ```
/// use std::sync::Arc;
+ /// use arrow::datatypes::DataType;
/// use datafusion::datasource::{listing::ListingOptions,
file_format::parquet::ParquetFormat};
///
/// let listing_options =
ListingOptions::new(Arc::new(ParquetFormat::default()))
- /// .with_table_partition_cols(vec!["col_a".to_string(),
"col_b".to_string()]);
+ /// .with_table_partition_cols(vec![("col_a".to_string(),
DataType::Utf8),
+ /// ("col_b".to_string(), DataType::Utf8)]);
///
- /// assert_eq!(listing_options.table_partition_cols, vec!["col_a",
"col_b"]);
+ /// assert_eq!(listing_options.table_partition_cols,
vec![("col_a".to_string(), DataType::Utf8),
+ /// ("col_b".to_string(), DataType::Utf8)]);
/// ```
pub fn with_table_partition_cols(
mut self,
- table_partition_cols: Vec<String>,
+ table_partition_cols: Vec<(String, DataType)>,
Review Comment:
It would help me read the code if this was a named struct so the code could
refer to `.name` and `.datatype` rather than `.0` and `.1`. But I don't think
it is necessary to merge this PR
```rust
#[derive(Clone)]
struct PartitionColumn {
name: String,
data_type: DataType
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]