alamb commented on code in PR #10404:
URL: https://github.com/apache/datafusion/pull/10404#discussion_r1597163747
##########
datafusion/core/src/datasource/stream.rs:
##########
@@ -58,12 +58,22 @@ impl TableProviderFactory for StreamTableFactory {
let schema: SchemaRef = Arc::new(cmd.schema.as_ref().into());
let location = cmd.location.clone();
let encoding = cmd.file_type.parse()?;
+ let header = if let Ok(opt) = cmd
+ .options
+ .get("format.has_header")
+ .map(|has_header| bool::from_str(has_header))
+ .transpose()
+ {
+ opt.unwrap_or(false)
+ } else {
+ return config_err!("format.has_header can either be true or
false");
Review Comment:
I am not quite sure what this error message means. Does it mean that there
are inconsistent settings somehow? Perhaps we could improve the text to give
some hint about how the user can fix whatever the problem is 🤔
##########
datafusion/core/src/datasource/file_format/csv.rs:
##########
@@ -716,6 +733,10 @@ mod tests {
async fn query_compress_data(
file_compression_type: FileCompressionType,
) -> Result<()> {
+ let runtime = Arc::new(RuntimeEnv::new(RuntimeConfig::new()).unwrap());
+ let cfg =
SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
+ let session_state = SessionState::new_with_config_rt(cfg, runtime);
Review Comment:
I think you can write this like this for extra compile time checking (maybe
for some other PR)
```rust
let mut cfg = SessionConfig::new();
cfg.options_mut().catalog.has_header = true;
let session_state = SessionState::new_with_config_rt(cfg, runtime);
```
##########
docs/source/user-guide/sql/ddl.md:
##########
@@ -60,9 +60,6 @@ CREATE [UNBOUNDED] EXTERNAL TABLE
[ IF NOT EXISTS ]
<TABLE_NAME>[ (<column_definition>) ]
STORED AS <file_type>
-[ WITH HEADER ROW ]
Review Comment:
👍
##########
docs/source/user-guide/cli/datasources.md:
##########
@@ -166,8 +166,8 @@ Register a single file csv datasource with a header row.
```sql
CREATE EXTERNAL TABLE test
STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_test_100.csv';
+LOCATION '/path/to/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
Review Comment:
I wonder if we can show the slightly nicer syntax here (and elsewhere in the
intro docs)
```suggestion
LOCATION '/path/to/aggregate_test_100.csv'
OPTIONS ('has_header' 'true');
```
##########
datafusion/sql/src/statement.rs:
##########
@@ -985,8 +981,52 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let inline_constraints =
calc_inline_constraints_from_columns(&columns);
all_constraints.extend(inline_constraints);
+ let mut options_map = HashMap::<String, String>::new();
+ for (key, value) in options {
+ if options_map.contains_key(&key) {
+ return plan_err!("Option {key} is specified multiple times");
Review Comment:
Can we please add a negative test in .slt for this case (it is a good one)
##########
datafusion/sql/src/parser.rs:
##########
@@ -734,22 +712,15 @@ impl<'a> DFParser<'a> {
} else {
self.parser.expect_keyword(Keyword::HEADER)?;
self.parser.expect_keyword(Keyword::ROW)?;
- ensure_not_set(&builder.has_header, "WITH HEADER
ROW")?;
- builder.has_header = Some(true);
+ return parser_err!("WITH HEADER ROW clause is no
longer in use. Please use the OPTIONS clause with 'format.has_header' set
appropriately, e.g., OPTIONS ('format.has_header' 'true')");
Review Comment:
❤️
##########
datafusion/sql/src/statement.rs:
##########
@@ -985,8 +981,52 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let inline_constraints =
calc_inline_constraints_from_columns(&columns);
all_constraints.extend(inline_constraints);
+ let mut options_map = HashMap::<String, String>::new();
+ for (key, value) in options {
+ if options_map.contains_key(&key) {
+ return plan_err!("Option {key} is specified multiple times");
+ }
+
+ let value_string = match value {
+ Value::SingleQuotedString(s) => s.to_string(),
+ Value::DollarQuotedString(s) => s.to_string(),
+ Value::UnQuotedString(s) => s.to_string(),
+ Value::Number(_, _) | Value::Boolean(_) => value.to_string(),
+ Value::DoubleQuotedString(_)
+ | Value::EscapedStringLiteral(_)
+ | Value::NationalStringLiteral(_)
+ | Value::SingleQuotedByteStringLiteral(_)
+ | Value::DoubleQuotedByteStringLiteral(_)
+ | Value::RawStringLiteral(_)
+ | Value::HexStringLiteral(_)
+ | Value::Null
+ | Value::Placeholder(_) => {
+ return plan_err!(
+ "Unsupported Value in CREATE EXTERNAL TABLE statement
{}",
+ value
+ );
+ }
+ };
+
+ if !(&key.contains('.')) {
+ // If a config does not belong to any namespace, we assume it
is
Review Comment:
I am not sure this commet applies to CREATE EXTERNAL TABLE which didn't have
format options previously
Can you also please make sure there is test for this alternate syntax (aka
`OPTIONS ('has_header' 'true')");` instead of `OPTIONS ('format.has_header'
'true')");` (I may have missed it)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]