alamb commented on code in PR #10404:
URL: https://github.com/apache/datafusion/pull/10404#discussion_r1597163747


##########
datafusion/core/src/datasource/stream.rs:
##########
@@ -58,12 +58,22 @@ impl TableProviderFactory for StreamTableFactory {
         let schema: SchemaRef = Arc::new(cmd.schema.as_ref().into());
         let location = cmd.location.clone();
         let encoding = cmd.file_type.parse()?;
+        let header = if let Ok(opt) = cmd
+            .options
+            .get("format.has_header")
+            .map(|has_header| bool::from_str(has_header))
+            .transpose()
+        {
+            opt.unwrap_or(false)
+        } else {
+            return config_err!("format.has_header can either be true or 
false");

Review Comment:
   I am not quite sure what this error message means. Does it mean  that there 
are inconsistent settings somehow? Perhaps we could improve the text to give 
some hint about how the user can fix whatever the problem is 🤔 



##########
datafusion/core/src/datasource/file_format/csv.rs:
##########
@@ -716,6 +733,10 @@ mod tests {
     async fn query_compress_data(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        let runtime = Arc::new(RuntimeEnv::new(RuntimeConfig::new()).unwrap());
+        let cfg = 
SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
+        let session_state = SessionState::new_with_config_rt(cfg, runtime);

Review Comment:
   I think you can write this like this for extra compile time checking (maybe 
for some other PR)
   
   ```rust
           let mut cfg = SessionConfig::new();
           cfg.options_mut().catalog.has_header = true;
           let session_state = SessionState::new_with_config_rt(cfg, runtime);
   ```



##########
docs/source/user-guide/sql/ddl.md:
##########
@@ -60,9 +60,6 @@ CREATE [UNBOUNDED] EXTERNAL TABLE
 [ IF NOT EXISTS ]
 <TABLE_NAME>[ (<column_definition>) ]
 STORED AS <file_type>
-[ WITH HEADER ROW ]

Review Comment:
   👍 



##########
docs/source/user-guide/cli/datasources.md:
##########
@@ -166,8 +166,8 @@ Register a single file csv datasource with a header row.
 ```sql
 CREATE EXTERNAL TABLE test
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_test_100.csv';
+LOCATION '/path/to/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');

Review Comment:
   I wonder if we can show the slightly nicer syntax here (and elsewhere in the 
intro docs) 
   
   ```suggestion
   LOCATION '/path/to/aggregate_test_100.csv'
   OPTIONS ('has_header' 'true');
   ```



##########
datafusion/sql/src/statement.rs:
##########
@@ -985,8 +981,52 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let inline_constraints = 
calc_inline_constraints_from_columns(&columns);
         all_constraints.extend(inline_constraints);
 
+        let mut options_map = HashMap::<String, String>::new();
+        for (key, value) in options {
+            if options_map.contains_key(&key) {
+                return plan_err!("Option {key} is specified multiple times");

Review Comment:
   Can we please add a negative test in .slt for this case (it is a good one)



##########
datafusion/sql/src/parser.rs:
##########
@@ -734,22 +712,15 @@ impl<'a> DFParser<'a> {
                         } else {
                             self.parser.expect_keyword(Keyword::HEADER)?;
                             self.parser.expect_keyword(Keyword::ROW)?;
-                            ensure_not_set(&builder.has_header, "WITH HEADER 
ROW")?;
-                            builder.has_header = Some(true);
+                            return parser_err!("WITH HEADER ROW clause is no 
longer in use. Please use the OPTIONS clause with 'format.has_header' set 
appropriately, e.g., OPTIONS ('format.has_header' 'true')");

Review Comment:
   ❤️ 



##########
datafusion/sql/src/statement.rs:
##########
@@ -985,8 +981,52 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let inline_constraints = 
calc_inline_constraints_from_columns(&columns);
         all_constraints.extend(inline_constraints);
 
+        let mut options_map = HashMap::<String, String>::new();
+        for (key, value) in options {
+            if options_map.contains_key(&key) {
+                return plan_err!("Option {key} is specified multiple times");
+            }
+
+            let value_string = match value {
+                Value::SingleQuotedString(s) => s.to_string(),
+                Value::DollarQuotedString(s) => s.to_string(),
+                Value::UnQuotedString(s) => s.to_string(),
+                Value::Number(_, _) | Value::Boolean(_) => value.to_string(),
+                Value::DoubleQuotedString(_)
+                | Value::EscapedStringLiteral(_)
+                | Value::NationalStringLiteral(_)
+                | Value::SingleQuotedByteStringLiteral(_)
+                | Value::DoubleQuotedByteStringLiteral(_)
+                | Value::RawStringLiteral(_)
+                | Value::HexStringLiteral(_)
+                | Value::Null
+                | Value::Placeholder(_) => {
+                    return plan_err!(
+                        "Unsupported Value in CREATE EXTERNAL TABLE statement 
{}",
+                        value
+                    );
+                }
+            };
+
+            if !(&key.contains('.')) {
+                // If a config does not belong to any namespace, we assume it 
is

Review Comment:
   I am not sure this commet applies to CREATE EXTERNAL TABLE which didn't have 
format options previously
   
   Can you also please make sure there is test for this alternate syntax (aka 
`OPTIONS ('has_header' 'true')");` instead of `OPTIONS ('format.has_header' 
'true')");` (I may have missed it)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to