eliaperantoni commented on code in PR #15143:
URL: https://github.com/apache/datafusion/pull/15143#discussion_r1988585977


##########
datafusion/sql/tests/cases/diagnostic.rs:
##########
@@ -286,3 +286,121 @@ fn test_invalid_function() -> Result<()> {
     assert_eq!(diag.span, Some(spans["whole"]));
     Ok(())
 }
+
+#[test]
+fn test_scalar_subquery_multiple_columns() -> Result<(), Box<dyn 
std::error::Error>> {
+    let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col";
+    let spans = get_spans(query);
+    let diag = do_query(query);
+
+    assert_eq!(
+        diag.message,
+        "Scalar subquery should only return one column"
+    );

Review Comment:
   I would remove the "Scalar" part, I don't the user knows the difference 
between a scalar subuery and an "in" subquery. To them they're just subqueries



##########
datafusion/sql/tests/cases/diagnostic.rs:
##########
@@ -286,3 +286,121 @@ fn test_invalid_function() -> Result<()> {
     assert_eq!(diag.span, Some(spans["whole"]));
     Ok(())
 }
+
+#[test]
+fn test_scalar_subquery_multiple_columns() -> Result<(), Box<dyn 
std::error::Error>> {
+    let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col";
+    let spans = get_spans(query);
+    let diag = do_query(query);
+
+    assert_eq!(
+        diag.message,
+        "Scalar subquery should only return one column"
+    );
+
+    let column_count_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Found 2 columns"))
+        .collect();
+    assert!(
+        !column_count_notes.is_empty(),
+        "Expected note about column count"
+    );
+
+    let extra_column_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Extra column"))
+        .collect();
+    assert_eq!(
+        extra_column_notes.len(),
+        1,
+        "Expected one note about extra column"
+    );
+
+    if let Some(first_note) = diag.notes.first() {
+        assert_eq!(
+            first_note.span,
+            Some(spans["x"]),
+            "Primary diagnostic span should match the first column's span"
+        );
+    }
+
+    if let Some(extra_column_span) = extra_column_notes
+        .first()
+        .and_then(|note| note.span.as_ref())
+    {
+        assert_eq!(
+            *extra_column_span, spans["y"],
+            "Extra column note span should match the second column's span"
+        );
+    }

Review Comment:
   It's a bit hard to read. I think you should do something like this to check 
length of notes and content at the same time.
   
   
   ```suggestion
   assert_eq!(
       diag.notes.iter().map(|n| (n.message, n.span)).collect::<Vec<_>>(),
       vec![
                ("Found 2 columns: x, y", spans["x"]),
                ("Extra column", spans["y"]),
        ]
   );
   ```



##########
datafusion/sql/tests/cases/diagnostic.rs:
##########
@@ -286,3 +286,121 @@ fn test_invalid_function() -> Result<()> {
     assert_eq!(diag.span, Some(spans["whole"]));
     Ok(())
 }
+
+#[test]
+fn test_scalar_subquery_multiple_columns() -> Result<(), Box<dyn 
std::error::Error>> {
+    let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col";
+    let spans = get_spans(query);
+    let diag = do_query(query);
+
+    assert_eq!(
+        diag.message,
+        "Scalar subquery should only return one column"
+    );

Review Comment:
   I would assert what `diag.span` is.
   
   Correct me if I'm wrong, but it looks like it wraps the primary column only, 
could we make it encompass the whole `SELECT` block, or the whole subquery?
   
   i.e. I think:
   
   ```
   SELECT * FROM (SELECT a, b, c FROM mytable)
                         ^
                         Too many columns in subquery!
   ```
   
   Is not 100% accurate because there's nothing wrong with `a` itself. There's 
something wrong with the larger scope:
   
   ```
   SELECT * FROM (SELECT a, b, c FROM mytable)
                         ^^^^^^^
                         Too many columns in subquery!
   ```
   
   ```
   SELECT * FROM (SELECT a, b, c FROM mytable)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                 Too many columns in subquery!
   ```



##########
datafusion/sql/tests/cases/diagnostic.rs:
##########
@@ -286,3 +286,121 @@ fn test_invalid_function() -> Result<()> {
     assert_eq!(diag.span, Some(spans["whole"]));
     Ok(())
 }
+
+#[test]
+fn test_scalar_subquery_multiple_columns() -> Result<(), Box<dyn 
std::error::Error>> {
+    let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col";
+    let spans = get_spans(query);
+    let diag = do_query(query);
+
+    assert_eq!(
+        diag.message,
+        "Scalar subquery should only return one column"
+    );
+
+    let column_count_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Found 2 columns"))
+        .collect();
+    assert!(
+        !column_count_notes.is_empty(),
+        "Expected note about column count"
+    );
+
+    let extra_column_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Extra column"))
+        .collect();
+    assert_eq!(
+        extra_column_notes.len(),
+        1,
+        "Expected one note about extra column"
+    );
+
+    if let Some(first_note) = diag.notes.first() {
+        assert_eq!(
+            first_note.span,
+            Some(spans["x"]),
+            "Primary diagnostic span should match the first column's span"
+        );
+    }
+
+    if let Some(extra_column_span) = extra_column_notes
+        .first()
+        .and_then(|note| note.span.as_ref())
+    {
+        assert_eq!(
+            *extra_column_span, spans["y"],
+            "Extra column note span should match the second column's span"
+        );
+    }
+
+    assert!(
+        diag.helps
+            .iter()
+            .any(|help| help.message.contains("Select only one column")),
+        "Expected help message"
+    );

Review Comment:
   ```suggestion
   assert_eq!(
       diag.helps.iter().map(|h| h.message).collect::<Vec<_>>(),
       vec!["Select only one column in a subquery"]
   );
   ```
   
   Shorter and tests length of helps + content at the same time



##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -3617,6 +3620,8 @@ pub struct Subquery {
     pub subquery: Arc<LogicalPlan>,
     /// The outer references used in the subquery
     pub outer_ref_columns: Vec<Expr>,
+    /// Span information for subquery projection columns
+    pub spans: Option<Spans>,

Review Comment:
   I think using a plain `Spans` is better:
   
   ```rust
   pub spans: Spans,
   ```
   
   `Spans` is meant to be used like that since the absence of spans is 
supported:
   
   ```rust
   let spans = Spans::new();
   assert!(spans.first().is_none());
   spans.add_span(Span { .. });
   assert!(spans.first().is_some());
   ```



##########
datafusion/sql/tests/cases/diagnostic.rs:
##########
@@ -286,3 +286,121 @@ fn test_invalid_function() -> Result<()> {
     assert_eq!(diag.span, Some(spans["whole"]));
     Ok(())
 }
+
+#[test]
+fn test_scalar_subquery_multiple_columns() -> Result<(), Box<dyn 
std::error::Error>> {
+    let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col";
+    let spans = get_spans(query);
+    let diag = do_query(query);
+
+    assert_eq!(
+        diag.message,
+        "Scalar subquery should only return one column"
+    );
+
+    let column_count_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Found 2 columns"))
+        .collect();
+    assert!(
+        !column_count_notes.is_empty(),
+        "Expected note about column count"
+    );
+
+    let extra_column_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Extra column"))
+        .collect();
+    assert_eq!(
+        extra_column_notes.len(),
+        1,
+        "Expected one note about extra column"
+    );
+
+    if let Some(first_note) = diag.notes.first() {
+        assert_eq!(
+            first_note.span,
+            Some(spans["x"]),
+            "Primary diagnostic span should match the first column's span"
+        );
+    }
+
+    if let Some(extra_column_span) = extra_column_notes
+        .first()
+        .and_then(|note| note.span.as_ref())
+    {
+        assert_eq!(
+            *extra_column_span, spans["y"],
+            "Extra column note span should match the second column's span"
+        );
+    }
+
+    assert!(
+        diag.helps
+            .iter()
+            .any(|help| help.message.contains("Select only one column")),
+        "Expected help message"
+    );
+
+    Ok(())
+}
+
+#[test]
+fn test_in_subquery_multiple_columns() -> Result<(), Box<dyn 
std::error::Error>> {

Review Comment:
   Same feedback as for the other test :)



##########
datafusion/sql/tests/cases/diagnostic.rs:
##########
@@ -286,3 +286,121 @@ fn test_invalid_function() -> Result<()> {
     assert_eq!(diag.span, Some(spans["whole"]));
     Ok(())
 }
+
+#[test]
+fn test_scalar_subquery_multiple_columns() -> Result<(), Box<dyn 
std::error::Error>> {
+    let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col";
+    let spans = get_spans(query);
+    let diag = do_query(query);
+
+    assert_eq!(
+        diag.message,
+        "Scalar subquery should only return one column"
+    );
+
+    let column_count_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Found 2 columns"))
+        .collect();
+    assert!(
+        !column_count_notes.is_empty(),
+        "Expected note about column count"
+    );
+
+    let extra_column_notes: Vec<_> = diag
+        .notes
+        .iter()
+        .filter(|note| note.message.contains("Extra column"))
+        .collect();
+    assert_eq!(
+        extra_column_notes.len(),
+        1,
+        "Expected one note about extra column"
+    );
+
+    if let Some(first_note) = diag.notes.first() {
+        assert_eq!(
+            first_note.span,
+            Some(spans["x"]),
+            "Primary diagnostic span should match the first column's span"
+        );
+    }

Review Comment:
   Is this the `Found 2 columns: x, y` note? If so, I'm not sure it should only 
wrap `x`.
   
   Also I would say this is a bit superfluous. In my mind, it's sufficient to:
   
   - Have the `Diagnostic` wrap the whole subquery, or better yet just the 
`SELECT` block, and tell the user that the subquery is returning too many 
columns.
   - Notes on the extra columns.
   - A help, like you added already.
   
   I think this note just repeats the information that's already provided by 
the `Diagnostic`'s error message.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to