This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new d5e6736dd Improve doc on lowercase treatment of columns on SQL (#3385)
d5e6736dd is described below

commit d5e6736dd286c0c0374bb0f65b88d3564155c976
Author: nbr <[email protected]>
AuthorDate: Wed Oct 12 22:42:20 2022 +0200

    Improve doc on lowercase treatment of columns on SQL (#3385)
    
    * Improve doc
    
    Adds specific note on lowercase treatment of columns on SQL
    
    This example is the first thing newcomers see when they start with 
datafusion (literally the second page on the docs) so it should be clearer.
    
    * Update select.md
    
    add note on the use of default lower-case for SQL query
    
    * prettier this
    
    * improve example file link
    
    * add capitalized example
---
 datafusion/core/tests/capitalized_example.csv |  5 +++++
 docs/source/user-guide/example-usage.md       | 17 +++++++++++------
 docs/source/user-guide/sql/select.md          |  3 +++
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/tests/capitalized_example.csv 
b/datafusion/core/tests/capitalized_example.csv
new file mode 100644
index 000000000..dbc8f5c5a
--- /dev/null
+++ b/datafusion/core/tests/capitalized_example.csv
@@ -0,0 +1,5 @@
+A,b,c
+1,2,3
+1,10,5
+2,5,6
+2,1,4
\ No newline at end of file
diff --git a/docs/source/user-guide/example-usage.md 
b/docs/source/user-guide/example-usage.md
index 48ca791ff..ced84ffa6 100644
--- a/docs/source/user-guide/example-usage.md
+++ b/docs/source/user-guide/example-usage.md
@@ -19,6 +19,10 @@
 
 # Example Usage
 
+In this example some simple processing is performed on a csv file. Please be 
aware that all identifiers are made lower-case in SQL, so if your csv file has 
capital letters (ex: Name) you should put your column name in double quotes or 
the example won't work.
+
+The following example uses [this 
file](../../../datafusion/core/tests/capitalized_example.csv)
+
 ## Update `Cargo.toml`
 
 Add the following to your `Cargo.toml` file:
@@ -37,10 +41,10 @@ use datafusion::prelude::*;
 async fn main() -> datafusion::error::Result<()> {
   // register the table
   let ctx = SessionContext::new();
-  ctx.register_csv("example", "tests/example.csv", 
CsvReadOptions::new()).await?;
+  ctx.register_csv("example", "tests/capitalized_example.csv", 
CsvReadOptions::new()).await?;
 
   // create a plan to run a SQL query
-  let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 
100").await?;
+  let df = ctx.sql("SELECT \"A\", MIN(b) FROM example GROUP BY \"A\" LIMIT 
100").await?;
 
   // execute and print results
   df.show().await?;
@@ -57,10 +61,10 @@ use datafusion::prelude::*;
 async fn main() -> datafusion::error::Result<()> {
   // create the dataframe
   let ctx = SessionContext::new();
-  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+  let df = ctx.read_csv("tests/capitalized_example.csv", 
CsvReadOptions::new()).await?;
 
-  let df = df.filter(col("a").lt_eq(col("b")))?
-           .aggregate(vec![col("a")], vec![min(col("b"))])?;
+  let df = df.filter(col("A").lt_eq(col("c")))?
+           .aggregate(vec![col("A")], vec![min(col("b"))])?;
 
   // execute and print results
   df.show_limit(100).await?;
@@ -72,8 +76,9 @@ async fn main() -> datafusion::error::Result<()> {
 
 ```text
 +---+--------+
-| a | MIN(b) |
+| A | MIN(b) |
 +---+--------+
+| 2 | 1      |
 | 1 | 2      |
 +---+--------+
 ```
diff --git a/docs/source/user-guide/sql/select.md 
b/docs/source/user-guide/sql/select.md
index 008981fde..3eea252d7 100644
--- a/docs/source/user-guide/sql/select.md
+++ b/docs/source/user-guide/sql/select.md
@@ -20,6 +20,9 @@
 # SELECT syntax
 
 The queries in DataFusion scan data from tables and return 0 or more rows.
+Please be aware that column names in queries are made lower-case, but not on 
the inferred schema. Accordingly, if you
+want to query against a capitalized field, make sure to use double quotes. 
Please see this
+[example](https://arrow.apache.org/datafusion/user-guide/example-usage.html) 
for clarification.
 In this documentation we describe the SQL syntax in DataFusion.
 
 DataFusion supports the following syntax for queries:

Reply via email to