alamb commented on a change in pull request #910:
URL: https://github.com/apache/arrow-datafusion/pull/910#discussion_r697996186



##########
File path: datafusion/tests/sql.rs
##########
@@ -4382,3 +4387,167 @@ async fn like_on_string_dictionaries() -> Result<()> {
     assert_batches_eq!(expected, &actual);
     Ok(())
 }
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_query() {
+    let mut ctx = ExecutionContext::new();
+    register_alltypes_avro(&mut ctx);
+    // NOTE that string_col is actually a binary column and does not have the 
UTF8 logical type
+    // so we need an explicit cast
+    let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let expected = vec![
+        "+----+--------------------------+",
+        "| id | CAST(string_col AS Utf8) |",
+        "+----+--------------------------+",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "+----+--------------------------+",
+    ];
+
+    assert_batches_eq!(expected, &actual);
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_query_multiple_files() {
+    let tempdir = tempfile::tempdir().unwrap();
+    let table_path = tempdir.path();
+    let testdata = datafusion::test_util::arrow_test_data();
+    let alltypes_plain_file = format!("{}/avro/alltypes_plain.avro", testdata);
+    std::fs::copy(
+        &alltypes_plain_file,
+        format!("{}/alltypes_plain1.avro", table_path.display()),
+    )
+    .unwrap();
+    std::fs::copy(
+        &alltypes_plain_file,
+        format!("{}/alltypes_plain2.avro", table_path.display()),
+    )
+    .unwrap();
+
+    let mut ctx = ExecutionContext::new();
+    ctx.register_avro(
+        "alltypes_plain",
+        table_path.display().to_string().as_str(),
+        AvroReadOptions::default(),
+    )
+    .unwrap();
+    // NOTE that string_col is actually a binary column and does not have the 
UTF8 logical type
+    // so we need an explicit cast
+    let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let expected = vec![
+        "+----+--------------------------+",
+        "| id | CAST(string_col AS Utf8) |",
+        "+----+--------------------------+",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "+----+--------------------------+",
+    ];
+
+    assert_batches_eq!(expected, &actual);
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_single_nan_schema() {
+    let mut ctx = ExecutionContext::new();
+    let testdata = datafusion::test_util::arrow_test_data();
+    ctx.register_avro(
+        "single_nan",
+        &format!("{}/avro/single_nan.avro", testdata),
+        AvroReadOptions::default(),
+    )
+    .unwrap();
+    let sql = "SELECT mycol FROM single_nan";
+    let plan = ctx.create_logical_plan(sql).unwrap();
+    let plan = ctx.optimize(&plan).unwrap();
+    let plan = ctx.create_physical_plan(&plan).unwrap();
+    let results = collect(plan).await.unwrap();
+    for batch in results {
+        assert_eq!(1, batch.num_rows());
+        assert_eq!(1, batch.num_columns());
+    }
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_explain() {
+    let mut ctx = ExecutionContext::new();
+    register_alltypes_avro(&mut ctx);
+
+    let sql = "EXPLAIN SELECT count(*) from alltypes_plain";
+    let actual = execute(&mut ctx, sql).await;
+    let actual = normalize_vec_for_explain(actual);
+    let expected = vec![
+        vec![
+            "logical_plan",
+            "Projection: #COUNT(UInt8(1))\
+            \n  Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]\
+            \n    TableScan: alltypes_plain projection=Some([0])",
+        ],
+        vec![
+            "physical_plan",
+            "ProjectionExec: expr=[COUNT(UInt8(1))@0 as COUNT(UInt8(1))]\
+            \n  HashAggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]\
+            \n    CoalescePartitionsExec\
+            \n      HashAggregateExec: mode=Partial, gby=[], 
aggr=[COUNT(UInt8(1))]\
+            \n        RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES)\
+            \n          ExecutionPlan(PlaceHolder)\

Review comment:
       note the fact that this says `Placeholder` can be fixed by implementing 
`fmt_as` in the Avro `ExecutionPlan`

##########
File path: datafusion/tests/sql.rs
##########
@@ -4382,3 +4387,167 @@ async fn like_on_string_dictionaries() -> Result<()> {
     assert_batches_eq!(expected, &actual);
     Ok(())
 }
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_query() {
+    let mut ctx = ExecutionContext::new();
+    register_alltypes_avro(&mut ctx);
+    // NOTE that string_col is actually a binary column and does not have the 
UTF8 logical type
+    // so we need an explicit cast
+    let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let expected = vec![
+        "+----+--------------------------+",
+        "| id | CAST(string_col AS Utf8) |",
+        "+----+--------------------------+",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "+----+--------------------------+",
+    ];
+
+    assert_batches_eq!(expected, &actual);
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_query_multiple_files() {
+    let tempdir = tempfile::tempdir().unwrap();
+    let table_path = tempdir.path();
+    let testdata = datafusion::test_util::arrow_test_data();
+    let alltypes_plain_file = format!("{}/avro/alltypes_plain.avro", testdata);
+    std::fs::copy(
+        &alltypes_plain_file,
+        format!("{}/alltypes_plain1.avro", table_path.display()),
+    )
+    .unwrap();
+    std::fs::copy(
+        &alltypes_plain_file,
+        format!("{}/alltypes_plain2.avro", table_path.display()),
+    )
+    .unwrap();
+
+    let mut ctx = ExecutionContext::new();
+    ctx.register_avro(
+        "alltypes_plain",
+        table_path.display().to_string().as_str(),
+        AvroReadOptions::default(),
+    )
+    .unwrap();
+    // NOTE that string_col is actually a binary column and does not have the 
UTF8 logical type
+    // so we need an explicit cast
+    let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let expected = vec![
+        "+----+--------------------------+",
+        "| id | CAST(string_col AS Utf8) |",
+        "+----+--------------------------+",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "+----+--------------------------+",
+    ];
+
+    assert_batches_eq!(expected, &actual);
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_single_nan_schema() {
+    let mut ctx = ExecutionContext::new();
+    let testdata = datafusion::test_util::arrow_test_data();
+    ctx.register_avro(
+        "single_nan",
+        &format!("{}/avro/single_nan.avro", testdata),
+        AvroReadOptions::default(),
+    )
+    .unwrap();
+    let sql = "SELECT mycol FROM single_nan";
+    let plan = ctx.create_logical_plan(sql).unwrap();
+    let plan = ctx.optimize(&plan).unwrap();
+    let plan = ctx.create_physical_plan(&plan).unwrap();
+    let results = collect(plan).await.unwrap();
+    for batch in results {
+        assert_eq!(1, batch.num_rows());
+        assert_eq!(1, batch.num_columns());
+    }
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_explain() {
+    let mut ctx = ExecutionContext::new();
+    register_alltypes_avro(&mut ctx);
+
+    let sql = "EXPLAIN SELECT count(*) from alltypes_plain";
+    let actual = execute(&mut ctx, sql).await;
+    let actual = normalize_vec_for_explain(actual);
+    let expected = vec![
+        vec![
+            "logical_plan",
+            "Projection: #COUNT(UInt8(1))\
+            \n  Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]\
+            \n    TableScan: alltypes_plain projection=Some([0])",
+        ],
+        vec![
+            "physical_plan",
+            "ProjectionExec: expr=[COUNT(UInt8(1))@0 as COUNT(UInt8(1))]\
+            \n  HashAggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]\
+            \n    CoalescePartitionsExec\
+            \n      HashAggregateExec: mode=Partial, gby=[], 
aggr=[COUNT(UInt8(1))]\
+            \n        RepartitionExec: partitioning=RoundRobinBatch(NUM_CORES)\
+            \n          ExecutionPlan(PlaceHolder)\
+            \n",
+        ],
+    ];
+    assert_eq!(expected, actual);
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_explain_analyze() {

Review comment:
       I don't think this particular test adds  a lot of test coverage -- the 
non avro version is to ensure execution metrics are propagated correctly.

##########
File path: .github/workflows/rust.yml
##########
@@ -105,13 +105,14 @@ jobs:
         run: |
           export ARROW_TEST_DATA=$(pwd)/testing/data
           export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
-          # run tests on all workspace members with default feature list
-          cargo test
+          # run tests on all workspace members with default feature list + avro

Review comment:
       👍 

##########
File path: datafusion/tests/sql.rs
##########
@@ -4382,3 +4387,167 @@ async fn like_on_string_dictionaries() -> Result<()> {
     assert_batches_eq!(expected, &actual);
     Ok(())
 }
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_query() {
+    let mut ctx = ExecutionContext::new();
+    register_alltypes_avro(&mut ctx);
+    // NOTE that string_col is actually a binary column and does not have the 
UTF8 logical type
+    // so we need an explicit cast
+    let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let expected = vec![
+        "+----+--------------------------+",
+        "| id | CAST(string_col AS Utf8) |",
+        "+----+--------------------------+",
+        "| 4  | 0                        |",
+        "| 5  | 1                        |",
+        "| 6  | 0                        |",
+        "| 7  | 1                        |",
+        "| 2  | 0                        |",
+        "| 3  | 1                        |",
+        "| 0  | 0                        |",
+        "| 1  | 1                        |",
+        "+----+--------------------------+",
+    ];
+
+    assert_batches_eq!(expected, &actual);
+}
+
+#[cfg(feature = "avro")]
+#[tokio::test]
+async fn avro_query_multiple_files() {

Review comment:
       ❤️ 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to