This is an automated email from the ASF dual-hosted git repository.

xudong963 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 78ac53ad3 Add sqllogictests (v0) (#4395)
78ac53ad3 is described below

commit 78ac53ad334fdc97c6520767172686fd2f986c03
Author: mvanschellebeeck <[email protected]>
AuthorDate: Thu Dec 1 08:28:30 2022 -0500

    Add sqllogictests (v0) (#4395)
    
    * sqllogictests v0
    
    * Add CI workflow
    
    * Add license
    
    * Run linter + remove submodule requirement in CI
    
    * Add submodules back
    
    * Remove files
    
    * Address comments:
    
    * Move sqllogic tests in datafusion/core/tests
    
    * Update README
    
    * Add licences
    
    * Update CI check
    
    * rust_lint.sh
    
    * Run prettier on readme
    
    * Fix checks
    
    * New line (windows)
    
    * sqllogictests don't parse correctly on windows - ignore windows
    
    * empty commit - rerun CI
    
    * Add LICENSE.txt and NOTICE.txt back:
    
    * pyarrow fix
---
 .github/workflows/rust.yml                         |  28 +
 datafusion-cli/Cargo.lock                          | 150 +++-
 datafusion/core/Cargo.toml                         |   6 +
 datafusion/core/tests/sqllogictests/.gitignore     |   1 +
 datafusion/core/tests/sqllogictests/README.md      |  63 ++
 datafusion/core/tests/sqllogictests/src/main.rs    | 136 ++++
 datafusion/core/tests/sqllogictests/src/setup.rs   | 201 +++++
 datafusion/core/tests/sqllogictests/src/utils.rs   |  40 +
 .../tests/sqllogictests/test_files/aggregate.slt   | 885 +++++++++++++++++++++
 .../sqllogictests/test_files/arrow_typeof.slt      |  82 ++
 10 files changed, 1569 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index e6ab64dde..e74d87e60 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -517,3 +517,31 @@ jobs:
           # If you encounter an error, run './dev/update_config_docs.sh' and 
commit
           ./dev/update_config_docs.sh
           git diff --exit-code
+
+  # Run sqllogictests
+  sql-logic-tests:
+    name: run sqllogictests
+    needs: [linux-build-lib]
+    runs-on: ubuntu-latest
+    container:
+      image: amd64/rust
+      env:
+        # Disable full debug symbol generation to speed up CI build and keep 
memory down
+        # "1" means line tables only, which is useful for panic tracebacks.
+        RUSTFLAGS: "-C debuginfo=1"
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: true
+      - name: Cache Cargo
+        uses: actions/cache@v3
+        with:
+          path: /github/home/.cargo
+          # this key equals the ones on `linux-build-lib` for re-use
+          key: cargo-cache-
+      - name: Setup Rust toolchain
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: stable
+      - name: Run sqllogictests
+        run: cargo test -p datafusion --test sqllogictests
\ No newline at end of file
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index f09e8be21..d2dadb58d 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -233,9 +233,9 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.58"
+version = "0.1.59"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c"
+checksum = "31e6e93155431f3931513b243d371981bb2770112b370c82745a1d19d2f99364"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -248,7 +248,7 @@ version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.1.19",
  "libc",
  "winapi",
 ]
@@ -282,9 +282,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.3.2"
+version = "1.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "895adc16c8b3273fbbc32685a7d55227705eda08c01e77704020f3491924b44b"
+checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -411,8 +411,8 @@ checksum = 
"71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
 dependencies = [
  "atty",
  "bitflags",
- "clap_derive",
- "clap_lex",
+ "clap_derive 3.2.18",
+ "clap_lex 0.2.4",
  "indexmap",
  "once_cell",
  "strsim",
@@ -420,6 +420,21 @@ dependencies = [
  "textwrap",
 ]
 
+[[package]]
+name = "clap"
+version = "4.0.29"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "4d63b9e9c07271b9957ad22c173bae2a4d9a81127680962039296abcd2f8251d"
+dependencies = [
+ "bitflags",
+ "clap_derive 4.0.21",
+ "clap_lex 0.3.0",
+ "is-terminal",
+ "once_cell",
+ "strsim",
+ "termcolor",
+]
+
 [[package]]
 name = "clap_derive"
 version = "3.2.18"
@@ -433,6 +448,19 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "clap_derive"
+version = "4.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014"
+dependencies = [
+ "heck",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "clap_lex"
 version = "0.2.4"
@@ -442,6 +470,15 @@ dependencies = [
  "os_str_bytes",
 ]
 
+[[package]]
+name = "clap_lex"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8"
+dependencies = [
+ "os_str_bytes",
+]
+
 [[package]]
 name = "clipboard-win"
 version = "4.4.2"
@@ -655,6 +692,7 @@ dependencies = [
  "pin-project-lite",
  "rand",
  "smallvec",
+ "sqllogictest",
  "sqlparser",
  "tempfile",
  "tokio",
@@ -670,7 +708,7 @@ name = "datafusion-cli"
 version = "14.0.0"
 dependencies = [
  "arrow",
- "clap",
+ "clap 3.2.23",
  "datafusion",
  "dirs",
  "env_logger",
@@ -765,6 +803,12 @@ dependencies = [
  "sqlparser",
 ]
 
+[[package]]
+name = "difference"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
+
 [[package]]
 name = "digest"
 version = "0.10.6"
@@ -1118,6 +1162,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "hermit-abi"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "http"
 version = "0.2.8"
@@ -1256,9 +1309,9 @@ checksum = 
"8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
 
 [[package]]
 name = "io-lifetimes"
-version = "1.0.1"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a7d367024b3f3414d8e01f437f704f41a9f64ab36f9067fa73e526ad4c763c87"
+checksum = "46112a93252b123d31a119a8d1a1ac19deac4fac6e0e8b0df58f0d4e5870e63c"
 dependencies = [
  "libc",
  "windows-sys",
@@ -1270,6 +1323,18 @@ version = "2.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "f88c5561171189e69df9d98bcf18fd5f9558300f7ea7b801eb8a0fd748bd8745"
 
+[[package]]
+name = "is-terminal"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "927609f78c2913a6f6ac3c27a4fe87f43e2a35367c0c4b0f8265e8f49a104330"
+dependencies = [
+ "hermit-abi 0.2.6",
+ "io-lifetimes",
+ "rustix",
+ "windows-sys",
+]
+
 [[package]]
 name = "itertools"
 version = "0.10.5"
@@ -1401,6 +1466,17 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "libtest-mimic"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "d7b603516767d1ab23d0de09d023e62966c3322f7148297c35cf3d97aa8b37fa"
+dependencies = [
+ "clap 4.0.29",
+ "termcolor",
+ "threadpool",
+]
+
 [[package]]
 name = "link-cplusplus"
 version = "1.0.7"
@@ -1640,7 +1716,7 @@ version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.1.19",
  "libc",
 ]
 
@@ -1705,9 +1781,9 @@ dependencies = [
 
 [[package]]
 name = "parking_lot_core"
-version = "0.9.4"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0"
+checksum = "7ff9f3fef3968a3ec5945535ed654cb38ff72d7495a25619e2247fb15a2ed9ba"
 dependencies = [
  "cfg-if",
  "libc",
@@ -1992,9 +2068,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.36.3"
+version = "0.36.4"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "0b1fbb4dfc4eb1d390c02df47760bb19a84bb80b301ecc947ab5406394d8223e"
+checksum = "cb93e85278e08bb5788653183213d3a60fc242b10cb9be96586f5a73dcb67c23"
 dependencies = [
  "bitflags",
  "errno",
@@ -2099,18 +2175,18 @@ checksum = 
"0772c5c30e1a0d91f6834f8e545c69281c099dfa9a3ac58d96a9fd629c8d4898"
 
 [[package]]
 name = "serde"
-version = "1.0.147"
+version = "1.0.148"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965"
+checksum = "e53f64bb4ba0191d6d0676e1b141ca55047d83b74f5607e6d8eb88126c52c2dc"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.147"
+version = "1.0.148"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852"
+checksum = "a55492425aa53521babf6137309e7d34c20bbfbbfcfe2c7f3a047fd1f6b92c0c"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2210,6 +2286,25 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
 
+[[package]]
+name = "sqllogictest"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "ba41e01d229d7725401de371e323851f82d839d68732a06162405362b60852fe"
+dependencies = [
+ "async-trait",
+ "difference",
+ "futures",
+ "glob",
+ "humantime",
+ "itertools",
+ "libtest-mimic",
+ "regex",
+ "tempfile",
+ "thiserror",
+ "tracing",
+]
+
 [[package]]
 name = "sqlparser"
 version = "0.27.0"
@@ -2264,9 +2359,9 @@ checksum = 
"6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
 
 [[package]]
 name = "syn"
-version = "1.0.103"
+version = "1.0.104"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d"
+checksum = "4ae548ec36cf198c0ef7710d3c230987c2d6d7bd98ad6edc0274462724c585ce"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2322,6 +2417,15 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "threadpool"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
+dependencies = [
+ "num_cpus",
+]
+
 [[package]]
 name = "thrift"
 version = "0.17.0"
@@ -2803,9 +2907,9 @@ dependencies = [
 
 [[package]]
 name = "zstd-sys"
-version = "2.0.3+zstd.1.5.2"
+version = "2.0.4+zstd.1.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "44ccf97612ac95f3ccb89b2d7346b345e52f1c3019be4984f0455fb4ba991f8a"
+checksum = "4fa202f2ef00074143e219d15b62ffc317d17cc33909feac471c044087cad7b0"
 dependencies = [
  "cc",
  "libc",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 228da1eb4..e5c802ef1 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -90,6 +90,7 @@ pyo3 = { version = "0.17.1", optional = true }
 rand = "0.8"
 rayon = { version = "1.5", optional = true }
 smallvec = { version = "1.6", features = ["union"] }
+sqllogictest = "0.8.0"
 sqlparser = "0.27"
 tempfile = "3"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", 
"sync", "fs", "parking_lot"] }
@@ -156,3 +157,8 @@ required-features = ["jit"]
 [[bench]]
 harness = false
 name = "merge"
+
+[[test]]
+harness = false
+name = "sqllogictests"
+path = "tests/sqllogictests/src/main.rs"
diff --git a/datafusion/core/tests/sqllogictests/.gitignore 
b/datafusion/core/tests/sqllogictests/.gitignore
new file mode 100644
index 000000000..8e5bbf044
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/.gitignore
@@ -0,0 +1 @@
+*.py
\ No newline at end of file
diff --git a/datafusion/core/tests/sqllogictests/README.md 
b/datafusion/core/tests/sqllogictests/README.md
new file mode 100644
index 000000000..550e4a558
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/README.md
@@ -0,0 +1,63 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+#### Overview
+
+This is the Datafusion implementation of 
[sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We 
use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a 
parser/runner of `.slt` files in `test_files`.
+
+#### Running tests
+
+`cargo test -p datafusion --test sqllogictests`
+
+#### sqllogictests
+
+> :warning: **Warning**:Datafusion's sqllogictest implementation and migration 
is still in progress. Definitions taken from 
https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki
+
+sqllogictest is a program originally written for SQLite to verify the 
correctness of SQL queries against the SQLite engine. The program is 
engine-agnostic and can parse sqllogictest files (`.slt`), runs queries against 
an SQL engine and compare the output to the expected output.
+
+Tests in the `.slt` file are a sequence of query record generally starting 
with `CREATE` statements to populate tables and then further queries to test 
the populated data (arrow-datafusion exception).
+
+Query records follow the format:
+
+```sql
+# <test_name>
+query <type_string> <sort_mode> <label>
+<sql_query>
+----
+<expected_result>
+```
+
+- `test_name`: Uniquely identify the test name (arrow-datafusion only)
+- `type_string`: A short string that specifies the number of result columns 
and the expected datatype of each result column. There is one character in the 
<type_string> for each result column. The characters codes are "T" for a text 
result, "I" for an integer result, and "R" for a floating-point result.
+- (Optional) `label`: sqllogictest stores a hash of the results of this query 
under the given label. If the label is reused, then sqllogictest verifies that 
the results are the same. This can be used to verify that two or more queries 
in the same test script that are logically equivalent always generate the same 
output.
+- `expected_result`: In the results section, integer values are rendered as if 
by printf("%d"). Floating point values are rendered as if by printf("%.3f"). 
NULL values are rendered as "NULL". Empty strings are rendered as "(empty)". 
Within non-empty strings, all control characters and unprintable characters are 
rendered as "@".
+- `sort_mode`: If included, it must be one of "nosort", "rowsort", or 
"valuesort". The default is "nosort". In nosort mode, the results appear in 
exactly the order in which they were received from the database engine. The 
nosort mode should only be used on queries that have an ORDER BY clause or 
which only have a single row of result, since otherwise the order of results is 
undefined and might vary from one database engine to another. The "rowsort" 
mode gathers all output from the databa [...]
+
+##### Example
+
+```sql
+# group_by_distinct
+query TTI
+SELECT a, b, COUNT(DISTINCT c) FROM my_table GROUP BY a, b ORDER BY a, b
+----
+foo bar 10
+foo baz 5
+foo     4
+        3
+```
diff --git a/datafusion/core/tests/sqllogictests/src/main.rs 
b/datafusion/core/tests/sqllogictests/src/main.rs
new file mode 100644
index 000000000..4d2f54222
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use async_trait::async_trait;
+use datafusion::arrow::csv::WriterBuilder;
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::prelude::SessionContext;
+use std::path::PathBuf;
+use std::time::Duration;
+
+use sqllogictest::TestError;
+pub type Result<T> = std::result::Result<T, TestError>;
+
+mod setup;
+mod utils;
+
+const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files";
+const TEST_CATEGORIES: [TestCategory; 2] =
+    [TestCategory::Aggregate, TestCategory::ArrowTypeOf];
+
+pub enum TestCategory {
+    Aggregate,
+    ArrowTypeOf,
+}
+
+impl TestCategory {
+    fn as_str(&self) -> &'static str {
+        match self {
+            TestCategory::Aggregate => "Aggregate",
+            TestCategory::ArrowTypeOf => "ArrowTypeOf",
+        }
+    }
+
+    fn test_filename(&self) -> &'static str {
+        match self {
+            TestCategory::Aggregate => "aggregate.slt",
+            TestCategory::ArrowTypeOf => "arrow_typeof.slt",
+        }
+    }
+
+    async fn register_test_tables(&self, ctx: &SessionContext) {
+        println!("[{}] Registering tables", self.as_str());
+        match self {
+            TestCategory::Aggregate => 
setup::register_aggregate_tables(ctx).await,
+            TestCategory::ArrowTypeOf => (),
+        }
+    }
+}
+
+pub struct DataFusion {
+    ctx: SessionContext,
+    test_category: TestCategory,
+}
+
+#[async_trait]
+impl sqllogictest::AsyncDB for DataFusion {
+    type Error = TestError;
+
+    async fn run(&mut self, sql: &str) -> Result<String> {
+        println!(
+            "[{}] Running query: \"{}\"",
+            self.test_category.as_str(),
+            sql
+        );
+        let result = run_query(&self.ctx, sql).await?;
+        Ok(result)
+    }
+
+    /// Engine name of current database.
+    fn engine_name(&self) -> &str {
+        "DataFusion"
+    }
+
+    /// [`Runner`] calls this function to perform sleep.
+    ///
+    /// The default implementation is `std::thread::sleep`, which is 
universial to any async runtime
+    /// but would block the current thread. If you are running in tokio 
runtime, you should override
+    /// this by `tokio::time::sleep`.
+    async fn sleep(dur: Duration) {
+        tokio::time::sleep(dur).await;
+    }
+}
+
+#[tokio::main]
+pub async fn main() -> Result<()> {
+    for test_category in TEST_CATEGORIES {
+        let filename = PathBuf::from(format!(
+            "{}/{}",
+            TEST_DIRECTORY,
+            test_category.test_filename()
+        ));
+        let ctx = SessionContext::new();
+        test_category.register_test_tables(&ctx).await;
+
+        if !cfg!(target_os = "windows") {
+            let mut tester = sqllogictest::Runner::new(DataFusion { ctx, 
test_category });
+            // TODO: use tester.run_parallel_async()
+            tester.run_file_async(filename).await?;
+        }
+    }
+
+    Ok(())
+}
+
+fn format_batches(batches: &[RecordBatch]) -> Result<String> {
+    let mut bytes = vec![];
+    {
+        let builder = 
WriterBuilder::new().has_headers(false).with_delimiter(b' ');
+        let mut writer = builder.build(&mut bytes);
+        for batch in batches {
+            writer.write(batch).unwrap();
+        }
+    }
+    Ok(String::from_utf8(bytes).unwrap())
+}
+
+async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> 
Result<String> {
+    let df = ctx.sql(&sql.into()).await.unwrap();
+    let results: Vec<RecordBatch> = df.collect().await.unwrap();
+    let formatted_batches = format_batches(&results)?;
+    Ok(formatted_batches)
+}
diff --git a/datafusion/core/tests/sqllogictests/src/setup.rs 
b/datafusion/core/tests/sqllogictests/src/setup.rs
new file mode 100644
index 000000000..1e38d4860
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/src/setup.rs
@@ -0,0 +1,201 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::{
+    arrow::{
+        array::{
+            ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, 
Int64Array,
+            Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+        },
+        datatypes::{DataType, Field, Schema},
+        record_batch::RecordBatch,
+    },
+    datasource::MemTable,
+    prelude::{CsvReadOptions, SessionContext},
+    test_util,
+};
+use std::sync::Arc;
+
+use crate::utils;
+
+pub async fn register_aggregate_tables(ctx: &SessionContext) {
+    register_aggregate_csv_by_sql(ctx).await;
+    register_aggregate_test_100(ctx).await;
+    register_decimal_table(ctx);
+    register_median_test_tables(ctx);
+    register_test_data(ctx);
+}
+
+fn register_median_test_tables(ctx: &SessionContext) {
+    // Register median tables
+    let items: Vec<(&str, DataType, ArrayRef)> = vec![
+        (
+            "i8",
+            DataType::Int8,
+            Arc::new(Int8Array::from(vec![i8::MIN, i8::MIN, 100, i8::MAX])),
+        ),
+        (
+            "i16",
+            DataType::Int16,
+            Arc::new(Int16Array::from(vec![i16::MIN, i16::MIN, 100, 
i16::MAX])),
+        ),
+        (
+            "i32",
+            DataType::Int32,
+            Arc::new(Int32Array::from(vec![i32::MIN, i32::MIN, 100, 
i32::MAX])),
+        ),
+        (
+            "i64",
+            DataType::Int64,
+            Arc::new(Int64Array::from(vec![i64::MIN, i64::MIN, 100, 
i64::MAX])),
+        ),
+        (
+            "u8",
+            DataType::UInt8,
+            Arc::new(UInt8Array::from(vec![u8::MIN, u8::MIN, 100, u8::MAX])),
+        ),
+        (
+            "u16",
+            DataType::UInt16,
+            Arc::new(UInt16Array::from(vec![u16::MIN, u16::MIN, 100, 
u16::MAX])),
+        ),
+        (
+            "u32",
+            DataType::UInt32,
+            Arc::new(UInt32Array::from(vec![u32::MIN, u32::MIN, 100, 
u32::MAX])),
+        ),
+        (
+            "u64",
+            DataType::UInt64,
+            Arc::new(UInt64Array::from(vec![u64::MIN, u64::MIN, 100, 
u64::MAX])),
+        ),
+        (
+            "f32",
+            DataType::Float32,
+            Arc::new(Float32Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
+        ),
+        (
+            "f64",
+            DataType::Float64,
+            Arc::new(Float64Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
+        ),
+        (
+            "f64_nan",
+            DataType::Float64,
+            Arc::new(Float64Array::from(vec![1.1, f64::NAN, f64::NAN, 
f64::NAN])),
+        ),
+    ];
+
+    for (name, data_type, values) in items {
+        let batch = RecordBatch::try_new(
+            Arc::new(Schema::new(vec![Field::new("a", data_type, false)])),
+            vec![values],
+        )
+        .unwrap();
+        let table_name = &format!("median_{}", name);
+        ctx.register_batch(table_name, batch).unwrap();
+    }
+}
+
+async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
+    let test_data = datafusion::test_util::arrow_test_data();
+
+    let df = ctx
+        .sql(&format!(
+            "
+    CREATE EXTERNAL TABLE aggregate_test_100_by_sql (
+        c1  VARCHAR NOT NULL,
+        c2  TINYINT NOT NULL,
+        c3  SMALLINT NOT NULL,
+        c4  SMALLINT NOT NULL,
+        c5  INTEGER NOT NULL,
+        c6  BIGINT NOT NULL,
+        c7  SMALLINT NOT NULL,
+        c8  INT NOT NULL,
+        c9  INT UNSIGNED NOT NULL,
+        c10 BIGINT UNSIGNED NOT NULL,
+        c11 FLOAT NOT NULL,
+        c12 DOUBLE NOT NULL,
+        c13 VARCHAR NOT NULL
+    )
+    STORED AS CSV
+    WITH HEADER ROW
+    LOCATION '{}/csv/aggregate_test_100.csv'
+    ",
+            test_data
+        ))
+        .await
+        .expect("Creating dataframe for CREATE EXTERNAL TABLE");
+
+    // Mimic the CLI and execute the resulting plan -- even though it
+    // is effectively a no-op (returns zero rows)
+    let results = df.collect().await.expect("Executing CREATE EXTERNAL TABLE");
+    assert!(
+        results.is_empty(),
+        "Expected no rows from executing CREATE EXTERNAL TABLE"
+    );
+}
+
+fn register_test_data(ctx: &SessionContext) {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Int64, true),
+        Field::new("c2", DataType::Int64, true),
+    ]));
+
+    let data = RecordBatch::try_new(
+        schema,
+        vec![
+            Arc::new(Int64Array::from(vec![
+                Some(0),
+                Some(1),
+                None,
+                Some(3),
+                Some(3),
+            ])),
+            Arc::new(Int64Array::from(vec![
+                None,
+                Some(1),
+                Some(1),
+                Some(2),
+                Some(2),
+            ])),
+        ],
+    )
+    .unwrap();
+
+    ctx.register_batch("test", data).unwrap();
+}
+
+fn register_decimal_table(ctx: &SessionContext) {
+    let batch_decimal = utils::make_decimal();
+    let schema = batch_decimal.schema();
+    let partitions = vec![vec![batch_decimal]];
+    let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap());
+    ctx.register_table("d_table", provider).unwrap();
+}
+
+async fn register_aggregate_test_100(ctx: &SessionContext) {
+    let test_data = datafusion::test_util::arrow_test_data();
+    let schema = test_util::aggr_test_schema();
+    ctx.register_csv(
+        "aggregate_test_100",
+        &format!("{}/csv/aggregate_test_100.csv", test_data),
+        CsvReadOptions::new().schema(&schema),
+    )
+    .await
+    .unwrap();
+}
diff --git a/datafusion/core/tests/sqllogictests/src/utils.rs 
b/datafusion/core/tests/sqllogictests/src/utils.rs
new file mode 100644
index 000000000..4d064a76e
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/src/utils.rs
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::{
+    array::{Array, Decimal128Builder},
+    datatypes::{Field, Schema},
+    record_batch::RecordBatch,
+};
+use std::sync::Arc;
+
+// TODO: move this to datafusion::test_utils?
+pub fn make_decimal() -> RecordBatch {
+    let mut decimal_builder = Decimal128Builder::with_capacity(20);
+    for i in 110000..110010 {
+        decimal_builder.append_value(i as i128);
+    }
+    for i in 100000..100010 {
+        decimal_builder.append_value(-i as i128);
+    }
+    let array = decimal_builder
+        .finish()
+        .with_precision_and_scale(10, 3)
+        .unwrap();
+    let schema = Schema::new(vec![Field::new("c1", array.data_type().clone(), 
true)]);
+    RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap()
+}
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt 
b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
new file mode 100644
index 000000000..11030446d
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -0,0 +1,885 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# FIX: custom absolute values
+# csv_query_avg_multi_batch
+
+# csv_query_avg
+query R
+SELECT avg(c12) FROM aggregate_test_100
+----
+0.5089725099127211
+
+# csv_query_covariance_1
+query R
+SELECT covar_pop(c2, c12) FROM aggregate_test_100
+----
+-0.07916932235380847
+
+# csv_query_covariance_2
+query R
+SELECT covar(c2, c12) FROM aggregate_test_100
+----
+-0.07996901247859442
+
+# csv_query_correlation
+query R
+SELECT corr(c2, c12) FROM aggregate_test_100
+----
+-0.19064544190576607
+
+# csv_query_variance_1
+query R
+SELECT var_pop(c2) FROM aggregate_test_100
+----
+1.8675
+
+# original: 26156334342021890000000000000000000000
+# csv_query_variance_2
+query R
+SELECT var_pop(c6) FROM aggregate_test_100
+----
+2.615633434202189e37
+
+# csv_query_variance_3
+query R
+SELECT var_pop(c12) FROM aggregate_test_100
+----
+0.09234223721582163
+
+# csv_query_variance_4
+query R
+SELECT var(c2) FROM aggregate_test_100
+----
+1.8863636363636365
+
+# csv_query_variance_5
+query R
+SELECT var_samp(c2) FROM aggregate_test_100
+----
+1.8863636363636365
+
+# csv_query_stddev_1
+query R
+SELECT stddev_pop(c2) FROM aggregate_test_100
+----
+1.3665650368716449
+
+# original: 5114326382039172000
+# csv_query_stddev_2
+query R 
+SELECT stddev_pop(c6) FROM aggregate_test_100
+----
+5.114326382039172e18
+
+# csv_query_stddev_3
+query R
+SELECT stddev_pop(c12) FROM aggregate_test_100
+----
+0.30387865541334363
+
+# csv_query_stddev_4
+query R
+SELECT stddev(c12) FROM aggregate_test_100
+----
+0.3054095399405338
+
+# csv_query_stddev_5
+query R
+SELECT stddev_samp(c12) FROM aggregate_test_100
+----
+0.3054095399405338
+
+# csv_query_stddev_6
+query R
+select stddev(sq.column1) from (values (1.1), (2.0), (3.0)) as sq
+----
+0.9504384952922168
+
+# csv_query_approx_median_1
+query I
+SELECT approx_median(c2) FROM aggregate_test_100
+----
+3
+
+# csv_query_approx_median_2
+query I
+SELECT approx_median(c6) FROM aggregate_test_100
+----
+1146409980542786560
+
+# csv_query_approx_median_3
+query R
+SELECT approx_median(c12) FROM aggregate_test_100
+----
+0.5550065410522981
+
+# csv_query_median_1
+query I
+SELECT median(c2) FROM aggregate_test_100
+----
+3
+
+# csv_query_median_2
+query I
+SELECT median(c6) FROM aggregate_test_100
+----
+1125553990140691277
+
+# csv_query_median_3
+query R
+SELECT median(c12) FROM aggregate_test_100
+----
+0.5513900544385053
+
+# median_i8
+query I
+SELECT median(a) FROM median_i8
+----
+-14
+
+# median_i16
+query I
+SELECT median(a) FROM median_i16
+----
+-16334
+
+# median_i32
+query I
+SELECT median(a) FROM median_i32
+----
+-1073741774
+
+# TODO: FIX - numbers are off?
+# median_i64
+# query I
+# SELECT median(a) FROM median_i64
+# ----
+# -4611686018427388000
+
+# median_u8
+query I
+SELECT median(a) FROM median_u8
+----
+50
+
+# median_u16
+query I
+SELECT median(a) FROM median_u16
+----
+50
+
+# median_u32
+query I
+SELECT median(a) FROM median_u32
+----
+50
+
+# median_u64
+query I
+SELECT median(a) FROM median_u64
+----
+50
+
+# median_f32
+query R
+SELECT median(a) FROM median_f32
+----
+3.3
+
+# median_f64
+query R
+SELECT median(a) FROM median_f64
+----
+3.3
+
+# median_f64_nan
+query T
+SELECT median(a) FROM median_f64_nan
+----
+NaN
+
+# approx_median_f64_nan
+query T
+SELECT approx_median(a) FROM median_f64_nan
+----
+NaN
+
+# csv_query_external_table_count
+query I
+SELECT COUNT(c12) FROM aggregate_test_100
+----
+100
+
+# csv_query_external_table_sum
+query II
+SELECT SUM(CAST(c7 AS BIGINT)), SUM(CAST(c8 AS BIGINT)) FROM aggregate_test_100
+----
+13060 3017641
+
+# csv_query_count
+query I
+SELECT count(c12) FROM aggregate_test_100
+----
+100
+
+# csv_query_count_distinct
+query I
+SELECT count(distinct c2) FROM aggregate_test_100
+----
+5
+
+# csv_query_count_distinct_expr
+query I
+SELECT count(distinct c2 % 2) FROM aggregate_test_100
+----
+2
+
+# csv_query_count_star
+query I
+SELECT COUNT(*) FROM aggregate_test_100
+----
+100
+
+# csv_query_count_literal
+query I
+SELECT COUNT(2) FROM aggregate_test_100
+----
+100
+
+# csv_query_approx_count
+# FIX: https://github.com/apache/arrow-datafusion/issues/3353
+# query II
+# SELECT approx_distinct(c9) AS count_c9, approx_distinct(cast(c9 as varchar)) 
count_c9_str FROM aggregate_test_100
+# ----
+# 100 99
+
+# csv_query_approx_count_dupe_expr_aliased
+query II
+SELECT approx_distinct(c9) AS a, approx_distinct(c9) AS b FROM 
aggregate_test_100
+----
+100 100
+
+# TODO: csv_query_approx_percentile_cont
+
+# TODO: fix decimal places
+# csv_query_cube_avg
+# query TIR
+# SELECT c1, c2, AVG(c3) FROM aggregate_test_100_by_sql GROUP BY CUBE (c1, c2) 
ORDER BY c1, c2
+# ----
+# a 1 -17.6
+# a 2 -15.333333333333334
+# a 3 -4.5
+# a 4 -32
+# a 5 -32
+# a  -18.333333333333332
+# b 1 31.666666666666668
+# b 2 25.5
+# b 3 -42
+# b 4 -44.6
+# b 5 -0.2
+# b  -5.842105263157895
+# c 1 47.5
+# c 2 -55.57142857142857
+# c 3 47.5
+# c 4 -10.75
+# c 5 12
+# c  -1.3333333333333333
+# d 1 -8.142857142857142
+# d 2 109.33333333333333
+# d 3 41.333333333333336
+# d 4 54
+# d 5 -49.5
+# d  25.444444444444443
+# e 1 75.66666666666667
+# e 2 37.8
+# e 3 48
+# e 4 37.285714285714285
+# e 5 -11
+# e  40.333333333333336
+#  1 16.681818181818183
+#  2 8.363636363636363
+#  3 20.789473684210527
+#  4 1.2608695652173914
+#  5 -13.857142857142858
+#    7.81
+
+# TODO: fix decimal places
+# csv_query_rollup_avg
+# query IIII
+# SELECT c1, c2, c3, AVG(c4) FROM aggregate_test_100_by_sql GROUP BY ROLLUP 
(c1, c2, c3) ORDER BY c1, c2, c3
+# ----
+# a 1 -85 -15154
+# a 1 -56 8692
+# a 1 -25 15295
+# a 1 -5 12636
+# a 1 83 -14704
+# a 1  1353
+# a 2 -48 -18025
+# a 2 -43 13080
+# a 2 45 15673
+# a 2  3576
+# a 3 -72 -11122
+# a 3 -12 -9168
+# a 3 13 22338.5
+# a 3 14 28162
+# a 3 17 -22796
+# a 3  4958.833333333333
+# a 4 -101 11640
+# a 4 -54 -2376
+# a 4 -38 20744
+# a 4 65 -28462
+# a 4  386.5
+# a 5 -101 -12484
+# a 5 -31 -12907
+# a 5 36 -16974
+# a 5  -14121.666666666666
+# a   306.04761904761904
+# b 1 12 7652
+# b 1 29 -18218
+# b 1 54 -18410
+# b 1  -9658.666666666666
+# b 2 -60 -21739
+# b 2 31 23127
+# b 2 63 21456
+# b 2 68 15874
+# b 2  9679.5
+# b 3 -101 -13217
+# b 3 17 14457
+# b 3  620
+# b 4 -117 19316
+# b 4 -111 -1967
+# b 4 -59 25286
+# b 4 17 -28070
+# b 4 47 20690
+# b 4  7051
+# b 5 -82 22080
+# b 5 -44 15788
+# b 5 -5 24896
+# b 5 62 16337
+# b 5 68 21576
+# b 5  20135.4
+# b   7732.315789473684
+# c 1 -24 -24085
+# c 1 41 -4667
+# c 1 70 27752
+# c 1 103 -22186
+# c 1  -5796.5
+# c 2 -117 -30187
+# c 2 -107 -2904
+# c 2 -106 -1114
+# c 2 -60 -16312
+# c 2 -29 25305
+# c 2 1 18109
+# c 2 29 -3855
+# c 2  -1565.4285714285713
+# c 3 -2 -18655
+# c 3 22 13741
+# c 3 73 -9565
+# c 3 97 29106
+# c 3  3656.75
+# c 4 -90 -2935
+# c 4 -79 5281
+# c 4 3 -30508
+# c 4 123 16620
+# c 4  -2885.5
+# c 5 -94 -15880
+# c 5 118 19208
+# c 5  1664
+# c   -1320.5238095238096
+# d 1 -99 5613
+# d 1 -98 13630
+# d 1 -72 25590
+# d 1 -8 27138
+# d 1 38 18384
+# d 1 57 28781
+# d 1 125 31106
+# d 1  21463.14285714286
+# d 2 93 -12642
+# d 2 113 3917
+# d 2 122 10130
+# d 2  468.3333333333333
+# d 3 -76 8809
+# d 3 77 15091
+# d 3 123 29533
+# d 3  17811
+# d 4 5 -7688
+# d 4 55 -1471
+# d 4 102 -24558
+# d 4  -11239
+# d 5 -59 2045
+# d 5 -40 22614
+# d 5  12329.5
+# d   10890.111111111111
+# e 1 36 -21481
+# e 1 71 -5479
+# e 1 120 10837
+# e 1  -5374.333333333333
+# e 2 -61 -2888
+# e 2 49 24495
+# e 2 52 5666
+# e 2 97 18167
+# e 2  10221.2
+# e 3 -95 13611
+# e 3 71 194
+# e 3 104 -25136
+# e 3 112 -6823
+# e 3  -4538.5
+# e 4 -56 -31500
+# e 4 -53 13788
+# e 4 30 -16110
+# e 4 73 -22501
+# e 4 74 -12612
+# e 4 96 -30336
+# e 4 97 -13181
+# e 4  -16064.57142857143
+# e 5 -86 32514
+# e 5 64 -26526
+# e 5  2994
+# e   -4268.333333333333
+#    2319.97
+
+# csv_query_approx_percentile_cont_with_weight
+query II
+SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100 
GROUP BY 1 ORDER BY 1
+----
+a 73
+b 68
+c 122
+d 124
+e 115
+
+# csv_query_approx_percentile_cont_with_weight (2)
+query II
+SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM 
aggregate_test_100 GROUP BY 1 ORDER BY 1
+----
+a 73
+b 68
+c 122
+d 124
+e 115
+
+# csv_query_approx_percentile_cont_with_histogram_bins
+query II
+SELECT c1, approx_percentile_cont(c3, 0.95, 200) AS c3_p95 FROM 
aggregate_test_100 GROUP BY 1 ORDER BY 1
+----
+a 73
+b 68
+c 122
+d 124
+e 115
+
+# csv_query_sum_crossjoin
+query III
+SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS JOIN 
aggregate_test_100 as b GROUP BY a.c1, b.c1 ORDER BY a.c1, b.c1
+----
+a a 1260
+a b 1140
+a c 1260
+a d 1080
+a e 1260
+b a 1302
+b b 1178
+b c 1302
+b d 1116
+b e 1302
+c a 1176
+c b 1064
+c c 1176
+c d 1008
+c e 1176
+d a 924
+d b 836
+d c 924
+d d 792
+d e 924
+e a 1323
+e b 1197
+e c 1323
+e d 1134
+e e 1323
+
+# csv_query_cube_sum_crossjoin
+query III
+SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS JOIN 
aggregate_test_100 as b GROUP BY CUBE (a.c1, b.c1) ORDER BY a.c1, b.c1
+----
+a a 1260
+a b 1140
+a c 1260
+a d 1080
+a e 1260
+a   6000
+b a 1302
+b b 1178
+b c 1302
+b d 1116
+b e 1302
+b   6200
+c a 1176
+c b 1064
+c c 1176
+c d 1008
+c e 1176
+c   5600
+d a 924
+d b 836
+d c 924
+d d 792
+d e 924
+d   4400
+e a 1323
+e b 1197
+e c 1323
+e d 1134
+e e 1323
+e   6300
+  a 5985
+  b 5415
+  c 5985
+  d 5130
+  e 5985
+    28500
+
+# csv_query_cube_distinct_count
+query III
+SELECT c1, c2, COUNT(DISTINCT c3) FROM aggregate_test_100 GROUP BY CUBE 
(c1,c2) ORDER BY c1,c2
+----
+a 1 5
+a 2 3
+a 3 5
+a 4 4
+a 5 3
+a   19
+b 1 3
+b 2 4
+b 3 2
+b 4 5
+b 5 5
+b   17
+c 1 4
+c 2 7
+c 3 4
+c 4 4
+c 5 2
+c   21
+d 1 7
+d 2 3
+d 3 3
+d 4 3
+d 5 2
+d   18
+e 1 3
+e 2 4
+e 3 4
+e 4 7
+e 5 2
+e   18
+  1 22
+  2 20
+  3 17
+  4 23
+  5 14
+    80
+
+# csv_query_rollup_distinct_count
+query III
+SELECT c1, c2, COUNT(DISTINCT c3) FROM aggregate_test_100 GROUP BY ROLLUP 
(c1,c2) ORDER BY c1,c2
+----
+a 1 5
+a 2 3
+a 3 5
+a 4 4
+a 5 3
+a   19
+b 1 3
+b 2 4
+b 3 2
+b 4 5
+b 5 5
+b   17
+c 1 4
+c 2 7
+c 3 4
+c 4 4
+c 5 2
+c   21
+d 1 7
+d 2 3
+d 3 3
+d 4 3
+d 5 2
+d   18
+e 1 3
+e 2 4
+e 3 4
+e 4 7
+e 5 2
+e  18
+   80
+
+# csv_query_rollup_sum_crossjoin
+query III
+SELECT a.c1, b.c1, SUM(a.c2) FROM aggregate_test_100 as a CROSS JOIN 
aggregate_test_100 as b GROUP BY ROLLUP (a.c1, b.c1) ORDER BY a.c1, b.c1
+----
+a a 1260
+a b 1140
+a c 1260
+a d 1080
+a e 1260
+a   6000
+b a 1302
+b b 1178
+b c 1302
+b d 1116
+b e 1302
+b   6200
+c a 1176
+c b 1064
+c c 1176
+c d 1008
+c e 1176
+c   5600
+d a 924
+d b 836
+d c 924
+d d 792
+d e 924
+d   4400
+e a 1323
+e b 1197
+e c 1323
+e d 1134
+e e 1323
+e   6300
+    28500
+
+# query_count_without_from
+query I
+SELECT count(1 + 1)
+----
+1
+
+# FIX: "CSV Writer does not support List(Field { name: \"item\", data_type: 
Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) 
data type")
+# csv_query_array_agg
+# query I
+# SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 
LIMIT 2) test
+# ----
+# [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm0keZ5G8BffGwgF2RwQD59TFzMStxCB]
+
+# FIX: see above
+# csv_query_array_agg_empty
+# query I
+# SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 LIMIT 0) test
+# ----
+# []
+
+# FIX: see above
+# csv_query_array_agg_one
+# query I
+# SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 
LIMIT 1) test
+# ----
+# [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm]
+
+# FIX: Decimal error
+# csv_query_array_agg_with_overflow
+# query IIIIII
+# select c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3, min(c3) min_c3, 
count(c3) count_c3 from aggregate_test_100 group by c2 order by c2
+# ----
+# 1 367 16.681818181818183 125 -99 22
+# 2 184 8.363636363636363 122 -117 22
+# 3 395 20.789473684210527 123 -101 19
+# 4 29 1.2608695652173914 123 -117 23
+# 5 -194 -13.857142857142858 118 -101 14
+
+# TODO: csv_query_array_agg_unsupported
+
+# FIX: Decimal error
+# csv_query_array_cube_agg_with_overflow
+# query IIIIIII
+# select c1, c2, sum(c3) sum_c3, avg(c3) avg_c3, max(c3) max_c3, min(c3) 
min_c3, count(c3) count_c3 from aggregate_test_100 group by CUBE (c1,c2) order 
by c1, c2
+# ----
+# a 1 -88 -17.6 83 -85 5
+# a 2 -46 -15.333333333333334 45 -48 3
+# a 3 -27 -4.5 17 -72 6
+# a 4 -128 -32 65 -101 4
+# a 5 -96 -32 36 -101 3
+# a  -385 -18.333333333333332 83 -101 21
+# b 1 95 31.666666666666668 54 12 3
+# b 2 102 25.5 68 -60 4
+# b 3 -84 -42 17 -101 2
+# b 4 -223 -44.6 47 -117 5
+# b 5 -1 -0.2 68 -82 5
+# b  -111 -5.842105263157895 68 -117 19
+# c 1 190 47.5 103 -24 4
+# c 2 -389 -55.57142857142857 29 -117 7
+# c 3 190 47.5 97 -2 4
+# c 4 -43 -10.75 123 -90 4
+# c 5 24 12 118 -94 2
+# c  -28 -1.3333333333333333 123 -117 21
+# d 1 -57 -8.142857142857142 125 -99 7
+# d 2 328 109.33333333333333 122 93 3
+# d 3 124 41.333333333333336 123 -76 3
+# d 4 162 54 102 5 3
+# d 5 -99 -49.5 -40 -59 2
+# d  458 25.444444444444443 125 -99 18
+# e 1 227 75.66666666666667 120 36 3
+# e 2 189 37.8 97 -61 5
+# e 3 192 48 112 -95 4
+# e 4 261 37.285714285714285 97 -56 7
+# e 5 -22 -11 64 -86 2
+# e  847 40.333333333333336 120 -95 21
+#  1 367 16.681818181818183 125 -99 22
+#  2 184 8.363636363636363 122 -117 22
+#  3 395 20.789473684210527 123 -101 19
+#  4 29 1.2608695652173914 123 -117 23
+#  5 -194 -13.857142857142858 118 -101 14
+#   781 7.81 125 -117 100
+
+# TODO: csv_query_array_agg_distinct
+
+# TODO: aggregate_timestamps_sum
+
+# TODO: aggregate_timestamps_count
+
+# TODO: aggregate_timestamps_min
+
+# TODO: aggregate_timestamps_max
+
+# TODO: aggregate_times_sum
+
+# TODO: aggregate_times_count
+
+# TODO: aggregate_times_min
+
+# TODO: aggregate_times_max
+
+# TODO: aggregate_timestamps_avg
+
+# aggregate_time_min_and_max
+query II
+select min(t), max(t) from  (select '00:00:00' as t union select '00:00:01' 
union select '00:00:02')
+----
+00:00:00 00:00:02
+
+# aggregate_decimal_min
+query I
+select min(c1) from d_table
+----
+-100.009
+
+# aggregate_decimal_max
+query I
+select max(c1) from d_table
+----
+110.009
+
+query I
+select sum(c1) from d_table
+----
+100.000
+
+# FIX: doesn't check datatype
+# aggregate_decimal_avg
+query I
+select avg(c1) from d_table
+----
+5.0000000
+
+# FIX: different test table
+# aggregate
+# query I
+# SELECT SUM(c1), SUM(c2) FROM test
+# ----
+# 60 220
+
+# TODO: aggregate_empty
+
+# TODO: aggregate_avg
+
+# TODO: aggregate_max
+
+# TODO: aggregate_min
+
+# TODO: aggregate_grouped
+
+# TODO: aggregate_grouped_avg
+
+# TODO: aggregate_grouped_empty
+
+# TODO: aggregate_grouped_max
+
+# TODO: aggregate_grouped_min
+
+# TODO: aggregate_avg_add
+
+# TODO: case_sensitive_identifiers_aggregates
+
+# TODO: count_basic
+
+# TODO: count_partitioned
+
+# TODO: count_aggregated
+
+# TODO: count_aggregated_cube
+
+# TODO: simple_avg
+
+# TODO: simple_mean
+
+# TOOD: bottom 2 require pre-populated data
+# query_sum_distinct
+query II
+SELECT AVG(c1), SUM(DISTINCT c2) FROM test
+----
+1.75 3
+
+# # query_count_distinct
+query I
+SELECT COUNT(DISTINCT c1) FROM test
+----
+3
+
+# TODO: count_distinct_integers_aggregated_single_partition
+
+# TODO: count_distinct_integers_aggregated_multiple_partitions
+
+# TODO: aggregate_with_alias
+
+
+# FIX: CSV Writer error
+# array_agg_zero
+# query I
+# SELECT ARRAY_AGG([])
+# ----
+# []
+
+# FIX: CSV Writer error
+# array_agg_one
+# query I
+# SELECT ARRAY_AGG([1])
+# ----
+# [[1]]
+
+# test_approx_percentile_cont_decimal_support
+query II
+SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM 
aggregate_test_100 GROUP BY 1 ORDER BY 1
+----
+a 4
+b 5
+c 4
+d 4
+e 4
diff --git a/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt 
b/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt
new file mode 100644
index 000000000..8f1c00651
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# arrow_typeof_null
+query T
+SELECT arrow_typeof(null)
+----
+Null
+
+# arrow_typeof_boolean
+query T
+SELECT arrow_typeof(true)
+----
+Boolean
+
+# arrow_typeof_i64
+query T
+SELECT arrow_typeof(1)
+----
+Int64
+
+# arrow_typeof_i32
+query T
+SELECT arrow_typeof(1::int)
+----
+Int32
+
+# arrow_typeof_f64
+query T
+SELECT arrow_typeof(1.0)
+----
+Float64
+
+# arrow_typeof_f32
+query T
+SELECT arrow_typeof(1.0::float)
+----
+Float32
+
+# arrow_typeof_decimal
+# query T
+# SELECT arrow_typeof(1::Decimal)
+# ----
+# Decimal128(38, 10)
+
+# # arrow_typeof_timestamp
+# query T
+# SELECT arrow_typeof(now()::timestamp)
+# ----
+# Timestamp(Nanosecond, None)
+
+# # arrow_typeof_timestamp_utc
+# query T
+# SELECT arrow_typeof(now())
+# ----
+# Timestamp(Nanosecond, Some(\"+00:00\"))
+
+# # arrow_typeof_timestamp_date32(
+# query T
+# SELECT arrow_typeof(now()::date)
+# ----
+# Date32
+
+# # arrow_typeof_utf8
+# query T
+# SELECT arrow_typeof('1')
+# ----
+# Utf8


Reply via email to