This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a862f7adbb Split clickbench query set into one file per query (#16476)
a862f7adbb is described below

commit a862f7adbb5dbe80f85cbe15e3ae8e7d801c30c3
Author: Pepijn Van Eeckhoudt <pep...@vaneeckhoudt.net>
AuthorDate: Mon Jun 23 23:30:29 2025 +0200

    Split clickbench query set into one file per query (#16476)
    
    * Split clickbench query set into one file per query
    
    * Fix queries-path value in bench.sh
---
 benchmarks/bench.sh                             |  6 +-
 benchmarks/queries/clickbench/README.md         |  8 +--
 benchmarks/queries/clickbench/extended.sql      |  9 ---
 benchmarks/queries/clickbench/extended/q0.sql   |  1 +
 benchmarks/queries/clickbench/extended/q1.sql   |  1 +
 benchmarks/queries/clickbench/extended/q2.sql   |  1 +
 benchmarks/queries/clickbench/extended/q3.sql   |  1 +
 benchmarks/queries/clickbench/extended/q4.sql   |  1 +
 benchmarks/queries/clickbench/extended/q5.sql   |  1 +
 benchmarks/queries/clickbench/extended/q6.sql   |  1 +
 benchmarks/queries/clickbench/extended/q7.sql   |  1 +
 benchmarks/queries/clickbench/queries.sql       | 43 ------------
 benchmarks/queries/clickbench/queries/q0.sql    |  1 +
 benchmarks/queries/clickbench/queries/q1.sql    |  1 +
 benchmarks/queries/clickbench/queries/q10.sql   |  1 +
 benchmarks/queries/clickbench/queries/q11.sql   |  1 +
 benchmarks/queries/clickbench/queries/q12.sql   |  1 +
 benchmarks/queries/clickbench/queries/q13.sql   |  1 +
 benchmarks/queries/clickbench/queries/q14.sql   |  1 +
 benchmarks/queries/clickbench/queries/q15.sql   |  1 +
 benchmarks/queries/clickbench/queries/q16.sql   |  1 +
 benchmarks/queries/clickbench/queries/q17.sql   |  1 +
 benchmarks/queries/clickbench/queries/q18.sql   |  1 +
 benchmarks/queries/clickbench/queries/q19.sql   |  1 +
 benchmarks/queries/clickbench/queries/q2.sql    |  1 +
 benchmarks/queries/clickbench/queries/q20.sql   |  1 +
 benchmarks/queries/clickbench/queries/q21.sql   |  1 +
 benchmarks/queries/clickbench/queries/q22.sql   |  1 +
 benchmarks/queries/clickbench/queries/q23.sql   |  1 +
 benchmarks/queries/clickbench/queries/q24.sql   |  1 +
 benchmarks/queries/clickbench/queries/q25.sql   |  1 +
 benchmarks/queries/clickbench/queries/q26.sql   |  1 +
 benchmarks/queries/clickbench/queries/q27.sql   |  1 +
 benchmarks/queries/clickbench/queries/q28.sql   |  1 +
 benchmarks/queries/clickbench/queries/q29.sql   |  1 +
 benchmarks/queries/clickbench/queries/q3.sql    |  1 +
 benchmarks/queries/clickbench/queries/q30.sql   |  1 +
 benchmarks/queries/clickbench/queries/q31.sql   |  1 +
 benchmarks/queries/clickbench/queries/q32.sql   |  1 +
 benchmarks/queries/clickbench/queries/q33.sql   |  1 +
 benchmarks/queries/clickbench/queries/q34.sql   |  1 +
 benchmarks/queries/clickbench/queries/q35.sql   |  1 +
 benchmarks/queries/clickbench/queries/q36.sql   |  1 +
 benchmarks/queries/clickbench/queries/q37.sql   |  1 +
 benchmarks/queries/clickbench/queries/q38.sql   |  1 +
 benchmarks/queries/clickbench/queries/q39.sql   |  1 +
 benchmarks/queries/clickbench/queries/q4.sql    |  1 +
 benchmarks/queries/clickbench/queries/q40.sql   |  1 +
 benchmarks/queries/clickbench/queries/q41.sql   |  1 +
 benchmarks/queries/clickbench/queries/q42.sql   |  1 +
 benchmarks/queries/clickbench/queries/q5.sql    |  1 +
 benchmarks/queries/clickbench/queries/q6.sql    |  1 +
 benchmarks/queries/clickbench/queries/q7.sql    |  1 +
 benchmarks/queries/clickbench/queries/q8.sql    |  1 +
 benchmarks/queries/clickbench/queries/q9.sql    |  1 +
 benchmarks/queries/clickbench/update_queries.sh | 80 ++++++++++++++++++++++
 benchmarks/src/clickbench.rs                    | 88 ++++++++++++++-----------
 datafusion/core/benches/sql_planner.rs          | 28 ++++----
 58 files changed, 200 insertions(+), 113 deletions(-)

diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index 9ad12d1f63..ed6225b591 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -586,7 +586,7 @@ run_clickbench_1() {
     RESULTS_FILE="${RESULTS_DIR}/clickbench_1.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running clickbench (1 file) benchmark..."
-    debug_run $CARGO_COMMAND --bin dfbench -- clickbench  --iterations 5 
--path "${DATA_DIR}/hits.parquet"  --queries-path 
"${SCRIPT_DIR}/queries/clickbench/queries.sql" -o "${RESULTS_FILE}"
+    debug_run $CARGO_COMMAND --bin dfbench -- clickbench  --iterations 5 
--path "${DATA_DIR}/hits.parquet"  --queries-path 
"${SCRIPT_DIR}/queries/clickbench/queries" -o "${RESULTS_FILE}"
 }
 
  # Runs the clickbench benchmark with the partitioned parquet files
@@ -594,7 +594,7 @@ run_clickbench_partitioned() {
     RESULTS_FILE="${RESULTS_DIR}/clickbench_partitioned.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running clickbench (partitioned, 100 files) benchmark..."
-    debug_run $CARGO_COMMAND --bin dfbench -- clickbench  --iterations 5 
--path "${DATA_DIR}/hits_partitioned" --queries-path 
"${SCRIPT_DIR}/queries/clickbench/queries.sql" -o "${RESULTS_FILE}"
+    debug_run $CARGO_COMMAND --bin dfbench -- clickbench  --iterations 5 
--path "${DATA_DIR}/hits_partitioned" --queries-path 
"${SCRIPT_DIR}/queries/clickbench/queries" -o "${RESULTS_FILE}"
 }
 
 # Runs the clickbench "extended" benchmark with a single large parquet file
@@ -602,7 +602,7 @@ run_clickbench_extended() {
     RESULTS_FILE="${RESULTS_DIR}/clickbench_extended.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running clickbench (1 file) extended benchmark..."
-    debug_run $CARGO_COMMAND --bin dfbench -- clickbench  --iterations 5 
--path "${DATA_DIR}/hits.parquet" --queries-path 
"${SCRIPT_DIR}/queries/clickbench/extended.sql" -o "${RESULTS_FILE}"
+    debug_run $CARGO_COMMAND --bin dfbench -- clickbench  --iterations 5 
--path "${DATA_DIR}/hits.parquet" --queries-path 
"${SCRIPT_DIR}/queries/clickbench/extended" -o "${RESULTS_FILE}"
 }
 
 # Downloads the csv.gz files IMDB datasets from Peter Boncz's homepage(one of 
the JOB paper authors)
diff --git a/benchmarks/queries/clickbench/README.md 
b/benchmarks/queries/clickbench/README.md
index e5acd8f348..877ea0e0c3 100644
--- a/benchmarks/queries/clickbench/README.md
+++ b/benchmarks/queries/clickbench/README.md
@@ -6,8 +6,8 @@ ClickBench is focused on aggregation and filtering performance 
(though it has no
 
 ## Files:
 
-- `queries.sql` - Actual ClickBench queries, downloaded from the [ClickBench 
repository]
-- `extended.sql` - "Extended" DataFusion specific queries.
+- `queries/*.sql` - Actual ClickBench queries, downloaded from the [ClickBench 
repository](https://raw.githubusercontent.com/ClickHouse/ClickBench/main/datafusion/queries.sql)
 and split by the `update_queries.sh` script.
+- `extended/*.sql` - "Extended" DataFusion specific queries.
 
 [clickbench repository]: 
https://github.com/ClickHouse/ClickBench/blob/main/datafusion/queries.sql
 
@@ -15,8 +15,8 @@ ClickBench is focused on aggregation and filtering 
performance (though it has no
 
 The "extended" queries are not part of the official ClickBench benchmark.
 Instead they are used to test other DataFusion features that are not covered by
-the standard benchmark. Each description below is for the corresponding line in
-`extended.sql` (line 1 is `Q0`, line 2 is `Q1`, etc.)
+the standard benchmark. Each description below is for the corresponding file in
+`extended`
 
 ### Q0: Data Exploration
 
diff --git a/benchmarks/queries/clickbench/extended.sql 
b/benchmarks/queries/clickbench/extended.sql
deleted file mode 100644
index 93c39efe4f..0000000000
--- a/benchmarks/queries/clickbench/extended.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), 
COUNT(DISTINCT "MobilePhoneModel") FROM hits;
-SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), 
COUNT(DISTINCT "BrowserLanguage")  FROM hits;
-SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT 
"HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") 
FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
-SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), 
AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice")  FROM hits 
GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s 
DESC LIMIT 10;
-SELECT "ClientIP", "WatchID",  COUNT(*) c, MIN("ResponseStartTiming") tmin, 
MEDIAN("ResponseStartTiming") tmed, MAX("ResponseStartTiming") tmax FROM hits 
WHERE "JavaEnable" = 0  GROUP BY  "ClientIP", "WatchID" HAVING c > 1 ORDER BY 
tmed DESC LIMIT 10;
-SELECT "ClientIP", "WatchID",  COUNT(*) c, MIN("ResponseStartTiming") tmin, 
APPROX_PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "ResponseStartTiming") 
tp95, MAX("ResponseStartTiming") tmax FROM 'hits' WHERE "JavaEnable" = 0 GROUP 
BY  "ClientIP", "WatchID" HAVING c > 1 ORDER BY tp95 DESC LIMIT 10;
-SELECT COUNT(*) AS ShareCount FROM hits WHERE "IsMobile" = 1 AND 
"MobilePhoneModel" LIKE 'iPhone%' AND "SocialAction" = 'share' AND 
"SocialSourceNetworkID" IN (5, 12) AND "ClientTimeZone" BETWEEN -5 AND 5 AND 
regexp_match("Referer", '\/campaign\/(spring|summer)_promo') IS NOT NULL AND 
CASE WHEN split_part(split_part("URL", 'resolution=', 2), '&', 1) ~ '^\d+$' 
THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT ELSE 0 END > 
1920 AND levenshtein(CAST("UTMSource" AS STRING), C [...]
-SELECT "WatchID", MIN("ResolutionWidth") as wmin, MAX("ResolutionWidth") as 
wmax, SUM("IsRefresh") as srefresh FROM hits GROUP BY "WatchID" ORDER BY 
"WatchID" DESC LIMIT 10;
-SELECT "RegionID", "UserAgent", "OS", 
AVG(to_timestamp("ResponseEndTiming")-to_timestamp("ResponseStartTiming")) as 
avg_response_time, 
AVG(to_timestamp("ResponseEndTiming")-to_timestamp("ConnectTiming")) as 
avg_latency FROM hits GROUP BY "RegionID", "UserAgent", "OS" ORDER BY 
avg_latency DESC limit 10;
\ No newline at end of file
diff --git a/benchmarks/queries/clickbench/extended/q0.sql 
b/benchmarks/queries/clickbench/extended/q0.sql
new file mode 100644
index 0000000000..a1e55b5b25
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q0.sql
@@ -0,0 +1 @@
+SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), 
COUNT(DISTINCT "MobilePhoneModel") FROM hits;
diff --git a/benchmarks/queries/clickbench/extended/q1.sql 
b/benchmarks/queries/clickbench/extended/q1.sql
new file mode 100644
index 0000000000..84fac921c8
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q1.sql
@@ -0,0 +1 @@
+SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), 
COUNT(DISTINCT "BrowserLanguage")  FROM hits;
diff --git a/benchmarks/queries/clickbench/extended/q2.sql 
b/benchmarks/queries/clickbench/extended/q2.sql
new file mode 100644
index 0000000000..9832ce44d4
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q2.sql
@@ -0,0 +1 @@
+SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT 
"HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") 
FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/extended/q3.sql 
b/benchmarks/queries/clickbench/extended/q3.sql
new file mode 100644
index 0000000000..d1661bc216
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q3.sql
@@ -0,0 +1 @@
+SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), 
AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice")  FROM hits 
GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s 
DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/extended/q4.sql 
b/benchmarks/queries/clickbench/extended/q4.sql
new file mode 100644
index 0000000000..bd54956a2b
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q4.sql
@@ -0,0 +1 @@
+SELECT "ClientIP", "WatchID",  COUNT(*) c, MIN("ResponseStartTiming") tmin, 
MEDIAN("ResponseStartTiming") tmed, MAX("ResponseStartTiming") tmax FROM hits 
WHERE "JavaEnable" = 0  GROUP BY  "ClientIP", "WatchID" HAVING c > 1 ORDER BY 
tmed DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/extended/q5.sql 
b/benchmarks/queries/clickbench/extended/q5.sql
new file mode 100644
index 0000000000..9de2f517d0
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q5.sql
@@ -0,0 +1 @@
+SELECT "ClientIP", "WatchID",  COUNT(*) c, MIN("ResponseStartTiming") tmin, 
APPROX_PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "ResponseStartTiming") 
tp95, MAX("ResponseStartTiming") tmax FROM 'hits' WHERE "JavaEnable" = 0 GROUP 
BY  "ClientIP", "WatchID" HAVING c > 1 ORDER BY tp95 DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/extended/q6.sql 
b/benchmarks/queries/clickbench/extended/q6.sql
new file mode 100644
index 0000000000..091e8867c7
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q6.sql
@@ -0,0 +1 @@
+SELECT COUNT(*) AS ShareCount FROM hits WHERE "IsMobile" = 1 AND 
"MobilePhoneModel" LIKE 'iPhone%' AND "SocialAction" = 'share' AND 
"SocialSourceNetworkID" IN (5, 12) AND "ClientTimeZone" BETWEEN -5 AND 5 AND 
regexp_match("Referer", '\/campaign\/(spring|summer)_promo') IS NOT NULL AND 
CASE WHEN split_part(split_part("URL", 'resolution=', 2), '&', 1) ~ '^\d+$' 
THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT ELSE 0 END > 
1920 AND levenshtein(CAST("UTMSource" AS STRING), C [...]
diff --git a/benchmarks/queries/clickbench/extended/q7.sql 
b/benchmarks/queries/clickbench/extended/q7.sql
new file mode 100644
index 0000000000..ddaff7f880
--- /dev/null
+++ b/benchmarks/queries/clickbench/extended/q7.sql
@@ -0,0 +1 @@
+SELECT "WatchID", MIN("ResolutionWidth") as wmin, MAX("ResolutionWidth") as 
wmax, SUM("IsRefresh") as srefresh FROM hits GROUP BY "WatchID" ORDER BY 
"WatchID" DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries.sql 
b/benchmarks/queries/clickbench/queries.sql
deleted file mode 100644
index 9a183cd6e2..0000000000
--- a/benchmarks/queries/clickbench/queries.sql
+++ /dev/null
@@ -1,43 +0,0 @@
-SELECT COUNT(*) FROM hits;
-SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
-SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
-SELECT AVG("UserID") FROM hits;
-SELECT COUNT(DISTINCT "UserID") FROM hits;
-SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
-SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
-SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY 
"AdvEngineID" ORDER BY COUNT(*) DESC;
-SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" 
ORDER BY u DESC LIMIT 10;
-SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), 
COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
-SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE 
"MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
-SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM 
hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" 
ORDER BY u DESC LIMIT 10;
-SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' 
GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
-SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
-SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
-SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC 
LIMIT 10;
-SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
-SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" LIMIT 10;
-SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, 
"SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER 
BY COUNT(*) DESC LIMIT 10;
-SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
-SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
-SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE 
'%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
-SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT 
"UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE 
'%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
-SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
-SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime" LIMIT 10;
-SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"SearchPhrase" LIMIT 10;
-SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime", "SearchPhrase" LIMIT 10;
-SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE 
"URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 
25;
-SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS 
k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE 
"Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
-SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), 
SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 
4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), 
SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 
9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), 
SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" 
+ 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("R [...]
-SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY 
"SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
-SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", 
"ClientIP" ORDER BY c DESC LIMIT 10;
-SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC 
LIMIT 10;
-SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
-SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC 
LIMIT 10;
-SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS 
c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 
ORDER BY c DESC LIMIT 10;
-SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" 
= 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC 
LIMIT 10;
-SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" 
= 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews 
DESC LIMIT 10;
-SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 
AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC 
LIMIT 10 OFFSET 1000;
-SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN 
("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, 
"URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 
GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY 
PageViews DESC LIMIT 10 OFFSET 1000;
-SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE 
"CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= 
'2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND 
"RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY 
PageViews DESC LIMIT 10 OFFSET 100;
-SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM 
hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= 
'2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 
2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY 
PageViews DESC LIMIT 10 OFFSET 10000;
-SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) 
AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' 
AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 
GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY 
DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
diff --git a/benchmarks/queries/clickbench/queries/q0.sql 
b/benchmarks/queries/clickbench/queries/q0.sql
new file mode 100644
index 0000000000..c70aa7a844
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q0.sql
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM hits;
diff --git a/benchmarks/queries/clickbench/queries/q1.sql 
b/benchmarks/queries/clickbench/queries/q1.sql
new file mode 100644
index 0000000000..283a5c3cc8
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q1.sql
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
diff --git a/benchmarks/queries/clickbench/queries/q10.sql 
b/benchmarks/queries/clickbench/queries/q10.sql
new file mode 100644
index 0000000000..dd44e5c493
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q10.sql
@@ -0,0 +1 @@
+SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE 
"MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q11.sql 
b/benchmarks/queries/clickbench/queries/q11.sql
new file mode 100644
index 0000000000..9349d45069
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q11.sql
@@ -0,0 +1 @@
+SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM 
hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" 
ORDER BY u DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q12.sql 
b/benchmarks/queries/clickbench/queries/q12.sql
new file mode 100644
index 0000000000..908af63149
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q12.sql
@@ -0,0 +1 @@
+SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' 
GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q13.sql 
b/benchmarks/queries/clickbench/queries/q13.sql
new file mode 100644
index 0000000000..46e1e6b4a7
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q13.sql
@@ -0,0 +1 @@
+SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q14.sql 
b/benchmarks/queries/clickbench/queries/q14.sql
new file mode 100644
index 0000000000..d6c5118168
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q14.sql
@@ -0,0 +1 @@
+SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE 
"SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q15.sql 
b/benchmarks/queries/clickbench/queries/q15.sql
new file mode 100644
index 0000000000..f5b4e511a8
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q15.sql
@@ -0,0 +1 @@
+SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC 
LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q16.sql 
b/benchmarks/queries/clickbench/queries/q16.sql
new file mode 100644
index 0000000000..38e44b6849
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q16.sql
@@ -0,0 +1 @@
+SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q17.sql 
b/benchmarks/queries/clickbench/queries/q17.sql
new file mode 100644
index 0000000000..1a97cdd36a
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q17.sql
@@ -0,0 +1 @@
+SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", 
"SearchPhrase" LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q18.sql 
b/benchmarks/queries/clickbench/queries/q18.sql
new file mode 100644
index 0000000000..5aeeedf78e
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q18.sql
@@ -0,0 +1 @@
+SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, 
"SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER 
BY COUNT(*) DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q19.sql 
b/benchmarks/queries/clickbench/queries/q19.sql
new file mode 100644
index 0000000000..e388497dd1
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q19.sql
@@ -0,0 +1 @@
+SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
diff --git a/benchmarks/queries/clickbench/queries/q2.sql 
b/benchmarks/queries/clickbench/queries/q2.sql
new file mode 100644
index 0000000000..9938e3081d
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q2.sql
@@ -0,0 +1 @@
+SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
diff --git a/benchmarks/queries/clickbench/queries/q20.sql 
b/benchmarks/queries/clickbench/queries/q20.sql
new file mode 100644
index 0000000000..a7e6995c1f
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q20.sql
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
diff --git a/benchmarks/queries/clickbench/queries/q21.sql 
b/benchmarks/queries/clickbench/queries/q21.sql
new file mode 100644
index 0000000000..d857899d13
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q21.sql
@@ -0,0 +1 @@
+SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE 
'%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q22.sql 
b/benchmarks/queries/clickbench/queries/q22.sql
new file mode 100644
index 0000000000..8ac4f099c4
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q22.sql
@@ -0,0 +1 @@
+SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT 
"UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE 
'%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC 
LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q23.sql 
b/benchmarks/queries/clickbench/queries/q23.sql
new file mode 100644
index 0000000000..3623b0fed8
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q23.sql
@@ -0,0 +1 @@
+SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q24.sql 
b/benchmarks/queries/clickbench/queries/q24.sql
new file mode 100644
index 0000000000..cee774aafe
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q24.sql
@@ -0,0 +1 @@
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime" LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q25.sql 
b/benchmarks/queries/clickbench/queries/q25.sql
new file mode 100644
index 0000000000..048b4cd9d3
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q25.sql
@@ -0,0 +1 @@
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"SearchPhrase" LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q26.sql 
b/benchmarks/queries/clickbench/queries/q26.sql
new file mode 100644
index 0000000000..104e8d50ec
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q26.sql
@@ -0,0 +1 @@
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY 
"EventTime", "SearchPhrase" LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q27.sql 
b/benchmarks/queries/clickbench/queries/q27.sql
new file mode 100644
index 0000000000..c84cad9296
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q27.sql
@@ -0,0 +1 @@
+SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE 
"URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 
25;
diff --git a/benchmarks/queries/clickbench/queries/q28.sql 
b/benchmarks/queries/clickbench/queries/q28.sql
new file mode 100644
index 0000000000..8c5a51877f
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q28.sql
@@ -0,0 +1 @@
+SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS 
k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE 
"Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
diff --git a/benchmarks/queries/clickbench/queries/q29.sql 
b/benchmarks/queries/clickbench/queries/q29.sql
new file mode 100644
index 0000000000..bfff250906
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q29.sql
@@ -0,0 +1 @@
+SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), 
SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 
4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), 
SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 
9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), 
SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" 
+ 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("R [...]
diff --git a/benchmarks/queries/clickbench/queries/q3.sql 
b/benchmarks/queries/clickbench/queries/q3.sql
new file mode 100644
index 0000000000..db818fa013
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q3.sql
@@ -0,0 +1 @@
+SELECT AVG("UserID") FROM hits;
diff --git a/benchmarks/queries/clickbench/queries/q30.sql 
b/benchmarks/queries/clickbench/queries/q30.sql
new file mode 100644
index 0000000000..8b4bf19b7f
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q30.sql
@@ -0,0 +1 @@
+SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY 
"SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q31.sql 
b/benchmarks/queries/clickbench/queries/q31.sql
new file mode 100644
index 0000000000..5ab49a38b8
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q31.sql
@@ -0,0 +1 @@
+SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", 
"ClientIP" ORDER BY c DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q32.sql 
b/benchmarks/queries/clickbench/queries/q32.sql
new file mode 100644
index 0000000000..d00bc12405
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q32.sql
@@ -0,0 +1 @@
+SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), 
AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC 
LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q33.sql 
b/benchmarks/queries/clickbench/queries/q33.sql
new file mode 100644
index 0000000000..45d491d1c3
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q33.sql
@@ -0,0 +1 @@
+SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q34.sql 
b/benchmarks/queries/clickbench/queries/q34.sql
new file mode 100644
index 0000000000..7e878804de
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q34.sql
@@ -0,0 +1 @@
+SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC 
LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q35.sql 
b/benchmarks/queries/clickbench/queries/q35.sql
new file mode 100644
index 0000000000..c03da84fb1
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q35.sql
@@ -0,0 +1 @@
+SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS 
c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 
ORDER BY c DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q36.sql 
b/benchmarks/queries/clickbench/queries/q36.sql
new file mode 100644
index 0000000000..b76dce5cab
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q36.sql
@@ -0,0 +1 @@
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" 
= 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC 
LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q37.sql 
b/benchmarks/queries/clickbench/queries/q37.sql
new file mode 100644
index 0000000000..49017e3a5f
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q37.sql
@@ -0,0 +1 @@
+SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" 
= 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews 
DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q38.sql 
b/benchmarks/queries/clickbench/queries/q38.sql
new file mode 100644
index 0000000000..b0cb6814bd
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q38.sql
@@ -0,0 +1 @@
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 
AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC 
LIMIT 10 OFFSET 1000;
diff --git a/benchmarks/queries/clickbench/queries/q39.sql 
b/benchmarks/queries/clickbench/queries/q39.sql
new file mode 100644
index 0000000000..8327eb9bd5
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q39.sql
@@ -0,0 +1 @@
+SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN 
("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, 
"URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND 
"EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 
GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY 
PageViews DESC LIMIT 10 OFFSET 1000;
diff --git a/benchmarks/queries/clickbench/queries/q4.sql 
b/benchmarks/queries/clickbench/queries/q4.sql
new file mode 100644
index 0000000000..027310ad75
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q4.sql
@@ -0,0 +1 @@
+SELECT COUNT(DISTINCT "UserID") FROM hits;
diff --git a/benchmarks/queries/clickbench/queries/q40.sql 
b/benchmarks/queries/clickbench/queries/q40.sql
new file mode 100644
index 0000000000..d30d7c4142
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q40.sql
@@ -0,0 +1 @@
+SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE 
"CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= 
'2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND 
"RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY 
PageViews DESC LIMIT 10 OFFSET 100;
diff --git a/benchmarks/queries/clickbench/queries/q41.sql 
b/benchmarks/queries/clickbench/queries/q41.sql
new file mode 100644
index 0000000000..0e9a51a7f5
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q41.sql
@@ -0,0 +1 @@
+SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM 
hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= 
'2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 
2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY 
PageViews DESC LIMIT 10 OFFSET 10000;
diff --git a/benchmarks/queries/clickbench/queries/q42.sql 
b/benchmarks/queries/clickbench/queries/q42.sql
new file mode 100644
index 0000000000..dcad5daa1b
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q42.sql
@@ -0,0 +1 @@
+SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) 
AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' 
AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 
GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY 
DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
diff --git a/benchmarks/queries/clickbench/queries/q5.sql 
b/benchmarks/queries/clickbench/queries/q5.sql
new file mode 100644
index 0000000000..35b17097d8
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q5.sql
@@ -0,0 +1 @@
+SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
diff --git a/benchmarks/queries/clickbench/queries/q6.sql 
b/benchmarks/queries/clickbench/queries/q6.sql
new file mode 100644
index 0000000000..6841036436
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q6.sql
@@ -0,0 +1 @@
+SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
diff --git a/benchmarks/queries/clickbench/queries/q7.sql 
b/benchmarks/queries/clickbench/queries/q7.sql
new file mode 100644
index 0000000000..ab8528c1b1
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q7.sql
@@ -0,0 +1 @@
+SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY 
"AdvEngineID" ORDER BY COUNT(*) DESC;
diff --git a/benchmarks/queries/clickbench/queries/q8.sql 
b/benchmarks/queries/clickbench/queries/q8.sql
new file mode 100644
index 0000000000..e5691bb66f
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q8.sql
@@ -0,0 +1 @@
+SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" 
ORDER BY u DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/queries/q9.sql 
b/benchmarks/queries/clickbench/queries/q9.sql
new file mode 100644
index 0000000000..42c22d9685
--- /dev/null
+++ b/benchmarks/queries/clickbench/queries/q9.sql
@@ -0,0 +1 @@
+SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), 
COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
diff --git a/benchmarks/queries/clickbench/update_queries.sh 
b/benchmarks/queries/clickbench/update_queries.sh
new file mode 100755
index 0000000000..d7db7359aa
--- /dev/null
+++ b/benchmarks/queries/clickbench/update_queries.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is meant for developers of DataFusion -- it is runnable
+# from the standard DataFusion development environment and uses cargo,
+# etc and orchestrates gathering data and run the benchmark binary in
+# different configurations.
+
+# Script to download ClickBench queries and split them into individual files
+
+set -e  # Exit on any error
+
+# URL for the raw file (not the GitHub page)
+URL="https://raw.githubusercontent.com/ClickHouse/ClickBench/main/datafusion/queries.sql";
+
+# Temporary file to store downloaded content
+TEMP_FILE="queries.sql"
+
+TARGET_DIR="queries"
+
+# Download the file
+echo "Downloading queries from $URL..."
+if command -v curl &> /dev/null; then
+    curl -s -o "$TEMP_FILE" "$URL"
+elif command -v wget &> /dev/null; then
+    wget -q -O "$TEMP_FILE" "$URL"
+else
+    echo "Error: Neither curl nor wget is available. Please install one of 
them."
+    exit 1
+fi
+
+# Check if download was successful
+if [ ! -f "$TEMP_FILE" ] || [ ! -s "$TEMP_FILE" ]; then
+    echo "Error: Failed to download or file is empty"
+    exit 1
+fi
+
+# Initialize counter
+counter=0
+
+# Ensure the target directory exists
+if [ ! -d ${TARGET_DIR} ]; then
+  mkdir -p ${TARGET_DIR}
+fi
+
+# Read the file line by line and create individual query files
+mapfile -t lines < $TEMP_FILE
+for line in "${lines[@]}"; do
+    # Skip empty lines
+    if [ -n "$line" ]; then
+        # Create filename with zero-padded counter
+        filename="q${counter}.sql"
+
+        # Write the line to the individual file
+        echo "$line" > "${TARGET_DIR}/$filename"
+
+        echo "Created ${TARGET_DIR}/$filename"
+
+        # Increment counter
+        (( counter += 1 ))
+    fi
+done
+
+# Clean up temporary file
+rm "$TEMP_FILE"
\ No newline at end of file
diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs
index 57726fd181..8d1847b1b8 100644
--- a/benchmarks/src/clickbench.rs
+++ b/benchmarks/src/clickbench.rs
@@ -15,8 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::path::Path;
-use std::path::PathBuf;
+use std::fs;
+use std::io::ErrorKind;
+use std::path::{Path, PathBuf};
 
 use crate::util::{BenchmarkRun, CommonOpt, QueryResult};
 use datafusion::{
@@ -56,12 +57,12 @@ pub struct RunOpt {
     )]
     path: PathBuf,
 
-    /// Path to queries.sql (single file)
+    /// Path to queries directory
     #[structopt(
         parse(from_os_str),
         short = "r",
         long = "queries-path",
-        default_value = "benchmarks/queries/clickbench/queries.sql"
+        default_value = "benchmarks/queries/clickbench/queries"
     )]
     queries_path: PathBuf,
 
@@ -70,49 +71,47 @@ pub struct RunOpt {
     output_path: Option<PathBuf>,
 }
 
-struct AllQueries {
-    queries: Vec<String>,
+/// Get the SQL file path
+pub fn get_query_path(query_dir: &Path, query: usize) -> PathBuf {
+    let mut query_path = query_dir.to_path_buf();
+    query_path.push(format!("q{query}.sql"));
+    query_path
 }
 
-impl AllQueries {
-    fn try_new(path: &Path) -> Result<Self> {
-        // ClickBench has all queries in a single file identified by line 
number
-        let all_queries = std::fs::read_to_string(path)
-            .map_err(|e| exec_datafusion_err!("Could not open {path:?}: 
{e}"))?;
-        Ok(Self {
-            queries: all_queries.lines().map(|s| s.to_string()).collect(),
-        })
+/// Get the SQL statement from the specified query file
+pub fn get_query_sql(query_path: &Path) -> Result<Option<String>> {
+    if fs::exists(query_path)? {
+        Ok(Some(fs::read_to_string(query_path)?))
+    } else {
+        Ok(None)
     }
+}
+
+impl RunOpt {
+    pub async fn run(self) -> Result<()> {
+        println!("Running benchmarks with the following options: {self:?}");
 
-    /// Returns the text of query `query_id`
-    fn get_query(&self, query_id: usize) -> Result<&str> {
-        self.queries
-            .get(query_id)
-            .ok_or_else(|| {
-                let min_id = self.min_query_id();
-                let max_id = self.max_query_id();
+        let query_dir_metadata = fs::metadata(&self.queries_path).map_err(|e| {
+            if e.kind() == ErrorKind::NotFound {
                 exec_datafusion_err!(
-                    "Invalid query id {query_id}. Must be between {min_id} and 
{max_id}"
+                    "Query path '{}' does not exist.",
+                    &self.queries_path.to_str().unwrap()
                 )
-            })
-            .map(|s| s.as_str())
-    }
+            } else {
+                DataFusionError::External(Box::new(e))
+            }
+        })?;
 
-    fn min_query_id(&self) -> usize {
-        0
-    }
+        if !query_dir_metadata.is_dir() {
+            return Err(exec_datafusion_err!(
+                "Query path '{}' is not a directory.",
+                &self.queries_path.to_str().unwrap()
+            ));
+        }
 
-    fn max_query_id(&self) -> usize {
-        self.queries.len() - 1
-    }
-}
-impl RunOpt {
-    pub async fn run(self) -> Result<()> {
-        println!("Running benchmarks with the following options: {self:?}");
-        let queries = AllQueries::try_new(self.queries_path.as_path())?;
         let query_range = match self.query {
             Some(query_id) => query_id..=query_id,
-            None => queries.min_query_id()..=queries.max_query_id(),
+            None => 0..=usize::MAX,
         };
 
         // configure parquet options
@@ -130,8 +129,18 @@ impl RunOpt {
 
         let mut benchmark_run = BenchmarkRun::new();
         for query_id in query_range {
+            let query_path = get_query_path(&self.queries_path, query_id);
+            let Some(sql) = get_query_sql(&query_path)? else {
+                if self.query.is_some() {
+                    return Err(exec_datafusion_err!(
+                        "Could not load query file '{}'.",
+                        &query_path.to_str().unwrap()
+                    ));
+                }
+                break;
+            };
             benchmark_run.start_new_case(&format!("Query {query_id}"));
-            let query_run = self.benchmark_query(&queries, query_id, 
&ctx).await;
+            let query_run = self.benchmark_query(&sql, query_id, &ctx).await;
             match query_run {
                 Ok(query_results) => {
                     for iter in query_results {
@@ -151,11 +160,10 @@ impl RunOpt {
 
     async fn benchmark_query(
         &self,
-        queries: &AllQueries,
+        sql: &str,
         query_id: usize,
         ctx: &SessionContext,
     ) -> Result<Vec<QueryResult>> {
-        let sql = queries.get_query(query_id)?;
         println!("Q{query_id}: {sql}");
 
         let mut millis = Vec::with_capacity(self.iterations());
diff --git a/datafusion/core/benches/sql_planner.rs 
b/datafusion/core/benches/sql_planner.rs
index 6dc953f56b..d02478d2b4 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -30,9 +30,6 @@ use datafusion::datasource::MemTable;
 use datafusion::execution::context::SessionContext;
 use datafusion_common::ScalarValue;
 use datafusion_expr::col;
-use itertools::Itertools;
-use std::fs::File;
-use std::io::{BufRead, BufReader};
 use std::path::PathBuf;
 use std::sync::Arc;
 use test_utils::tpcds::tpcds_schemas;
@@ -466,17 +463,20 @@ fn criterion_benchmark(c: &mut Criterion) {
     // });
 
     // -- clickbench --
-
-    let queries_file =
-        
File::open(format!("{benchmarks_path}queries/clickbench/queries.sql")).unwrap();
-    let extended_file =
-        
File::open(format!("{benchmarks_path}queries/clickbench/extended.sql")).unwrap();
-
-    let clickbench_queries: Vec<String> = BufReader::new(queries_file)
-        .lines()
-        .chain(BufReader::new(extended_file).lines())
-        .map(|l| l.expect("Could not parse line"))
-        .collect_vec();
+    let clickbench_queries = (0..=42)
+        .map(|q| {
+            std::fs::read_to_string(format!(
+                "{benchmarks_path}queries/clickbench/queries/q{q}.sql"
+            ))
+            .unwrap()
+        })
+        .chain((0..=7).map(|q| {
+            std::fs::read_to_string(format!(
+                "{benchmarks_path}queries/clickbench/extended/q{q}.sql"
+            ))
+            .unwrap()
+        }))
+        .collect::<Vec<_>>();
 
     let clickbench_ctx = register_clickbench_hits_table(&rt);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org


Reply via email to