This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8f0440516 [VL] Include ClickBench benchmark in gluten-it (#5887)
8f0440516 is described below
commit 8f044051674e2ecb2fa378b3f01e9bf121cfad4f
Author: Hongze Zhang <[email protected]>
AuthorDate: Tue May 28 15:01:46 2024 +0800
[VL] Include ClickBench benchmark in gluten-it (#5887)
---
tools/gluten-it/README.md | 2 +-
.../{tpc/TpcMixin.java => BaseMixin.java} | 23 ++--
.../gluten/integration/{tpc/Tpc.java => Cli.java} | 20 +--
.../{tpc => }/command/DataGenMixin.java | 10 +-
.../integration/{tpc => }/command/DataGenOnly.java | 6 +-
.../{tpc => }/command/Parameterized.java | 18 +--
.../integration/{tpc => }/command/Queries.java | 10 +-
.../{tpc => }/command/QueriesCompare.java | 11 +-
.../{tpc => }/command/QueriesMixin.java | 10 +-
.../{tpc => }/command/SparkRunModes.java | 2 +-
.../integration/{tpc => }/command/SparkShell.java | 11 +-
.../src/main/resources/clickbench-queries/q1.sql | 1 +
.../src/main/resources/clickbench-queries/q10.sql | 1 +
.../src/main/resources/clickbench-queries/q11.sql | 1 +
.../src/main/resources/clickbench-queries/q12.sql | 1 +
.../src/main/resources/clickbench-queries/q13.sql | 1 +
.../src/main/resources/clickbench-queries/q14.sql | 1 +
.../src/main/resources/clickbench-queries/q15.sql | 1 +
.../src/main/resources/clickbench-queries/q16.sql | 1 +
.../src/main/resources/clickbench-queries/q17.sql | 1 +
.../src/main/resources/clickbench-queries/q18.sql | 1 +
.../src/main/resources/clickbench-queries/q19.sql | 1 +
.../src/main/resources/clickbench-queries/q2.sql | 1 +
.../src/main/resources/clickbench-queries/q20.sql | 1 +
.../src/main/resources/clickbench-queries/q21.sql | 1 +
.../src/main/resources/clickbench-queries/q22.sql | 1 +
.../src/main/resources/clickbench-queries/q23.sql | 1 +
.../src/main/resources/clickbench-queries/q24.sql | 1 +
.../src/main/resources/clickbench-queries/q25.sql | 1 +
.../src/main/resources/clickbench-queries/q26.sql | 1 +
.../src/main/resources/clickbench-queries/q27.sql | 1 +
.../src/main/resources/clickbench-queries/q28.sql | 1 +
.../src/main/resources/clickbench-queries/q29.sql | 1 +
.../src/main/resources/clickbench-queries/q3.sql | 1 +
.../src/main/resources/clickbench-queries/q30.sql | 1 +
.../src/main/resources/clickbench-queries/q31.sql | 1 +
.../src/main/resources/clickbench-queries/q32.sql | 1 +
.../src/main/resources/clickbench-queries/q33.sql | 1 +
.../src/main/resources/clickbench-queries/q34.sql | 1 +
.../src/main/resources/clickbench-queries/q35.sql | 1 +
.../src/main/resources/clickbench-queries/q36.sql | 1 +
.../src/main/resources/clickbench-queries/q37.sql | 1 +
.../src/main/resources/clickbench-queries/q38.sql | 1 +
.../src/main/resources/clickbench-queries/q39.sql | 1 +
.../src/main/resources/clickbench-queries/q4.sql | 1 +
.../src/main/resources/clickbench-queries/q40.sql | 1 +
.../src/main/resources/clickbench-queries/q41.sql | 1 +
.../src/main/resources/clickbench-queries/q42.sql | 1 +
.../src/main/resources/clickbench-queries/q43.sql | 1 +
.../src/main/resources/clickbench-queries/q5.sql | 1 +
.../src/main/resources/clickbench-queries/q6.sql | 1 +
.../src/main/resources/clickbench-queries/q7.sql | 1 +
.../src/main/resources/clickbench-queries/q8.sql | 1 +
.../src/main/resources/clickbench-queries/q9.sql | 1 +
.../gluten/integration/{tpc => }/Constants.scala | 2 +-
.../gluten/integration/{tpc => }/DataGen.scala | 2 +-
.../{tpc/TpcRunner.scala => QueryRunner.scala} | 39 ++----
.../gluten/integration/{tpc => }/ShimUtils.scala | 2 +-
.../{tpc/TpcSuite.scala => Suite.scala} | 32 +++--
.../apache/gluten/integration/TableCreator.scala | 50 +++++++
.../integration/{tpc => }/action/Actions.scala | 8 +-
.../integration/{tpc => }/action/DataGenOnly.scala | 10 +-
.../{tpc => }/action/Parameterized.scala | 48 ++++---
.../integration/{tpc => }/action/Queries.scala | 32 +++--
.../{tpc => }/action/QueriesCompare.scala | 36 ++---
.../integration/{tpc => }/action/SparkShell.scala | 20 +--
.../{tpc => }/action/TableFormatter.scala | 2 +-
.../integration/clickbench/ClickBenchDataGen.scala | 45 +++++++
.../ClickBenchSuite.scala} | 88 +++++-------
.../clickbench/ClickBenchTableCreator.scala | 150 +++++++++++++++++++++
.../integration/{tpc => }/ds/TpcdsDataGen.scala | 4 +-
.../integration/{tpc => }/ds/TpcdsSuite.scala | 35 ++---
.../integration/{tpc => }/h/TpchDataGen.scala | 4 +-
.../gluten/integration/{tpc => }/h/TpchSuite.scala | 35 ++---
.../{QueryRunner.scala => SparkQueryRunner.scala} | 6 +-
.../integration/{tpc/TpcTest.java => CliTest.java} | 6 +-
tools/gluten-it/sbin/gluten-it.sh | 2 +-
77 files changed, 555 insertions(+), 269 deletions(-)
diff --git a/tools/gluten-it/README.md b/tools/gluten-it/README.md
index 59ae55e14..37ed7e82b 100644
--- a/tools/gluten-it/README.md
+++ b/tools/gluten-it/README.md
@@ -30,7 +30,7 @@ sbin/gluten-it.sh
```
Usage: gluten-it [-hV] [COMMAND]
-Gluten integration test using TPC benchmark's data and queries.
+Gluten integration test using various of benchmark's data and queries.
-h, --help Show this help message and exit.
-V, --version Print version information and exit.
Commands:
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/TpcMixin.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
similarity index 89%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/TpcMixin.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
index c0313fe77..41d244871 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/TpcMixin.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
@@ -14,12 +14,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc;
+package org.apache.gluten.integration;
-import org.apache.gluten.integration.tpc.action.Action;
-import org.apache.gluten.integration.tpc.command.SparkRunModes;
-import org.apache.gluten.integration.tpc.ds.TpcdsSuite;
-import org.apache.gluten.integration.tpc.h.TpchSuite;
+import org.apache.gluten.integration.action.Action;
+import org.apache.gluten.integration.clickbench.ClickBenchSuite;
+import org.apache.gluten.integration.command.SparkRunModes;
+import org.apache.gluten.integration.ds.TpcdsSuite;
+import org.apache.gluten.integration.h.TpchSuite;
import org.apache.log4j.Level;
import org.apache.spark.SparkConf;
import picocli.CommandLine;
@@ -30,9 +31,9 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
-public class TpcMixin {
+public class BaseMixin {
- @CommandLine.Option(required = true, names = {"--benchmark-type"},
description = "TPC benchmark type: h, ds", defaultValue = "h")
+ @CommandLine.Option(required = true, names = {"--benchmark-type"},
description = "Benchmark type: h, ds, clickbench", defaultValue = "h")
private String benchmarkType;
@CommandLine.Option(names = {"-p", "--preset"}, description = "Preset used:
vanilla, velox, velox-with-celeborn, velox-with-uniffle...", defaultValue =
"velox")
@@ -124,7 +125,7 @@ public class TpcMixin {
mergeMapSafe(extraSparkConf,
runModeEnumeration.extraSparkConf())).asScala().toMap(
Predef.conforms());
- final TpcSuite suite;
+ final Suite suite;
switch (benchmarkType) {
case "h":
suite = new TpchSuite(runModeEnumeration.getSparkMasterUrl(), actions,
testConf,
@@ -138,6 +139,12 @@ public class TpcMixin {
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, minimumScanPartitions);
break;
+ case "clickbench":
+ suite = new ClickBenchSuite(runModeEnumeration.getSparkMasterUrl(),
actions, testConf,
+ baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
+ enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
+ disableWscg, shufflePartitions, minimumScanPartitions);
+ break;
default:
throw new IllegalArgumentException("TPC benchmark type not found: " +
benchmarkType);
}
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/Tpc.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/Cli.java
similarity index 68%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/Tpc.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/Cli.java
index ceaf71b54..be0bf5722 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/Tpc.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/Cli.java
@@ -14,26 +14,26 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc;
+package org.apache.gluten.integration;
-import org.apache.gluten.integration.tpc.command.DataGenOnly;
-import org.apache.gluten.integration.tpc.command.Parameterized;
-import org.apache.gluten.integration.tpc.command.Queries;
-import org.apache.gluten.integration.tpc.command.QueriesCompare;
-import org.apache.gluten.integration.tpc.command.SparkShell;
+import org.apache.gluten.integration.command.DataGenOnly;
+import org.apache.gluten.integration.command.Parameterized;
+import org.apache.gluten.integration.command.Queries;
+import org.apache.gluten.integration.command.QueriesCompare;
+import org.apache.gluten.integration.command.SparkShell;
import picocli.CommandLine;
@CommandLine.Command(name = "gluten-it", mixinStandardHelpOptions = true,
showDefaultValues = true,
subcommands = {DataGenOnly.class, Queries.class, QueriesCompare.class,
SparkShell.class, Parameterized.class},
- description = "Gluten integration test using TPC benchmark's data and
queries.")
-public class Tpc {
+ description = "Gluten integration test using various of benchmark's data
and queries.")
+public class Cli {
- private Tpc() {
+ private Cli() {
}
public static void main(String... args) {
- final CommandLine cmd = new CommandLine(new Tpc());
+ final CommandLine cmd = new CommandLine(new Cli());
final int exitCode = cmd.execute(args);
System.exit(exitCode);
}
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/DataGenMixin.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/DataGenMixin.java
similarity index 83%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/DataGenMixin.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/DataGenMixin.java
index 72ca0c699..0682f5601 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/DataGenMixin.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/DataGenMixin.java
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
-import org.apache.gluten.integration.tpc.action.Action;
+import org.apache.gluten.integration.action.Action;
import picocli.CommandLine;
public class DataGenMixin {
@@ -33,10 +33,14 @@ public class DataGenMixin {
if (skipDataGen) {
return new Action[0];
}
- return new Action[]{new
org.apache.gluten.integration.tpc.action.DataGenOnly(scale,
genPartitionedData)};
+ return new Action[]{new
org.apache.gluten.integration.action.DataGenOnly(scale, genPartitionedData)};
}
public double getScale() {
return scale;
}
+
+ public boolean genPartitionedData() {
+ return genPartitionedData;
+ }
}
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/DataGenOnly.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/DataGenOnly.java
similarity index 90%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/DataGenOnly.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/DataGenOnly.java
index 11e14426f..f1ac48888 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/DataGenOnly.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/DataGenOnly.java
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
-import org.apache.gluten.integration.tpc.TpcMixin;
+import org.apache.gluten.integration.BaseMixin;
import picocli.CommandLine;
import java.util.concurrent.Callable;
@@ -26,7 +26,7 @@ import java.util.concurrent.Callable;
description = "Generate data only.")
public class DataGenOnly implements Callable<Integer> {
@CommandLine.Mixin
- private TpcMixin mixin;
+ private BaseMixin mixin;
@CommandLine.Mixin
private DataGenMixin dataGenMixin;
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/Parameterized.java
similarity index 92%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/Parameterized.java
index bf7d89fe6..7e1234e76 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Parameterized.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/Parameterized.java
@@ -14,13 +14,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
import com.google.common.base.Preconditions;
-import org.apache.gluten.integration.tpc.TpcMixin;
-import org.apache.gluten.integration.tpc.action.Dim;
-import org.apache.gluten.integration.tpc.action.DimKv;
-import org.apache.gluten.integration.tpc.action.DimValue;
+import org.apache.gluten.integration.BaseMixin;
+import org.apache.gluten.integration.action.Dim;
+import org.apache.gluten.integration.action.DimKv;
+import org.apache.gluten.integration.action.DimValue;
import org.apache.commons.lang3.ArrayUtils;
import picocli.CommandLine;
import scala.Tuple2;
@@ -38,7 +38,7 @@ import java.util.stream.Collectors;
description = "Run queries with parameterized configurations")
public class Parameterized implements Callable<Integer> {
@CommandLine.Mixin
- private TpcMixin mixin;
+ private BaseMixin mixin;
@CommandLine.Mixin
private DataGenMixin dataGenMixin;
@@ -62,6 +62,7 @@ public class Parameterized implements Callable<Integer> {
private static final Pattern dimPattern2 =
Pattern.compile("([^,:]+)((?:,[^=,]+=[^=,]+)+)");
private static final Pattern excludedDimsPattern =
Pattern.compile("[\\w-]+:[^,:]+(?:,[\\w-]+:[^,:]+)*");
+
@Override
public Integer call() throws Exception {
final Map<String, Map<String, List<Map.Entry<String, String>>>> parsed =
new HashMap<>();
@@ -129,8 +130,9 @@ public class Parameterized implements Callable<Integer> {
.collect(Collectors.toList())).asScala())).collect(Collectors.toList())).asScala()
)).collect(Collectors.toList())).asScala();
- org.apache.gluten.integration.tpc.action.Parameterized parameterized =
- new
org.apache.gluten.integration.tpc.action.Parameterized(dataGenMixin.getScale(),
queriesMixin.queries(),
+ org.apache.gluten.integration.action.Parameterized parameterized =
+ new
org.apache.gluten.integration.action.Parameterized(dataGenMixin.getScale(),
+ dataGenMixin.genPartitionedData(), queriesMixin.queries(),
queriesMixin.explain(), queriesMixin.iterations(),
warmupIterations, parsedDims,
excludedCombinations, metrics);
return mixin.runActions(ArrayUtils.addAll(dataGenMixin.makeActions(),
parameterized));
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Queries.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/Queries.java
similarity index 83%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Queries.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/Queries.java
index 53d46cc5e..f0c07b415 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/Queries.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/Queries.java
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
-import org.apache.gluten.integration.tpc.TpcMixin;
+import org.apache.gluten.integration.BaseMixin;
import org.apache.commons.lang3.ArrayUtils;
import picocli.CommandLine;
@@ -27,7 +27,7 @@ import java.util.concurrent.Callable;
description = "Run queries.")
public class Queries implements Callable<Integer> {
@CommandLine.Mixin
- private TpcMixin mixin;
+ private BaseMixin mixin;
@CommandLine.Mixin
private DataGenMixin dataGenMixin;
@@ -40,8 +40,8 @@ public class Queries implements Callable<Integer> {
@Override
public Integer call() throws Exception {
- org.apache.gluten.integration.tpc.action.Queries queries =
- new
org.apache.gluten.integration.tpc.action.Queries(dataGenMixin.getScale(),
queriesMixin.queries(),
+ org.apache.gluten.integration.action.Queries queries =
+ new
org.apache.gluten.integration.action.Queries(dataGenMixin.getScale(),
dataGenMixin.genPartitionedData(), queriesMixin.queries(),
queriesMixin.explain(), queriesMixin.iterations(),
randomKillTasks);
return mixin.runActions(ArrayUtils.addAll(dataGenMixin.makeActions(),
queries));
}
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/QueriesCompare.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/QueriesCompare.java
similarity index 81%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/QueriesCompare.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/QueriesCompare.java
index d4c0c684d..42b00f94c 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/QueriesCompare.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/QueriesCompare.java
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
-import org.apache.gluten.integration.tpc.TpcMixin;
+import org.apache.gluten.integration.BaseMixin;
import org.apache.commons.lang3.ArrayUtils;
import picocli.CommandLine;
@@ -27,7 +27,7 @@ import java.util.concurrent.Callable;
description = "Run queries and do result comparison with baseline preset.")
public class QueriesCompare implements Callable<Integer> {
@CommandLine.Mixin
- private TpcMixin mixin;
+ private BaseMixin mixin;
@CommandLine.Mixin
private DataGenMixin dataGenMixin;
@@ -37,8 +37,9 @@ public class QueriesCompare implements Callable<Integer> {
@Override
public Integer call() throws Exception {
- org.apache.gluten.integration.tpc.action.QueriesCompare queriesCompare =
- new
org.apache.gluten.integration.tpc.action.QueriesCompare(dataGenMixin.getScale(),
queriesMixin.queries(),
+ org.apache.gluten.integration.action.QueriesCompare queriesCompare =
+ new
org.apache.gluten.integration.action.QueriesCompare(dataGenMixin.getScale(),
+ dataGenMixin.genPartitionedData(), queriesMixin.queries(),
queriesMixin.explain(), queriesMixin.iterations());
return mixin.runActions(ArrayUtils.addAll(dataGenMixin.makeActions(),
queriesCompare));
}
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/QueriesMixin.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/QueriesMixin.java
similarity index 95%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/QueriesMixin.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/QueriesMixin.java
index f51488364..fc93f968c 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/QueriesMixin.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/QueriesMixin.java
@@ -14,11 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
import com.google.common.base.Preconditions;
-import org.apache.gluten.integration.tpc.TpcSuite;
-import org.apache.gluten.integration.tpc.action.Actions;
+import org.apache.gluten.integration.Suite;
+import org.apache.gluten.integration.action.Actions;
import picocli.CommandLine;
import scala.collection.Seq;
import scala.collection.JavaConverters;
@@ -53,7 +53,7 @@ public class QueriesMixin {
public Actions.QuerySelector queries() {
return new Actions.QuerySelector() {
@Override
- public Seq<String> select(TpcSuite suite) {
+ public Seq<String> select(Suite suite) {
final List<String> all = select0(suite);
final Division div = Division.parse(shard);
final List<String> out = div(all, div);
@@ -81,7 +81,7 @@ public class QueriesMixin {
return out;
}
- private List<String> select0(TpcSuite suite) {
+ private List<String> select0(Suite suite) {
final String[] queryIds = queries;
final String[] excludedQueryIds = excludedQueries;
if (queryIds.length > 0 && excludedQueryIds.length > 0) {
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/SparkRunModes.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
similarity index 99%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/SparkRunModes.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
index 987099902..f5a5c73a6 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/SparkRunModes.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
import org.apache.spark.launcher.SparkLauncher;
import org.apache.spark.util.Utils;
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/SparkShell.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkShell.java
similarity index 80%
rename from
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/SparkShell.java
rename to
tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkShell.java
index f16cedb86..9c2829e92 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/tpc/command/SparkShell.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkShell.java
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.command;
+package org.apache.gluten.integration.command;
-import org.apache.gluten.integration.tpc.TpcMixin;
+import org.apache.gluten.integration.BaseMixin;
import org.apache.commons.lang3.ArrayUtils;
import picocli.CommandLine;
@@ -27,15 +27,16 @@ import java.util.concurrent.Callable;
description = "Open a standard Spark shell.")
public class SparkShell implements Callable<Integer> {
@CommandLine.Mixin
- private TpcMixin mixin;
+ private BaseMixin mixin;
@CommandLine.Mixin
private DataGenMixin dataGenMixin;
@Override
public Integer call() throws Exception {
- org.apache.gluten.integration.tpc.action.SparkShell sparkShell =
- new
org.apache.gluten.integration.tpc.action.SparkShell(dataGenMixin.getScale());
+ org.apache.gluten.integration.action.SparkShell sparkShell =
+ new
org.apache.gluten.integration.action.SparkShell(dataGenMixin.getScale(),
+ dataGenMixin.genPartitionedData());
return mixin.runActions(ArrayUtils.addAll(dataGenMixin.makeActions(),
sparkShell));
}
}
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q1.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q1.sql
new file mode 100644
index 000000000..c70aa7a84
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q1.sql
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM hits;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q10.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q10.sql
new file mode 100644
index 000000000..f4a9ee344
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q10.sql
@@ -0,0 +1 @@
+SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth),
COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q11.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q11.sql
new file mode 100644
index 000000000..d0ea7e3b3
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q11.sql
@@ -0,0 +1 @@
+SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE
MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q12.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q12.sql
new file mode 100644
index 000000000..2a316d173
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q12.sql
@@ -0,0 +1 @@
+SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits
WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u
DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q13.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q13.sql
new file mode 100644
index 000000000..7a0254690
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q13.sql
@@ -0,0 +1 @@
+SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY
SearchPhrase ORDER BY c DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q14.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q14.sql
new file mode 100644
index 000000000..4ce0feed2
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q14.sql
@@ -0,0 +1 @@
+SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase
<> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q15.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q15.sql
new file mode 100644
index 000000000..8e85255a0
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q15.sql
@@ -0,0 +1 @@
+SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE
SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT
10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q16.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q16.sql
new file mode 100644
index 000000000..f959e98a6
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q16.sql
@@ -0,0 +1 @@
+SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT
10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q17.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q17.sql
new file mode 100644
index 000000000..50b1f3832
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q17.sql
@@ -0,0 +1 @@
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase
ORDER BY COUNT(*) DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q18.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q18.sql
new file mode 100644
index 000000000..454cdb507
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q18.sql
@@ -0,0 +1 @@
+SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase
LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q19.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q19.sql
new file mode 100644
index 000000000..fbd104900
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q19.sql
@@ -0,0 +1 @@
+SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*)
FROM hits GROUP BY UserID, extract(minute FROM EventTime), SearchPhrase ORDER
BY COUNT(*) DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q2.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q2.sql
new file mode 100644
index 000000000..ad8031a76
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q2.sql
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q20.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q20.sql
new file mode 100644
index 000000000..3f84066e6
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q20.sql
@@ -0,0 +1 @@
+SELECT UserID FROM hits WHERE UserID = 435090932899640449;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q21.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q21.sql
new file mode 100644
index 000000000..4426afa35
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q21.sql
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q22.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q22.sql
new file mode 100644
index 000000000..bd28609bd
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q22.sql
@@ -0,0 +1 @@
+SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE
'%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT
10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q23.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q23.sql
new file mode 100644
index 000000000..4ee87ac45
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q23.sql
@@ -0,0 +1 @@
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT
UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND
SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q24.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q24.sql
new file mode 100644
index 000000000..935169e37
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q24.sql
@@ -0,0 +1 @@
+SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q25.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q25.sql
new file mode 100644
index 000000000..1bcfd4c1e
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q25.sql
@@ -0,0 +1 @@
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime
LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q26.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q26.sql
new file mode 100644
index 000000000..58ea7610c
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q26.sql
@@ -0,0 +1 @@
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase
LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q27.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q27.sql
new file mode 100644
index 000000000..88ed7ba3e
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q27.sql
@@ -0,0 +1 @@
+SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime,
SearchPhrase LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q28.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q28.sql
new file mode 100644
index 000000000..d18ffcd72
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q28.sql
@@ -0,0 +1 @@
+SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <>
'' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q29.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q29.sql
new file mode 100644
index 000000000..86d6f204b
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q29.sql
@@ -0,0 +1 @@
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k,
AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer
<> '' GROUP BY REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1')
HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q3.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q3.sql
new file mode 100644
index 000000000..7db4dc2fe
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q3.sql
@@ -0,0 +1 @@
+SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q30.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q30.sql
new file mode 100644
index 000000000..630450b43
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q30.sql
@@ -0,0 +1 @@
+SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth +
2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth +
5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth +
8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth +
11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth
+ 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16),
SUM(ResolutionWidth + 17), SUM(Resolutio [...]
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q31.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q31.sql
new file mode 100644
index 000000000..f8a80d9c6
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q31.sql
@@ -0,0 +1 @@
+SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh),
AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY
SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q32.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q32.sql
new file mode 100644
index 000000000..ba8ac2f91
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q32.sql
@@ -0,0 +1 @@
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth)
FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC
LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q33.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q33.sql
new file mode 100644
index 000000000..893773a20
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q33.sql
@@ -0,0 +1 @@
+SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth)
FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q34.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q34.sql
new file mode 100644
index 000000000..f00a3e8ef
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q34.sql
@@ -0,0 +1 @@
+SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q35.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q35.sql
new file mode 100644
index 000000000..213753083
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q35.sql
@@ -0,0 +1 @@
+SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT
10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q36.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q36.sql
new file mode 100644
index 000000000..581e5e389
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q36.sql
@@ -0,0 +1 @@
+SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM
hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c
DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q37.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q37.sql
new file mode 100644
index 000000000..7aa529845
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q37.sql
@@ -0,0 +1 @@
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate
>= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND DontCountHits = 0
AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q38.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q38.sql
new file mode 100644
index 000000000..b1a580dba
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q38.sql
@@ -0,0 +1 @@
+SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND
EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND
DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY
PageViews DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q39.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q39.sql
new file mode 100644
index 000000000..93b691f37
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q39.sql
@@ -0,0 +1 @@
+SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate
>= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 AND
IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC OFFSET 1000
LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q4.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q4.sql
new file mode 100644
index 000000000..e953498fe
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q4.sql
@@ -0,0 +1 @@
+SELECT AVG(UserID) FROM hits;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q40.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q40.sql
new file mode 100644
index 000000000..d97b60772
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q40.sql
@@ -0,0 +1 @@
+SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID
= 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*)
AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01'
AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID,
SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0)
THEN Referer ELSE '' END, URL ORDER BY PageViews DESC OFFSET 1000 LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q41.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q41.sql
new file mode 100644
index 000000000..321a06e3e
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q41.sql
@@ -0,0 +1 @@
+SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID =
62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND
IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash =
3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC OFFSET
100 LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q42.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q42.sql
new file mode 100644
index 000000000..46b81c5be
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q42.sql
@@ -0,0 +1 @@
+SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits
WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE
'2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash =
2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY
PageViews DESC OFFSET 10000 LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q43.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q43.sql
new file mode 100644
index 000000000..b54c09211
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q43.sql
@@ -0,0 +1 @@
+SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits
WHERE CounterID = 62 AND EventDate >= DATE '2013-07-14' AND EventDate <= DATE
'2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY
DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) OFFSET
1000 LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q5.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q5.sql
new file mode 100644
index 000000000..2f9baf825
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q5.sql
@@ -0,0 +1 @@
+SELECT COUNT(DISTINCT UserID) FROM hits;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q6.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q6.sql
new file mode 100644
index 000000000..e9615f8fd
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q6.sql
@@ -0,0 +1 @@
+SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q7.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q7.sql
new file mode 100644
index 000000000..03fbb82d6
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q7.sql
@@ -0,0 +1 @@
+SELECT MIN(EventDate), MAX(EventDate) FROM hits;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q8.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q8.sql
new file mode 100644
index 000000000..25a10b112
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q8.sql
@@ -0,0 +1 @@
+SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY
AdvEngineID ORDER BY COUNT(*) DESC;
diff --git
a/tools/gluten-it/common/src/main/resources/clickbench-queries/q9.sql
b/tools/gluten-it/common/src/main/resources/clickbench-queries/q9.sql
new file mode 100644
index 000000000..a6bf47e4f
--- /dev/null
+++ b/tools/gluten-it/common/src/main/resources/clickbench-queries/q9.sql
@@ -0,0 +1 @@
+SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER
BY u DESC LIMIT 10;
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/Constants.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Constants.scala
similarity index 99%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/Constants.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Constants.scala
index d39a16c32..50766f3a9 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/Constants.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Constants.scala
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc
+package org.apache.gluten.integration
import org.apache.spark.SparkConf
import org.apache.spark.sql.TypeUtils
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/DataGen.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/DataGen.scala
similarity index 98%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/DataGen.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/DataGen.scala
index e810a4dc2..6b2d4ec71 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/DataGen.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/DataGen.scala
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc
+package org.apache.gluten.integration
import org.apache.spark.sql.types.{DataType, StructField, StructType}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/TpcRunner.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/QueryRunner.scala
similarity index 54%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/TpcRunner.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/QueryRunner.scala
index 908b8206e..88e8e2250 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/TpcRunner.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/QueryRunner.scala
@@ -14,26 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc
-
-import org.apache.spark.sql.{AnalysisException, QueryRunner, RunResult,
SparkSession}
+package org.apache.gluten.integration
import com.google.common.base.Preconditions
-import org.apache.commons.io.FileUtils
+import org.apache.spark.sql.{RunResult, SparkQueryRunner, SparkSession}
import java.io.File
-class TpcRunner(val queryResourceFolder: String, val dataPath: String) {
+class QueryRunner(val queryResourceFolder: String, val dataPath: String) {
Preconditions.checkState(
new File(dataPath).exists(),
s"Data not found at $dataPath, try using command `<gluten-it>
data-gen-only <options>` to generate it first.",
Array(): _*)
- def createTables(spark: SparkSession): Unit = {
- TpcRunner.createTables(spark, dataPath)
+ def createTables(creator: TableCreator, spark: SparkSession): Unit = {
+ creator.create(spark, dataPath)
}
- def runTpcQuery(
+ def runQuery(
spark: SparkSession,
desc: String,
caseId: String,
@@ -41,29 +39,8 @@ class TpcRunner(val queryResourceFolder: String, val
dataPath: String) {
metrics: Array[String] = Array(),
randomKillTasks: Boolean = false): RunResult = {
val path = "%s/%s.sql".format(queryResourceFolder, caseId)
- QueryRunner.runTpcQuery(spark, desc, path, explain, metrics,
randomKillTasks)
+ SparkQueryRunner.runQuery(spark, desc, path, explain, metrics,
randomKillTasks)
}
}
-object TpcRunner {
- def createTables(spark: SparkSession, dataPath: String): Unit = {
- val files = new File(dataPath).listFiles()
- files.foreach(file => {
- if (spark.catalog.tableExists(file.getName)) {
- println("Table exists: " + file.getName)
- } else {
- println("Creating catalog table: " + file.getName)
- spark.catalog.createTable(file.getName, file.getAbsolutePath,
"parquet")
- try {
- spark.catalog.recoverPartitions(file.getName)
- } catch {
- case _: AnalysisException =>
- }
- }
- })
- }
-
- private def delete(path: String): Unit = {
- FileUtils.forceDelete(new File(path))
- }
-}
+object QueryRunner {}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ShimUtils.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ShimUtils.scala
similarity index 97%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ShimUtils.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ShimUtils.scala
index 19e15df5c..d2986bfa7 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ShimUtils.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ShimUtils.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc
+package org.apache.gluten.integration
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/TpcSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
similarity index 89%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/TpcSuite.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
index f7605e273..9e31e1171 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/TpcSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Suite.scala
@@ -14,22 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc
-
-import org.apache.gluten.integration.tpc.action.Action
+package org.apache.gluten.integration
+import org.apache.gluten.integration.action.Action
+import org.apache.log4j.{Level, LogManager}
import org.apache.spark.SparkConf
import org.apache.spark.deploy.history.HistoryServerHelper
import org.apache.spark.network.util.ByteUnit
import org.apache.spark.sql.ConfUtils.ConfImplicits._
import org.apache.spark.sql.SparkSessionSwitcher
-import org.apache.log4j.{Level, LogManager}
-
import java.io.File
import java.util.Scanner
-abstract class TpcSuite(
+abstract class Suite(
private val masterUrl: String,
private val actions: Array[Action],
private val testConf: SparkConf,
@@ -49,7 +47,7 @@ abstract class TpcSuite(
resetLogLevel()
- private[tpc] val sessionSwitcher: SparkSessionSwitcher =
+ private[integration] val sessionSwitcher: SparkSessionSwitcher =
new SparkSessionSwitcher(masterUrl, logLevel.toString)
// define initial configs
@@ -153,32 +151,32 @@ abstract class TpcSuite(
}
}
+ def tableCreator(): TableCreator
+
private def resetLogLevel(): Unit = {
LogManager.getRootLogger.setLevel(logLevel)
}
- private[tpc] def getBaselineConf(): SparkConf = {
+ private[integration] def getBaselineConf(): SparkConf = {
baselineConf.clone()
}
- private[tpc] def getTestConf(): SparkConf = {
+ private[integration] def getTestConf(): SparkConf = {
testConf.clone()
}
protected def historyWritePath(): String
- private[tpc] def dataWritePath(scale: Double): String
-
- private[tpc] def createDataGen(scale: Double, genPartitionedData: Boolean):
DataGen
+ private[integration] def dataWritePath(scale: Double, genPartitionedData:
Boolean): String
- private[tpc] def queryResource(): String
+ private[integration] def createDataGen(scale: Double, genPartitionedData:
Boolean): DataGen
- protected def typeModifiers(): List[TypeModifier]
+ private[integration] def queryResource(): String
- private[tpc] def allQueryIds(): Array[String]
+ private[integration] def allQueryIds(): Array[String]
- private[tpc] def desc(): String
+ private[integration] def desc(): String
}
-object TpcSuite {}
+object Suite {}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
new file mode 100644
index 000000000..b35aceef8
--- /dev/null
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/TableCreator.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.integration
+
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+
+import java.io.File
+
+trait TableCreator {
+ def create(spark: SparkSession, dataPath: String): Unit
+}
+
+object TableCreator {
+ def discoverSchema(): TableCreator = {
+ DiscoverSchema
+ }
+
+ private object DiscoverSchema extends TableCreator {
+ override def create(spark: SparkSession, dataPath: String): Unit = {
+ val files = new File(dataPath).listFiles()
+ files.foreach(file => {
+ if (spark.catalog.tableExists(file.getName)) {
+ println("Table exists: " + file.getName)
+ } else {
+ println("Creating catalog table: " + file.getName)
+ spark.catalog.createTable(file.getName, file.getAbsolutePath,
"parquet")
+ try {
+ spark.catalog.recoverPartitions(file.getName)
+ } catch {
+ case _: AnalysisException =>
+ }
+ }
+ })
+ }
+ }
+}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Actions.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Actions.scala
similarity index 82%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Actions.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Actions.scala
index 5e49b2888..4977dda70 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Actions.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Actions.scala
@@ -14,16 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.action
+package org.apache.gluten.integration.action
-import org.apache.gluten.integration.tpc.TpcSuite
+import org.apache.gluten.integration.Suite
trait Action {
- def execute(tpcSuite: TpcSuite): Boolean
+ def execute(suite: Suite): Boolean
}
object Actions {
trait QuerySelector {
- def select(suite: TpcSuite): Seq[String]
+ def select(suite: Suite): Seq[String]
}
}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/DataGenOnly.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/DataGenOnly.scala
similarity index 76%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/DataGenOnly.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/DataGenOnly.scala
index 488bb19ad..bc4383461 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/DataGenOnly.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/DataGenOnly.scala
@@ -14,16 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.action
+package org.apache.gluten.integration.action
-import org.apache.gluten.integration.tpc.TpcSuite
+import org.apache.gluten.integration.Suite
import java.io.File
case class DataGenOnly(scale: Double, genPartitionedData: Boolean) extends
Action {
- override def execute(tpcSuite: TpcSuite): Boolean = {
- tpcSuite.sessionSwitcher.useSession("baseline", "Data Gen")
- val dataGen = tpcSuite.createDataGen(scale, genPartitionedData)
+ override def execute(suite: Suite): Boolean = {
+ suite.sessionSwitcher.useSession("baseline", "Data Gen")
+ val dataGen = suite.createDataGen(scale, genPartitionedData)
dataGen.gen()
true
}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Parameterized.scala
similarity index 88%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Parameterized.scala
index 6fc4e66d6..2871ef2de 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Parameterized.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Parameterized.scala
@@ -14,21 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.action
+package org.apache.gluten.integration.action
+import org.apache.commons.lang3.exception.ExceptionUtils
+import org.apache.gluten.integration.action.Actions.QuerySelector
import org.apache.gluten.integration.stat.RamStat
-import org.apache.gluten.integration.tpc.{TpcRunner, TpcSuite}
+import org.apache.gluten.integration.{QueryRunner, Suite, TableCreator}
import org.apache.spark.sql.ConfUtils.ConfImplicits._
import org.apache.spark.sql.SparkSessionSwitcher
-import org.apache.commons.lang3.exception.ExceptionUtils
-import org.apache.gluten.integration.tpc.action.Actions.QuerySelector
-import scala.collection.immutable.Map
import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
class Parameterized(
scale: Double,
+ genPartitionedData: Boolean,
queries: QuerySelector,
explain: Boolean,
iterations: Int,
@@ -91,12 +91,13 @@ class Parameterized(
coordinateMap
}
- override def execute(tpcSuite: TpcSuite): Boolean = {
- val runner: TpcRunner = new TpcRunner(tpcSuite.queryResource(),
tpcSuite.dataWritePath(scale))
- val allQueries = tpcSuite.allQueryIds()
+ override def execute(suite: Suite): Boolean = {
+ val runner: QueryRunner =
+ new QueryRunner(suite.queryResource(), suite.dataWritePath(scale,
genPartitionedData))
+ val allQueries = suite.allQueryIds()
- val sessionSwitcher = tpcSuite.sessionSwitcher
- val testConf = tpcSuite.getTestConf()
+ val sessionSwitcher = suite.sessionSwitcher
+ val testConf = suite.getTestConf()
println("Prepared coordinates: ")
coordinates.toList.map(_._1).zipWithIndex.foreach {
@@ -112,12 +113,12 @@ class Parameterized(
sessionSwitcher.registerSession(coordinate.toString, conf)
}
- val runQueryIds = queries.select(tpcSuite)
+ val runQueryIds = queries.select(suite)
// warm up
(0 until warmupIterations).foreach { _ =>
runQueryIds.foreach { queryId =>
- Parameterized.warmUp(queryId, tpcSuite.desc(), sessionSwitcher, runner)
+ Parameterized.warmUp(suite.tableCreator(), queryId, suite.desc(),
sessionSwitcher, runner)
}
}
@@ -126,12 +127,13 @@ class Parameterized(
val coordinateResults = (0 until iterations).flatMap { iteration =>
println(s"Running tests (iteration $iteration) with coordinate
$coordinate...")
runQueryIds.map { queryId =>
- Parameterized.runTpcQuery(
+ Parameterized.runQuery(
runner,
+ suite.tableCreator(),
sessionSwitcher,
queryId,
coordinate,
- tpcSuite.desc(),
+ suite.desc(),
explain,
metrics)
}
@@ -241,8 +243,9 @@ case class TestResultLines(
}
object Parameterized {
- private def runTpcQuery(
- runner: TpcRunner,
+ private def runQuery(
+ runner: QueryRunner,
+ creator: TableCreator,
sessionSwitcher: SparkSessionSwitcher,
id: String,
coordinate: Coordinate,
@@ -253,9 +256,9 @@ object Parameterized {
try {
val testDesc = "Gluten Spark %s %s %s".format(desc, id, coordinate)
sessionSwitcher.useSession(coordinate.toString, testDesc)
- runner.createTables(sessionSwitcher.spark())
+ runner.createTables(creator, sessionSwitcher.spark())
val result =
- runner.runTpcQuery(sessionSwitcher.spark(), testDesc, id, explain,
metrics)
+ runner.runQuery(sessionSwitcher.spark(), testDesc, id, explain,
metrics)
val resultRows = result.rows
println(
s"Successfully ran query $id. " +
@@ -279,17 +282,18 @@ object Parameterized {
}
}
- private[tpc] def warmUp(
+ private[integration] def warmUp(
+ creator: TableCreator,
id: String,
desc: String,
sessionSwitcher: SparkSessionSwitcher,
- runner: TpcRunner): Unit = {
+ runner: QueryRunner): Unit = {
println(s"Warming up: Running query: $id...")
try {
val testDesc = "Gluten Spark %s %s warm up".format(desc, id)
sessionSwitcher.useSession("test", testDesc)
- runner.createTables(sessionSwitcher.spark())
- val result = runner.runTpcQuery(sessionSwitcher.spark(), testDesc, id,
explain = false)
+ runner.createTables(creator, sessionSwitcher.spark())
+ val result = runner.runQuery(sessionSwitcher.spark(), testDesc, id,
explain = false)
val resultRows = result.rows
println(
s"Warming up: Successfully ran query $id. " +
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Queries.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Queries.scala
similarity index 87%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Queries.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Queries.scala
index 290b8e3f5..cf24b906b 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/Queries.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/Queries.scala
@@ -14,32 +14,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.action
+package org.apache.gluten.integration.action
-import org.apache.gluten.integration.stat.RamStat
-import org.apache.gluten.integration.tpc.{TpcRunner, TpcSuite}
import org.apache.commons.lang3.exception.ExceptionUtils
-import org.apache.gluten.integration.tpc.action.Actions.QuerySelector
+import org.apache.gluten.integration.action.Actions.QuerySelector
+import org.apache.gluten.integration.stat.RamStat
+import org.apache.gluten.integration.{QueryRunner, Suite}
case class Queries(
scale: Double,
+ genPartitionedData: Boolean,
queries: QuerySelector,
explain: Boolean,
iterations: Int,
randomKillTasks: Boolean)
extends Action {
- override def execute(tpcSuite: TpcSuite): Boolean = {
- val runQueryIds = queries.select(tpcSuite)
- val runner: TpcRunner = new TpcRunner(tpcSuite.queryResource(),
tpcSuite.dataWritePath(scale))
+ override def execute(suite: Suite): Boolean = {
+ val runQueryIds = queries.select(suite)
+ val runner: QueryRunner =
+ new QueryRunner(suite.queryResource(), suite.dataWritePath(scale,
genPartitionedData))
val results = (0 until iterations).flatMap { iteration =>
println(s"Running tests (iteration $iteration)...")
runQueryIds.map { queryId =>
- Queries.runTpcQuery(
+ Queries.runQuery(
runner,
- tpcSuite.sessionSwitcher,
+ suite.tableCreator(),
+ suite.sessionSwitcher,
queryId,
- tpcSuite.desc(),
+ suite.desc(),
explain,
randomKillTasks)
}
@@ -153,8 +156,9 @@ object Queries {
None)))
}
- private def runTpcQuery(
- runner: _root_.org.apache.gluten.integration.tpc.TpcRunner,
+ private def runQuery(
+ runner: _root_.org.apache.gluten.integration.QueryRunner,
+ creator: _root_.org.apache.gluten.integration.TableCreator,
sessionSwitcher: _root_.org.apache.spark.sql.SparkSessionSwitcher,
id: _root_.java.lang.String,
desc: _root_.java.lang.String,
@@ -164,8 +168,8 @@ object Queries {
try {
val testDesc = "Gluten Spark %s %s".format(desc, id)
sessionSwitcher.useSession("test", testDesc)
- runner.createTables(sessionSwitcher.spark())
- val result = runner.runTpcQuery(
+ runner.createTables(creator, sessionSwitcher.spark())
+ val result = runner.runQuery(
sessionSwitcher.spark(),
testDesc,
id,
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/QueriesCompare.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/QueriesCompare.scala
similarity index 89%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/QueriesCompare.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/QueriesCompare.scala
index 404d75cb4..320bd61b6 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/QueriesCompare.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/QueriesCompare.scala
@@ -14,32 +14,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.action
+package org.apache.gluten.integration.action
+import org.apache.commons.lang3.exception.ExceptionUtils
+import org.apache.gluten.integration.action.Actions.QuerySelector
import org.apache.gluten.integration.stat.RamStat
-import org.apache.gluten.integration.tpc.{TpcRunner, TpcSuite}
+import org.apache.gluten.integration.{QueryRunner, Suite, TableCreator}
import org.apache.spark.sql.{SparkSessionSwitcher, TestUtils}
-import org.apache.commons.lang3.exception.ExceptionUtils
-import org.apache.gluten.integration.tpc.action.Actions.QuerySelector
case class QueriesCompare(
scale: Double,
+ genPartitionedData: Boolean,
queries: QuerySelector,
explain: Boolean,
iterations: Int)
extends Action {
- override def execute(tpcSuite: TpcSuite): Boolean = {
- val runner: TpcRunner = new TpcRunner(tpcSuite.queryResource(),
tpcSuite.dataWritePath(scale))
- val runQueryIds = queries.select(tpcSuite)
+ override def execute(suite: Suite): Boolean = {
+ val runner: QueryRunner =
+ new QueryRunner(suite.queryResource(), suite.dataWritePath(scale,
genPartitionedData))
+ val runQueryIds = queries.select(suite)
val results = (0 until iterations).flatMap { iteration =>
println(s"Running tests (iteration $iteration)...")
runQueryIds.map { queryId =>
- QueriesCompare.runTpcQuery(
+ QueriesCompare.runQuery(
+ suite.tableCreator(),
queryId,
explain,
- tpcSuite.desc(),
- tpcSuite.sessionSwitcher,
+ suite.desc(),
+ suite.sessionSwitcher,
runner)
}
}.toList
@@ -179,24 +182,25 @@ object QueriesCompare {
None)))
}
- private[tpc] def runTpcQuery(
+ private[integration] def runQuery(
+ creator: TableCreator,
id: String,
explain: Boolean,
desc: String,
sessionSwitcher: SparkSessionSwitcher,
- runner: TpcRunner): TestResultLine = {
+ runner: QueryRunner): TestResultLine = {
println(s"Running query: $id...")
try {
val baseLineDesc = "Vanilla Spark %s %s".format(desc, id)
sessionSwitcher.useSession("baseline", baseLineDesc)
- runner.createTables(sessionSwitcher.spark())
+ runner.createTables(creator, sessionSwitcher.spark())
val expected =
- runner.runTpcQuery(sessionSwitcher.spark(), baseLineDesc, id, explain
= explain)
+ runner.runQuery(sessionSwitcher.spark(), baseLineDesc, id, explain =
explain)
val expectedRows = expected.rows
val testDesc = "Gluten Spark %s %s".format(desc, id)
sessionSwitcher.useSession("test", testDesc)
- runner.createTables(sessionSwitcher.spark())
- val result = runner.runTpcQuery(sessionSwitcher.spark(), testDesc, id,
explain = explain)
+ runner.createTables(creator, sessionSwitcher.spark())
+ val result = runner.runQuery(sessionSwitcher.spark(), testDesc, id,
explain = explain)
val resultRows = result.rows
val error = TestUtils.compareAnswers(resultRows, expectedRows, sort =
true)
if (error.isEmpty) {
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/SparkShell.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/SparkShell.scala
similarity index 58%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/SparkShell.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/SparkShell.scala
index 78e816955..76f43cb71 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/SparkShell.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/SparkShell.scala
@@ -14,19 +14,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.action
-
-import org.apache.gluten.integration.tpc.{TpcRunner, TpcSuite}
+package org.apache.gluten.integration.action
+import org.apache.gluten.integration.{QueryRunner, Suite}
import org.apache.spark.repl.Main
-case class SparkShell(scale: Double) extends Action {
- override def execute(tpcSuite: TpcSuite): Boolean = {
- tpcSuite.sessionSwitcher.useSession("test", "Gluten Spark CLI")
- val runner: TpcRunner = new TpcRunner(tpcSuite.queryResource(),
tpcSuite.dataWritePath(scale))
- runner.createTables(tpcSuite.sessionSwitcher.spark())
- Main.sparkSession = tpcSuite.sessionSwitcher.spark()
- Main.sparkContext = tpcSuite.sessionSwitcher.spark().sparkContext
+case class SparkShell(scale: Double, genPartitionedData: Boolean) extends
Action {
+ override def execute(suite: Suite): Boolean = {
+ suite.sessionSwitcher.useSession("test", "Gluten Spark CLI")
+ val runner: QueryRunner =
+ new QueryRunner(suite.queryResource(), suite.dataWritePath(scale,
genPartitionedData))
+ runner.createTables(suite.tableCreator(), suite.sessionSwitcher.spark())
+ Main.sparkSession = suite.sessionSwitcher.spark()
+ Main.sparkContext = suite.sessionSwitcher.spark().sparkContext
Main.main(Array("-usejavacp"))
true
}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/TableFormatter.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/TableFormatter.scala
similarity index 98%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/TableFormatter.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/TableFormatter.scala
index 8aeea9938..07e253d5e 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/action/TableFormatter.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/action/TableFormatter.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.action
+package org.apache.gluten.integration.action
import java.io.{OutputStream, PrintStream}
import scala.collection.mutable
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchDataGen.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchDataGen.scala
new file mode 100644
index 000000000..ba772f165
--- /dev/null
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchDataGen.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.integration.clickbench
+
+import org.apache.commons.io.FileUtils
+import org.apache.gluten.integration.DataGen
+import org.apache.spark.sql.SparkSession
+
+import java.io.File
+import scala.language.postfixOps
+import scala.sys.process._
+
+class ClickBenchDataGen(val spark: SparkSession, dir: String) extends DataGen {
+ import ClickBenchDataGen._
+ override def gen(): Unit = {
+ println(s"Start to download ClickBench Parquet dataset from URL:
$DATA_URL... ")
+ // Directly download from official URL.
+ val target = new File(dir + File.separator + FILE_NAME)
+ FileUtils.forceMkdirParent(target)
+ val code = Process(s"wget -P $dir $DATA_URL") !;
+ if (code != 0) {
+ throw new RuntimeException("Download failed")
+ }
+ println(s"ClickBench Parquet dataset successfully downloaded to $target.")
+ }
+}
+
+object ClickBenchDataGen {
+ private val DATA_URL =
"https://datasets.clickhouse.com/hits_compatible/hits.parquet"
+ private[clickbench] val FILE_NAME = "hits.parquet"
+}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
similarity index 52%
copy from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchSuite.scala
copy to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
index 9fbd83dc2..deffdb7e5 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchSuite.scala
@@ -14,17 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.h
-
-import org.apache.gluten.integration.tpc.{DataGen, TpcSuite, TypeModifier}
-import org.apache.gluten.integration.tpc.action.Action
-import org.apache.gluten.integration.tpc.h.TpchSuite.{HISTORY_WRITE_PATH,
TPCH_WRITE_PATH}
-
-import org.apache.spark.SparkConf
+package org.apache.gluten.integration.clickbench
+import org.apache.gluten.integration.action.Action
+import org.apache.gluten.integration.{DataGen, Suite, TableCreator}
import org.apache.log4j.Level
+import org.apache.spark.SparkConf
-class TpchSuite(
+/**
+ * ClickBench: a Benchmark For Analytical Databases
+ *
+ * See the project: https://github.com/ClickHouse/ClickBench
+ * Site: https://benchmark.clickhouse.com/
+ */
+class ClickBenchSuite(
val masterUrl: String,
val actions: Array[Action],
val testConf: SparkConf,
@@ -41,7 +44,7 @@ class TpchSuite(
val disableWscg: Boolean,
val shufflePartitions: Int,
val minimumScanPartitions: Boolean)
- extends TpcSuite(
+ extends Suite(
masterUrl,
actions,
testConf,
@@ -58,56 +61,39 @@ class TpchSuite(
disableWscg,
shufflePartitions,
minimumScanPartitions) {
+ import ClickBenchSuite._
override protected def historyWritePath(): String = HISTORY_WRITE_PATH
- override private[tpc] def dataWritePath(scale: Double): String =
TPCH_WRITE_PATH + s"-$scale"
-
- override private[tpc] def createDataGen(scale: Double, genPartitionedData:
Boolean): DataGen =
- new TpchDataGen(
- sessionSwitcher.spark(),
- scale,
- shufflePartitions,
- dataWritePath(scale),
- typeModifiers())
-
- override private[tpc] def queryResource(): String = {
- "/tpch-queries"
+ override private[integration] def dataWritePath(
+ scale: Double,
+ genPartitionedData: Boolean): String = {
+ checkDataGenArgs(scale, genPartitionedData)
+ DATA_WRITE_PATH
}
- override protected def typeModifiers(): List[TypeModifier] = {
- List()
+ override private[integration] def createDataGen(
+ scale: Double,
+ genPartitionedData: Boolean): DataGen = {
+ new ClickBenchDataGen(sessionSwitcher.spark(), dataWritePath(scale,
genPartitionedData))
}
- override private[tpc] def allQueryIds(): Array[String] =
TpchSuite.ALL_QUERY_IDS
+ override private[integration] def queryResource(): String =
"/clickbench-queries"
+
+ override private[integration] def allQueryIds(): Array[String] =
ALL_QUERY_IDS
- override private[tpc] def desc(): String = "TPC-H"
+ override private[integration] def desc(): String = "ClickBench"
+
+ override def tableCreator(): TableCreator = ClickBenchTableCreator
}
-object TpchSuite {
- private val TPCH_WRITE_PATH = "/tmp/tpch-generated"
- private val ALL_QUERY_IDS = Array(
- "q1",
- "q2",
- "q3",
- "q4",
- "q5",
- "q6",
- "q7",
- "q8",
- "q9",
- "q10",
- "q11",
- "q12",
- "q13",
- "q14",
- "q15",
- "q16",
- "q17",
- "q18",
- "q19",
- "q20",
- "q21",
- "q22")
- private val HISTORY_WRITE_PATH = "/tmp/tpch-history"
+private object ClickBenchSuite {
+ private val DATA_WRITE_PATH = "/tmp/clickbench-generated"
+ private val HISTORY_WRITE_PATH = "/tmp/clickbench-history"
+ private val ALL_QUERY_IDS = (1 to 43).map(i => s"q$i").toArray
+
+ private def checkDataGenArgs(scale: Double, genPartitionedData: Boolean):
Unit = {
+ assert(scale == 1.0D, "ClickBench suite doesn't support scale factor other
than 1")
+ assert(!genPartitionedData, "ClickBench suite doesn't support generating
partitioned data")
+ }
}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchTableCreator.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchTableCreator.scala
new file mode 100644
index 000000000..33eac3862
--- /dev/null
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/clickbench/ClickBenchTableCreator.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gluten.integration.clickbench
+
+import org.apache.gluten.integration.TableCreator
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+
+import java.io.File
+
+object ClickBenchTableCreator extends TableCreator {
+ private val TABLE_NAME = "hits"
+ private val SCHEMA: StructType = StructType.fromDDL("""
+ |watchid bigint,
+ |javaenable smallint,
+ |title varchar(65535),
+ |goodevent smallint,
+ |eventtime timestamp,
+ |eventdate date,
+ |counterid int,
+ |clientip int,
+ |regionid int,
+ |userid bigint,
+ |counterclass smallint,
+ |os smallint,
+ |useragent smallint,
+ |url varchar(65535),
+ |referer varchar(65535),
+ |isrefresh smallint,
+ |referercategoryid smallint,
+ |refererregionid int,
+ |urlcategoryid smallint,
+ |urlregionid int,
+ |resolutionwidth smallint,
+ |resolutionheight smallint,
+ |resolutiondepth smallint,
+ |flashmajor smallint,
+ |flashminor smallint,
+ |flashminor2 varchar(65535),
+ |netmajor smallint,
+ |netminor smallint,
+ |useragentmajor smallint,
+ |useragentminor varchar(65535),
+ |cookieenable smallint,
+ |javascriptenable smallint,
+ |ismobile smallint,
+ |mobilephone smallint,
+ |mobilephonemodel varchar(65535),
+ |params varchar(65535),
+ |ipnetworkid int,
+ |traficsourceid smallint,
+ |searchengineid smallint,
+ |searchphrase varchar(65535),
+ |advengineid smallint,
+ |isartifical smallint,
+ |windowclientwidth smallint,
+ |windowclientheight smallint,
+ |clienttimezone smallint,
+ |clienteventtime timestamp,
+ |silverlightversion1 smallint,
+ |silverlightversion2 smallint,
+ |silverlightversion3 int,
+ |silverlightversion4 smallint,
+ |pagecharset varchar(65535),
+ |codeversion int,
+ |islink smallint,
+ |isdownload smallint,
+ |isnotbounce smallint,
+ |funiqid bigint,
+ |originalurl varchar(65535),
+ |hid int,
+ |isoldcounter smallint,
+ |isevent smallint,
+ |isparameter smallint,
+ |dontcounthits smallint,
+ |withhash smallint,
+ |hitcolor varchar(65535),
+ |localeventtime timestamp,
+ |age smallint,
+ |sex smallint,
+ |income smallint,
+ |interests smallint,
+ |robotness smallint,
+ |remoteip int,
+ |windowname int,
+ |openername int,
+ |historylength smallint,
+ |browserlanguage varchar(65535),
+ |browsercountry varchar(65535),
+ |socialnetwork varchar(65535),
+ |socialaction varchar(65535),
+ |httperror smallint,
+ |sendtiming int,
+ |dnstiming int,
+ |connecttiming int,
+ |responsestarttiming int,
+ |responseendtiming int,
+ |fetchtiming int,
+ |socialsourcenetworkid smallint,
+ |socialsourcepage varchar(65535),
+ |paramprice bigint,
+ |paramorderid varchar(65535),
+ |paramcurrency varchar(65535),
+ |paramcurrencyid smallint,
+ |openstatservicename varchar(65535),
+ |openstatcampaignid varchar(65535),
+ |openstatadid varchar(65535),
+ |openstatsourceid varchar(65535),
+ |utmsource varchar(65535),
+ |utmmedium varchar(65535),
+ |utmcampaign varchar(65535),
+ |utmcontent varchar(65535),
+ |utmterm varchar(65535),
+ |fromtag varchar(65535),
+ |hasgclid smallint,
+ |refererhash bigint,
+ |urlhash bigint,
+ |clid int
+ |""".stripMargin)
+
+ override def create(spark: SparkSession, dataPath: String): Unit = {
+ val file = new File(dataPath + File.separator +
ClickBenchDataGen.FILE_NAME)
+ if (spark.catalog.tableExists(TABLE_NAME)) {
+ println("Table exists: " + TABLE_NAME)
+ return
+ }
+ println("Creating catalog table: " + TABLE_NAME)
+ spark.catalog.createTable(TABLE_NAME, "parquet", SCHEMA, Map("path" ->
file.getAbsolutePath))
+ try {
+ spark.catalog.recoverPartitions(file.getName)
+ } catch {
+ case _: AnalysisException =>
+ }
+ }
+}
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ds/TpcdsDataGen.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsDataGen.scala
similarity index 99%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ds/TpcdsDataGen.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsDataGen.scala
index 82d16dd90..7d63fc67a 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ds/TpcdsDataGen.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsDataGen.scala
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.ds
+package org.apache.gluten.integration.ds
-import org.apache.gluten.integration.tpc.{DataGen, ShimUtils, TypeModifier}
+import org.apache.gluten.integration.{DataGen, ShimUtils, TypeModifier}
import org.apache.spark.sql.{Column, Row, SaveMode, SparkSession}
import org.apache.spark.sql.types._
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ds/TpcdsSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
similarity index 78%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ds/TpcdsSuite.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
index c703821c1..339e89d5b 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/ds/TpcdsSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/ds/TpcdsSuite.scala
@@ -14,19 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.ds
+package org.apache.gluten.integration.ds
-import org.apache.gluten.integration.tpc.{Constants, DataGen, TpcSuite,
TypeModifier}
-import org.apache.gluten.integration.tpc.action.Action
-import org.apache.gluten.integration.tpc.ds.TpcdsSuite.{
+import org.apache.gluten.integration.action.Action
+import org.apache.gluten.integration.ds.TpcdsSuite.{
ALL_QUERY_IDS,
HISTORY_WRITE_PATH,
TPCDS_WRITE_PATH
}
-
-import org.apache.spark.SparkConf
-
+import org.apache.gluten.integration.{DataGen, Suite, TableCreator,
TypeModifier}
import org.apache.log4j.Level
+import org.apache.spark.SparkConf
class TpcdsSuite(
val masterUrl: String,
@@ -45,7 +43,7 @@ class TpcdsSuite(
val disableWscg: Boolean,
val shufflePartitions: Int,
val minimumScanPartitions: Boolean)
- extends TpcSuite(
+ extends Suite(
masterUrl,
actions,
testConf,
@@ -65,28 +63,35 @@ class TpcdsSuite(
override protected def historyWritePath(): String = HISTORY_WRITE_PATH
- override private[tpc] def dataWritePath(scale: Double): String =
TPCDS_WRITE_PATH + s"-$scale"
+ override private[integration] def dataWritePath(
+ scale: Double,
+ genPartitionedData: Boolean): String =
+ TPCDS_WRITE_PATH + s"-$scale"
- override private[tpc] def createDataGen(scale: Double, genPartitionedData:
Boolean): DataGen =
+ override private[integration] def createDataGen(
+ scale: Double,
+ genPartitionedData: Boolean): DataGen =
new TpcdsDataGen(
sessionSwitcher.spark(),
scale,
shufflePartitions,
- dataWritePath(scale),
+ dataWritePath(scale, genPartitionedData),
typeModifiers(),
genPartitionedData)
- override private[tpc] def queryResource(): String = {
+ override private[integration] def queryResource(): String = {
"/tpcds-queries"
}
- override protected def typeModifiers(): List[TypeModifier] = {
+ private def typeModifiers(): List[TypeModifier] = {
List()
}
- override private[tpc] def allQueryIds(): Array[String] = ALL_QUERY_IDS
+ override private[integration] def allQueryIds(): Array[String] =
ALL_QUERY_IDS
+
+ override private[integration] def desc(): String = "TPC-DS"
- override private[tpc] def desc(): String = "TPC-DS"
+ override def tableCreator(): TableCreator = TableCreator.discoverSchema()
}
object TpcdsSuite {
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchDataGen.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchDataGen.scala
similarity index 98%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchDataGen.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchDataGen.scala
index fa574f59c..5223c61c9 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchDataGen.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchDataGen.scala
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.h
+package org.apache.gluten.integration.h
-import org.apache.gluten.integration.tpc.{DataGen, ShimUtils, TypeModifier}
+import org.apache.gluten.integration.{DataGen, ShimUtils, TypeModifier}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.apache.spark.sql.types._
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchSuite.scala
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
similarity index 71%
rename from
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchSuite.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
index 9fbd83dc2..29c299bee 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/tpc/h/TpchSuite.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/h/TpchSuite.scala
@@ -14,15 +14,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc.h
-
-import org.apache.gluten.integration.tpc.{DataGen, TpcSuite, TypeModifier}
-import org.apache.gluten.integration.tpc.action.Action
-import org.apache.gluten.integration.tpc.h.TpchSuite.{HISTORY_WRITE_PATH,
TPCH_WRITE_PATH}
-
-import org.apache.spark.SparkConf
+package org.apache.gluten.integration.h
+import org.apache.gluten.integration.action.Action
+import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH,
TPCH_WRITE_PATH}
+import org.apache.gluten.integration.{DataGen, Suite, TableCreator,
TypeModifier}
import org.apache.log4j.Level
+import org.apache.spark.SparkConf
class TpchSuite(
val masterUrl: String,
@@ -41,7 +39,7 @@ class TpchSuite(
val disableWscg: Boolean,
val shufflePartitions: Int,
val minimumScanPartitions: Boolean)
- extends TpcSuite(
+ extends Suite(
masterUrl,
actions,
testConf,
@@ -61,27 +59,34 @@ class TpchSuite(
override protected def historyWritePath(): String = HISTORY_WRITE_PATH
- override private[tpc] def dataWritePath(scale: Double): String =
TPCH_WRITE_PATH + s"-$scale"
+ override private[integration] def dataWritePath(
+ scale: Double,
+ genPartitionedData: Boolean): String =
+ TPCH_WRITE_PATH + s"-$scale"
- override private[tpc] def createDataGen(scale: Double, genPartitionedData:
Boolean): DataGen =
+ override private[integration] def createDataGen(
+ scale: Double,
+ genPartitionedData: Boolean): DataGen =
new TpchDataGen(
sessionSwitcher.spark(),
scale,
shufflePartitions,
- dataWritePath(scale),
+ dataWritePath(scale, genPartitionedData),
typeModifiers())
- override private[tpc] def queryResource(): String = {
+ override private[integration] def queryResource(): String = {
"/tpch-queries"
}
- override protected def typeModifiers(): List[TypeModifier] = {
+ private def typeModifiers(): List[TypeModifier] = {
List()
}
- override private[tpc] def allQueryIds(): Array[String] =
TpchSuite.ALL_QUERY_IDS
+ override private[integration] def allQueryIds(): Array[String] =
TpchSuite.ALL_QUERY_IDS
+
+ override private[integration] def desc(): String = "TPC-H"
- override private[tpc] def desc(): String = "TPC-H"
+ override def tableCreator(): TableCreator = TableCreator.discoverSchema()
}
object TpchSuite {
diff --git
a/tools/gluten-it/common/src/main/scala/org/apache/spark/sql/QueryRunner.scala
b/tools/gluten-it/common/src/main/scala/org/apache/spark/sql/SparkQueryRunner.scala
similarity index 98%
rename from
tools/gluten-it/common/src/main/scala/org/apache/spark/sql/QueryRunner.scala
rename to
tools/gluten-it/common/src/main/scala/org/apache/spark/sql/SparkQueryRunner.scala
index a5b699a1a..bb11a679f 100644
---
a/tools/gluten-it/common/src/main/scala/org/apache/spark/sql/QueryRunner.scala
+++
b/tools/gluten-it/common/src/main/scala/org/apache/spark/sql/SparkQueryRunner.scala
@@ -33,7 +33,7 @@ import java.io.ByteArrayOutputStream
import java.nio.charset.StandardCharsets
import java.util.concurrent.atomic.AtomicInteger
-object QueryRunner {
+object SparkQueryRunner {
private val availableExecutorMetrics: Set[String] = Set(
"JVMHeapMemory",
"JVMOffHeapMemory",
@@ -52,7 +52,7 @@ object QueryRunner {
"ProcessTreeOtherVMemory",
"ProcessTreeOtherRSSMemory")
- def runTpcQuery(
+ def runQuery(
spark: SparkSession,
desc: String,
queryPath: String,
@@ -106,7 +106,7 @@ object QueryRunner {
}
private def resourceToString(resource: String): String = {
- val inStream = QueryRunner.getClass.getResourceAsStream(resource)
+ val inStream = SparkQueryRunner.getClass.getResourceAsStream(resource)
Preconditions.checkNotNull(inStream)
val outStream = new ByteArrayOutputStream
try {
diff --git
a/tools/gluten-it/common/src/test/java/org/apache/gluten/integration/tpc/TpcTest.java
b/tools/gluten-it/common/src/test/java/org/apache/gluten/integration/CliTest.java
similarity index 90%
rename from
tools/gluten-it/common/src/test/java/org/apache/gluten/integration/tpc/TpcTest.java
rename to
tools/gluten-it/common/src/test/java/org/apache/gluten/integration/CliTest.java
index 2463429af..804cf3a0a 100644
---
a/tools/gluten-it/common/src/test/java/org/apache/gluten/integration/tpc/TpcTest.java
+++
b/tools/gluten-it/common/src/test/java/org/apache/gluten/integration/CliTest.java
@@ -14,10 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.integration.tpc;
+package org.apache.gluten.integration;
-public class TpcTest {
+public class CliTest {
public static void main(String[] args) {
- Tpc.main(args);
+ Cli.main(args);
}
}
diff --git a/tools/gluten-it/sbin/gluten-it.sh
b/tools/gluten-it/sbin/gluten-it.sh
index 5262df054..fda117417 100755
--- a/tools/gluten-it/sbin/gluten-it.sh
+++ b/tools/gluten-it/sbin/gluten-it.sh
@@ -47,4 +47,4 @@ $JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \
-Djdk.reflect.useDirectMethodHandle=false \
-Dio.netty.tryReflectionSetAccessible=true \
-cp $JAR_PATH \
- org.apache.gluten.integration.tpc.Tpc $@
+ org.apache.gluten.integration.Cli $@
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]