This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c77131fd6 [VL] RAS: Make default rough cost model exhaustively offload
computations (#6493)
c77131fd6 is described below
commit c77131fd6655fd4feb6bdeedc588117fd9c6d9c9
Author: Hongze Zhang <[email protected]>
AuthorDate: Fri Jul 19 09:18:18 2024 +0800
[VL] RAS: Make default rough cost model exhaustively offload computations
(#6493)
---
.../execution/ScalarFunctionsValidateSuite.scala | 64 ++++++++++++++++------
.../gluten/planner/cost/GlutenCostModel.scala | 17 +++---
.../buildhere-veloxbe-portable-libs/README.md | 2 +-
3 files changed, 56 insertions(+), 27 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 12fa3b46d..43d54fb62 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -16,14 +16,58 @@
*/
package org.apache.gluten.execution
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.sql.catalyst.optimizer.NullPropagation
import org.apache.spark.sql.execution.ProjectExec
import org.apache.spark.sql.types._
import java.sql.Timestamp
-class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
+class ScalarFunctionsValidateSuiteRasOff extends ScalarFunctionsValidateSuite {
+ override protected def sparkConf: SparkConf = {
+ super.sparkConf
+ .set("spark.gluten.ras.enabled", "false")
+ }
+
+ // Since https://github.com/apache/incubator-gluten/pull/6200.
+ test("Test input_file_name function") {
+ runQueryAndCompare("""SELECT input_file_name(), l_orderkey
+ | from lineitem limit 100""".stripMargin) {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+
+ runQueryAndCompare("""SELECT input_file_name(), l_orderkey
+ | from
+ | (select l_orderkey from lineitem
+ | union all
+ | select o_orderkey as l_orderkey from orders)
+ | limit 100""".stripMargin) {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+}
+
+class ScalarFunctionsValidateSuiteRasOn extends ScalarFunctionsValidateSuite {
+ override protected def sparkConf: SparkConf = {
+ super.sparkConf
+ .set("spark.gluten.ras.enabled", "true")
+ }
+
+ // TODO: input_file_name is not yet supported in RAS
+ ignore("Test input_file_name function") {
+ runQueryAndCompare("""SELECT input_file_name(), l_orderkey
+ | from lineitem limit 100""".stripMargin) { _ => }
+
+ runQueryAndCompare("""SELECT input_file_name(), l_orderkey
+ | from
+ | (select l_orderkey from lineitem
+ | union all
+ | select o_orderkey as l_orderkey from orders)
+ | limit 100""".stripMargin) { _ => }
+ }
+}
+
+abstract class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
disableFallbackCheck
import testImplicits._
@@ -658,22 +702,6 @@ class ScalarFunctionsValidateSuite extends
FunctionsValidateTest {
}
}
- test("Test input_file_name function") {
- runQueryAndCompare("""SELECT input_file_name(), l_orderkey
- | from lineitem limit 100""".stripMargin) {
- checkGlutenOperatorMatch[ProjectExecTransformer]
- }
-
- runQueryAndCompare("""SELECT input_file_name(), l_orderkey
- | from
- | (select l_orderkey from lineitem
- | union all
- | select o_orderkey as l_orderkey from orders)
- | limit 100""".stripMargin) {
- checkGlutenOperatorMatch[ProjectExecTransformer]
- }
- }
-
test("Test sequence function optimized by Spark constant folding") {
withSQLConf(("spark.sql.optimizer.excludedRules",
NullPropagation.ruleName)) {
runQueryAndCompare("""SELECT sequence(1, 5), l_orderkey
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/planner/cost/GlutenCostModel.scala
b/gluten-core/src/main/scala/org/apache/gluten/planner/cost/GlutenCostModel.scala
index ab0deab19..513a91e43 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/planner/cost/GlutenCostModel.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/planner/cost/GlutenCostModel.scala
@@ -70,20 +70,21 @@ object GlutenCostModel extends Logging {
(n.children.map(longCostOf).toList :+ selfCost).reduce(safeSum)
}
- // A very rough estimation as of now.
+ // A very rough estimation as of now. The cost model basically considers
any
+ // fallen back ops has extreme high cost so offloads computations as much
as possible.
private def selfLongCostOf(node: SparkPlan): Long = {
node match {
case _: RemoveFilter.NoopFilter =>
// To make planner choose the tree that has applied rule
PushFilterToScan.
0L
- case ColumnarToRowExec(child) => 3L
- case RowToColumnarExec(child) => 3L
- case ColumnarToRowLike(child) => 3L
- case RowToColumnarLike(child) => 3L
- case p if PlanUtil.isGlutenColumnarOp(p) => 2L
- case p if PlanUtil.isVanillaColumnarOp(p) => 3L
+ case ColumnarToRowExec(child) => 10L
+ case RowToColumnarExec(child) => 10L
+ case ColumnarToRowLike(child) => 10L
+ case RowToColumnarLike(child) => 10L
+ case p if PlanUtil.isGlutenColumnarOp(p) => 10L
+ case p if PlanUtil.isVanillaColumnarOp(p) => 1000L
// Other row ops. Usually a vanilla row op.
- case _ => 5L
+ case _ => 1000L
}
}
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
index cd76e74e7..28e955dac 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
@@ -25,7 +25,7 @@ export MOUNT_MAVEN_CACHE=ON
# Additionally, changes to HTTP_PROXY_HOST / HTTP_PROXY_PORT could invalidate
the build cache
# either. For more details, please check docker file `dockerfile-buildenv`.
cd gluten/
-tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
# 4. Check the built libs.
ls -l cpp/build/releases/
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]