(incubator-gluten) branch main updated: [VL][Core] SampleExec Operator Native Support (#5856)

zhli Mon, 27 May 2024 23:42:40 -0700

This is an automated email from the ASF dual-hosted git repository.

zhli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 729d3450e [VL][Core] SampleExec Operator Native Support  (#5856)
729d3450e is described below

commit 729d3450e0e853b6647079452f7051c642756a9c
Author: 高阳阳 <[email protected]>
AuthorDate: Tue May 28 14:42:29 2024 +0800

    [VL][Core] SampleExec Operator Native Support  (#5856)
    
    [VL] SampleExec Operator Native Support.
---
 .../backendsapi/clickhouse/CHMetricsApi.scala      |  11 ++
 .../clickhouse/CHSparkPlanExecApi.scala            |   8 ++
 .../gluten/backendsapi/velox/VeloxBackend.scala    |   2 +
 .../gluten/backendsapi/velox/VeloxMetricsApi.scala |  16 +++
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala  |   9 ++
 .../org/apache/gluten/execution/TestOperator.scala |  12 ++
 .../gluten/backendsapi/BackendSettingsApi.scala    |   2 +
 .../org/apache/gluten/backendsapi/MetricsApi.scala |   4 +
 .../gluten/backendsapi/SparkPlanExecApi.scala      |   7 ++
 .../gluten/execution/SampleExecTransformer.scala   | 126 +++++++++++++++++++++
 .../extension/columnar/OffloadSingleNode.scala     |   9 ++
 .../extension/columnar/TransformHintRule.scala     |   9 ++
 .../extension/columnar/validator/Validators.scala  |   3 +
 .../gluten/metrics/SampleMetricsUpdater.scala      |  35 ++++++
 .../scala/org/apache/gluten/GlutenConfig.scala     |   9 ++
 15 files changed, 262 insertions(+)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHMetricsApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHMetricsApi.scala
index 30f682f0f..350548e98 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHMetricsApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHMetricsApi.scala
@@ -361,6 +361,17 @@ class CHMetricsApi extends MetricsApi with Logging with 
LogLevelUtil {
       s"NestedLoopJoinTransformer metrics update is not supported in CH 
backend")
   }
 
+  override def genSampleTransformerMetrics(sparkContext: SparkContext): 
Map[String, SQLMetric] = {
+    throw new UnsupportedOperationException(
+      s"SampleTransformer metrics update is not supported in CH backend")
+  }
+
+  override def genSampleTransformerMetricsUpdater(
+      metrics: Map[String, SQLMetric]): MetricsUpdater = {
+    throw new UnsupportedOperationException(
+      s"SampleTransformer metrics update is not supported in CH backend")
+  }
+
   def genWriteFilesTransformerMetrics(sparkContext: SparkContext): Map[String, 
SQLMetric] = {
     throw new UnsupportedOperationException(
       s"WriteFilesTransformer metrics update is not supported in CH backend")
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index 8c2b20db6..1403c8261 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -379,6 +379,14 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
     throw new GlutenNotSupportException(
       "BroadcastNestedLoopJoinExecTransformer is not supported in ch backend.")
 
+  override def genSampleExecTransformer(
+      lowerBound: Double,
+      upperBound: Double,
+      withReplacement: Boolean,
+      seed: Long,
+      child: SparkPlan): SampleExecTransformer =
+    throw new GlutenNotSupportException("SampleExecTransformer is not 
supported in ch backend.")
+
   /** Generate an expression transformer to transform GetMapValue to 
Substrait. */
   def genGetMapValueTransformer(
       substraitExprName: String,
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index a2da0b8b2..7f928bd33 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -499,6 +499,8 @@ object VeloxBackendSettings extends BackendSettingsApi {
 
   override def supportBroadcastNestedLoopJoinExec(): Boolean = true
 
+  override def supportSampleExec(): Boolean = true
+
   override def supportColumnarArrowUdf(): Boolean = true
 
   override def generateHdfsConfForLibhdfs(): Boolean = true
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxMetricsApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxMetricsApi.scala
index 7be639d4c..0811d71d1 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxMetricsApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxMetricsApi.scala
@@ -540,4 +540,20 @@ class VeloxMetricsApi extends MetricsApi with Logging {
 
   override def genNestedLoopJoinTransformerMetricsUpdater(
       metrics: Map[String, SQLMetric]): MetricsUpdater = new 
NestedLoopJoinMetricsUpdater(metrics)
+
+  override def genSampleTransformerMetrics(sparkContext: SparkContext): 
Map[String, SQLMetric] =
+    Map(
+      "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of 
output rows"),
+      "outputVectors" -> SQLMetrics.createMetric(sparkContext, "number of 
output vectors"),
+      "outputBytes" -> SQLMetrics.createSizeMetric(sparkContext, "number of 
output bytes"),
+      "wallNanos" -> SQLMetrics.createNanoTimingMetric(sparkContext, 
"totaltime of sample"),
+      "cpuCount" -> SQLMetrics.createMetric(sparkContext, "cpu wall time 
count"),
+      "peakMemoryBytes" -> SQLMetrics.createSizeMetric(sparkContext, "peak 
memory bytes"),
+      "numMemoryAllocations" -> SQLMetrics.createMetric(
+        sparkContext,
+        "number of memory allocations")
+    )
+
+  override def genSampleTransformerMetricsUpdater(metrics: Map[String, 
SQLMetric]): MetricsUpdater =
+    new SampleMetricsUpdater(metrics)
 }
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 155a33c94..92be63a58 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -411,6 +411,15 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
       right,
       isNullAwareAntiJoin)
 
+  override def genSampleExecTransformer(
+      lowerBound: Double,
+      upperBound: Double,
+      withReplacement: Boolean,
+      seed: Long,
+      child: SparkPlan): SampleExecTransformer = {
+    SampleExecTransformer(lowerBound, upperBound, withReplacement, seed, child)
+  }
+
   override def genSortMergeJoinExecTransformer(
       leftKeys: Seq[Expression],
       rightKeys: Seq[Expression],
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala 
b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
index 8e8423360..7bbc24d45 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
@@ -1050,6 +1050,18 @@ class TestOperator extends 
VeloxWholeStageTransformerSuite {
     }
   }
 
+  test("Test sample op") {
+    withSQLConf("spark.gluten.sql.columnarSampleEnabled" -> "true") {
+      withTable("t") {
+        sql("create table t (id int, b boolean) using parquet")
+        sql("insert into t values (1, true), (2, false), (3, null), (4, true), 
(5, false)")
+        runQueryAndCompare("select * from t TABLESAMPLE(20 PERCENT)", false) {
+          checkGlutenOperatorMatch[SampleExecTransformer]
+        }
+      }
+    }
+  }
+
   test("test cross join") {
     withTable("t1", "t2") {
       sql("""
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
index 9c5c13271..d18273af2 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
@@ -144,6 +144,8 @@ trait BackendSettingsApi {
 
   def supportBroadcastNestedLoopJoinExec(): Boolean = false
 
+  def supportSampleExec(): Boolean = false
+
   /** Merge two phases hash based aggregate if need */
   def mergeTwoPhasesHashBaseAggregateIfNeed(): Boolean = false
 
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/MetricsApi.scala 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/MetricsApi.scala
index 99b44a2de..a96f27f5a 100644
--- a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/MetricsApi.scala
+++ b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/MetricsApi.scala
@@ -113,6 +113,10 @@ trait MetricsApi extends Serializable {
 
   def genNestedLoopJoinTransformerMetricsUpdater(metrics: Map[String, 
SQLMetric]): MetricsUpdater
 
+  def genSampleTransformerMetrics(sparkContext: SparkContext): Map[String, 
SQLMetric]
+
+  def genSampleTransformerMetricsUpdater(metrics: Map[String, SQLMetric]): 
MetricsUpdater
+
   def genColumnarInMemoryTableMetrics(sparkContext: SparkContext): Map[String, 
SQLMetric] =
     Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of 
output rows"))
 }
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 429b926cd..78cf02f0a 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -125,6 +125,13 @@ trait SparkPlanExecApi {
       right: SparkPlan,
       isNullAwareAntiJoin: Boolean = false): 
BroadcastHashJoinExecTransformerBase
 
+  def genSampleExecTransformer(
+      lowerBound: Double,
+      upperBound: Double,
+      withReplacement: Boolean,
+      seed: Long,
+      child: SparkPlan): SampleExecTransformer
+
   /** Generate ShuffledHashJoinExecTransformer. */
   def genSortMergeJoinExecTransformer(
       leftKeys: Seq[Expression],
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/execution/SampleExecTransformer.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/execution/SampleExecTransformer.scala
new file mode 100644
index 000000000..86189392a
--- /dev/null
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/execution/SampleExecTransformer.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.execution
+
+import org.apache.gluten.backendsapi.BackendsApiManager
+import org.apache.gluten.expression.{ConverterUtils, ExpressionConverter}
+import org.apache.gluten.extension.ValidationResult
+import org.apache.gluten.metrics.MetricsUpdater
+import org.apache.gluten.substrait.`type`.TypeBuilder
+import org.apache.gluten.substrait.SubstraitContext
+import org.apache.gluten.substrait.extensions.ExtensionBuilder
+import org.apache.gluten.substrait.rel.{RelBuilder, RelNode}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, 
LessThan, Literal, Rand}
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.types.DoubleType
+
+import scala.collection.JavaConverters._
+
+/**
+ * SampleExec supports two sampling methods: with replacement and without 
replacement. This
+ * transformer currently supports only sampling without replacement. For 
sampling without
+ * replacement, sampleExec uses `seed + partitionId` as the seed for each 
partition. The `upperBound
+ * \- lowerBound` value is used as the fraction, and the XORShiftRandom number 
generator is
+ * employed. Each row undergoes a Bernoulli trial, and if the generated random 
number falls within
+ * the range [lowerBound, upperBound), the row is included; otherwise, it is 
skipped.
+ *
+ * This transformer converts sampleExec to a Substrait Filter relation, 
achieving a similar sampling
+ * effect through the filter op with rand sampling expression. Specifically, 
the `upperBound -
+ * lowerBound` value is used as the fraction, and the node be translated to 
`filter(rand(seed +
+ * partitionId) < fraction)` for random sampling.
+ */
+case class SampleExecTransformer(
+    lowerBound: Double,
+    upperBound: Double,
+    withReplacement: Boolean,
+    seed: Long,
+    child: SparkPlan)
+  extends UnaryTransformSupport
+  with Logging {
+  def fraction: Double = upperBound - lowerBound
+
+  def condition: Expression = {
+    val randExpr: Expression = Rand(seed)
+    val sampleRateExpr: Expression = Literal(fraction, DoubleType)
+    LessThan(randExpr, sampleRateExpr)
+  }
+
+  override def output: Seq[Attribute] = child.output
+
+  // Note: "metrics" is made transient to avoid sending driver-side metrics to 
tasks.
+  @transient override lazy val metrics =
+    
BackendsApiManager.getMetricsApiInstance.genSampleTransformerMetrics(sparkContext)
+
+  override def metricsUpdater(): MetricsUpdater =
+    
BackendsApiManager.getMetricsApiInstance.genSampleTransformerMetricsUpdater(metrics)
+
+  def getRelNode(
+      context: SubstraitContext,
+      condExpr: Expression,
+      originalInputAttributes: Seq[Attribute],
+      operatorId: Long,
+      input: RelNode,
+      validation: Boolean): RelNode = {
+    assert(condExpr != null)
+    val args = context.registeredFunction
+    val condExprNode = ExpressionConverter
+      .replaceWithExpressionTransformer(condExpr, attributeSeq = 
originalInputAttributes)
+      .doTransform(args)
+
+    if (!validation) {
+      RelBuilder.makeFilterRel(input, condExprNode, context, operatorId)
+    } else {
+      // Use a extension node to send the input types through Substrait plan 
for validation.
+      val inputTypeNodeList = originalInputAttributes
+        .map(attr => ConverterUtils.getTypeNode(attr.dataType, attr.nullable))
+        .asJava
+      val extensionNode = ExtensionBuilder.makeAdvancedExtension(
+        BackendsApiManager.getTransformerApiInstance.packPBMessage(
+          TypeBuilder.makeStruct(false, inputTypeNodeList).toProtobuf))
+      RelBuilder.makeFilterRel(input, condExprNode, extensionNode, context, 
operatorId)
+    }
+  }
+
+  override protected def doValidateInternal(): ValidationResult = {
+    if (withReplacement) {
+      return ValidationResult.notOk(
+        "Unsupported sample exec in native with " +
+          s"withReplacement parameter is $withReplacement")
+    }
+    val substraitContext = new SubstraitContext
+    val operatorId = substraitContext.nextOperatorId((this.nodeName))
+    // Firstly, need to check if the Substrait plan for this operator can be 
successfully generated.
+    val relNode =
+      getRelNode(substraitContext, condition, child.output, operatorId, null, 
validation = true)
+    // Then, validate the generated plan in native engine.
+    doNativeValidation(substraitContext, relNode)
+  }
+
+  override def doTransform(context: SubstraitContext): TransformContext = {
+    val childCtx = child.asInstanceOf[TransformSupport].doTransform(context)
+    val operatorId = context.nextOperatorId(this.nodeName)
+    val currRel =
+      getRelNode(context, condition, child.output, operatorId, childCtx.root, 
validation = false)
+    assert(currRel != null, "Filter rel should be valid.")
+    TransformContext(childCtx.outputAttributes, output, currRel)
+  }
+
+  override protected def withNewChildInternal(newChild: SparkPlan): 
SampleExecTransformer =
+    copy(child = newChild)
+}
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala
index 84a2ec5c6..e0aa0c26b 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala
@@ -432,6 +432,15 @@ object OffloadOthers {
               child,
               plan.evalType)
           }
+        case plan: SampleExec =>
+          logDebug(s"Columnar Processing for ${plan.getClass} is currently 
supported.")
+          val child = plan.child
+          
BackendsApiManager.getSparkPlanExecApiInstance.genSampleExecTransformer(
+            plan.lowerBound,
+            plan.upperBound,
+            plan.withReplacement,
+            plan.seed,
+            child)
         case p if !p.isInstanceOf[GlutenPlan] =>
           logDebug(s"Transformation for ${p.getClass} is currently not 
supported.")
           val children = plan.children
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
index c9fcc52aa..7ce9ffc52 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
@@ -500,6 +500,15 @@ case class AddTransformHintRule() extends Rule[SparkPlan] {
             plan.child,
             offset)
           transformer.doValidate().tagOnFallback(plan)
+        case plan: SampleExec =>
+          val transformer = 
BackendsApiManager.getSparkPlanExecApiInstance.genSampleExecTransformer(
+            plan.lowerBound,
+            plan.upperBound,
+            plan.withReplacement,
+            plan.seed,
+            plan.child
+          )
+          transformer.doValidate().tagOnFallback(plan)
         case _ =>
         // Currently we assume a plan to be transformable by default.
       }
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
index d4bd9926a..56b63ef84 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
@@ -192,6 +192,9 @@ object Validators {
       case p
           if HiveTableScanExecTransformer.isHiveTableScan(p) && 
!conf.enableColumnarHiveTableScan =>
         fail(p)
+      case p: SampleExec
+          if !(conf.enableColumnarSample && 
BackendsApiManager.getSettings.supportSampleExec()) =>
+        fail(p)
       case _ => pass()
     }
   }
diff --git 
a/gluten-data/src/main/scala/org/apache/gluten/metrics/SampleMetricsUpdater.scala
 
b/gluten-data/src/main/scala/org/apache/gluten/metrics/SampleMetricsUpdater.scala
new file mode 100644
index 000000000..a108a5b79
--- /dev/null
+++ 
b/gluten-data/src/main/scala/org/apache/gluten/metrics/SampleMetricsUpdater.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.metrics
+
+import org.apache.spark.sql.execution.metric.SQLMetric
+
+class SampleMetricsUpdater(val metrics: Map[String, SQLMetric]) extends 
MetricsUpdater {
+
+  override def updateNativeMetrics(opMetrics: IOperatorMetrics): Unit = {
+    if (opMetrics != null) {
+      val operatorMetrics = opMetrics.asInstanceOf[OperatorMetrics]
+      metrics("numOutputRows") += operatorMetrics.outputRows
+      metrics("outputVectors") += operatorMetrics.outputVectors
+      metrics("outputBytes") += operatorMetrics.outputBytes
+      metrics("cpuCount") += operatorMetrics.cpuCount
+      metrics("wallNanos") += operatorMetrics.wallNanos
+      metrics("peakMemoryBytes") += operatorMetrics.peakMemoryBytes
+      metrics("numMemoryAllocations") += operatorMetrics.numMemoryAllocations
+    }
+  }
+}
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala 
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index d353c75c3..c9a62b8b7 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -85,6 +85,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
 
   def enableColumnarBroadcastJoin: Boolean = 
conf.getConf(COLUMNAR_BROADCAST_JOIN_ENABLED)
 
+  def enableColumnarSample: Boolean = conf.getConf(COLUMNAR_SAMPLE_ENABLED)
+
   def enableColumnarArrowUDF: Boolean = 
conf.getConf(COLUMNAR_ARROW_UDF_ENABLED)
 
   def enableColumnarCoalesce: Boolean = conf.getConf(COLUMNAR_COALESCE_ENABLED)
@@ -1772,6 +1774,13 @@ object GlutenConfig {
       .booleanConf
       .createWithDefault(true)
 
+  val COLUMNAR_SAMPLE_ENABLED =
+    buildConf("spark.gluten.sql.columnarSampleEnabled")
+      .internal()
+      .doc("Disable or enable columnar sample.")
+      .booleanConf
+      .createWithDefault(false)
+
   val CACHE_WHOLE_STAGE_TRANSFORMER_CONTEXT =
     buildConf("spark.gluten.sql.cacheWholeStageTransformerContext")
       .internal()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL][Core] SampleExec Operator Native Support (#5856)

Reply via email to