Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/20915#discussion_r193420453
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala ---
@@ -90,32 +96,37 @@ abstract class BucketedReadSuite extends QueryTest with
SQLTestUtils {
originalDataFrame: DataFrame): Unit = {
// This test verifies parts of the plan. Disable whole stage codegen.
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
- val strategy = DataSourceStrategy(spark.sessionState.conf)
val bucketedDataFrame = spark.table("bucketed_table").select("i",
"j", "k")
val BucketSpec(numBuckets, bucketColumnNames, _) = bucketSpec
// Limit: bucket pruning only works when the bucket column has one
and only one column
assert(bucketColumnNames.length == 1)
val bucketColumnIndex =
bucketedDataFrame.schema.fieldIndex(bucketColumnNames.head)
val bucketColumn =
bucketedDataFrame.schema.toAttributes(bucketColumnIndex)
- val matchedBuckets = new BitSet(numBuckets)
- bucketValues.foreach { value =>
- matchedBuckets.set(strategy.getBucketId(bucketColumn, numBuckets,
value))
- }
// Filter could hide the bug in bucket pruning. Thus, skipping all
the filters
val plan =
bucketedDataFrame.filter(filterCondition).queryExecution.executedPlan
val rdd = plan.find(_.isInstanceOf[DataSourceScanExec])
--- End diff --
nit: not introduced by this PR but this name is wrong, we should probably
call it `scanPlan`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]