Supritha created SPARK-27318:
--------------------------------

             Summary: Join operation on bucketing table fails with base 
adaptive enabled
                 Key: SPARK-27318
                 URL: https://issues.apache.org/jira/browse/SPARK-27318
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 2.4.0
            Reporter: Supritha


Join Operation on bucketed table is failing.

Steps to reproduce the issue.

spark.sql("set spark.sql.adaptive.enabled=true")

1. Create table bukcet3 and bucket4 Table as below and load the data.
 sql("create table bucket3(id3 int,country3 String, sports3 String) row format 
delimited fields terminated by ','").show()
 sql("create table bucket4(id4 int,country4 String) row format delimited fields 
terminated by ','").show()

sql("load data local inpath '/opt/abhidata/bucket2.txt' into table 
bucket3").show()
 sql("load data local inpath '/opt/abhidata/bucket3.txt' into table 
bucket4").show()

2. Create bucketing table as below
spark.sqlContext.table("bucket3").write.bucketBy(3, 
"id3").saveAsTable("bucketed_table_3");

spark.sqlContext.table("bucket4").write.bucketBy(4, 
"id4").saveAsTable("bucketed_table_4");

3. Execute the join query on the bucketed table 
sql("select * from bucketed_table_3 join bucketed_table_4 on 
bucketed_table_3.id3 = bucketed_table_4.id4").show()

scala> sql("select * from bucketed_table_3 join bucketed_table_4 on 
bucketed_table_3.id3 = bucketed_table_4.id4").show()

 
{code:java}
// code placeholder
{code}
java.lang.IllegalArgumentException: requirement failed: PartitioningCollection 
requires all of its partitionings have the same numPartitions. at 
scala.Predef$.require(Predef.scala:224) at 
org.apache.spark.sql.catalyst.plans.physical.PartitioningCollection.<init>(partitioning.scala:291)
 at 
org.apache.spark.sql.execution.joins.SortMergeJoinExec.outputPartitioning(SortMergeJoinExec.scala:69)
 at 
org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$org$apache$spark$sql$execution$exchange$EnsureRequirements$$ensureDistributionAndOrdering$1.apply(EnsureRequirements.scala:150)
 at 
org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$org$apache$spark$sql$execution$exchange$EnsureRequirements$$ensureDistributionAndOrdering$1.apply(EnsureRequirements.scala:149)
 at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
 at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
 at scala.collection.immutable.List.foreach(List.scala:392) at 
scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at 
scala.collection.immutable.List.map(List.scala:296) at 
org.apache.spark.sql.execution.exchange.EnsureRequirements.org$apache$spark$sql$execution$exchange$EnsureRequirements$$ensureDistributionAndOrdering(EnsureRequirements.scala:149)
 at 
org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$apply$1.applyOrElse(EnsureRequirements.scala:304)
 at 
org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$apply$1.applyOrElse(EnsureRequirements.scala:296)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$2.apply(TreeNode.scala:282)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$2.apply(TreeNode.scala:282)
 at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:281) at 
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:275)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:275)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
 at 
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324) at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:275) at 
org.apache.spark.sql.execution.exchange.EnsureRequirements.apply(EnsureRequirements.scala:296)
 at 
org.apache.spark.sql.execution.exchange.EnsureRequirements.apply(EnsureRequirements.scala:38)
 at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$prepareForExecution$1.apply(QueryExecution.scala:87)
 at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$prepareForExecution$1.apply(QueryExecution.scala:87)
 at 
scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
 at scala.collection.immutable.List.foldLeft(List.scala:84) at 
org.apache.spark.sql.execution.QueryExecution.prepareForExecution(QueryExecution.scala:87)
 at 
org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
 at 
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
 at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3360) at 
org.apache.spark.sql.Dataset.head(Dataset.scala:2545) at 
org.apache.spark.sql.Dataset.take(Dataset.scala:2759) at 
org.apache.spark.sql.Dataset.getRows(Dataset.scala:255) at 
org.apache.spark.sql.Dataset.showString(Dataset.scala:292) at 
org.apache.spark.sql.Dataset.show(Dataset.scala:746) at 
org.apache.spark.sql.Dataset.show(Dataset.scala:705) at 
org.apache.spark.sql.Dataset.show(Dataset.scala:714) ... 49 elided  

 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to