[
https://issues.apache.org/jira/browse/HIVE-7613?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14108600#comment-14108600
]
Brock Noland commented on HIVE-7613:
------------------------------------
As part of this work we should enable auto_sortmerge_join_13.q which currently
fails with:
{noformat}
Done query: auto_sortmerge_join_12.q elapsedTime=8s
Begin query: auto_sortmerge_join_13.q
java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.exec.FileSinkOperator.createBucketFiles(FileSinkOperator.java:455)
at
org.apache.hadoop.hive.ql.exec.FileSinkOperator.closeOp(FileSinkOperator.java:836)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:583)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at
org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.close(SparkMapRecordHandler.java:175)
at
org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.closeRecordProcessor(HiveMapFunctionResultList.java:57)
at
org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList$ResultIterator.hasNext(HiveBaseFunctionResultList.java:111)
at
scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:41)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
at
org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
at
org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.scheduler.Task.run(Task.scala:54)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:199)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.exec.FileSinkOperator.createBucketFiles(FileSinkOperator.java:459)
at
org.apache.hadoop.hive.ql.exec.FileSinkOperator.closeOp(FileSinkOperator.java:836)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:583)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:595)
at
org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.close(SparkMapRecordHandler.java:175)
at
org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.closeRecordProcessor(HiveMapFunctionResultList.java:57)
at
org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList$ResultIterator.hasNext(HiveBaseFunctionResultList.java:111)
at
scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:41)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:759)
at
org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
at
org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.scheduler.Task.run(Task.scala:54)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:199)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
Caused by: java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.exec.FileSinkOperator.createBucketFiles(FileSinkOperator.java:455)
{noformat}
> Research optimization of auto convert join to map join [Spark branch]
> ---------------------------------------------------------------------
>
> Key: HIVE-7613
> URL: https://issues.apache.org/jira/browse/HIVE-7613
> Project: Hive
> Issue Type: Sub-task
> Components: Spark
> Reporter: Chengxiang Li
> Priority: Minor
>
> ConvertJoinMapJoin is an optimization the replaces a common join(aka shuffle
> join) with a map join(aka broadcast or fragment replicate join) when
> possible. we need to research how to make it workable with Hive on Spark.
--
This message was sent by Atlassian JIRA
(v6.2#6252)