Yi Yao created SPARK-5707: ----------------------------- Summary: Enabling spark.sql.codegen throws ClassNotFound exception Key: SPARK-5707 URL: https://issues.apache.org/jira/browse/SPARK-5707 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 1.2.0 Environment: yarn-client mode Reporter: Yi Yao
org.apache.spark.SparkException: Job aborted due to stage failure: Task 13 in stage 133.0 failed 4 times, most recent failure: Lost task 13.3 in stage 133.0 (TID 3066, cdh52-node2): java.io.IOException: com.esotericsoftware.kryo.KryoException: Unable to find class: __wrapper$1$81257352e1c844aebf09cb84fe9e7459.__wrapper$1$81257352e1c844aebf09cb84fe9e7459$SpecificRow$1 Serialization trace: hashTable (org.apache.spark.sql.execution.joins.UniqueKeyHashedRelation) at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1011) at org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:164) at org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:64) at org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:64) at org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:87) at org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70) at org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$3.apply(BroadcastHashJoin.scala:62) at org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$3.apply(BroadcastHashJoin.scala:61) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.CartesianRDD.compute(CartesianRDD.scala:75) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.CartesianRDD.compute(CartesianRDD.scala:75) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) SQL: INSERT INTO TABLE ${hiveconf:TEMP_TABLE} SELECT s_store_name, pr_review_date, pr_review_content FROM ( --select store_name for stores with flat or declining sales in 3 consecutive months. SELECT s_store_name FROM store s JOIN ( -- linear regression part SELECT temp.cat AS cat, --SUM(temp.x)as sumX, --SUM(temp.y)as sumY, --SUM(temp.xy)as sumXY, --SUM(temp.xx)as sumXSquared, --count(temp.x) as N, --N * sumXY - sumX * sumY AS numerator, --N * sumXSquared - sumX*sumX AS denom --numerator / denom as slope, --(sumY - slope * sumX) / N as intercept --(count(temp.x) * SUM(temp.xy) - SUM(temp.x) * SUM(temp.y)) AS numerator, --(count(temp.x) * SUM(temp.xx) - SUM(temp.x) * SUM(temp.x)) AS denom --numerator / denom as slope, --(sumY - slope * sumX) / N as intercept ((count(temp.x) * SUM(temp.xy) - SUM(temp.x) * SUM(temp.y)) / (count(temp.x) * SUM(temp.xx) - SUM(temp.x) * SUM(temp.x)) ) as slope, (SUM(temp.y) - ((count(temp.x) * SUM(temp.xy) - SUM(temp.x) * SUM(temp.y)) / (count(temp.x) * SUM(temp.xx) - SUM(temp.x) * SUM(temp.x)) ) * SUM(temp.x)) / count(temp.x) as intercept FROM ( SELECT s.ss_store_sk AS cat, s.ss_sold_date_sk AS x, SUM(s.ss_net_paid) AS y, s.ss_sold_date_sk * SUM(s.ss_net_paid) AS xy, s.ss_sold_date_sk*s.ss_sold_date_sk AS xx FROM store_sales s --select date range LEFT SEMI JOIN ( SELECT d_date_sk FROM date_dim d WHERE d.d_date >= '${hiveconf:q18_startDate}' AND d.d_date <= '${hiveconf:q18_endDate}' ) dd ON ( s.ss_sold_date_sk=dd.d_date_sk ) WHERE s.ss_store_sk <= 18 GROUP BY s.ss_store_sk, s.ss_sold_date_sk ) temp GROUP BY temp.cat ) c on s.s_store_sk = c.cat WHERE c.slope < 0 ) tmp JOIN product_reviews pr on (true) WHERE instr(pr.pr_review_content, tmp.s_store_name) > 0 -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org