[ https://issues.apache.org/jira/browse/SPARK-6967?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14499574#comment-14499574 ]
Rastislav Krist commented on SPARK-6967: ---------------------------------------- steps to reproduce the same problem in JdbcRDD (hope it helps): - create a table containig Date field in your favourite DBMS, I used PostgreSQL: CREATE TABLE spark_test ( pk_spark_test integer NOT NULL, text character varying(25), date1 date, CONSTRAINT pk PRIMARY KEY (pk_spark_test) ) WITH ( OIDS=FALSE ); ALTER TABLE spark_test OWNER TO postgres; GRANT ALL ON TABLE spark_test TO postgres; GRANT ALL ON TABLE spark_test TO public; - fill it with data: insert into spark_test(pk_spark_test, text, date1) values (1, 'one', '2014-04-01') insert into spark_test(pk_spark_test, text, date1) values (2, 'two', '2014-04-02') - from scala REPL, try the following: import org.apache.spark.sql.SQLContext val sqc = new SQLContext(sc) sqc.jdbc("jdbc:postgresql://localhost:5432/ebx_repository?schema=ebx_repository&user=abc&password=def", "spark_test").cache.registerTempTable("spark_test") // don’t forget the cache method sqc.sql("select * from spark_test").foreach(println) the last command will produce the following error (if you don’t use cache, it will produce correct results as expected): 11:50:27.306 [Executor task launch worker-0] ERROR org.apache.spark.executor.Executor - Exception in task 0.0 in stage 0.0 (TID 0) java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableInt at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.getInt(SpecificMutableRow.scala:248) ~[spark-catalyst_2.11-1.3.0.jar:1.3.0] at org.apache.spark.sql.columnar.IntColumnStats.gatherStats(ColumnStats.scala:191) ~[spark-sql_2.11-1.3.0.jar:1.3.0] at org.apache.spark.sql.columnar.NullableColumnBuilder$class.appendFrom(NullableColumnBuilder.scala:56) ~[spark-sql_2.11-1.3.0.jar:1.3.0] at org.apache.spark.sql.columnar.NativeColumnBuilder.org$apache$spark$sql$columnar$compression$CompressibleColumnBuilder$$super$appendFrom(ColumnBuilder.scala:87) ~[spark-sql_2.11-1.3.0.jar:1.3.0] at org.apache.spark.sql.columnar.compression.CompressibleColumnBuilder$class.appendFrom(CompressibleColumnBuilder.scala:78) ~[spark-sql_2.11-1.3.0.jar:1.3.0] at org.apache.spark.sql.columnar.NativeColumnBuilder.appendFrom(ColumnBuilder.scala:87) ~[spark-sql_2.11-1.3.0.jar:1.3.0] at org.apache.spark.sql.columnar.InMemoryRelation$$anonfun$3$$anon$1.next(InMemoryColumnarTableScan.scala:135) ~[spark-sql_2.11-1.3.0.jar:1.3.0] at org.apache.spark.sql.columnar.InMemoryRelation$$anonfun$3$$anon$1.next(InMemoryColumnarTableScan.scala:111) ~[spark-sql_2.11-1.3.0.jar:1.3.0] at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:249) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:172) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:79) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.rdd.RDD.iterator(RDD.scala:242) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.scheduler.Task.run(Task.scala:64) ~[spark-core_2.11-1.3.0.jar:1.3.0] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) ~[spark-core_2.11-1.3.0.jar:1.3.0] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_11] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_11] at java.lang.Thread.run(Thread.java:745) [na:1.8.0_11] 11:50:27.318 [task-result-getter-0] WARN o.a.spark.scheduler.TaskSetManager - Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableInt at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.getInt(SpecificMutableRow.scala:248) at org.apache.spark.sql.columnar.IntColumnStats.gatherStats(ColumnStats.scala:191) at org.apache.spark.sql.columnar.NullableColumnBuilder$class.appendFrom(NullableColumnBuilder.scala:56) at org.apache.spark.sql.columnar.NativeColumnBuilder.org$apache$spark$sql$columnar$compression$CompressibleColumnBuilder$$super$appendFrom(ColumnBuilder.scala:87) at org.apache.spark.sql.columnar.compression.CompressibleColumnBuilder$class.appendFrom(CompressibleColumnBuilder.scala:78) at org.apache.spark.sql.columnar.NativeColumnBuilder.appendFrom(ColumnBuilder.scala:87) at org.apache.spark.sql.columnar.InMemoryRelation$$anonfun$3$$anon$1.next(InMemoryColumnarTableScan.scala:135) at org.apache.spark.sql.columnar.InMemoryRelation$$anonfun$3$$anon$1.next(InMemoryColumnarTableScan.scala:111) at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:249) at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:172) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:79) at org.apache.spark.rdd.RDD.iterator(RDD.scala:242) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:64) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 11:50:27.320 [task-result-getter-0] ERROR o.a.spark.scheduler.TaskSetManager - Task 0 in stage 0.0 failed 1 times; aborting job org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.MutableAny cannot be cast to org.apache.spark.sql.catalyst.expressions.MutableInt at org.apache.spark.sql.catalyst.expressions.SpecificMutableRow.getInt(SpecificMutableRow.scala:248) at org.apache.spark.sql.columnar.IntColumnStats.gatherStats(ColumnStats.scala:191) at org.apache.spark.sql.columnar.NullableColumnBuilder$class.appendFrom(NullableColumnBuilder.scala:56) at org.apache.spark.sql.columnar.NativeColumnBuilder.org$apache$spark$sql$columnar$compression$CompressibleColumnBuilder$$super$appendFrom(ColumnBuilder.scala:87) at org.apache.spark.sql.columnar.compression.CompressibleColumnBuilder$class.appendFrom(CompressibleColumnBuilder.scala:78) at org.apache.spark.sql.columnar.NativeColumnBuilder.appendFrom(ColumnBuilder.scala:87) at org.apache.spark.sql.columnar.InMemoryRelation$$anonfun$3$$anon$1.next(InMemoryColumnarTableScan.scala:135) at org.apache.spark.sql.columnar.InMemoryRelation$$anonfun$3$$anon$1.next(InMemoryColumnarTableScan.scala:111) at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:249) at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:172) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:79) at org.apache.spark.rdd.RDD.iterator(RDD.scala:242) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:64) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > Internal DateType not handled correctly in caching > -------------------------------------------------- > > Key: SPARK-6967 > URL: https://issues.apache.org/jira/browse/SPARK-6967 > Project: Spark > Issue Type: Bug > Components: SQL > Reporter: Michael Armbrust > Assignee: Adrian Wang > Priority: Blocker > > From the user list. It looks like data is not implemented correctly in > in-memory caching. We should also check the JDBC datasource support for date. > {code} > Stack trace of an exception being reported since upgrade to 1.3.0: > java.lang.ClassCastException: java.sql.Date cannot be cast to > java.lang.Integer > at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:105) > ~[scala-library-2.11.6.jar:na] > at > org.apache.spark.sql.catalyst.expressions.GenericRow.getInt(rows.scala:83) > ~[spark-catalyst_2.11-1.3.0.jar:1.3.0] > at > org.apache.spark.sql.columnar.IntColumnStats.gatherStats(ColumnStats.scala:191) > ~[spark-sql_2.11-1.3.0.jar:1.3.0] > at > org.apache.spark.sql.columnar.NullableColumnBuilder$class.appendFrom(NullableColumnBuilder.scala:56) > ~[spark-sql_2.11-1.3.0.jar:1.3.0] > at > org.apache.spark.sql.columnar.NativeColumnBuilder.org$apache$spark$sql$columnar$compression$CompressibleColumnBuilder$$super$appendFrom(ColumnBuilder.scala:87) > ~[spark-sql_2.11-1.3.0.jar:1.3.0] > at > org.apache.spark.sql.columnar.compression.CompressibleColumnBuilder$class.appendFrom(CompressibleColumnBuilder.scala:78) > ~[spark-sql_2.11-1.3.0.jar:1.3.0] > at > org.apache.spark.sql.columnar.NativeColumnBuilder.appendFrom(ColumnBuilder.scala:87) > ~[spark-sql_2.11-1.3.0.jar:1.3.0] > at > org.apache.spark.sql.columnar.InMemoryRelation$$anonfun$3$$anon$1.next(InMemoryColumnarTableScan.scala:135) > ~[spark-sql_2.11-1.3.0.jar:1.3.0] > at > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org