[
https://issues.apache.org/jira/browse/SPARK-3034?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14568979#comment-14568979
]
Ian Babrou edited comment on SPARK-3034 at 6/2/15 11:35 AM:
------------------------------------------------------------
I'm seeing this with 1.3.1 and elasticsearch-spark:
{code}
object App {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Simple Application")
.setMaster("local")
.set("es.nodes", "web606:9200")
.set("es.net.proxy.socks.host", "127.0.0.1")
.set("es.net.proxy.socks.port", "6666")
.set("es.scroll.size", "1000")
.set("es.field.read.empty.as.null", "no")
.set("es.query", "?q=@key:unique_active_users")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val df = JavaEsSparkSQL.esDF(sqlContext, "statistics-20150525/events")
df.printSchema()
df.save("/Users/bobrik/Desktop/parquet-test", "parquet", SaveMode.Overwrite)
}
}
{code}
{code}
15/06/02 15:30:10 ERROR Executor: Exception in task 4.0 in stage 0.0 (TID 4)
java.lang.ClassCastException: java.util.Date cannot be cast to
java.sql.Timestamp
at
org.apache.spark.sql.parquet.RowWriteSupport.writePrimitive(ParquetTableSupport.scala:210)
at
org.apache.spark.sql.parquet.RowWriteSupport.writeValue(ParquetTableSupport.scala:192)
at
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:171)
at
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:134)
at
parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:120)
at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:81)
at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:37)
at
org.apache.spark.sql.parquet.ParquetRelation2.org$apache$spark$sql$parquet$ParquetRelation2$$writeShard$1(newParquet.scala:671)
at
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
at
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
at org.apache.spark.scheduler.Task.run(Task.scala:64)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
{code}
was (Author: bobrik):
I'm seeing thins with 1.3.1:
{code}
object App {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Simple Application")
.setMaster("local")
.set("es.nodes", "web606:9200")
.set("es.net.proxy.socks.host", "127.0.0.1")
.set("es.net.proxy.socks.port", "6666")
.set("es.scroll.size", "1000")
.set("es.field.read.empty.as.null", "no")
.set("es.query", "?q=@key:unique_active_users")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val df = JavaEsSparkSQL.esDF(sqlContext, "statistics-20150525/events")
df.printSchema()
df.save("/Users/bobrik/Desktop/parquet-test", "parquet", SaveMode.Overwrite)
}
}
{code}
{code}
15/06/02 15:30:10 ERROR Executor: Exception in task 4.0 in stage 0.0 (TID 4)
java.lang.ClassCastException: java.util.Date cannot be cast to
java.sql.Timestamp
at
org.apache.spark.sql.parquet.RowWriteSupport.writePrimitive(ParquetTableSupport.scala:210)
at
org.apache.spark.sql.parquet.RowWriteSupport.writeValue(ParquetTableSupport.scala:192)
at
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:171)
at
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:134)
at
parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:120)
at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:81)
at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:37)
at
org.apache.spark.sql.parquet.ParquetRelation2.org$apache$spark$sql$parquet$ParquetRelation2$$writeShard$1(newParquet.scala:671)
at
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
at
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
at org.apache.spark.scheduler.Task.run(Task.scala:64)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
{code}
> [HIve] java.sql.Date cannot be cast to java.sql.Timestamp
> ---------------------------------------------------------
>
> Key: SPARK-3034
> URL: https://issues.apache.org/jira/browse/SPARK-3034
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 1.0.2
> Reporter: pengyanhong
>
> run a simple HiveQL via yarn-cluster, got error as below:
> {quote}
> Exception in thread "Thread-2" java.lang.reflect.InvocationTargetException
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597)
> at
> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:199)
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure:
> Task 0.0:127 failed 3 times, most recent failure: Exception failure in TID
> 141 on host A01-R06-I147-41.jd.local: java.lang.ClassCastException:
> java.sql.Date cannot be cast to java.sql.Timestamp
>
> org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaTimestampObjectInspector.getPrimitiveWritableObject(JavaTimestampObjectInspector.java:33)
>
> org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:251)
>
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:486)
>
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:439)
>
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:423)
>
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$3$$anonfun$apply$1.apply(InsertIntoHiveTable.scala:200)
>
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$3$$anonfun$apply$1.apply(InsertIntoHiveTable.scala:192)
> scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable.org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1(InsertIntoHiveTable.scala:149)
>
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$1.apply(InsertIntoHiveTable.scala:158)
>
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$1.apply(InsertIntoHiveTable.scala:158)
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
> org.apache.spark.scheduler.Task.run(Task.scala:51)
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183)
>
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
> java.lang.Thread.run(Thread.java:662)
> Driver stacktrace:
> at
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1049)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1033)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1031)
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1031)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:635)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:635)
> at scala.Option.foreach(Option.scala:236)
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:635)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1234)
> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
> at akka.actor.ActorCell.invoke(ActorCell.scala:456)
> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
> at akka.dispatch.Mailbox.run(Mailbox.scala:219)
> at
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
> at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> at
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> at
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> at
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> {quote}
> above error is thrown in the stage of inserting result data into a hive table
> which has a field of timestamp data type.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]