[ 
https://issues.apache.org/jira/browse/SPARK-3034?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14568979#comment-14568979
 ] 

Ian Babrou edited comment on SPARK-3034 at 6/2/15 11:35 AM:
------------------------------------------------------------

I'm seeing this with 1.3.1 and elasticsearch-spark:

{code}
object App {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Simple Application")
      .setMaster("local")
      .set("es.nodes", "web606:9200")
      .set("es.net.proxy.socks.host", "127.0.0.1")
      .set("es.net.proxy.socks.port", "6666")
      .set("es.scroll.size", "1000")
      .set("es.field.read.empty.as.null", "no")
      .set("es.query", "?q=@key:unique_active_users")

    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)

    val df = JavaEsSparkSQL.esDF(sqlContext, "statistics-20150525/events")

    df.printSchema()

    df.save("/Users/bobrik/Desktop/parquet-test", "parquet", SaveMode.Overwrite)
  }

}
{code}

{code}
15/06/02 15:30:10 ERROR Executor: Exception in task 4.0 in stage 0.0 (TID 4)
java.lang.ClassCastException: java.util.Date cannot be cast to 
java.sql.Timestamp
        at 
org.apache.spark.sql.parquet.RowWriteSupport.writePrimitive(ParquetTableSupport.scala:210)
        at 
org.apache.spark.sql.parquet.RowWriteSupport.writeValue(ParquetTableSupport.scala:192)
        at 
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:171)
        at 
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:134)
        at 
parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:120)
        at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:81)
        at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:37)
        at 
org.apache.spark.sql.parquet.ParquetRelation2.org$apache$spark$sql$parquet$ParquetRelation2$$writeShard$1(newParquet.scala:671)
        at 
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
        at 
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
        at org.apache.spark.scheduler.Task.run(Task.scala:64)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
{code}


was (Author: bobrik):
I'm seeing thins with 1.3.1:

{code}
object App {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Simple Application")
      .setMaster("local")
      .set("es.nodes", "web606:9200")
      .set("es.net.proxy.socks.host", "127.0.0.1")
      .set("es.net.proxy.socks.port", "6666")
      .set("es.scroll.size", "1000")
      .set("es.field.read.empty.as.null", "no")
      .set("es.query", "?q=@key:unique_active_users")

    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)

    val df = JavaEsSparkSQL.esDF(sqlContext, "statistics-20150525/events")

    df.printSchema()

    df.save("/Users/bobrik/Desktop/parquet-test", "parquet", SaveMode.Overwrite)
  }

}
{code}

{code}
15/06/02 15:30:10 ERROR Executor: Exception in task 4.0 in stage 0.0 (TID 4)
java.lang.ClassCastException: java.util.Date cannot be cast to 
java.sql.Timestamp
        at 
org.apache.spark.sql.parquet.RowWriteSupport.writePrimitive(ParquetTableSupport.scala:210)
        at 
org.apache.spark.sql.parquet.RowWriteSupport.writeValue(ParquetTableSupport.scala:192)
        at 
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:171)
        at 
org.apache.spark.sql.parquet.RowWriteSupport.write(ParquetTableSupport.scala:134)
        at 
parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:120)
        at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:81)
        at parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:37)
        at 
org.apache.spark.sql.parquet.ParquetRelation2.org$apache$spark$sql$parquet$ParquetRelation2$$writeShard$1(newParquet.scala:671)
        at 
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
        at 
org.apache.spark.sql.parquet.ParquetRelation2$$anonfun$insert$2.apply(newParquet.scala:689)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
        at org.apache.spark.scheduler.Task.run(Task.scala:64)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
{code}

> [HIve] java.sql.Date cannot be cast to java.sql.Timestamp
> ---------------------------------------------------------
>
>                 Key: SPARK-3034
>                 URL: https://issues.apache.org/jira/browse/SPARK-3034
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.0.2
>            Reporter: pengyanhong
>
> run a simple HiveQL via yarn-cluster, got error as below:
> {quote}
> Exception in thread "Thread-2" java.lang.reflect.InvocationTargetException
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>       at java.lang.reflect.Method.invoke(Method.java:597)
>       at 
> org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:199)
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: 
> Task 0.0:127 failed 3 times, most recent failure: Exception failure in TID 
> 141 on host A01-R06-I147-41.jd.local: java.lang.ClassCastException: 
> java.sql.Date cannot be cast to java.sql.Timestamp
>         
> org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaTimestampObjectInspector.getPrimitiveWritableObject(JavaTimestampObjectInspector.java:33)
>         
> org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:251)
>         
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:486)
>         
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:439)
>         
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:423)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$3$$anonfun$apply$1.apply(InsertIntoHiveTable.scala:200)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$3$$anonfun$apply$1.apply(InsertIntoHiveTable.scala:192)
>         scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable.org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1(InsertIntoHiveTable.scala:149)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$1.apply(InsertIntoHiveTable.scala:158)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$1.apply(InsertIntoHiveTable.scala:158)
>         org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
>         org.apache.spark.scheduler.Task.run(Task.scala:51)
>         org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:183)
>         
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>         
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>         java.lang.Thread.run(Thread.java:662)
> Driver stacktrace:
>       at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1049)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1033)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1031)
>       at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>       at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1031)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:635)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:635)
>       at scala.Option.foreach(Option.scala:236)
>       at 
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:635)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1234)
>       at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
>       at akka.actor.ActorCell.invoke(ActorCell.scala:456)
>       at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
>       at akka.dispatch.Mailbox.run(Mailbox.scala:219)
>       at 
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
>       at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
>       at 
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
>       at 
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
>       at 
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> {quote}
> above error is thrown in the stage of inserting result data into a hive table 
> which has a field of timestamp data type.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to