[
https://issues.apache.org/jira/browse/SPARK-26942?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16776808#comment-16776808
]
ketan kunde commented on SPARK-26942:
-------------------------------------
Logs attached
test statistics of LogicalRelation converted from Hive serde tables *** FAILED
***
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in
stage 131.0 failed 1 times, most recent failure: Lost task 0.0 in stage 131.0
(TID 191, localhost, executor driver): org.iq80.snappy.CorruptionException:
Invalid copy offset for opcode starting at 4841
at
org.iq80.snappy.SnappyDecompressor.decompressAllTags(SnappyDecompressor.java:165)
at org.iq80.snappy.SnappyDecompressor.uncompress(SnappyDecompressor.java:76)
at org.iq80.snappy.Snappy.uncompress(Snappy.java:43)
at org.apache.hadoop.hive.ql.io.orc.SnappyCodec.decompress(SnappyCodec.java:71)
at
org.apache.hadoop.hive.ql.io.orc.InStream$CompressedStream.readHeader(InStream.java:214)
at
org.apache.hadoop.hive.ql.io.orc.InStream$CompressedStream.read(InStream.java:238)
at java.io.InputStream.read(InputStream.java:113)
at
org.apache.hive.com.google.protobuf.CodedInputStream.refillBuffer(CodedInputStream.java:737)
at
org.apache.hive.com.google.protobuf.CodedInputStream.isAtEnd(CodedInputStream.java:701)
at
org.apache.hive.com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:99)
at org.apache.hadoop.hive.ql.io.orc.OrcProto$Footer.<init>(OrcProto.java:15780)
at org.apache.hadoop.hive.ql.io.orc.OrcProto$Footer.<init>(OrcProto.java:15744)
at
org.apache.hadoop.hive.ql.io.orc.OrcProto$Footer$1.parsePartialFrom(OrcProto.java:15886)
at
org.apache.hadoop.hive.ql.io.orc.OrcProto$Footer$1.parsePartialFrom(OrcProto.java:15881)
at
org.apache.hive.com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
at
org.apache.hive.com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:217)
at
org.apache.hive.com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:223)
at
org.apache.hive.com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:49)
at
org.apache.hadoop.hive.ql.io.orc.OrcProto$Footer.parseFrom(OrcProto.java:16226)
at
org.apache.hadoop.hive.ql.io.orc.ReaderImpl$MetaInfoObjExtractor.<init>(ReaderImpl.java:479)
at org.apache.hadoop.hive.ql.io.orc.ReaderImpl.<init>(ReaderImpl.java:319)
at org.apache.hadoop.hive.ql.io.orc.OrcFile.createReader(OrcFile.java:187)
at
org.apache.spark.sql.hive.orc.OrcFileOperator$$anonfun$getFileReader$2.apply(OrcFileOperator.scala:75)
at
org.apache.spark.sql.hive.orc.OrcFileOperator$$anonfun$getFileReader$2.apply(OrcFileOperator.scala:73)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at
scala.collection.TraversableOnce$class.collectFirst(TraversableOnce.scala:145)
at scala.collection.AbstractIterator.collectFirst(Iterator.scala:1336)
at
org.apache.spark.sql.hive.orc.OrcFileOperator$.getFileReader(OrcFileOperator.scala:86)
at
org.apache.spark.sql.hive.orc.OrcFileOperator$$anonfun$readSchema$1.apply(OrcFileOperator.scala:95)
at
org.apache.spark.sql.hive.orc.OrcFileOperator$$anonfun$readSchema$1.apply(OrcFileOperator.scala:95)
at
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.immutable.List.flatMap(List.scala:344)
at
org.apache.spark.sql.hive.orc.OrcFileOperator$.readSchema(OrcFileOperator.scala:95)
at
org.apache.spark.sql.hive.orc.OrcFileFormat$$anonfun$buildReader$2.apply(OrcFileFormat.scala:145)
at
org.apache.spark.sql.hive.orc.OrcFileFormat$$anonfun$buildReader$2.apply(OrcFileFormat.scala:136)
at
org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:148)
at
org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:132)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109)
at
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(generated.java:36)
at
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(generated.java:64)
at
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1160)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.lang.Thread.run(Thread.java:812)
Driver stacktrace:
at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1651)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1639)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1638)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1638)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
...
Cause: org.iq80.snappy.CorruptionException: Invalid copy offset for opcode
starting at 4841
at
org.iq80.snappy.SnappyDecompressor.decompressAllTags(SnappyDecompressor.java:165)
at org.iq80.snappy.SnappyDecompressor.uncompress(SnappyDecompressor.java:76)
at org.iq80.snappy.Snappy.uncompress(Snappy.java:43)
at org.apache.hadoop.hive.ql.io.orc.SnappyCodec.decompress(SnappyCodec.java:71)
at
org.apache.hadoop.hive.ql.io.orc.InStream$CompressedStream.readHeader(InStream.java:214)
at
org.apache.hadoop.hive.ql.io.orc.InStream$CompressedStream.read(InStream.java:238)
at java.io.InputStream.read(InputStream.java:113)
at
org.apache.hive.com.google.protobuf.CodedInputStream.refillBuffer(CodedInputStream.java:737)
at
org.apache.hive.com.google.protobuf.CodedInputStream.isAtEnd(CodedInputStream.java:701)
at
org.apache.hive.com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:99)
> spark v 2.3.2 test failure in hive module
> -----------------------------------------
>
> Key: SPARK-26942
> URL: https://issues.apache.org/jira/browse/SPARK-26942
> Project: Spark
> Issue Type: Test
> Components: Spark Core
> Affects Versions: 2.3.2
> Environment: ub 16.04
> 8GB ram
> 2 core machine ..
> docker container
> Reporter: ketan kunde
> Priority: Major
>
> HI,
> I have build spark 2.3.2 on big endian system.
> I am now executing test cases in hive
> i encounter issue related to ORC format on bigendian while runningtest
> "("test statistics of LogicalRelation converted from Hive serde tables")"
> I want to know what is support of ORC serde on big endian system and if it is
> supported then whats the workaround to get this test fixed?
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]