[
https://issues.apache.org/jira/browse/SPARK-3365?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14306774#comment-14306774
]
Joseph K. Bradley commented on SPARK-3365:
------------------------------------------
I found this as follows:
{code}
import org.apache.spark.sql._
val data = sc.parallelize(Seq((List.empty[Double], 5)))
val sql = new SQLContext(sc)
import sql._
val df = data.toDataFrame("l", "a")
df.saveAsParquetFile("blah")
{code}
Here's the error:
{code}
java.lang.IllegalStateException: Cannot build an empty group
at parquet.Preconditions.checkState(Preconditions.java:58)
at parquet.schema.Types$GroupBuilder.build(Types.java:536)
at parquet.schema.Types$GroupBuilder.build(Types.java:408)
at parquet.schema.Types$Builder.named(Types.java:210)
at
parquet.format.converter.ParquetMetadataConverter.buildChildren(ParquetMetadataConverter.java:623)
at
parquet.format.converter.ParquetMetadataConverter.fromParquetSchema(ParquetMetadataConverter.java:584)
at
parquet.format.converter.ParquetMetadataConverter.fromParquetMetadata(ParquetMetadataConverter.java:526)
at
parquet.format.converter.ParquetMetadataConverter.readParquetMetadata(ParquetMetadataConverter.java:520)
at
parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:426)
at
org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$readMetaData$3.apply(ParquetTypes.scala:461)
at
org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$readMetaData$3.apply(ParquetTypes.scala:461)
at scala.Option.map(Option.scala:145)
at
org.apache.spark.sql.parquet.ParquetTypesConverter$.readMetaData(ParquetTypes.scala:461)
at
org.apache.spark.sql.parquet.ParquetTypesConverter$.readSchemaFromFile(ParquetTypes.scala:481)
at
org.apache.spark.sql.parquet.ParquetRelation.<init>(ParquetRelation.scala:65)
at
org.apache.spark.sql.parquet.ParquetRelation$$anon$1.<init>(ParquetRelation.scala:170)
at
org.apache.spark.sql.parquet.ParquetRelation$.createEmpty(ParquetRelation.scala:170)
at
org.apache.spark.sql.parquet.ParquetRelation$.create(ParquetRelation.scala:147)
at
org.apache.spark.sql.execution.SparkStrategies$ParquetOperations$.apply(SparkStrategies.scala:211)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at
org.apache.spark.sql.catalyst.planning.QueryPlanner.apply(QueryPlanner.scala:59)
at
org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:545)
at
org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:543)
at
org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:549)
at
org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:549)
at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:552)
at
org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:552)
at
org.apache.spark.sql.DataFrameImpl.saveAsParquetFile(DataFrameImpl.scala:305)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:25)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:30)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:32)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:34)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:36)
at $iwC$$iwC$$iwC.<init>(<console>:38)
at $iwC$$iwC.<init>(<console>:40)
at $iwC.<init>(<console>:42)
at <init>(<console>:44)
at .<init>(<console>:48)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
at
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:854)
at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:899)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:811)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:654)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:662)
at
org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:667)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:994)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:942)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:942)
at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at
org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:942)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1039)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:403)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:77)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
{code}
> Failure to save Lists to Parquet
> --------------------------------
>
> Key: SPARK-3365
> URL: https://issues.apache.org/jira/browse/SPARK-3365
> Project: Spark
> Issue Type: Bug
> Affects Versions: 1.1.0
> Reporter: Michael Armbrust
>
> Reproduction, same works if type is Seq. (props to [~chrisgrier] for finding
> this)
> {code}
> scala> case class Test(x: List[String])
> defined class Test
> scala> sparkContext.parallelize(Test(List()) :: Nil).saveAsParquetFile("bug")
> 23:09:51.807 ERROR org.apache.spark.executor.Executor: Exception in task 0.0
> in stage 0.0 (TID 0)
> java.lang.ArithmeticException: / by zero
> at
> parquet.hadoop.InternalParquetRecordWriter.initStore(InternalParquetRecordWriter.java:99)
> at
> parquet.hadoop.InternalParquetRecordWriter.<init>(InternalParquetRecordWriter.java:92)
> at
> parquet.hadoop.ParquetRecordWriter.<init>(ParquetRecordWriter.java:64)
> at
> parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:282)
> at
> parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
> at
> org.apache.spark.sql.parquet.InsertIntoParquetTable.org$apache$spark$sql$parquet$InsertIntoParquetTable$$writeShard$1(ParquetTableOperations.scala:300)
> at
> org.apache.spark.sql.parquet.InsertIntoParquetTable$$anonfun$saveAsHadoopFile$1.apply(ParquetTableOperations.scala:318)
> at
> org.apache.spark.sql.parquet.InsertIntoParquetTable$$anonfun$saveAsHadoopFile$1.apply(ParquetTableOperations.scala:318)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
> at org.apache.spark.scheduler.Task.run(Task.scala:54)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> at java.lang.Thread.run(Thread.java:744)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]