[
https://issues.apache.org/jira/browse/SPARK-2883?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14603507#comment-14603507
]
Phil Claridge commented on SPARK-2883:
--------------------------------------
Took your data frame.write.format sample and build simple test case based on
earlier example .. fails with out of memory error:
{code}
package testorc
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object SimpleOrcTest {
case class AllDataTypes(
stringField: String,
intField: Int,
longField: Long,
floatField: Float,
doubleField: Double,
shortField: Short,
byteField: Byte,
booleanField: Boolean)
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("SimpleOrcTest").setMaster("local")
val sc = new SparkContext(conf)
import org.apache.spark.sql.hive.orc._
import org.apache.spark.sql._
val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
import sqlContext.implicits._
val range = (0 to 255)
val data: RDD[AllDataTypes] = sc.parallelize(range).map(x =>
AllDataTypes(s"$x", x, x.toLong,
x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0))
data.toDF().write.format("orc").save("orcTest")
}
}
{code}
> Spark Support for ORCFile format
> --------------------------------
>
> Key: SPARK-2883
> URL: https://issues.apache.org/jira/browse/SPARK-2883
> Project: Spark
> Issue Type: New Feature
> Components: Input/Output, SQL
> Reporter: Zhan Zhang
> Assignee: Zhan Zhang
> Priority: Critical
> Fix For: 1.4.0
>
> Attachments: 2014-09-12 07.05.24 pm Spark UI.png, 2014-09-12 07.07.19
> pm jobtracker.png, orc.diff
>
>
> Verify the support of OrcInputFormat in spark, fix issues if exists and add
> documentation of its usage.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]