[ https://issues.apache.org/jira/browse/SPARK-2883?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14393141#comment-14393141 ]
Zhan Zhang edited comment on SPARK-2883 at 4/2/15 7:54 PM: ----------------------------------------------------------- Following code demonstrate the usage of the orc support. import org.apache.spark.sql.hive.orc._ import org.apache.spark.sql._ //schema case class AllDataTypes( stringField: String, intField: Int, longField: Long, floatField: Float, doubleField: Double, shortField: Short, byteField: Byte, booleanField: Boolean) //saveAsOrcFile val range = (0 to 255) val data = sc.parallelize(range).map(x => AllDataTypes(s"$x", x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0)) data.toDF().saveAsOrcFile("orcTest") //read orcFile val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) //orcFile val orcTest = hiveContext.orcFile("orcTest") orcTest.registerTempTable("orcTest") hiveContext.sql("SELECT * from orcTest where intfield>185").collect.foreach(println) //new data source API, read hiveContext.sql("create temporary table orc using org.apache.spark.sql.hive.orc OPTIONS (path \"orcTest\")") hiveContext.sql("select * from orc").collect.foreach(println) val table = hiveContext.sql("select * from orc") // new data source API write table.saveAsTable("table", "org.apache.spark.sql.hive.orc") val hiveOrc = hiveContext.orcFile("/user/hive/warehouse/table") hiveOrc.registerTempTable("hiveOrc") hiveContext.sql("select * from hiveOrc").collect.foreach(println) table.saveAsOrcFile("/user/ambari-qa/table") hiveContext.sql("create temporary table normal_orc_as_source USING org.apache.spark.sql.hive.orc OPTIONS (path 'saveTable') as select * from table") was (Author: zzhan): Following code demonstrate the usage of the orc support. @climberus following examples demonstrate how to use it: import org.apache.spark.sql.hive.orc._ import org.apache.spark.sql._ //schema case class AllDataTypes( stringField: String, intField: Int, longField: Long, floatField: Float, doubleField: Double, shortField: Short, byteField: Byte, booleanField: Boolean) //saveAsOrcFile val range = (0 to 255) val data = sc.parallelize(range).map(x => AllDataTypes(s"$x", x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0)) data.toDF().saveAsOrcFile("orcTest") //read orcFile val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) //orcFile val orcTest = hiveContext.orcFile("orcTest") orcTest.registerTempTable("orcTest") hiveContext.sql("SELECT * from orcTest where intfield>185").collect.foreach(println) //new data source API, read hiveContext.sql("create temporary table orc using org.apache.spark.sql.hive.orc OPTIONS (path \"orcTest\")") hiveContext.sql("select * from orc").collect.foreach(println) val table = hiveContext.sql("select * from orc") // new data source API write table.saveAsTable("table", "org.apache.spark.sql.hive.orc") val hiveOrc = hiveContext.orcFile("/user/hive/warehouse/table") hiveOrc.registerTempTable("hiveOrc") hiveContext.sql("select * from hiveOrc").collect.foreach(println) table.saveAsOrcFile("/user/ambari-qa/table") hiveContext.sql("create temporary table normal_orc_as_source USING org.apache.spark.sql.hive.orc OPTIONS (path 'saveTable') as select * from table") > Spark Support for ORCFile format > -------------------------------- > > Key: SPARK-2883 > URL: https://issues.apache.org/jira/browse/SPARK-2883 > Project: Spark > Issue Type: Bug > Components: Input/Output, SQL > Reporter: Zhan Zhang > Priority: Blocker > Attachments: 2014-09-12 07.05.24 pm Spark UI.png, 2014-09-12 07.07.19 > pm jobtracker.png, orc.diff > > > Verify the support of OrcInputFormat in spark, fix issues if exists and add > documentation of its usage. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org