Re: carbon data

Lu Cao Tue, 29 Nov 2016 01:56:29 -0800

Thank you for the response Liang. I think I have followed the example but
it still returns error:
       Data loading failed. table not found: default.carbontest
attached my code below: I read data from a hive table with HiveContext and
convert it to CarbonContext then generate the df and save to hdfs. I'm not
sure whether it's correct or not when I generate the dataframe in
sc.parallelize(sc.Files,
25) Do you have any other mothod we can use to generate DF?


object SparkConvert {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setAppName("CarbonTest")

    val sc = new SparkContext(conf)

    val path = "hdfs:///user/appuser/lucao/CarbonTest_001.carbon"

    val hqlContext = new HiveContext(sc)

    val df = hqlContext.sql("select * from default.test_data_all")

    println("the count is:" + df.count())

    val cc = createCarbonContext(df.sqlContext.sparkContext, path)

    writeDataFrame(cc, "CarbonTest", SaveMode.Append)



  }



  def createCarbonContext(sc : SparkContext, storePath : String):
CarbonContext = {

    val cc = new CarbonContext(sc, storePath)

    cc

  }



  def writeDataFrame(cc : CarbonContext, tableName : String, mode :
SaveMode) : Unit = {

    import cc.implicits._

    val sc = cc.sparkContext

    val df = sc.parallelize(sc.files,
25).toDF(“col1”,”col2”,”col3”..."coln")

    df.write

      .format("carbondata")

      .option("tableName", tableName)

      .option("compress", "true")

      .mode(mode)

      .save()

  }



}

Re: carbon data

Reply via email to