Thank you for the response Liang. I think I have followed the example but it still returns error: Data loading failed. table not found: default.carbontest attached my code below: I read data from a hive table with HiveContext and convert it to CarbonContext then generate the df and save to hdfs. I'm not sure whether it's correct or not when I generate the dataframe in sc.parallelize(sc.Files, 25) Do you have any other mothod we can use to generate DF?
object SparkConvert { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CarbonTest") val sc = new SparkContext(conf) val path = "hdfs:///user/appuser/lucao/CarbonTest_001.carbon" val hqlContext = new HiveContext(sc) val df = hqlContext.sql("select * from default.test_data_all") println("the count is:" + df.count()) val cc = createCarbonContext(df.sqlContext.sparkContext, path) writeDataFrame(cc, "CarbonTest", SaveMode.Append) } def createCarbonContext(sc : SparkContext, storePath : String): CarbonContext = { val cc = new CarbonContext(sc, storePath) cc } def writeDataFrame(cc : CarbonContext, tableName : String, mode : SaveMode) : Unit = { import cc.implicits._ val sc = cc.sparkContext val df = sc.parallelize(sc.files, 25).toDF(“col1”,”col2”,”col3”..."coln") df.write .format("carbondata") .option("tableName", tableName) .option("compress", "true") .mode(mode) .save() } }