This is the code import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.types._ import org.apache.spark.sql.SQLContext import org.apache.spark.sql.functions._ // object ImportCSV { def main(args: Array[String]) { val conf = new SparkConf(). setAppName("ImportCSV"). setMaster("local[12]"). set("spark.driver.allowMultipleContexts", "true"). set("spark.hadoop.validateOutputSpecs", "false") val sc = new SparkContext(conf) val sqlContext= new org.apache.spark.sql.SQLContext(sc) import sqlContext.implicits._ val HiveContext = new org.apache.spark.sql.hive.HiveContext(sc) val df = HiveContext.read.format("com.databricks.spark.csv").option("inferSchema", "true").option("header", "true").load("/data/stg/table2") val a = df.filter(col("Total") > "").map(x => (x.getString(0),x.getString(1), x.getString(2).substring(1).replace(",", "").toDouble, x.getString(3).substring(1).replace(",", "").toDouble, x.getString(4).substring(1).replace(",", "").toDouble)) a.toDF.registerTempTable("tmp") HiveContext.sql("use test") HiveContext.sql("DROP TABLE IF EXISTS t3") var sqltext : String = "" sqltext = """ CREATE TABLE t3 ( INVOICENUMBER INT ,PAYMENTDATE timestamp ,NET DECIMAL(20,2) ,VAT DECIMAL(20,2) ,TOTAL DECIMAL(20,2) ) COMMENT 'from csv file from excel sheet' STORED AS ORC TBLPROPERTIES ( "orc.compress"="ZLIB" ) """ HiveContext.sql(sqltext) HiveContext.sql("INSERT INTO TABLE t3 SELECT * FROM tmp") HiveContext.sql("SELECT * FROM t3 ORDER BY 1").collect.foreach(println) } }
I am getting this error when running the above code with spark-submit Exception in thread "main" org.apache.spark.sql.AnalysisException: no such table tmp; line 1 pos 35 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.getTable(Analyzer.scala:260) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$7.applyOrElse(Analyzer.scala:268) Dr Mich Talebzadeh LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>* http://talebzadehmich.wordpress.com On 7 March 2016 at 21:00, Holden Karau <hol...@pigscanfly.ca> wrote: > So what about if you just start with a hive context, and create your DF > using the HiveContext? > > > On Monday, March 7, 2016, Mich Talebzadeh <mich.talebza...@gmail.com> > wrote: > >> Hi, >> >> I have done this Spark-shell and Hive itself so it works. >> >> I am exploring whether I can do it programmatically. The problem I >> encounter was that I tried to register the DF as temporary table. The >> problem is that trying to insert from temporary table into Hive table, II >> was getting the following error >> >> sqltext = "INSERT INTO TABLE t3 SELECT * FROM tmp" >> >> sqlContext.sql(sqltext) >> >> Tables created with SQLContext must be TEMPORARY. Use a HiveContext >> instead. >> >> When I switched to HiveContext, it could not see the temporary table >> >> Do decided to save the Spark table as follows: >> >> val a = df.filter(col("Total") > "").map(x => >> (x.getString(0),x.getString(1), x.getString(2).substring(1).replace(",", >> "").toDouble, x.getString(3).substring(1).replace(",", "").toDouble, >> x.getString(4).substring(1).replace(",", "").toDouble)) >> >> --delete the file in hdfs if already exists >> val hadoopConf = new org.apache.hadoop.conf.Configuration() >> val hdfs = org.apache.hadoop.fs.FileSystem.get(new >> java.net.URI("hdfs://rhes564:9000"), hadoopConf) >> val output = "hdfs://rhes564:9000/user/hduser/t3_parquet" >> try { hdfs.delete(new org.apache.hadoop.fs.Path(output), true) } catch { >> case _ : Throwable => { } } >> >> -- save it as Parquet file >> a.toDF.saveAsParquetFile(output) >> >> -- Hive table t3 is created as a simple textfile. ORC did not work! >> >> HiveContext.sql("LOAD DATA INPATH '/user/hduser/t3_parquet' into table >> t3") >> >> OK that works but very cumbersome. >> >> I checked the web but there are conflicting attempts to solve this issue. >> >> Please note that this can be done easily with spark-shell as it is built >> in HiveContext. >> >> Thanks >> >> >> >> Dr Mich Talebzadeh >> >> >> >> LinkedIn * >> https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw >> <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>* >> >> >> >> http://talebzadehmich.wordpress.com >> >> >> > > > -- > Cell : 425-233-8271 > Twitter: https://twitter.com/holdenkarau > >