Unable To access Hive From Spark

Amit Singh Hora Fri, 15 Apr 2016 05:21:07 -0700

Hi All,

I am trying to access hive from Spark but getting exception
The root scratch dir: /tmp/hive on HDFS should be writable. Current
permissions are: rw-rw-rw-


Code :-

                String logFile = "hdfs://hdp23ha/logs"; // Should be some file 
on
                                                                                
                                // your system]
                System.setProperty("HADOOP_USER_NAME","hadoop");
                SparkConf conf = new SparkConf().setAppName("Simple
Application").setMaster("local[2]");
                JavaSparkContext sc = new JavaSparkContext(conf);
                            sc.hadoopConfiguration().set("fs.defaultFS", 
"hdfs://hdp23ha");
                            
sc.hadoopConfiguration().set("hive.metastore.warehouse.dir",
"/apps/hive/warehouse");
                            
sc.hadoopConfiguration().set("hive.exec.local.scratchdir", "D://");
                            sc.hadoopConfiguration().set("dfs.nameservices", 
"hdp23ha");
                            
sc.hadoopConfiguration().set("hive.exec.scratchdir", "/tmp/hive/");
                            
sc.hadoopConfiguration().setInt("hive.exec.scratchdir.permission",
777);
                            
sc.hadoopConfiguration().set("dfs.ha.namenodes.hdp23ha", "nn1,nn2");
                            
sc.hadoopConfiguration().set("dfs.namenode.rpc-address.hdp23ha.nn1",
"ambarimaster:8020");
                            
sc.hadoopConfiguration().set("dfs.namenode.rpc-address.hdp23ha.nn2",
"hdp231:8020");
                           
sc.hadoopConfiguration().set("dfs.client.failover.proxy.provider.hdp23ha",
                             
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

                
                HiveContext hiveContext = new 
org.apache.spark.sql.hive.HiveContext(sc);
                JavaRDD<String> logData = sc.textFile(logFile).cache();

                hiveContext.sql("CREATE EXTERNAL TABLE IF NOT EXISTS Logs (date 
STRING,msg
STRING) STORED AS ORC");
                 
                JavaRDD<Logs> logsRDD=logData.map(new Function<String,
com.upwork.sparketl.core.bean.Logs>() {

                        public Logs call(String arg0) throws Exception {
                                String array[]=arg0.split(",");
                                Logs logs=new Logs(array[0],array[1]);
                                return logs;
                        }
                });
                // sc is an existing JavaSparkContext.
                SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
                DataFrame schemaPeople = sqlContext.createDataFrame(logsRDD, 
Logs.class);
                schemaPeople.registerTempTable("logs");
                


                         DataFrame results = sqlContext.sql("SELECT * FROM 
logs");
                         


                         results.write().format("orc").saveAsTable("Logs");


                
Any help would of a great help 
Thanks


        



--
View this message in context: 
http://apache-spark-user-list.1001560.n3.nabble.com/Unable-To-access-Hive-From-Spark-tp26788.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org

Unable To access Hive From Spark

Reply via email to