This should work.
Create your sbt file first
cat PrintAllDatabases.sbt
name := "PrintAllDatabases"
version := "1.0"
scalaVersion := "2.10.5"
libraryDependencies += "org.apache.spark" %% "spark-core" % "1.5.0"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "1.5.0"
libraryDependencies += "org.apache.spark" %% "spark-hive" % "1.5.0"
Your scala file should look like this
cat PrintAllDatabases.scala
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.types._
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions._
//
object PrintAllDatabases {
def main(args: Array[String]) {
//
val conf = new SparkConf().
setAppName("PrintAllDatabases").
setMaster("local[12]").
set("spark.driver.allowMultipleContexts", "true").
set("spark.hadoop.validateOutputSpecs", "false")
val sc = new SparkContext(conf)
// Create sqlContext based on HiveContext
val sqlContext = new HiveContext(sc)
import sqlContext.implicits._
val HiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
println ("\nStarted at"); sqlContext.sql("SELECT
FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss')
").collect.foreach(println)
println("\n Running the query \n")
val rs = HiveContext.sql("show databases")
rs.collect.foreach(println)
println ("\nFinished at"); sqlContext.sql("SELECT
FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss')
").collect.foreach(println)
}
}
And you create and run it as below
sbt package
$SPARK_HOME/bin/spark-submit \
--class "PrintDatabases" \
--master spark://50.140.197.217:7077 \
target/scala-2.10/printalldatabases_2.10-1.0.jar
The output is below
Started at
[15/03/2016 00:57:01.01]
Running the query
[asehadoop]
[default]
[iqhadoop]
[mytable_db]
[oraclehadoop]
[test]
Finished at
[15/03/2016 00:57:03.03]
HTH
Dr Mich Talebzadeh
LinkedIn *
https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
http://talebzadehmich.wordpress.com
On 14 March 2016 at 23:20, rhuang <[email protected]> wrote:
> Hi all,
>
> I have several Hive queries that work in spark-shell, but they don't work
> in
> spark-submit. In fact, I can't even show all databases. The following works
> in spark-shell:
>
>
> import org.apache.spark._
> import org.apache.spark.sql._
>
> object ViewabilityFetchInsertDailyHive {
> def main() {
> val x = sqlContext.sql("show databases")
> val z = x.collect
> for(i <- z) println(i.toString)
> }
> }
>
> But the following doesn't work in spark-submit:
>
>
> object PrintAllDatabases {
> def main() {
> val sc = new SparkContext(new
> SparkConf().setAppName(this.getClass.getName))
> val sqlContext = new SQLContext(sc)
> val x = sqlContext.sql("show databases")
> val z = x.collect
> for(i <- z) println(i.toString)
> }
> }
>
>
> And I get this error:
>
> 16/03/14 22:27:55 INFO BlockManagerMaster: Registered BlockManager
> 16/03/14 22:27:56 INFO EventLoggingListener: Logging events to
> hdfs://nameservice1/user/spark/applicationHistory/local-1457994475020
> Exception in thread "main" java.lang.RuntimeException: [1.1] failure:
> ``with'' expected but identifier show found
>
> show databases
> ^
> at scala.sys.package$.error(package.scala:27)
> at
>
> org.apache.spark.sql.catalyst.AbstractSparkSQLParser.parse(AbstractSparkSQLParser.scala:36)
> at
>
> org.apache.spark.sql.catalyst.DefaultParserDialect.parse(ParserDialect.scala:67)
> at
> org.apache.spark.sql.SQLContext$$anonfun$2.apply(SQLContext.scala:211)
> at
> org.apache.spark.sql.SQLContext$$anonfun$2.apply(SQLContext.scala:211)
> at
>
> org.apache.spark.sql.execution.SparkSQLParser$$anonfun$org$apache$spark$sql$execution$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:114)
>
>
> Any suggestions are appreciated!
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/Hive-query-works-in-spark-shell-not-spark-submit-tp26492.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [email protected]
> For additional commands, e-mail: [email protected]
>
>