Re: Code works in Spark-Shell but Fails inside IntelliJ
Not using SBT...I have been creating and adapting various Spark Scala examples and put it here and all u have to do is git clone and import as maven project into IntelliJhttps://github.com/sanjaysubramanian/msfx_scala.git Sidenote , IMHO, IDEs encourage the "new to Spark/Scala developers" to quickly test , experiment and debug code. From: Jay Vyas To: Sanjay Subramanian Cc: "user@spark.apache.org" Sent: Thursday, November 20, 2014 4:53 PM Subject: Re: Code works in Spark-Shell but Fails inside IntelliJ This seems pretty standard: your IntelliJ classpath isn't matched to the correct ones that are used in spark shell Are you using the SBT plugin? If not how are you putting deps into IntelliJ? On Nov 20, 2014, at 7:35 PM, Sanjay Subramanian wrote: hey guys I am at AmpCamp 2014 at UCB right now :-) Funny Issue... This code works in Spark-Shell but throws a funny exception in IntelliJ CODE val sqlContext = new org.apache.spark.sql.SQLContext(sc)sqlContext.setConf("spark.sql.parquet.binaryAsString", "true")val wikiData = sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet")wikiData.registerTempTable("wikiData")sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10").collect().foreach(println) RESULTS[Waacstats,2003][Cydebot,949][BattyBot,939][Yobot,890][Addbot,853][Monkbot,668][ChrisGualtieri,438][RjwilmsiBot,387][OccultZone,377][ClueBot NG,353] INTELLIJ CODE=object ParquetSql { def main(args: Array[String]) { val sconf = new SparkConf().setMaster("local").setAppName("MedicalSideFx-NamesFoodSql") val sc = new SparkContext(sconf) val sqlContext = new org.apache.spark.sql.SQLContext(sc) sqlContext.setConf("spark.sql.parquet.binaryAsString", "true") val wikiData = sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet") wikiData.registerTempTable("wikiData") val results = sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10") results.collect().foreach(println) } } INTELLIJ ERROR==Exception in thread "main" java.lang.IncompatibleClassChangeError: Found interface org.apache.spark.serializer.Serializer, but class was expected at org.apache.spark.sql.parquet.ParquetFilters$.serializeFilterExpressions(ParquetFilters.scala:244) at org.apache.spark.sql.parquet.ParquetTableScan.execute(ParquetTableOperations.scala:109) at org.apache.spark.sql.execution.Filter.execute(basicOperators.scala:57) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:48) at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at org.apache.spark.sql.execution.TakeOrdered.executeCollect(basicOperators.scala:171) at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:438) at org.medicalsidefx.common.utils.ParquetSql$.main(ParquetSql.scala:18) at org.medicalsidefx.common.utils.ParquetSql.main(ParquetSql.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)
Re: Code works in Spark-Shell but Fails inside IntelliJ
Awesome that was it...Hit me with with a hockey stick :-) unmatched Spark Core (1.0.0) and SparkSql (1.1.1) versionsCorrected that to 1.1.0 on both org.apache.spark spark-core_2.10 1.0.0 org.apache.spark spark-sql_2.10 1.1.0 From: Michael Armbrust To: Sanjay Subramanian Cc: "user@spark.apache.org" Sent: Thursday, November 20, 2014 4:49 PM Subject: Re: Code works in Spark-Shell but Fails inside IntelliJ Looks like intelij might be trying to load the wrong version of spark? On Thu, Nov 20, 2014 at 4:35 PM, Sanjay Subramanian wrote: hey guys I am at AmpCamp 2014 at UCB right now :-) Funny Issue... This code works in Spark-Shell but throws a funny exception in IntelliJ CODE val sqlContext = new org.apache.spark.sql.SQLContext(sc)sqlContext.setConf("spark.sql.parquet.binaryAsString", "true")val wikiData = sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet")wikiData.registerTempTable("wikiData")sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10").collect().foreach(println) RESULTS[Waacstats,2003][Cydebot,949][BattyBot,939][Yobot,890][Addbot,853][Monkbot,668][ChrisGualtieri,438][RjwilmsiBot,387][OccultZone,377][ClueBot NG,353] INTELLIJ CODE=object ParquetSql { def main(args: Array[String]) { val sconf = new SparkConf().setMaster("local").setAppName("MedicalSideFx-NamesFoodSql") val sc = new SparkContext(sconf) val sqlContext = new org.apache.spark.sql.SQLContext(sc) sqlContext.setConf("spark.sql.parquet.binaryAsString", "true") val wikiData = sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet") wikiData.registerTempTable("wikiData") val results = sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10") results.collect().foreach(println) } } INTELLIJ ERROR==Exception in thread "main" java.lang.IncompatibleClassChangeError: Found interface org.apache.spark.serializer.Serializer, but class was expected at org.apache.spark.sql.parquet.ParquetFilters$.serializeFilterExpressions(ParquetFilters.scala:244) at org.apache.spark.sql.parquet.ParquetTableScan.execute(ParquetTableOperations.scala:109) at org.apache.spark.sql.execution.Filter.execute(basicOperators.scala:57) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:48) at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at org.apache.spark.sql.execution.TakeOrdered.executeCollect(basicOperators.scala:171) at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:438) at org.medicalsidefx.common.utils.ParquetSql$.main(ParquetSql.scala:18) at org.medicalsidefx.common.utils.ParquetSql.main(ParquetSql.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)
Re: Code works in Spark-Shell but Fails inside IntelliJ
This seems pretty standard: your IntelliJ classpath isn't matched to the correct ones that are used in spark shell Are you using the SBT plugin? If not how are you putting deps into IntelliJ? > On Nov 20, 2014, at 7:35 PM, Sanjay Subramanian > wrote: > > hey guys > > I am at AmpCamp 2014 at UCB right now :-) > > Funny Issue... > > This code works in Spark-Shell but throws a funny exception in IntelliJ > > CODE > > val sqlContext = new org.apache.spark.sql.SQLContext(sc) > sqlContext.setConf("spark.sql.parquet.binaryAsString", "true") > val wikiData = > sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet") > wikiData.registerTempTable("wikiData") > sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE username > <> '' GROUP BY username ORDER BY cnt DESC LIMIT > 10").collect().foreach(println) > > RESULTS > > [Waacstats,2003] > [Cydebot,949] > [BattyBot,939] > [Yobot,890] > [Addbot,853] > [Monkbot,668] > [ChrisGualtieri,438] > [RjwilmsiBot,387] > [OccultZone,377] > [ClueBot NG,353] > > > INTELLIJ CODE > = > object ParquetSql { > def main(args: Array[String]) { > > val sconf = new > SparkConf().setMaster("local").setAppName("MedicalSideFx-NamesFoodSql") > val sc = new SparkContext(sconf) > val sqlContext = new org.apache.spark.sql.SQLContext(sc) > sqlContext.setConf("spark.sql.parquet.binaryAsString", "true") > val wikiData = > sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet") > wikiData.registerTempTable("wikiData") > val results = sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM > wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10") > results.collect().foreach(println) > } > > } > > INTELLIJ ERROR > == > Exception in thread "main" java.lang.IncompatibleClassChangeError: Found > interface org.apache.spark.serializer.Serializer, but class was expected > at > org.apache.spark.sql.parquet.ParquetFilters$.serializeFilterExpressions(ParquetFilters.scala:244) > at > org.apache.spark.sql.parquet.ParquetTableScan.execute(ParquetTableOperations.scala:109) > at > org.apache.spark.sql.execution.Filter.execute(basicOperators.scala:57) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) > at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) > at > org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:48) > at > org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) > at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) > at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) > at > org.apache.spark.sql.execution.TakeOrdered.executeCollect(basicOperators.scala:171) > at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:438) > at org.medicalsidefx.common.utils.ParquetSql$.main(ParquetSql.scala:18) > at org.medicalsidefx.common.utils.ParquetSql.main(ParquetSql.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134) > > >
Re: Code works in Spark-Shell but Fails inside IntelliJ
Looks like intelij might be trying to load the wrong version of spark? On Thu, Nov 20, 2014 at 4:35 PM, Sanjay Subramanian < sanjaysubraman...@yahoo.com.invalid> wrote: > hey guys > > I am at AmpCamp 2014 at UCB right now :-) > > Funny Issue... > > This code works in Spark-Shell but throws a funny exception in IntelliJ > > CODE > > val sqlContext = new org.apache.spark.sql.SQLContext(sc) > sqlContext.setConf("spark.sql.parquet.binaryAsString", "true") > val wikiData = > sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet") > wikiData.registerTempTable("wikiData") > sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE > username <> '' GROUP BY username ORDER BY cnt DESC LIMIT > 10").collect().foreach(println) > > RESULTS > > [Waacstats,2003] > [Cydebot,949] > [BattyBot,939] > [Yobot,890] > [Addbot,853] > [Monkbot,668] > [ChrisGualtieri,438] > [RjwilmsiBot,387] > [OccultZone,377] > [ClueBot NG,353] > > > INTELLIJ CODE > = > > object ParquetSql { > def main(args: Array[String]) { > > val sconf = new > SparkConf().setMaster("local").setAppName("MedicalSideFx-NamesFoodSql") > val sc = new SparkContext(sconf) > val sqlContext = new org.apache.spark.sql.SQLContext(sc) > sqlContext.setConf("spark.sql.parquet.binaryAsString", "true") > val wikiData = > sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet") > wikiData.registerTempTable("wikiData") > val results = sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM > wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10") > results.collect().foreach(println) > } > > } > > > INTELLIJ ERROR > == > Exception in thread "main" java.lang.IncompatibleClassChangeError: Found > interface org.apache.spark.serializer.Serializer, but class was expected > at > org.apache.spark.sql.parquet.ParquetFilters$.serializeFilterExpressions(ParquetFilters.scala:244) > at > org.apache.spark.sql.parquet.ParquetTableScan.execute(ParquetTableOperations.scala:109) > at org.apache.spark.sql.execution.Filter.execute(basicOperators.scala:57) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) > at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) > at > org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:48) > at > org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) > at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) > at > org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) > at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) > at > org.apache.spark.sql.execution.TakeOrdered.executeCollect(basicOperators.scala:171) > at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:438) > at org.medicalsidefx.common.utils.ParquetSql$.main(ParquetSql.scala:18) > at org.medicalsidefx.common.utils.ParquetSql.main(ParquetSql.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134) > > > >
Code works in Spark-Shell but Fails inside IntelliJ
hey guys I am at AmpCamp 2014 at UCB right now :-) Funny Issue... This code works in Spark-Shell but throws a funny exception in IntelliJ CODE val sqlContext = new org.apache.spark.sql.SQLContext(sc)sqlContext.setConf("spark.sql.parquet.binaryAsString", "true")val wikiData = sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet")wikiData.registerTempTable("wikiData")sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10").collect().foreach(println) RESULTS[Waacstats,2003][Cydebot,949][BattyBot,939][Yobot,890][Addbot,853][Monkbot,668][ChrisGualtieri,438][RjwilmsiBot,387][OccultZone,377][ClueBot NG,353] INTELLIJ CODE=object ParquetSql { def main(args: Array[String]) { val sconf = new SparkConf().setMaster("local").setAppName("MedicalSideFx-NamesFoodSql") val sc = new SparkContext(sconf) val sqlContext = new org.apache.spark.sql.SQLContext(sc) sqlContext.setConf("spark.sql.parquet.binaryAsString", "true") val wikiData = sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet") wikiData.registerTempTable("wikiData") val results = sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10") results.collect().foreach(println) } } INTELLIJ ERROR==Exception in thread "main" java.lang.IncompatibleClassChangeError: Found interface org.apache.spark.serializer.Serializer, but class was expected at org.apache.spark.sql.parquet.ParquetFilters$.serializeFilterExpressions(ParquetFilters.scala:244) at org.apache.spark.sql.parquet.ParquetTableScan.execute(ParquetTableOperations.scala:109) at org.apache.spark.sql.execution.Filter.execute(basicOperators.scala:57) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:48) at org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151) at org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46) at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at org.apache.spark.sql.execution.TakeOrdered.executeCollect(basicOperators.scala:171) at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:438) at org.medicalsidefx.common.utils.ParquetSql$.main(ParquetSql.scala:18) at org.medicalsidefx.common.utils.ParquetSql.main(ParquetSql.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)