filter also has error 16/06/14 19:00:27 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041. Spark context available as sc. SQL context available as sqlContext.
scala> import org.apache.spark.sql.SQLContext import org.apache.spark.sql.SQLContext scala> val sqlContext = new SQLContext(sc) sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@3114ea scala> val df = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").option("inferSchema", "true").load("/home/martin/result002.csv") 16/06/14 19:00:32 WARN SizeEstimator: Failed to check whether UseCompressedOops is set; assuming yes Java HotSpot(TM) Client VM warning: You have loaded library /tmp/libnetty-transport-native-epoll7823347435914767500.so which might have disabled stack guard. The VM will try to fix the stack guard now. It's highly recommended that you fix the library with 'execstack -c <libfile>', or link it with '-z noexecstack'. df: org.apache.spark.sql.DataFrame = [a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 : string] scala> df.printSchema() root |-- a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 : string (nullable = true) scala> df.registerTempTable("sales") scala> df.filter($"a0".contains("found deep=1")).filter($"a1".contains("found deep=1")).filter($"a2".contains("found deep=1")) org.apache.spark.sql.AnalysisException: cannot resolve 'a0' given input columns: [a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 ]; at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) On Tue, Jun 14, 2016 at 6:19 PM, Lee Ho Yeung <jobmatt...@gmail.com> wrote: > after tried following commands, can not show data > > > https://drive.google.com/file/d/0Bxs_ao6uuBDUVkJYVmNaUGx2ZUE/view?usp=sharing > > https://drive.google.com/file/d/0Bxs_ao6uuBDUc3ltMVZqNlBUYVk/view?usp=sharing > > /home/martin/Downloads/spark-1.6.1/bin/spark-shell --packages > com.databricks:spark-csv_2.11:1.4.0 > > import org.apache.spark.sql.SQLContext > > val sqlContext = new SQLContext(sc) > val df = > sqlContext.read.format("com.databricks.spark.csv").option("header", > "true").option("inferSchema", "true").load("/home/martin/result002.csv") > df.printSchema() > df.registerTempTable("sales") > val aggDF = sqlContext.sql("select * from sales where a0 like > \"%deep=3%\"") > df.collect.foreach(println) > aggDF.collect.foreach(println) > > > > val df = > sqlContext.read.format("com.databricks.spark.csv").option("header", > "true").load("/home/martin/result002.csv") > df.printSchema() > df.registerTempTable("sales") > sqlContext.sql("select * from sales").take(30).foreach(println) >