[ https://issues.apache.org/jira/browse/SPARK-4317?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean Owen resolved SPARK-4317. ------------------------------ Resolution: Not A Problem No follow up in a long time so assuming this is not a problem > Error querying Avro files imported by Sqoop: > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Unresolved > attributes > ---------------------------------------------------------------------------------------------------------------------------------- > > Key: SPARK-4317 > URL: https://issues.apache.org/jira/browse/SPARK-4317 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.1.0 > Environment: Spark 1.1.0, Sqoop 1.4.5, PostgreSQL 9.3 > Reporter: Hendy Irawan > > After importing table from PostgreSQL 9.3 to Avro file using Sqoop 1.4.5, > Spark SQL 1.1.0 is unable to process it: > (note that Hive 0.13 can process the Avro file just fine) > {code} > spark-sql> select city from place; > 14/11/10 10:15:08 INFO ParseDriver: Parsing command: select city from place > 14/11/10 10:15:08 INFO ParseDriver: Parse Completed > 14/11/10 10:15:08 INFO HiveMetaStore: 0: get_table : db=default tbl=place > 14/11/10 10:15:08 INFO audit: ugi=ceefour ip=unknown-ip-addr > cmd=get_table : db=default tbl=place > 14/11/10 10:15:08 ERROR SparkSQLDriver: Failed in [select city from place] > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Unresolved > attributes: 'city, tree: > Project ['city] > LowerCaseSchema > MetastoreRelation default, place, None > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$apply$1.applyOrElse(Analyzer.scala:72) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$apply$1.applyOrElse(Analyzer.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:165) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:156) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:70) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:68) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60) > at > scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:34) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:397) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:397) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.optimizedPlan$lzycompute(HiveContext.scala:358) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.optimizedPlan(HiveContext.scala:357) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:402) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:400) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:406) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:406) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:406) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:59) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Unresolved > attributes: 'city, tree: > Project ['city] > LowerCaseSchema > MetastoreRelation default, place, None > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$apply$1.applyOrElse(Analyzer.scala:72) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$apply$1.applyOrElse(Analyzer.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:165) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:156) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:70) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:68) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60) > at > scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:34) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:397) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:397) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.optimizedPlan$lzycompute(HiveContext.scala:358) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.optimizedPlan(HiveContext.scala:357) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:402) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:400) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:406) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:406) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:406) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:59) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > 14/11/10 10:15:08 ERROR CliDriver: > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Unresolved > attributes: 'city, tree: > Project ['city] > LowerCaseSchema > MetastoreRelation default, place, None > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$apply$1.applyOrElse(Analyzer.scala:72) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$$anonfun$apply$1.applyOrElse(Analyzer.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:165) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:156) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:70) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$CheckResolution$.apply(Analyzer.scala:68) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60) > at > scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:34) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:397) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:397) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.optimizedPlan$lzycompute(HiveContext.scala:358) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.optimizedPlan(HiveContext.scala:357) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:402) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:400) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:406) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:406) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:406) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:59) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > {code} > However a simple `COUNT(*)` works fine: > {code} > > SELECT COUNT(*) FROM place; > 14/11/10 10:17:44 INFO ParseDriver: Parsing command: SELECT COUNT(*) FROM > place > 14/11/10 10:17:44 INFO ParseDriver: Parse Completed > 14/11/10 10:17:44 INFO HiveMetaStore: 0: get_table : db=default tbl=place > 14/11/10 10:17:44 INFO audit: ugi=ceefour ip=unknown-ip-addr > cmd=get_table : db=default tbl=place > 14/11/10 10:17:44 INFO MemoryStore: ensureFreeSpace(450038) called with > curMem=1354834, maxMem=278019440 > 14/11/10 10:17:44 INFO MemoryStore: Block broadcast_9 stored as values in > memory (estimated size 439.5 KB, free 263.4 MB) > 14/11/10 10:17:44 INFO AvroSerDe: Configuration null, not inserting schema > 14/11/10 10:17:44 INFO SparkContext: Starting job: collect at > HiveContext.scala:415 > 14/11/10 10:17:44 INFO FileInputFormat: Total input paths to process : 1 > 14/11/10 10:17:44 INFO DAGScheduler: Registering RDD 37 (mapPartitions at > Exchange.scala:86) > 14/11/10 10:17:44 INFO DAGScheduler: Got job 3 (collect at > HiveContext.scala:415) with 1 output partitions (allowLocal=false) > 14/11/10 10:17:44 INFO DAGScheduler: Final stage: Stage 6(collect at > HiveContext.scala:415) > 14/11/10 10:17:44 INFO DAGScheduler: Parents of final stage: List(Stage 7) > 14/11/10 10:17:44 INFO DAGScheduler: Missing parents: List(Stage 7) > 14/11/10 10:17:44 INFO DAGScheduler: Submitting Stage 7 (MapPartitionsRDD[37] > at mapPartitions at Exchange.scala:86), which has no missing parents > 14/11/10 10:17:44 INFO MemoryStore: ensureFreeSpace(10880) called with > curMem=1804872, maxMem=278019440 > 14/11/10 10:17:44 INFO MemoryStore: Block broadcast_10 stored as values in > memory (estimated size 10.6 KB, free 263.4 MB) > 14/11/10 10:17:44 INFO DAGScheduler: Submitting 2 missing tasks from Stage 7 > (MapPartitionsRDD[37] at mapPartitions at Exchange.scala:86) > 14/11/10 10:17:44 INFO TaskSchedulerImpl: Adding task set 7.0 with 2 tasks > 14/11/10 10:17:44 INFO TaskSetManager: Starting task 0.0 in stage 7.0 (TID 9, > localhost, PROCESS_LOCAL, 1220 bytes) > 14/11/10 10:17:44 INFO TaskSetManager: Starting task 1.0 in stage 7.0 (TID > 10, localhost, PROCESS_LOCAL, 1220 bytes) > 14/11/10 10:17:44 INFO Executor: Running task 0.0 in stage 7.0 (TID 9) > 14/11/10 10:17:44 INFO Executor: Running task 1.0 in stage 7.0 (TID 10) > 14/11/10 10:17:44 INFO HadoopRDD: Input split: > file:/media/ceefour/passport/databank/culinary/hdfs/place/part-m-00000.avro:0+381526 > 14/11/10 10:17:44 INFO HadoopRDD: Input split: > file:/media/ceefour/passport/databank/culinary/hdfs/place/part-m-00000.avro:381526+381527 > 14/11/10 10:17:44 INFO AvroGenericRecordReader: Found the avro schema in the > job: {"type":"record","name":"QueryResult","doc":"Sqoop import of > QueryResult","fields":[{"name":"id","type":["string","null"],"columnName":"id","sqlType":"12"},{"name":"city","type":["string","null"],"columnName":"city","sqlType":"12"},{"name":"description","type":["string","null"],"columnName":"description","sqlType":"12"},{"name":"lat","type":["double","null"],"columnName":"lat","sqlType":"8"},{"name":"lng","type":["double","null"],"columnName":"lng","sqlType":"8"},{"name":"mapimagefile","type":["string","null"],"columnName":"mapimagefile","sqlType":"12"},{"name":"menu","type":["string","null"],"columnName":"menu","sqlType":"12"},{"name":"menuphotofile","type":["string","null"],"columnName":"menuphotofile","sqlType":"12"},{"name":"name","type":["string","null"],"columnName":"name","sqlType":"12"},{"name":"openinghours","type":["string","null"],"columnName":"openinghours","sqlType":"12"},{"name":"phonenumber","type":["string","null"],"columnName":"phonenumber","sqlType":"12"},{"name":"photofile","type":["string","null"],"columnName":"photofile","sqlType":"12"},{"name":"pricerange","type":["string","null"],"columnName":"pricerange","sqlType":"12"},{"name":"sourceuri","type":["string","null"],"columnName":"sourceuri","sqlType":"12"},{"name":"street","type":["string","null"],"columnName":"street","sqlType":"12"},{"name":"foursquareid","type":["string","null"],"columnName":"foursquareid","sqlType":"12"}],"tableName":"QueryResult"} > 14/11/10 10:17:44 INFO AvroGenericRecordReader: Found the avro schema in the > job: {"type":"record","name":"QueryResult","doc":"Sqoop import of > QueryResult","fields":[{"name":"id","type":["string","null"],"columnName":"id","sqlType":"12"},{"name":"city","type":["string","null"],"columnName":"city","sqlType":"12"},{"name":"description","type":["string","null"],"columnName":"description","sqlType":"12"},{"name":"lat","type":["double","null"],"columnName":"lat","sqlType":"8"},{"name":"lng","type":["double","null"],"columnName":"lng","sqlType":"8"},{"name":"mapimagefile","type":["string","null"],"columnName":"mapimagefile","sqlType":"12"},{"name":"menu","type":["string","null"],"columnName":"menu","sqlType":"12"},{"name":"menuphotofile","type":["string","null"],"columnName":"menuphotofile","sqlType":"12"},{"name":"name","type":["string","null"],"columnName":"name","sqlType":"12"},{"name":"openinghours","type":["string","null"],"columnName":"openinghours","sqlType":"12"},{"name":"phonenumber","type":["string","null"],"columnName":"phonenumber","sqlType":"12"},{"name":"photofile","type":["string","null"],"columnName":"photofile","sqlType":"12"},{"name":"pricerange","type":["string","null"],"columnName":"pricerange","sqlType":"12"},{"name":"sourceuri","type":["string","null"],"columnName":"sourceuri","sqlType":"12"},{"name":"street","type":["string","null"],"columnName":"street","sqlType":"12"},{"name":"foursquareid","type":["string","null"],"columnName":"foursquareid","sqlType":"12"}],"tableName":"QueryResult"} > 14/11/10 10:17:44 INFO Executor: Finished task 0.0 in stage 7.0 (TID 9). 1865 > bytes result sent to driver > 14/11/10 10:17:44 INFO TaskSetManager: Finished task 0.0 in stage 7.0 (TID 9) > in 45 ms on localhost (1/2) > 14/11/10 10:17:44 INFO Executor: Finished task 1.0 in stage 7.0 (TID 10). > 1865 bytes result sent to driver > 14/11/10 10:17:44 INFO TaskSetManager: Finished task 1.0 in stage 7.0 (TID > 10) in 53 ms on localhost (2/2) > 14/11/10 10:17:44 INFO TaskSchedulerImpl: Removed TaskSet 7.0, whose tasks > have all completed, from pool > 14/11/10 10:17:44 INFO DAGScheduler: Stage 7 (mapPartitions at > Exchange.scala:86) finished in 0.054 s > 14/11/10 10:17:44 INFO DAGScheduler: looking for newly runnable stages > 14/11/10 10:17:44 INFO DAGScheduler: running: Set() > 14/11/10 10:17:44 INFO DAGScheduler: waiting: Set(Stage 6) > 14/11/10 10:17:44 INFO DAGScheduler: failed: Set() > 14/11/10 10:17:44 INFO StatsReportListener: Finished stage: > org.apache.spark.scheduler.StageInfo@40069cb0 > 14/11/10 10:17:44 INFO StatsReportListener: task runtime:(count: 2, mean: > 49.000000, stdev: 4.000000, max: 53.000000, min: 45.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 45.0 ms 45.0 ms 45.0 ms 45.0 > ms 53.0 ms 53.0 ms 53.0 ms 53.0 ms 53.0 ms > 14/11/10 10:17:44 INFO DAGScheduler: Missing parents for Stage 6: List() > 14/11/10 10:17:44 INFO StatsReportListener: shuffle bytes written:(count: 2, > mean: 50.000000, stdev: 0.000000, max: 50.000000, min: 50.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 50.0 B 50.0 B 50.0 B 50.0 > B 50.0 B 50.0 B 50.0 B 50.0 B 50.0 B > 14/11/10 10:17:44 INFO DAGScheduler: Submitting Stage 6 (MappedRDD[41] at map > at HiveContext.scala:360), which is now runnable > 14/11/10 10:17:44 INFO StatsReportListener: task result size:(count: 2, mean: > 1865.000000, stdev: 0.000000, max: 1865.000000, min: 1865.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 1865.0 B 1865.0 B > 1865.0 B 1865.0 B 1865.0 B 1865.0 B 1865.0 B > 1865.0 B 1865.0 B > 14/11/10 10:17:44 INFO StatsReportListener: executor (non-fetch) time pct: > (count: 2, mean: 94.779874, stdev: 1.446541, max: 96.226415, min: 93.333333) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 93 % 93 % 93 % 93 % > 96 % 96 % 96 % 96 % 96 % > 14/11/10 10:17:44 INFO StatsReportListener: other time pct: (count: 2, mean: > 5.220126, stdev: 1.446541, max: 6.666667, min: 3.773585) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 4 % 4 % 4 % 4 % > 7 % 7 % 7 % 7 % 7 % > 14/11/10 10:17:44 INFO MemoryStore: ensureFreeSpace(9616) called with > curMem=1815752, maxMem=278019440 > 14/11/10 10:17:44 INFO MemoryStore: Block broadcast_11 stored as values in > memory (estimated size 9.4 KB, free 263.4 MB) > 14/11/10 10:17:44 INFO DAGScheduler: Submitting 1 missing tasks from Stage 6 > (MappedRDD[41] at map at HiveContext.scala:360) > 14/11/10 10:17:44 INFO TaskSchedulerImpl: Adding task set 6.0 with 1 tasks > 14/11/10 10:17:44 INFO TaskSetManager: Starting task 0.0 in stage 6.0 (TID > 11, localhost, PROCESS_LOCAL, 948 bytes) > 14/11/10 10:17:44 INFO Executor: Running task 0.0 in stage 6.0 (TID 11) > 14/11/10 10:17:44 INFO BlockFetcherIterator$BasicBlockFetcherIterator: > maxBytesInFlight: 50331648, targetRequestSize: 10066329 > 14/11/10 10:17:44 INFO BlockFetcherIterator$BasicBlockFetcherIterator: > Getting 2 non-empty blocks out of 2 blocks > 14/11/10 10:17:44 INFO BlockFetcherIterator$BasicBlockFetcherIterator: > Started 0 remote fetches in 1 ms > 14/11/10 10:17:44 INFO Executor: Finished task 0.0 in stage 6.0 (TID 11). > 1076 bytes result sent to driver > 14/11/10 10:17:44 INFO DAGScheduler: Stage 6 (collect at > HiveContext.scala:415) finished in 0.008 s > 14/11/10 10:17:44 INFO StatsReportListener: Finished stage: > org.apache.spark.scheduler.StageInfo@209037df > 14/11/10 10:17:44 INFO SparkContext: Job finished: collect at > HiveContext.scala:415, took 0.113842844 s > 6771 > Time taken: 0.146 seconds > 14/11/10 10:17:44 INFO StatsReportListener: task runtime:(count: 1, mean: > 8.000000, stdev: 0.000000, max: 8.000000, min: 8.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 8.0 ms 8.0 ms 8.0 ms 8.0 > ms 8.0 ms 8.0 ms 8.0 ms 8.0 ms 8.0 ms > 14/11/10 10:17:44 INFO StatsReportListener: fetch wait time:(count: 1, mean: > 0.000000, stdev: 0.000000, max: 0.000000, min: 0.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 0.0 ms 0.0 ms 0.0 ms 0.0 > ms 0.0 ms 0.0 ms 0.0 ms 0.0 ms 0.0 ms > 14/11/10 10:17:44 INFO StatsReportListener: remote bytes read:(count: 1, > mean: 0.000000, stdev: 0.000000, max: 0.000000, min: 0.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 0.0 B 0.0 B 0.0 B 0.0 B > 0.0 B 0.0 B 0.0 B 0.0 B 0.0 B > 14/11/10 10:17:44 INFO StatsReportListener: task result size:(count: 1, mean: > 1076.000000, stdev: 0.000000, max: 1076.000000, min: 1076.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 1076.0 B 1076.0 B > 1076.0 B 1076.0 B 1076.0 B 1076.0 B 1076.0 B > 1076.0 B 1076.0 B > 14/11/10 10:17:44 INFO StatsReportListener: executor (non-fetch) time pct: > (count: 1, mean: 75.000000, stdev: 0.000000, max: 75.000000, min: 75.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 75 % 75 % 75 % 75 % > 75 % 75 % 75 % 75 % 75 % > 14/11/10 10:17:44 INFO StatsReportListener: fetch wait time pct: (count: 1, > mean: 0.000000, stdev: 0.000000, max: 0.000000, min: 0.000000) > 14/11/10 10:17:44 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:44 INFO StatsReportListener: 0 % 0 % 0 % 0 % > 0 % 0 % 0 % 0 % 0 % > 14/11/10 10:17:45 INFO StatsReportListener: other time pct: (count: 1, mean: > 25.000000, stdev: 0.000000, max: 25.000000, min: 25.000000) > 14/11/10 10:17:45 INFO StatsReportListener: 0% 5% 10% 25% > 50% 75% 90% 95% 100% > 14/11/10 10:17:45 INFO StatsReportListener: 25 % 25 % 25 % 25 % > 25 % 25 % 25 % 25 % 25 % > 14/11/10 10:17:45 INFO CliDriver: Time taken: 0.146 seconds > spark-sql> 14/11/10 10:17:44 INFO TaskSetManager: Finished task 0.0 in stage > 6.0 (TID 11) in 8 ms on localhost (1/1) > 14/11/10 10:17:45 INFO TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks > have all completed, from pool > {code} > It's probably because Sqoop creates nested schema ? > {code} > { > "type" : "record", > "name" : "QueryResult", > "doc" : "Sqoop import of QueryResult", > "fields" : [ { > "name" : "id", > "type" : [ "string", "null" ], > "columnName" : "id", > "sqlType" : "12" > }, { > "name" : "city", > "type" : [ "string", "null" ], > "columnName" : "city", > "sqlType" : "12" > }, { > "name" : "description", > "type" : [ "string", "null" ], > "columnName" : "description", > "sqlType" : "12" > }, { > "name" : "lat", > "type" : [ "double", "null" ], > "columnName" : "lat", > "sqlType" : "8" > }, { > "name" : "lng", > "type" : [ "double", "null" ], > "columnName" : "lng", > "sqlType" : "8" > }, { > "name" : "mapimagefile", > "type" : [ "string", "null" ], > "columnName" : "mapimagefile", > "sqlType" : "12" > }, { > "name" : "menu", > "type" : [ "string", "null" ], > "columnName" : "menu", > "sqlType" : "12" > }, { > "name" : "menuphotofile", > "type" : [ "string", "null" ], > "columnName" : "menuphotofile", > "sqlType" : "12" > }, { > "name" : "name", > "type" : [ "string", "null" ], > "columnName" : "name", > "sqlType" : "12" > }, { > "name" : "openinghours", > "type" : [ "string", "null" ], > "columnName" : "openinghours", > "sqlType" : "12" > }, { > "name" : "phonenumber", > "type" : [ "string", "null" ], > "columnName" : "phonenumber", > "sqlType" : "12" > }, { > "name" : "photofile", > "type" : [ "string", "null" ], > "columnName" : "photofile", > "sqlType" : "12" > }, { > "name" : "pricerange", > "type" : [ "string", "null" ], > "columnName" : "pricerange", > "sqlType" : "12" > }, { > "name" : "sourceuri", > "type" : [ "string", "null" ], > "columnName" : "sourceuri", > "sqlType" : "12" > }, { > "name" : "street", > "type" : [ "string", "null" ], > "columnName" : "street", > "sqlType" : "12" > }, { > "name" : "foursquareid", > "type" : [ "string", "null" ], > "columnName" : "foursquareid", > "sqlType" : "12" > } ], > "tableName" : "QueryResult" > } > {code} > Sample record: > {code} > {"id":{"string":"17d8e71b-5c7f-4c23-8bbe-5af93a0a1847"},"city":null,"description":null,"lat":{"double":-7.00417828399503},"lng":{"double":107.63597989152},"mapimagefile":null,"menu":null,"menuphotofile":null,"name":{"string":"lontong > sayur > siliwangi"},"openinghours":null,"phonenumber":null,"photofile":null,"pricerange":null,"sourceuri":{"string":"https://id.foursquare.com/v/4f8b6772e4b00597a01917e8"},"street":null,"foursquareid":{"string":"4f8b6772e4b00597a01917e8"}} > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org