[ https://issues.apache.org/jira/browse/SPARK-5741?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Armbrust updated SPARK-5741: ------------------------------------ Target Version/s: 1.4.0 (was: 1.3.0) > Support the path contains comma in HiveContext > ---------------------------------------------- > > Key: SPARK-5741 > URL: https://issues.apache.org/jira/browse/SPARK-5741 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.2.0 > Reporter: Yadong Qi > > When run ```select * from nzhang_part where hr = 'file,';```, it throws > exception ```java.lang.IllegalArgumentException: Can not create a Path from > an empty string```. Because the path of hdfs contains comma, and > FileInputFormat.setInputPaths will split path by comma. > ############################### > SQL > ############################### > set hive.merge.mapfiles=true; > set hive.merge.mapredfiles=true; > set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; > set hive.exec.dynamic.partition=true; > set hive.exec.dynamic.partition.mode=nonstrict; > create table nzhang_part like srcpart; > insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select > key, value, hr from srcpart where ds='2008-04-08'; > insert overwrite table nzhang_part partition (ds='2010-08-15', hr=11) select > key, value from srcpart where ds='2008-04-08'; > insert overwrite table nzhang_part partition (ds='2010-08-15', hr) > select * from ( > select key, value, hr from srcpart where ds='2008-04-08' > union all > select '1' as key, '1' as value, 'file,' as hr from src limit 1) s; > select * from nzhang_part where hr = 'file,'; > ############################### > Error log > ############################### > 15/02/10 14:33:16 ERROR SparkSQLDriver: Failed in [select * from nzhang_part > where hr = 'file,'] > java.lang.IllegalArgumentException: Can not create a Path from an empty string > at org.apache.hadoop.fs.Path.checkPathArg(Path.java:127) > at org.apache.hadoop.fs.Path.<init>(Path.java:135) > at > org.apache.hadoop.util.StringUtils.stringToPath(StringUtils.java:241) > at > org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:400) > at > org.apache.spark.sql.hive.HadoopTableReader$.initializeLocalJobConfFunc(TableReader.scala:251) > at > org.apache.spark.sql.hive.HadoopTableReader$$anonfun$11.apply(TableReader.scala:229) > at > org.apache.spark.sql.hive.HadoopTableReader$$anonfun$11.apply(TableReader.scala:229) > at > org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:172) > at > org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:172) > at scala.Option.map(Option.scala:145) > at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:172) > at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:196) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:223) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:221) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:221) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:223) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:221) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:221) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:223) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:221) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:221) > at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:66) > at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:66) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at scala.collection.immutable.List.foreach(List.scala:318) > at > scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:66) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:223) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:221) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:221) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:32) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:223) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:221) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:221) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1391) > at org.apache.spark.rdd.RDD.collect(RDD.scala:811) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:81) > at > org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:446) > at > org.apache.spark.sql.hive.thriftserver.AbstractSparkSQLDriver.run(AbstractSparkSQLDriver.scala:58) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:275) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:423) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:211) > at > org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:403) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:77) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org