[
https://issues.apache.org/jira/browse/SPARK-20053?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15935568#comment-15935568
]
Xuxiang Mao commented on SPARK-20053:
-------------------------------------
This is how my code looks like:
String cmdOutputFile = "/Downloads/output.csv";
SparkSession spark = SparkSession
.builder().master("local[*]")
.appName("PostProcessingBeta")
.getOrCreate();
Dataset<Row> df = spark.read().option("maxCharsPerColumn",
"4096").option("inferSchema", true).option("header", true).option("comment",
"#").csv(cmdOutputFile);
df.select("sd_1_2").show(); // this can successfully return the
result. no "." in the column name.
df.select("r_2_shape_1.8").show(); // this will throw the exception
> Can't select col when the dot (.) in col name
> ---------------------------------------------
>
> Key: SPARK-20053
> URL: https://issues.apache.org/jira/browse/SPARK-20053
> Project: Spark
> Issue Type: Bug
> Components: Java API
> Affects Versions: 2.1.0
> Environment: mac OX
> Reporter: Xuxiang Mao
>
> I use java API read a csv file as Dataframe and try to do
> Dataframe.select("column name").show(). This operation can successfully done
> when the column name contains no ".", but it will fail when the column name
> has ".". ERROR:
> Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot
> resolve '`r_2_shape_1.8`' given input columns: [z_2.1.1, z_2.1.11, ....
> r_1.34.2, r_1.14.2, r_2_shape_1.8, z_1.2.39];;
> 'Project ['r_2_shape_1.8]
> +- TypedFilter
> com.amazon.recommerce.pricing.forecasting.postProcessing.utils.RawFileUtils$1@a03529c,
> interface org.apache.spark.sql.Row, [StructField(lp__,IntegerType,true),
> StructField(b.1,DoubleType,true),
> StructField(temp_Intercept,DoubleType,true),
> StructField(b_shape.1,DoubleType,true), StructField(sd_1.1,DoubleType,true),
> StructField(sd_1_2,DoubleType,true), StructField(z_1.1.1,DoubleType,true),
> StructField(z_1.2.1,DoubleType,true), StructField(z_1.1.2,DoubleType,true),
> StructField(z_1.2.2,DoubleType,true), StructField(z_1.1.3,DoubleType,true),
> StructField(z_1.2.3,DoubleType,true), StructField(z_1.1.4,DoubleType,true),
> StructField(z_1.2.4,DoubleType,true), StructField(z_1.1.5,DoubleType,true),
> StructField(z_1.2.5,DoubleType,true), StructField(z_1.1.6,DoubleType,true),
> StructField(z_1.2.6,DoubleType,true), StructField(z_1.1.7,DoubleType,true),
> StructField(z_1.2.7,DoubleType,true), StructField(z_1.1.8,DoubleType,true),
> StructField(z_1.2.8,DoubleType,true), StructField(z_1.1.9,DoubleType,true),
> StructField(z_1.2.9,DoubleType,true), ... 294 more fields],
> createexternalrow(lp__#0, b.1#1, temp_Intercept#2, b_shape.1#3, sd_1.1#4,
> sd_1_2#5, z_1.1.1#6, z_1.2.1#7, z_1.1.2#8, z_1.2.2#9, z_1.1.3#10, z_1.2.3#11,
> z_1.1.4#12, z_1.2.4#13, z_1.1.5#14, z_1.2.5#15, z_1.1.6#16, z_1.2.6#17,
> z_1.1.7#18, z_1.2.7#19, z_1.1.8#20, z_1.2.8#21, z_1.1.9#22, z_1.2.9#23, ...
> 612 more fields)
> +-
> Relation[lp__#0,b.1#1,temp_Intercept#2,b_shape.1#3,sd_1.1#4,sd_1_2#5,z_1.1.1#6,z_1.2.1#7,z_1.1.2#8,z_1.2.2#9,z_1.1.3#10,z_1.2.3#11,z_1.1.4#12,z_1.2.4#13,z_1.1.5#14,z_1.2.5#15,z_1.1.6#16,z_1.2.6#17,z_1.1.7#18,z_1.2.7#19,z_1.1.8#20,z_1.2.8#21,z_1.1.9#22,z_1.2.9#23,...
> 294 more fields] csv
> at
> org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:77)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:74)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:310)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:310)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:309)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionUp$1(QueryPlan.scala:282)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:292)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2$1.apply(QueryPlan.scala:296)
> at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
> at scala.collection.AbstractTraversable.map(Traversable.scala:104)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:296)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$7.apply(QueryPlan.scala:301)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
> at
> org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:301)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:74)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:67)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:128)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:67)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:57)
> at
> org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:48)
> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:63)
> at
> org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:2822)
> at org.apache.spark.sql.Dataset.select(Dataset.scala:1121)
> at org.apache.spark.sql.Dataset.select(Dataset.scala:1139)
> at org.apache.spark.sql.Dataset.select(Dataset.scala:1139)
> at
> com.amazon.recommerce.pricing.forecasting.postProcessing.PostProcessing.main(PostProcessing.java:47)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]