[
https://issues.apache.org/jira/browse/SPARK-12512?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
JO EE updated SPARK-12512:
--------------------------
Description:
Just for simplicity I am using Scalaide scala-worksheet to show the problem
the withColumn could not work from .withColumnRenamed("bField","k.b:Field")
{code:title=Bar.scala|borderStyle=solid}
object bug {
println("Welcome to the Scala worksheet") //> Welcome to the Scala
worksheet
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.DateType
import org.apache.spark.sql.functions._
import org.apache.spark.storage.StorageLevel._
import org.apache.spark.sql.types.{StructType,StructField,StringType}
val conf = new SparkConf()
.setMaster("local[4]")
.setAppName("Testbug") //> conf :
org.apache.spark.SparkConf = org.apache.spark.SparkConf@3b94d659
val sc = new SparkContext(conf) //> sc :
org.apache.spark.SparkContext = org.apache.spark.SparkContext@1dcca8d3
//|
val sqlContext = new SQLContext(sc) //> sqlContext :
org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLCont
//| ext@2d23faef
val schemaString = "aField,bField,cField" //> schemaString : String =
aField,bField,cField
val schema = StructType(schemaString.split(",")
.map(fieldName => StructField(fieldName, StringType, true)))
//> schema :
org.apache.spark.sql.types.StructType = StructType(StructField(aFi
//| eld,StringType,true),
StructField(bField,StringType,true), StructField(cFiel
//| d,StringType,true))
//import sqlContext.implicits._
val newRDD = sc.parallelize(List(("a","b","c")))
.map(x=>Row(x._1,x._2,x._3)) //> newRDD :
org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitions
//| RDD[1] at map at
com.joee.worksheet.bug.scala:30
val newDF = sqlContext.createDataFrame(newRDD, schema)
//> newDF :
org.apache.spark.sql.DataFrame = [aField: string, bField: string, c
//| Field: string]
val changeDF = newDF.withColumnRenamed("aField","anodotField")
.withColumnRenamed("bField","bnodotField")
.show() //>
+-----------+-----------+------+
//|
|anodotField|bnodotField|cField|
//|
+-----------+-----------+------+
//| | a| b|
c|
//|
+-----------+-----------+------+
//|
//| changeDF : Unit = ()
val changeDFwithdotfield1 = newDF.withColumnRenamed("aField","k.a:Field")
//> changeDFwithdotfield1 :
org.apache.spark.sql.DataFrame = [k.a:Field: strin
//| g, bField: string,
cField: string]
val changeDFwithdotfield = changeDFwithdotfield1
.withColumnRenamed("bField","k.b:Field")
//>
org.apache.spark.sql.AnalysisException: cannot resolve 'k.a:Field' given in
//| put columns k.a:Field,
bField, cField;
//| at
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAn
//| alysis(package.scala:42)
//| at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
//|
alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:56)
//| at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
//|
alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:53)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
//| pply(TreeNode.scala:293)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
//| pply(TreeNode.scala:293)
//| at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNod
//| e.scala:51)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.sca
//| la:292)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.app
//| Output exceeds cutoff
limit.
val changeDFwithdotfieldlt =
changeDFwithdotfield.withColumn("k.a:Field",lit("tt")).show(10)
}
{code}
was:
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.DateType
import org.apache.spark.sql.functions._
import org.apache.spark.storage.StorageLevel._
import org.apache.spark.sql.types.{StructType,StructField,StringType}
val conf = new SparkConf()
.setMaster("local[4]")
.setAppName("Testbug") //> conf :
org.apache.spark.SparkConf = org.apache.spark.SparkConf@3b94d659
val sc = new SparkContext(conf) //> sc :
org.apache.spark.SparkContext = org.apache.spark.SparkContext@1dcca8d3
//|
val sqlContext = new SQLContext(sc) //> sqlContext :
org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLCont
//| ext@2d23faef
val schemaString = "aField,bField,cField" //> schemaString : String =
aField,bField,cField
val schema = StructType(schemaString.split(",")
.map(fieldName => StructField(fieldName, StringType, true)))
//> schema :
org.apache.spark.sql.types.StructType = StructType(StructField(aFi
//| eld,StringType,true),
StructField(bField,StringType,true), StructField(cFiel
//| d,StringType,true))
//import sqlContext.implicits._
val newRDD = sc.parallelize(List(("a","b","c")))
.map(x=>Row(x._1,x._2,x._3)) //> newRDD :
org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitions
//| RDD[1] at map at
com.joee.worksheet.bug.scala:30
val newDF = sqlContext.createDataFrame(newRDD, schema)
//> newDF :
org.apache.spark.sql.DataFrame = [aField: string, bField: string, c
//| Field: string]
val changeDF = newDF.withColumnRenamed("aField","anodotField")
.withColumnRenamed("bField","bnodotField")
.show() //>
+-----------+-----------+------+
//|
|anodotField|bnodotField|cField|
//|
+-----------+-----------+------+
//| | a| b|
c|
//|
+-----------+-----------+------+
//|
//| changeDF : Unit = ()
val changeDFwithdotfield1 = newDF.withColumnRenamed("aField","k.a:Field")
//> changeDFwithdotfield1 :
org.apache.spark.sql.DataFrame = [k.a:Field: strin
//| g, bField: string,
cField: string]
val changeDFwithdotfield = changeDFwithdotfield1
.withColumnRenamed("bField","k.b:Field")
//>
org.apache.spark.sql.AnalysisException: cannot resolve 'k.a:Field' given in
//| put columns k.a:Field,
bField, cField;
//| at
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAn
//| alysis(package.scala:42)
//| at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
//|
alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:56)
//| at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
//|
alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:53)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
//| pply(TreeNode.scala:293)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
//| pply(TreeNode.scala:293)
//| at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNod
//| e.scala:51)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.sca
//| la:292)
//| at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.app
//| Output exceeds cutoff
limit.
> WithColumn does not work on multiple column with special character
> ------------------------------------------------------------------
>
> Key: SPARK-12512
> URL: https://issues.apache.org/jira/browse/SPARK-12512
> Project: Spark
> Issue Type: Bug
> Affects Versions: 1.5.2
> Reporter: JO EE
> Labels: spark, sql
>
> Just for simplicity I am using Scalaide scala-worksheet to show the problem
> the withColumn could not work from .withColumnRenamed("bField","k.b:Field")
> {code:title=Bar.scala|borderStyle=solid}
> object bug {
> println("Welcome to the Scala worksheet") //> Welcome to the Scala
> worksheet
>
> import org.apache.spark.SparkContext
> import org.apache.spark.SparkConf
> import org.apache.spark.sql.SQLContext
> import org.apache.spark.sql.Row
> import org.apache.spark.sql.types.DateType
> import org.apache.spark.sql.functions._
> import org.apache.spark.storage.StorageLevel._
> import org.apache.spark.sql.types.{StructType,StructField,StringType}
>
> val conf = new SparkConf()
> .setMaster("local[4]")
> .setAppName("Testbug") //> conf :
> org.apache.spark.SparkConf = org.apache.spark.SparkConf@3b94d659
>
> val sc = new SparkContext(conf) //> sc :
> org.apache.spark.SparkContext = org.apache.spark.SparkContext@1dcca8d3
> //|
>
> val sqlContext = new SQLContext(sc) //> sqlContext :
> org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLCont
> //| ext@2d23faef
>
> val schemaString = "aField,bField,cField" //> schemaString : String
> = aField,bField,cField
>
> val schema = StructType(schemaString.split(",")
> .map(fieldName => StructField(fieldName, StringType, true)))
> //> schema :
> org.apache.spark.sql.types.StructType = StructType(StructField(aFi
> //| eld,StringType,true),
> StructField(bField,StringType,true), StructField(cFiel
> //| d,StringType,true))
> //import sqlContext.implicits._
>
> val newRDD = sc.parallelize(List(("a","b","c")))
> .map(x=>Row(x._1,x._2,x._3)) //> newRDD :
> org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitions
> //| RDD[1] at map at
> com.joee.worksheet.bug.scala:30
>
> val newDF = sqlContext.createDataFrame(newRDD, schema)
> //> newDF :
> org.apache.spark.sql.DataFrame = [aField: string, bField: string, c
> //| Field: string]
>
> val changeDF = newDF.withColumnRenamed("aField","anodotField")
> .withColumnRenamed("bField","bnodotField")
> .show() //>
> +-----------+-----------+------+
> //|
> |anodotField|bnodotField|cField|
> //|
> +-----------+-----------+------+
> //| | a|
> b| c|
> //|
> +-----------+-----------+------+
> //|
> //| changeDF : Unit = ()
> val changeDFwithdotfield1 = newDF.withColumnRenamed("aField","k.a:Field")
> //> changeDFwithdotfield1
> : org.apache.spark.sql.DataFrame = [k.a:Field: strin
> //| g, bField: string,
> cField: string]
>
> val changeDFwithdotfield = changeDFwithdotfield1
> .withColumnRenamed("bField","k.b:Field")
> //>
> org.apache.spark.sql.AnalysisException: cannot resolve 'k.a:Field' given in
> //| put columns k.a:Field,
> bField, cField;
> //| at
> org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAn
> //| alysis(package.scala:42)
> //| at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
> //|
> alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:56)
> //| at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAn
> //|
> alysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:53)
> //| at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
> //| pply(TreeNode.scala:293)
> //| at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.a
> //| pply(TreeNode.scala:293)
> //| at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNod
> //| e.scala:51)
> //| at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.sca
> //| la:292)
> //| at
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.app
> //| Output exceeds cutoff
> limit.
>
>
> val changeDFwithdotfieldlt =
> changeDFwithdotfield.withColumn("k.a:Field",lit("tt")).show(10)
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]