[ 
https://issues.apache.org/jira/browse/SPARK-4052?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Michael Armbrust resolved SPARK-4052.
-------------------------------------
       Resolution: Fixed
    Fix Version/s: 1.2.0

> Use scala.collection.Map for pattern matching instead of using Predef.Map (it 
> is scala.collection.immutable.Map)
> ----------------------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-4052
>                 URL: https://issues.apache.org/jira/browse/SPARK-4052
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.1.0
>            Reporter: Yin Huai
>            Assignee: Yin Huai
>            Priority: Minor
>             Fix For: 1.2.0
>
>
> Seems ScalaReflection and InsertIntoHiveTable only take 
> scala.collection.immutable.Map as the value type of MapType. Here are test 
> cases showing errors.
> {code}
> val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
> import sqlContext.createSchemaRDD
> val rdd = sc.parallelize(("key", "value") :: Nil)
> // Test1: This one fails.
> case class Test1(m: scala.collection.Map[String, String])
> val rddOfTest1 = rdd.map { case (k, v) => Test1(Map(k->v)) }
> rddOfTest1.registerTempTable("t1")
> /* Stack trace
> scala.MatchError: scala.collection.Map[String,String] (of class 
> scala.reflect.internal.Types$TypeRef$$anon$5)
>       at 
> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:53)
>       at 
> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:64)
>       at 
> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:62)
> ...
> */
> // Test2: This one is fine.
> case class Test2(m: scala.collection.immutable.Map[String, String])
> val rddOfTest2 = rdd.map { case (k, v) => Test2(Map(k->v)) }
> rddOfTest2.registerTempTable("t2")
> sqlContext.sql("SELECT m FROM t2").collect
> sqlContext.sql("SELECT m['key'] FROM t2").collect
> // Test3: This one fails.
> val schema = StructType(StructField("m", MapType(StringType, StringType), 
> true) :: Nil)
> val rowRDD = rdd.map { case (k, v) =>  
> Row(scala.collection.mutable.HashMap(k->v)) }
> val schemaRDD = sqlContext.applySchema(rowRDD, schema)
> schemaRDD.registerTempTable("t3")
> sqlContext.sql("SELECT m FROM t3").collect
> sqlContext.sql("SELECT m['key'] FROM t3").collect
> sqlContext.sql("CREATE TABLE testHiveTable1(m MAP <STRING, STRING>)")
> sqlContext.sql("INSERT OVERWRITE TABLE testHiveTable1 SELECT m FROM t3")
> /* Stack trace
> 14/10/22 19:30:56 INFO DAGScheduler: Job 4 failed: runJob at 
> InsertIntoHiveTable.scala:124, took 1.384579 s
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in 
> stage 4.0 failed 4 times, most recent failure: Lost task 1.3 in stage 4.0 
> (TID 12, yins-mbp): java.lang.ClassCastException: 
> scala.collection.mutable.HashMap cannot be cast to 
> scala.collection.immutable.Map
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:148)
>         
> org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:145)
> */
> // Test4: This one is fine.
> val rowRDD = rdd.map { case (k, v) =>  Row(Map(k->v)) }
> val schemaRDD = sqlContext.applySchema(rowRDD, schema)
> schemaRDD.registerTempTable("t4")
> sqlContext.sql("SELECT m FROM t4").collect
> sqlContext.sql("SELECT m['key'] FROM t4").collect
> sqlContext.sql("CREATE TABLE testHiveTable1(m MAP <STRING, STRING>)")
> sqlContext.sql("INSERT OVERWRITE TABLE testHiveTable1 SELECT m FROM t4")
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to