[ https://issues.apache.org/jira/browse/SPARK-29602?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hyukjin Kwon resolved SPARK-29602. ---------------------------------- Resolution: Invalid Please ask questions to stackoverflow or mailing list. > How does the spark from_json json and dataframe transform ignore the case of > the json key > ----------------------------------------------------------------------------------------- > > Key: SPARK-29602 > URL: https://issues.apache.org/jira/browse/SPARK-29602 > Project: Spark > Issue Type: Question > Components: Spark Core > Affects Versions: 2.4.4 > Reporter: ruiliang > Priority: Major > Labels: spark-sql > Original Estimate: 12h > Remaining Estimate: 12h > > How does the spark from_json json and dataframe transform ignore the case of > the json key > code > {code:java} > def main(args: Array[String]): Unit = { > val spark = SparkSession.builder().master("local[*]"). > enableHiveSupport().getOrCreate() > // spark.sqlContext.setConf("spark.sql.caseSensitive", "false") > import spark.implicits._ > //hive table data Lower case automatically when saving > val hivetable = > > """{"deliverysystype":"dms","orderid":"B0001-N103-000-005882-RL3AI2RWCP","storeid":103,"timestamp":1571587522000,"aaaa":"dms"}""" > val hiveDF = Seq(hivetable).toDF("msg") > val rdd = hiveDF.rdd.map(_.getString(0)) > val jsonDataDF = spark.read.json(rdd.toDS()) > jsonDataDF.show(false) > > //+----+---------------+--------------------------------+-------+-------------+ > //|aaaa|deliverysystype|orderid |storeid|timestamp > | > > //+----+---------------+--------------------------------+-------+-------------+ > //|dms |dms |B0001-N103-000-005882-RL3AI2RWCP|103 > |1571587522000| > > //+----+---------------+--------------------------------+-------+-------------+ > val jsonstr = > > """{"data":{"deliverySysType":"dms","orderId":"B0001-N103-000-005882-RL3AI2RWCP","storeId":103,"timestamp":1571587522000},"accessKey":"f9d069861dfb1678","actionName":"candao.rider.getDeliveryInfo","sign":"fa0239c75e065cf43d0a4040665578ba" > }""" > val jsonStrDF = Seq(jsonstr).toDF("msg") > //转换json数据列 action_name actionName > jsonStrDF.show(false) > val structSeqSchme = StructType(Seq(StructField("data", jsonDataDF.schema, > true), > StructField("accessKey", StringType, true), //这里应该 accessKey > StructField("actionName", StringType, true), > StructField("columnNameOfCorruptRecord", StringType, true) > )) > //hive col name lower case, json data key capital and small letter,Take > less than value > val mapOption = Map("allowBackslashEscapingAnyCharacter" -> "true", > "allowUnquotedControlChars" -> "true", "allowSingleQuotes" -> "true") > //I'm not doing anything here, but I don't know how to set a value, right? > val newDF = jsonStrDF.withColumn("data_col", from_json(col("msg"), > structSeqSchme, mapOption)) > newDF.show(false) > newDF.printSchema() > newDF.select($"data_col.accessKey", $"data_col.actionName", > $"data_col.data.*", $"data_col.columnNameOfCorruptRecord").show(false) > //Lowercase columns do not fetch data. How do you make it ignore lowercase > columns? deliverysystype,storeid-> null > > //+----------------+----------------------------+----+---------------+-------+-------+-------------+-------------------------+ > //|accessKey |actionName > |aaaa|deliverysystype|orderid|storeid|timestamp |columnNameOfCorruptRecord| > > //+----------------+----------------------------+----+---------------+-------+-------+-------------+-------------------------+ > //|f9d069861dfb1678|candao.rider.getDeliveryInfo|null|null |null > |null |1571587522000|null | > > //+----------------+----------------------------+----+---------------+-------+-------+-------------+-------------------------+ > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org