Hello,
I am having issues with splitting contents of a dataframe column using Spark
1.4. The dataframe was created by reading a nested complex json file. I used
df.explode but keep getting error message. The json file format looks like
[
{
"neid":{ },
"mi":{
"mts":"20100609071500Z",
"gp":"900",
"tMOID":"Aal2Ap",
"mt":[ ],
"mv":[
{
"moid":"ManagedElement=1,TransportNetwork=1,Aal2Sp=1,Aal2Ap=r1552q",
"r":
[
1,
2,
5
]
},
{
"moid":"ManagedElement=1,TransportNetwork=1,Aal2Sp=1,Aal2Ap=r1542q",
"r":
[
1,
2,
5
]
}
]
}
},
{
"neid":{
"neun":"RC003",
"nedn":"SubNetwork=ONRM_RootMo_R,SubNetwork=RC003,MeContext=RC003",
"nesw":"CP90831_R9YC/11"
},
"mi":{
"mts":"20100609071500Z",
"gp":"900",
"tMOID":"PlugInUnit",
"mt":"pmProcessorLoad",
"mv":[
{
"moid":"ManagedElement=1,Equipment=1,Subrack=MS,Slot=6,PlugInUnit=1",
"r":
[ 1, 2, 5
]
},
{
"moid":"ManagedElement=1,Equipment=1,Subrack=ES-1,Slot=1,PlugInUnit=1",
"r":
[ 1, 2, 5
]
}
]
}
}
]
scala> val df = sqlContext.read.json("/Users/xx/target/statsfile.json")
scala> df.show()
+++
| mi|neid|
+++
|[900,["pmEs","pmS...|[SubNetwork=ONRM_...|
|[900,["pmIcmpInEr...|[SubNetwork=ONRM_...|
|[900,pmUnsuccessf...|[SubNetwork=ONRM_...|
|[900,["pmBwErrBlo...|[SubNetwork=ONRM_...|
|[900,["pmSctpStat...|[SubNetwork=ONRM_...|
|[900,["pmLinkInSe...|[SubNetwork=ONRM_...|
|[900,["pmGrFc","p...|[SubNetwork=ONRM_...|
|[900,["pmReceived...|[SubNetwork=ONRM_...|
|[900,["pmIvIma","...|[SubNetwork=ONRM_...|
|[900,["pmEs","pmS...|[SubNetwork=ONRM_...|
|[900,["pmEs","pmS...|[SubNetwork=ONRM_...|
|[900,["pmExisOrig...|[SubNetwork=ONRM_...|
|[900,["pmHDelayVa...|[SubNetwork=ONRM_...|
|[900,["pmReceived...|[SubNetwork=ONRM_...|
|[900,["pmReceived...|[SubNetwork=ONRM_...|
|[900,["pmAverageR...|[SubNetwork=ONRM_...|
|[900,["pmDchFrame...|[SubNetwork=ONRM_...|
|[900,["pmReceived...|[SubNetwork=ONRM_...|
|[900,["pmNegative...|[SubNetwork=ONRM_...|
|[900,["pmUsedTbsQ...|[SubNetwork=ONRM_...|
+++
scala> df.printSchema()
root
|-- mi: struct (nullable = true)
||-- gp: long (nullable = true)
||-- mt: string (nullable = true)
||-- mts: string (nullable = true)
||-- mv: string (nullable = true)
|-- neid: struct (nullable = true)
||-- nedn: string (nullable = true)
||-- nesw: string (nullable = true)
||-- neun: string (nullable = true)
scala> val df1=df.select("mi.mv²)
df1: org.apache.spark.sql.DataFrame = [mv: string]
scala> val df1=df.select("mi.mv").show()
++
| mv|
++
|[{"r":[0,0,0],"mo...|
|{"r":[0,4,0,4],"m...|
|{"r":5,"moid":"Ma...|
|[{"r":[2147483647...|
|{"r":[225,1112986...|
|[{"r":[83250,0,0,...|
|[{"r":[1,2,529982...|
|[{"r":[26998564,0...|
|[{"r":[0,0,0,0,0,...|
|[{"r":[0,0,0],"mo...|
|[{"r":[0,0,0],"mo...|
|{"r":[0,0,0,0,0,0...|
|{"r":[0,0,1],"moi...|
|{"r":[4587,4587],...|
|[{"r":[180,180],"...|
|[{"r":["0,0,0,0,0...|
|{"r":[0,35101,0,0...|
|[{"r":["0,0,0,0,0...|
|[{"r":[0,1558],"m...|
|[{"r":["7484,4870...|
++
scala> df1.explode("mv","mvnew")(mv => mv.split(","))
:28: error: value split is not a member of Nothing
df1.explode("mv","mvnew")(mv => mv.split(","))
Am i doing something wrong? I need to extract data under mi.mv in separate
columns so i can apply some transformations.
Regards
Mike