[ 
https://issues.apache.org/jira/browse/SPARK-23403?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh Kumar updated SPARK-23403:
---------------------------------
    Docs Text: 
val 
washing_flat=sc.textFile("hdfs://ip-172-31-53-45:8020/user/narine91267897/washing_flat.csv")
washing_flat: org.apache.spark.rdd.RDD[String] = 
hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv 
MapPartitionsRDD[
24] at textFile at <console>:33
scala> val schema=StructType(Array(
     |      StructField("id",StringType,true),
     |      StructField("rev",StringType,true),
     |      StructField("count",LongType,true),
     |      StructField("flowrate",LongType,true),
     |      StructField("fluidlevel",StringType,true),
     |      StructField("frequency",LongType,true),
     |      StructField("hardness",LongType,true),
     |      StructField("speed",LongType,true),
     |      StructField("temperature",LongType,true),
     |      StructField("ts",LongType,true),
     |      StructField("voltage",LongType,true)))

scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => 
Row(row(0)
     | ,row(1)
     | ,row(2),
     | row(3),
     | row(4),
     | row(5),
     | row(6),
     | row(7),
     | row(8),
     | row(9),
     | row(10)))
rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = 
MapPartitionsRDD[26] at map at <console>:35
scala> val washing_df=spark.createDataFrame(rowRDD,schema)
washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 
more fields]
scala> washing_df.printSchema
root
 |-- id: string (nullable = true)
 |-- rev: string (nullable = true)
 |-- count: long (nullable = true)
 |-- flowrate: long (nullable = true)
 |-- fluidlevel: string (nullable = true)
 |-- frequency: long (nullable = true)
 |-- hardness: long (nullable = true)
 |-- speed: long (nullable = true)
 |-- temperature: long (nullable = true)
 |-- ts: long (nullable = true)
 |-- voltage: long (nullable = true)
scala> washing_df.show(5)
18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 
(TID 5)
java.lang.ArrayIndexOutOfBoundsException: 10


  was:
val 
washing_flat=sc.textFile("hdfs://ip-172-31-53-48.ec2.internal:8020/user/narine91267897/washing_flat.csv")
washing_flat: org.apache.spark.rdd.RDD[String] = 
hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv 
MapPartitionsRDD[
24] at textFile at <console>:33
scala> val schema=StructType(Array(
     |      StructField("id",StringType,true),
     |      StructField("rev",StringType,true),
     |      StructField("count",LongType,true),
     |      StructField("flowrate",LongType,true),
     |      StructField("fluidlevel",StringType,true),
     |      StructField("frequency",LongType,true),
     |      StructField("hardness",LongType,true),
     |      StructField("speed",LongType,true),
     |      StructField("temperature",LongType,true),
     |      StructField("ts",LongType,true),
     |      StructField("voltage",LongType,true)))

scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => 
Row(row(0)
     | ,row(1)
     | ,row(2),
     | row(3),
     | row(4),
     | row(5),
     | row(6),
     | row(7),
     | row(8),
     | row(9),
     | row(10)))
rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = 
MapPartitionsRDD[26] at map at <console>:35
scala> val washing_df=spark.createDataFrame(rowRDD,schema)
washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 
more fields]
scala> washing_df.printSchema
root
 |-- id: string (nullable = true)
 |-- rev: string (nullable = true)
 |-- count: long (nullable = true)
 |-- flowrate: long (nullable = true)
 |-- fluidlevel: string (nullable = true)
 |-- frequency: long (nullable = true)
 |-- hardness: long (nullable = true)
 |-- speed: long (nullable = true)
 |-- temperature: long (nullable = true)
 |-- ts: long (nullable = true)
 |-- voltage: long (nullable = true)
scala> washing_df.show(5)
18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 
(TID 5)
java.lang.ArrayIndexOutOfBoundsException: 10



> java.lang.ArrayIndexOutOfBoundsException: 10
> --------------------------------------------
>
>                 Key: SPARK-23403
>                 URL: https://issues.apache.org/jira/browse/SPARK-23403
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Shell
>    Affects Versions: 2.2.0
>            Reporter: Naresh Kumar
>            Priority: Major
>
> java.lang.ArrayIndexOutOfBoundsException: 10, while retriving records from 
> Dataframe in spark-shell



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to