[ https://issues.apache.org/jira/browse/SPARK-23403?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Naresh Kumar updated SPARK-23403: --------------------------------- Docs Text: val washing_flat=sc.textFile("hdfs://ip-172-31-53-45:8020/user/narine91267897/washing_flat.csv") washing_flat: org.apache.spark.rdd.RDD[String] = hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv MapPartitionsRDD[ 24] at textFile at <console>:33 scala> val schema=StructType(Array( | StructField("id",StringType,true), | StructField("rev",StringType,true), | StructField("count",LongType,true), | StructField("flowrate",LongType,true), | StructField("fluidlevel",StringType,true), | StructField("frequency",LongType,true), | StructField("hardness",LongType,true), | StructField("speed",LongType,true), | StructField("temperature",LongType,true), | StructField("ts",LongType,true), | StructField("voltage",LongType,true))) scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => Row(row(0) | ,row(1) | ,row(2), | row(3), | row(4), | row(5), | row(6), | row(7), | row(8), | row(9), | row(10))) rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[26] at map at <console>:35 scala> val washing_df=spark.createDataFrame(rowRDD,schema) washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 more fields] scala> washing_df.printSchema root |-- id: string (nullable = true) |-- rev: string (nullable = true) |-- count: long (nullable = true) |-- flowrate: long (nullable = true) |-- fluidlevel: string (nullable = true) |-- frequency: long (nullable = true) |-- hardness: long (nullable = true) |-- speed: long (nullable = true) |-- temperature: long (nullable = true) |-- ts: long (nullable = true) |-- voltage: long (nullable = true) scala> washing_df.show(5) 18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 (TID 5) java.lang.ArrayIndexOutOfBoundsException: 10 was: val washing_flat=sc.textFile("hdfs://ip-172-31-53-48.ec2.internal:8020/user/narine91267897/washing_flat.csv") washing_flat: org.apache.spark.rdd.RDD[String] = hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv MapPartitionsRDD[ 24] at textFile at <console>:33 scala> val schema=StructType(Array( | StructField("id",StringType,true), | StructField("rev",StringType,true), | StructField("count",LongType,true), | StructField("flowrate",LongType,true), | StructField("fluidlevel",StringType,true), | StructField("frequency",LongType,true), | StructField("hardness",LongType,true), | StructField("speed",LongType,true), | StructField("temperature",LongType,true), | StructField("ts",LongType,true), | StructField("voltage",LongType,true))) scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => Row(row(0) | ,row(1) | ,row(2), | row(3), | row(4), | row(5), | row(6), | row(7), | row(8), | row(9), | row(10))) rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[26] at map at <console>:35 scala> val washing_df=spark.createDataFrame(rowRDD,schema) washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 more fields] scala> washing_df.printSchema root |-- id: string (nullable = true) |-- rev: string (nullable = true) |-- count: long (nullable = true) |-- flowrate: long (nullable = true) |-- fluidlevel: string (nullable = true) |-- frequency: long (nullable = true) |-- hardness: long (nullable = true) |-- speed: long (nullable = true) |-- temperature: long (nullable = true) |-- ts: long (nullable = true) |-- voltage: long (nullable = true) scala> washing_df.show(5) 18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 (TID 5) java.lang.ArrayIndexOutOfBoundsException: 10 > java.lang.ArrayIndexOutOfBoundsException: 10 > -------------------------------------------- > > Key: SPARK-23403 > URL: https://issues.apache.org/jira/browse/SPARK-23403 > Project: Spark > Issue Type: Bug > Components: Spark Shell > Affects Versions: 2.2.0 > Reporter: Naresh Kumar > Priority: Major > > java.lang.ArrayIndexOutOfBoundsException: 10, while retriving records from > Dataframe in spark-shell -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org