full code example:
  def main(args: Array[String]) {
    val conf = new
SparkConf().setAppName("ErrorExample").setMaster("local[8]")
      .set("spark.serializer", classOf[KryoSerializer].getName)
    val sc = new SparkContext(conf)

    val rdd = sc.hadoopFile(
            "hdfs://...../user.avro",
      classOf[org.apache.avro.mapred.AvroInputFormat[User]],
      classOf[org.apache.avro.mapred.AvroWrapper[User]],
      classOf[org.apache.hadoop.io.NullWritable],
      1)

    val usersRDD = rdd.map({ case (u, _) => u.datum()})
    usersRDD.foreach(println)

    println("-----------------")

    val collected = usersRDD.collect()

    collected.foreach(println)
  }


output (without info loggind etc):
{"id": "1", "name": "a"}
{"id": "2", "name": "b"}
{"id": "3", "name": "c"}
{"id": "4", "name": "d"}
{"id": "5", "name": "e"}
{"id": "6", "name": "f"}
-----------------
{"id": "6", "name": "f"}
{"id": "6", "name": "f"}
{"id": "6", "name": "f"}
{"id": "6", "name": "f"}
{"id": "6", "name": "f"}
{"id": "6", "name": "f"}




--
View this message in context: 
http://apache-spark-user-list.1001560.n3.nabble.com/collect-on-hadoopFile-RDD-returns-wrong-results-tp14368p14428.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org

Reply via email to