Is it permissible to use a custom class (as opposed to e.g. the built-in String
or Int) for the key in groupByKey? It doesn't seem to be working for me on
Spark 0.9.0/Scala 2.10.3:
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
class C(val s:String) extends Serializable {
override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == s
else false
override def toString = s
}
object SimpleApp {
def main(args: Array[String]) {
val sc = new SparkContext("local", "Simple App", null, null)
val r1 = sc.parallelize(Array((new C("a"),11),(new C("a"),12)))
println("r1=" + r1.groupByKey.collect.mkString(";"))
val r2 = sc.parallelize(Array(("a",11),("a",12)))
println("r2=" + r2.groupByKey.collect.mkString(";"))
}
}
Output
======
r1=(a,ArrayBuffer(11));(a,ArrayBuffer(12))
r2=(a,ArrayBuffer(11, 12))