Github user attilapiros commented on a diff in the pull request:
https://github.com/apache/spark/pull/20235#discussion_r160979218
--- Diff: mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
---
@@ -34,86 +35,122 @@ class FPGrowthSuite extends SparkFunSuite with
MLlibTestSparkContext with Defaul
}
test("FPGrowth fit and transform with different data types") {
- Array(IntegerType, StringType, ShortType, LongType, ByteType).foreach
{ dt =>
- val data = dataset.withColumn("items",
col("items").cast(ArrayType(dt)))
- val model = new FPGrowth().setMinSupport(0.5).fit(data)
- val generatedRules = model.setMinConfidence(0.5).associationRules
- val expectedRules = spark.createDataFrame(Seq(
- (Array("2"), Array("1"), 1.0),
- (Array("1"), Array("2"), 0.75)
- )).toDF("antecedent", "consequent", "confidence")
- .withColumn("antecedent", col("antecedent").cast(ArrayType(dt)))
- .withColumn("consequent", col("consequent").cast(ArrayType(dt)))
- assert(expectedRules.sort("antecedent").rdd.collect().sameElements(
- generatedRules.sort("antecedent").rdd.collect()))
-
- val transformed = model.transform(data)
- val expectedTransformed = spark.createDataFrame(Seq(
- (0, Array("1", "2"), Array.emptyIntArray),
- (0, Array("1", "2"), Array.emptyIntArray),
- (0, Array("1", "2"), Array.emptyIntArray),
- (0, Array("1", "3"), Array(2))
- )).toDF("id", "items", "prediction")
- .withColumn("items", col("items").cast(ArrayType(dt)))
- .withColumn("prediction", col("prediction").cast(ArrayType(dt)))
- assert(expectedTransformed.collect().toSet.equals(
- transformed.collect().toSet))
+ class DataTypeWithEncoder[A](val a: DataType)
+ (implicit val encoder: Encoder[(Int,
Array[A], Array[A])])
--- End diff --
In DataTypeWithEncoder I would suggest to rename the val "a" to "dataType".
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]