This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e65c21e  [SPARK-31304][ML][EXAMPLES] Add examples for ml.stat.ANOVATest
e65c21e is described below

commit e65c21e093a643573f7ced4998dd9050557ec328
Author: Qianyang Yu <q...@us.ibm.com>
AuthorDate: Tue Mar 31 16:33:26 2020 -0500

    [SPARK-31304][ML][EXAMPLES] Add examples for ml.stat.ANOVATest
    
    ### What changes were proposed in this pull request?
    
    Add ANOVATest example for ml.stat.ANOVATest in python/java/scala
    
    ### Why are the changes needed?
    
    Improve ML example
    
    ### Does this PR introduce any user-facing change?
    
    No
    
    ### How was this patch tested?
    
    manually run the example
    
    Closes #28073 from kevinyu98/add-ANOVA-example.
    
    Authored-by: Qianyang Yu <q...@us.ibm.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 .../spark/examples/ml/JavaANOVATestExample.java    | 75 ++++++++++++++++++++++
 examples/src/main/python/ml/anova_test_example.py  | 52 +++++++++++++++
 .../spark/examples/ml/ANOVATestExample.scala       | 63 ++++++++++++++++++
 3 files changed, 190 insertions(+)

diff --git 
a/examples/src/main/java/org/apache/spark/examples/ml/JavaANOVATestExample.java 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaANOVATestExample.java
new file mode 100644
index 0000000..3b2de1f
--- /dev/null
+++ 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaANOVATestExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.stat.ANOVATest;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+/**
+ * An example for ANOVA testing.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaANOVATestExample
+ * </pre>
+ */
+public class JavaANOVATestExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaANOVATestExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(3.0, Vectors.dense(1.7, 4.4, 7.6, 5.8, 9.6, 2.3)),
+      RowFactory.create(2.0, Vectors.dense(8.8, 7.3, 5.7, 7.3, 2.2, 4.1)),
+      RowFactory.create(1.0, Vectors.dense(1.2, 9.5, 2.5, 3.1, 8.7, 2.5)),
+      RowFactory.create(2.0, Vectors.dense(3.7, 9.2, 6.1, 4.1, 7.5, 3.8)),
+      RowFactory.create(4.0, Vectors.dense(8.9, 5.2, 7.8, 8.3, 5.2, 3.0)),
+      RowFactory.create(4.0, Vectors.dense(7.9, 8.5, 9.2, 4.0, 9.4, 2.1))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+    Row r = ANOVATest.test(df, "features", "label").head();
+    System.out.println("pValues: " + r.get(0).toString());
+    System.out.println("degreesOfFreedom: " + r.getList(1).toString());
+    System.out.println("fValues: " + r.get(2).toString());
+
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/python/ml/anova_test_example.py 
b/examples/src/main/python/ml/anova_test_example.py
new file mode 100644
index 0000000..3fffdbd
--- /dev/null
+++ b/examples/src/main/python/ml/anova_test_example.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example for ANOVA testing.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/anova_test_example.py
+"""
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.stat import ANOVATest
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("ANOVATestExample")\
+        .getOrCreate()
+
+    # $example on$
+    data = [(3.0, Vectors.dense([1.7, 4.4, 7.6, 5.8, 9.6, 2.3])),
+            (2.0, Vectors.dense([8.8, 7.3, 5.7, 7.3, 2.2, 4.1])),
+            (1.0, Vectors.dense([1.2, 9.5, 2.5, 3.1, 8.7, 2.5])),
+            (2.0, Vectors.dense([3.7, 9.2, 6.1, 4.1, 7.5, 3.8])),
+            (4.0, Vectors.dense([8.9, 5.2, 7.8, 8.3, 5.2, 3.0])),
+            (4.0, Vectors.dense([7.9, 8.5, 9.2, 4.0, 9.4, 2.1]))]
+    df = spark.createDataFrame(data, ["label", "features"])
+
+    r = ANOVATest.test(df, "features", "label").head()
+    print("pValues: " + str(r.pValues))
+    print("degreesOfFreedom: " + str(r.degreesOfFreedom))
+    print("fValues: " + str(r.fValues))
+    # $example off$
+
+    spark.stop()
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/ml/ANOVATestExample.scala 
b/examples/src/main/scala/org/apache/spark/examples/ml/ANOVATestExample.scala
new file mode 100644
index 0000000..0cd793f
--- /dev/null
+++ 
b/examples/src/main/scala/org/apache/spark/examples/ml/ANOVATestExample.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.stat.ANOVATest
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example for ANOVA testing.
+ * Run with
+ * {{{
+ * bin/run-example ml.ANOVATestExample
+ * }}}
+ */
+object ANOVATestExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("ANOVATestExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val data = Seq(
+      (3.0, Vectors.dense(1.7, 4.4, 7.6, 5.8, 9.6, 2.3)),
+      (2.0, Vectors.dense(8.8, 7.3, 5.7, 7.3, 2.2, 4.1)),
+      (1.0, Vectors.dense(1.2, 9.5, 2.5, 3.1, 8.7, 2.5)),
+      (2.0, Vectors.dense(3.7, 9.2, 6.1, 4.1, 7.5, 3.8)),
+      (4.0, Vectors.dense(8.9, 5.2, 7.8, 8.3, 5.2, 3.0)),
+      (4.0, Vectors.dense(7.9, 8.5, 9.2, 4.0, 9.4, 2.1))
+    )
+
+    val df = data.toDF("label", "features")
+    val anova = ANOVATest.test(df, "features", "label").head
+    println(s"pValues = ${anova.getAs[Vector](0)}")
+    println(s"degreesOfFreedom ${anova.getSeq[Int](1).mkString("[", ",", 
"]")}")
+    println(s"fValues ${anova.getAs[Vector](2)}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to