Repository: spark Updated Branches: refs/heads/branch-1.6 6a74efab0 -> 6616f4da3
[SPARK-11567] [PYTHON] Add Python API for corr Aggregate function like `df.agg(corr("col1", "col2")` davies Author: felixcheung <felixcheun...@hotmail.com> Closes #9536 from felixcheung/pyfunc. (cherry picked from commit 32790fe7249b0efe2cbc5c4ee2df0fb687dcd624) Signed-off-by: Davies Liu <davies....@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6616f4da Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6616f4da Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6616f4da Branch: refs/heads/branch-1.6 Commit: 6616f4da3719667939ed41f81d2e2ba32bb87c72 Parents: 6a74efa Author: felixcheung <felixcheun...@hotmail.com> Authored: Tue Nov 10 15:47:10 2015 -0800 Committer: Davies Liu <davies....@gmail.com> Committed: Tue Nov 10 15:47:21 2015 -0800 ---------------------------------------------------------------------- python/pyspark/sql/functions.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/6616f4da/python/pyspark/sql/functions.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 6e1cbde..c3da513 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -255,6 +255,22 @@ def coalesce(*cols): return Column(jc) +@since(1.6) +def corr(col1, col2): + """Returns a new :class:`Column` for the Pearson Correlation Coefficient for ``col1`` + and ``col2``. + + >>> a = [x * x - 2 * x + 3.5 for x in range(20)] + >>> b = range(20) + >>> corrDf = sqlContext.createDataFrame(zip(a, b)) + >>> corrDf = corrDf.agg(corr(corrDf._1, corrDf._2).alias('c')) + >>> corrDf.selectExpr('abs(c - 0.9572339139475857) < 1e-16 as t').collect() + [Row(t=True)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.corr(_to_java_column(col1), _to_java_column(col2))) + + @since(1.3) def countDistinct(col, *cols): """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org