spark git commit: [SPARK-16107][R] group glm methods in documentation

meng Wed, 22 Jun 2016 09:13:31 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 503eb882c -> 1cfdd25fd



[SPARK-16107][R] group glm methods in documentation

## What changes were proposed in this pull request?

This groups GLM methods (spark.glm, summary, print, predict and write.ml) in 
the documentation. The example code was updated.

## How was this patch tested?

N/A

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)

![screen shot 2016-06-21 at 2 31 37 
pm](https://cloud.githubusercontent.com/assets/15318264/16247077/f6eafc04-37bc-11e6-89a8-7898ff3e4078.png)
![screen shot 2016-06-21 at 2 31 45 
pm](https://cloud.githubusercontent.com/assets/15318264/16247078/f6eb1c16-37bc-11e6-940a-2b595b10617c.png)

Author: Junyang Qian <junya...@databricks.com>
Author: Junyang Qian <junyangq@Junyangs-MacBook-Pro.local>

Closes #13820 from junyangq/SPARK-16107.

(cherry picked from commit ea3a12b0147821960f8dabdc58d726f07f1f0e52)
Signed-off-by: Xiangrui Meng <m...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1cfdd25f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1cfdd25f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1cfdd25f

Branch: refs/heads/branch-2.0
Commit: 1cfdd25fdb87012187b1e01f9c5ac4b6218dc840
Parents: 503eb88
Author: Junyang Qian <junya...@databricks.com>
Authored: Wed Jun 22 09:13:08 2016 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Wed Jun 22 09:13:15 2016 -0700

----------------------------------------------------------------------
 R/pkg/R/mllib.R | 80 +++++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/1cfdd25f/R/pkg/R/mllib.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index b83b3b3..dbff1b9 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -53,9 +53,10 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = 
"jobj"))
 #' @note KMeansModel since 2.0.0
 setClass("KMeansModel", representation(jobj = "jobj"))
 
-#' Fits a generalized linear model
+#' Generalized Linear Models
 #'
-#' Fits a generalized linear model against a Spark DataFrame.
+#' Fits generalized linear model against a Spark DataFrame. Users can print, 
make predictions on the
+#' produced model and save the model to the input path.
 #'
 #' @param data SparkDataFrame for training.
 #' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
@@ -66,8 +67,9 @@ setClass("KMeansModel", representation(jobj = "jobj"))
 #'               
\url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
 #' @param tol Positive convergence tolerance of iterations.
 #' @param maxIter Integer giving the maximal number of IRLS iterations.
-#' @return a fitted generalized linear model
+#' @return \code{spark.glm} returns a fitted generalized linear model
 #' @rdname spark.glm
+#' @name spark.glm
 #' @export
 #' @examples
 #' \dontrun{
@@ -76,8 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj"))
 #' df <- createDataFrame(iris)
 #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
 #' summary(model)
+#'
+#' # fitted values on training data
+#' fitted <- predict(model, df)
+#' head(select(fitted, "Sepal_Length", "prediction"))
+#'
+#' # save fitted model to input path
+#' path <- "path/to/model"
+#' write.ml(model, path)
+#'
+#' # can also read back the saved model and print
+#' savedModel <- read.ml(path)
+#' summary(savedModel)
 #' }
 #' @note spark.glm since 2.0.0
+#' @seealso \link{glm}, \link{read.ml}
 setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) 
{
             if (is.character(family)) {
@@ -99,10 +114,9 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
             return(new("GeneralizedLinearRegressionModel", jobj = jobj))
           })
 
-#' Fits a generalized linear model (R-compliant).
+#' Generalized Linear Models (R-compliant)
 #'
 #' Fits a generalized linear model, similarly to R's glm().
-#'
 #' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and 
'-'.
 #' @param data SparkDataFrame for training.
@@ -112,7 +126,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 #'               
\url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
 #' @param epsilon Positive convergence tolerance of iterations.
 #' @param maxit Integer giving the maximal number of IRLS iterations.
-#' @return a fitted generalized linear model
+#' @return \code{glm} returns a fitted generalized linear model.
 #' @rdname glm
 #' @export
 #' @examples
@@ -124,24 +138,21 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 #' summary(model)
 #' }
 #' @note glm since 1.5.0
+#' @seealso \link{spark.glm}
 setMethod("glm", signature(formula = "formula", family = "ANY", data = 
"SparkDataFrame"),
           function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 
25) {
             spark.glm(data, formula, family, tol = epsilon, maxIter = maxit)
           })
 
-#' Get the summary of a generalized linear model
-#'
-#' Returns the summary of a model produced by glm() or spark.glm(), similarly 
to R's summary().
+#  Returns the summary of a model produced by glm() or spark.glm(), similarly 
to R's summary().
 #'
 #' @param object A fitted generalized linear model
-#' @return coefficients the model's coefficients, intercept
-#' @rdname summary
+#' @return \code{summary} returns a summary object of the fitted model, a list 
of components
+#'         including at least the coefficients, null/residual deviance, 
null/residual degrees
+#'         of freedom, AIC and number of iterations IRLS takes.
+#'
+#' @rdname spark.glm
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- glm(y ~ x, trainingData)
-#' summary(model)
-#' }
 #' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
           function(object, ...) {
@@ -173,10 +184,10 @@ setMethod("summary", signature(object = 
"GeneralizedLinearRegressionModel"),
             return(ans)
           })
 
-#' Print the summary of GeneralizedLinearRegressionModel
+#  Prints the summary of GeneralizedLinearRegressionModel
 #'
-#' @rdname print
-#' @name print.summary.GeneralizedLinearRegressionModel
+#' @rdname spark.glm
+#' @param x Summary object of fitted generalized linear model returned by 
\code{summary} function
 #' @export
 #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
@@ -205,22 +216,13 @@ print.summary.GeneralizedLinearRegressionModel <- 
function(x, ...) {
   invisible(x)
   }
 
-#' Predicted values based on model
+#  Makes predictions from a generalized linear model produced by glm() or 
spark.glm(),
+#  similarly to R's predict().
 #'
-#' Makes predictions from a generalized linear model produced by glm() or 
spark.glm(),
-#' similarly to R's predict().
-#'
-#' @param object A fitted generalized linear model
 #' @param newData SparkDataFrame for testing
-#' @return SparkDataFrame containing predicted labels in a column named 
"prediction"
-#' @rdname predict
+#' @return \code{predict} returns a SparkDataFrame containing predicted labels 
in a column named "prediction"
+#' @rdname spark.glm
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- glm(y ~ x, trainingData)
-#' predicted <- predict(model, testData)
-#' showDF(predicted)
-#' }
 #' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
           function(object, newData) {
@@ -471,24 +473,14 @@ setMethod("write.ml", signature(object = 
"AFTSurvivalRegressionModel", path = "c
             invisible(callJMethod(writer, "save", path))
           })
 
-#' Save fitted MLlib model to the input path
-#'
-#' Save the generalized linear model to the input path.
+#  Saves the generalized linear model to the input path.
 #'
-#' @param object A fitted generalized linear model
 #' @param path The directory where the model is saved
 #' @param overwrite Overwrites or not if the output path already exists. 
Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
-#' @rdname write.ml
-#' @name write.ml
+#' @rdname spark.glm
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- glm(y ~ x, trainingData)
-#' path <- "path/to/model"
-#' write.ml(model, path)
-#' }
 #' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0
 setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", 
path = "character"),
           function(object, path, overwrite = FALSE) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16107][R] group glm methods in documentation

Reply via email to