Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d96e8c2dd -> 1cde325e2


[SPARK-16140][MLLIB][SPARKR][DOCS] Group k-means method in generated R doc

https://issues.apache.org/jira/browse/SPARK-16140

## What changes were proposed in this pull request?

Group the R doc of spark.kmeans, predict(KM), summary(KM), read/write.ml(KM) 
under Rd spark.kmeans. The example code was updated.

## How was this patch tested?

Tested on my local machine

And on my laptop `jekyll build` is failing to build API docs, so here I can 
only show you the html I manually generated from Rd files, with no CSS applied, 
but the doc content should be there.

![screenshotkmeans](https://cloud.githubusercontent.com/assets/3925641/16403203/c2c9ca1e-3ca7-11e6-9e29-f2164aee75fc.png)

Author: Xin Ren <iamsh...@126.com>

Closes #13921 from keypointt/SPARK-16140.

(cherry picked from commit 8c9cd0a7a719ce4286f77f35bb787e2b626a472e)
Signed-off-by: Xiangrui Meng <m...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1cde325e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1cde325e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1cde325e

Branch: refs/heads/branch-2.0
Commit: 1cde325e29286a8c6631b0b32351994aad7db567
Parents: d96e8c2
Author: Xin Ren <iamsh...@126.com>
Authored: Wed Jun 29 11:25:00 2016 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Wed Jun 29 11:25:07 2016 -0700

----------------------------------------------------------------------
 R/pkg/R/generics.R |  2 ++
 R/pkg/R/mllib.R    | 72 +++++++++++++++++++++++--------------------------
 2 files changed, 35 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/1cde325e/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 27dfd67..0e4350f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1247,6 +1247,7 @@ setGeneric("spark.glm", function(data, formula, ...) { 
standardGeneric("spark.gl
 #' @export
 setGeneric("glm")
 
+#' predict
 #' @rdname predict
 #' @export
 setGeneric("predict", function(object, ...) { standardGeneric("predict") })
@@ -1271,6 +1272,7 @@ setGeneric("spark.naiveBayes", function(data, formula, 
...) { standardGeneric("s
 #' @export
 setGeneric("spark.survreg", function(data, formula, ...) { 
standardGeneric("spark.survreg") })
 
+#' write.ml
 #' @rdname write.ml
 #' @export
 setGeneric("write.ml", function(object, path, ...) { 
standardGeneric("write.ml") })

http://git-wip-us.apache.org/repos/asf/spark/blob/1cde325e/R/pkg/R/mllib.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 897a376..4fe7367 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -267,9 +267,10 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
             return(list(apriori = apriori, tables = tables))
           })
 
-#' Fit a k-means model
+#' K-Means Clustering Model
 #'
-#' Fit a k-means model, similarly to R's kmeans().
+#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's 
kmeans().
+#' Users can print, make predictions on the produced model and save the model 
to the input path.
 #'
 #' @param data SparkDataFrame for training
 #' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
@@ -278,14 +279,32 @@ setMethod("summary", signature(object = 
"NaiveBayesModel"),
 #' @param k Number of centers
 #' @param maxIter Maximum iteration number
 #' @param initMode The initialization algorithm choosen to fit the model
-#' @return A fitted k-means model
+#' @return \code{spark.kmeans} returns a fitted k-means model
 #' @rdname spark.kmeans
+#' @name spark.kmeans
 #' @export
 #' @examples
 #' \dontrun{
-#' model <- spark.kmeans(data, ~ ., k = 4, initMode = "random")
+#' sparkR.session()
+#' data(iris)
+#' df <- createDataFrame(iris)
+#' model <- spark.kmeans(df, Sepal_Length ~ Sepal_Width, k = 4, initMode = 
"random")
+#' summary(model)
+#'
+#' # fitted values on training data
+#' fitted <- predict(model, df)
+#' head(select(fitted, "Sepal_Length", "prediction"))
+#'
+#' # save fitted model to input path
+#' path <- "path/to/model"
+#' write.ml(model, path)
+#'
+#' # can also read back the saved model and print
+#' savedModel <- read.ml(path)
+#' summary(savedModel)
 #' }
 #' @note spark.kmeans since 2.0.0
+#' @seealso \link{predict}, \link{read.ml}, \link{write.ml}
 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = 
"formula"),
           function(data, formula, k = 2, maxIter = 20, initMode = 
c("k-means||", "random")) {
             formula <- paste(deparse(formula), collapse = "")
@@ -301,7 +320,7 @@ setMethod("spark.kmeans", signature(data = 
"SparkDataFrame", formula = "formula"
 #' Note: A saved-loaded model does not support this method.
 #'
 #' @param object A fitted k-means model
-#' @return SparkDataFrame containing fitted values
+#' @return \code{fitted} returns a SparkDataFrame containing fitted values
 #' @rdname fitted
 #' @export
 #' @examples
@@ -323,20 +342,12 @@ setMethod("fitted", signature(object = "KMeansModel"),
             }
           })
 
-#' Get the summary of a k-means model
-#'
-#' Returns the summary of a k-means model produced by spark.kmeans(),
-#' similarly to R's summary().
+#  Get the summary of a k-means model
 #'
-#' @param object a fitted k-means model
-#' @return the model's coefficients, size and cluster
-#' @rdname summary
+#' @param object A fitted k-means model
+#' @return \code{summary} returns the model's coefficients, size and cluster
+#' @rdname spark.kmeans
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- spark.kmeans(trainingData, ~ ., 2)
-#' summary(model)
-#' }
 #' @note summary(KMeansModel) since 2.0.0
 setMethod("summary", signature(object = "KMeansModel"),
           function(object, ...) {
@@ -358,19 +369,11 @@ setMethod("summary", signature(object = "KMeansModel"),
                    cluster = cluster, is.loaded = is.loaded))
           })
 
-#' Predicted values based on model
-#'
-#' Makes predictions from a k-means model or a model produced by 
spark.kmeans().
+#  Predicted values based on a k-means model
 #'
-#' @param object A fitted k-means model
-#' @rdname predict
+#' @return \code{predict} returns the predicted values based on a k-means model
+#' @rdname spark.kmeans
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- spark.kmeans(trainingData, ~ ., 2)
-#' predicted <- predict(model, testData)
-#' showDF(predicted)
-#' }
 #' @note predict(KMeansModel) since 2.0.0
 setMethod("predict", signature(object = "KMeansModel"),
           function(object, newData) {
@@ -477,24 +480,15 @@ setMethod("write.ml", signature(object = 
"GeneralizedLinearRegressionModel", pat
             invisible(callJMethod(writer, "save", path))
           })
 
-#' Save fitted MLlib model to the input path
-#'
-#' Save the k-means model to the input path.
+#  Save fitted MLlib model to the input path
 #'
-#' @param object A fitted k-means model
 #' @param path The directory where the model is saved
 #' @param overwrite Overwrites or not if the output path already exists. 
Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
-#' @rdname write.ml
+#' @rdname spark.kmeans
 #' @name write.ml
 #' @export
-#' @examples
-#' \dontrun{
-#' model <- spark.kmeans(trainingData, ~ ., k = 2)
-#' path <- "path/to/model"
-#' write.ml(model, path)
-#' }
 #' @note write.ml(KMeansModel, character) since 2.0.0
 setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
           function(object, path, overwrite = FALSE) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to