This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0721c108588e [MINOR][DOCS] Fix broken ML paper links 0721c108588e is described below commit 0721c108588ee307d9ab17c5175b315d34c54631 Author: Nicholas Chammas <nicholas.cham...@gmail.com> AuthorDate: Mon Jan 22 09:48:33 2024 +0900 [MINOR][DOCS] Fix broken ML paper links ### What changes were proposed in this pull request? There are several links to ML papers that are now broken. This change updates the links to targets that still exist. ### Why are the changes needed? Broken links are annoying. ### Does this PR introduce _any_ user-facing change? Yes, it fixes user-facing documentation. ### How was this patch tested? I built the docs successfully. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44823 from nchammas/paper-links. Authored-by: Nicholas Chammas <nicholas.cham...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- docs/ml-classification-regression.md | 8 ++++++-- .../main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala | 10 ++++++---- .../org/apache/spark/ml/classification/FMClassifier.scala | 6 ++++-- .../scala/org/apache/spark/ml/regression/FMRegressor.scala | 6 ++++-- .../org/apache/spark/mllib/evaluation/RegressionMetrics.scala | 4 +++- 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md index 604b3245272f..9a401c026cd8 100644 --- a/docs/ml-classification-regression.md +++ b/docs/ml-classification-regression.md @@ -699,7 +699,9 @@ Spark's generalized linear regression interface also provides summary statistics fit of GLM models, including residuals, p-values, deviances, the Akaike information criterion, and others. -[See here](http://data.princeton.edu/wws509/notes/) for a more comprehensive review of GLMs and their applications. +[See here][glm] for a more comprehensive review of GLMs and their applications. + +[glm]: https://web.archive.org/web/20180217071524/http://data.princeton.edu/wws509/notes/ ### Available families @@ -1157,10 +1159,12 @@ regression with elastic net regularization. # Factorization Machines -[Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) are able to estimate interactions +[Factorization Machines][fm] are able to estimate interactions between features even in problems with huge sparsity (like advertising and recommendation system). The `spark.ml` implementation supports factorization machines for binary classification and for regression. +[fm]: https://web.archive.org/web/20191225211603/https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf + Factorization machines formula is: $$ diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala index bc6fab45810e..8764c9854c53 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala @@ -38,11 +38,12 @@ object SVDPlusPlus { var gamma7: Double) extends Serializable + // scalastyle:off line.size.limit /** - * Implement SVD++ based on "Factorization Meets the Neighborhood: - * a Multifaceted Collaborative Filtering Model", - * available at <a href="http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf"> - * here</a>. + * Implement SVD++ based on "Factorization Meets the Neighborhood: a Multifaceted + * Collaborative Filtering Model", + * <a href="https://web.archive.org/web/20220403174543/https://people.engr.tamu.edu/huangrh/Spring16/papers_course/matrix_factorization.pdf"> + * available here</a>. * * The prediction rule is rui = u + bu + bi + qi*(pu + |N(u)|^^-0.5^^*sum(y)), * see the details on page 6. @@ -53,6 +54,7 @@ object SVDPlusPlus { * * @return a graph with vertex attributes containing the trained model */ + // scalastyle:on line.size.limit def run(edges: RDD[Edge[Double]], conf: Conf) : (Graph[(Array[Double], Array[Double], Double, Double), Double], Double) = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala index 51f312cf1833..4a4a4fffe5de 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala @@ -39,12 +39,13 @@ private[classification] trait FMClassifierParams extends ProbabilisticClassifier with FactorizationMachinesParams { } +// scalastyle:off line.size.limit /** * Factorization Machines learning algorithm for classification. * It supports normal gradient descent and AdamW solver. * - * The implementation is based upon: - * <a href="https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf"> + * The implementation is based on: + * <a href="https://web.archive.org/web/20191225211603/https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf"> * S. Rendle. "Factorization machines" 2010</a>. * * FM is able to estimate interactions even in problems with huge sparsity @@ -67,6 +68,7 @@ private[classification] trait FMClassifierParams extends ProbabilisticClassifier * * @note Multiclass labels are not currently supported. */ +// scalastyle:on line.size.limit @Since("3.0.0") class FMClassifier @Since("3.0.0") ( @Since("3.0.0") override val uid: String) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala index e6e8c2f1fa4b..6e09143e9ee7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala @@ -270,12 +270,13 @@ private[ml] object FactorizationMachines { private[regression] trait FMRegressorParams extends FactorizationMachinesParams { } +// scalastyle:off line.size.limit /** * Factorization Machines learning algorithm for regression. * It supports normal gradient descent and AdamW solver. * - * The implementation is based upon: - * <a href="https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf"> + * The implementation is based on: + * <a href="https://web.archive.org/web/20191225211603/https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf"> * S. Rendle. "Factorization machines" 2010</a>. * * FM is able to estimate interactions even in problems with huge sparsity @@ -296,6 +297,7 @@ private[regression] trait FMRegressorParams extends FactorizationMachinesParams * FM regression model uses MSE loss which can be solved by gradient descent method, and * regularization terms like L2 are usually added to the loss function to prevent overfitting. */ +// scalastyle:on line.size.limit @Since("3.0.0") class FMRegressor @Since("3.0.0") ( @Since("3.0.0") override val uid: String) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala index 7938427544bd..c4f169005519 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala @@ -115,14 +115,16 @@ class RegressionMetrics @Since("2.0.0") ( math.sqrt(this.meanSquaredError) } + // scalastyle:off line.size.limit /** * Returns R^2^, the unadjusted coefficient of determination. * @see <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination"> * Coefficient of determination (Wikipedia)</a> * In case of regression through the origin, the definition of R^2^ is to be modified. - * @see <a href="https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf"> + * @see <a href="https://web.archive.org/web/20161024050532/https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf"> * J. G. Eisenhauer, Regression through the Origin. Teaching Statistics 25, 76-80 (2003)</a> */ + // scalastyle:on line.size.limit @Since("1.2.0") def r2: Double = { if (throughOrigin) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org