This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0721c108588e [MINOR][DOCS] Fix broken ML paper links
0721c108588e is described below
commit 0721c108588ee307d9ab17c5175b315d34c54631
Author: Nicholas Chammas <[email protected]>
AuthorDate: Mon Jan 22 09:48:33 2024 +0900
[MINOR][DOCS] Fix broken ML paper links
### What changes were proposed in this pull request?
There are several links to ML papers that are now broken. This change
updates the links to targets that still exist.
### Why are the changes needed?
Broken links are annoying.
### Does this PR introduce _any_ user-facing change?
Yes, it fixes user-facing documentation.
### How was this patch tested?
I built the docs successfully.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44823 from nchammas/paper-links.
Authored-by: Nicholas Chammas <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
docs/ml-classification-regression.md | 8 ++++++--
.../main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala | 10 ++++++----
.../org/apache/spark/ml/classification/FMClassifier.scala | 6 ++++--
.../scala/org/apache/spark/ml/regression/FMRegressor.scala | 6 ++++--
.../org/apache/spark/mllib/evaluation/RegressionMetrics.scala | 4 +++-
5 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/docs/ml-classification-regression.md
b/docs/ml-classification-regression.md
index 604b3245272f..9a401c026cd8 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -699,7 +699,9 @@ Spark's generalized linear regression interface also
provides summary statistics
fit of GLM models, including residuals, p-values, deviances, the Akaike
information criterion, and
others.
-[See here](http://data.princeton.edu/wws509/notes/) for a more comprehensive
review of GLMs and their applications.
+[See here][glm] for a more comprehensive review of GLMs and their applications.
+
+[glm]:
https://web.archive.org/web/20180217071524/http://data.princeton.edu/wws509/notes/
### Available families
@@ -1157,10 +1159,12 @@ regression with elastic net regularization.
# Factorization Machines
-[Factorization
Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) are able
to estimate interactions
+[Factorization Machines][fm] are able to estimate interactions
between features even in problems with huge sparsity (like advertising and
recommendation system).
The `spark.ml` implementation supports factorization machines for binary
classification and for regression.
+[fm]:
https://web.archive.org/web/20191225211603/https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
+
Factorization machines formula is:
$$
diff --git
a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index bc6fab45810e..8764c9854c53 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -38,11 +38,12 @@ object SVDPlusPlus {
var gamma7: Double)
extends Serializable
+ // scalastyle:off line.size.limit
/**
- * Implement SVD++ based on "Factorization Meets the Neighborhood:
- * a Multifaceted Collaborative Filtering Model",
- * available at <a
href="http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf">
- * here</a>.
+ * Implement SVD++ based on "Factorization Meets the Neighborhood: a
Multifaceted
+ * Collaborative Filtering Model",
+ * <a
href="https://web.archive.org/web/20220403174543/https://people.engr.tamu.edu/huangrh/Spring16/papers_course/matrix_factorization.pdf">
+ * available here</a>.
*
* The prediction rule is rui = u + bu + bi + qi*(pu +
|N(u)|^^-0.5^^*sum(y)),
* see the details on page 6.
@@ -53,6 +54,7 @@ object SVDPlusPlus {
*
* @return a graph with vertex attributes containing the trained model
*/
+ // scalastyle:on line.size.limit
def run(edges: RDD[Edge[Double]], conf: Conf)
: (Graph[(Array[Double], Array[Double], Double, Double), Double], Double) =
{
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
index 51f312cf1833..4a4a4fffe5de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -39,12 +39,13 @@ private[classification] trait FMClassifierParams extends
ProbabilisticClassifier
with FactorizationMachinesParams {
}
+// scalastyle:off line.size.limit
/**
* Factorization Machines learning algorithm for classification.
* It supports normal gradient descent and AdamW solver.
*
- * The implementation is based upon:
- * <a href="https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf">
+ * The implementation is based on:
+ * <a
href="https://web.archive.org/web/20191225211603/https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf">
* S. Rendle. "Factorization machines" 2010</a>.
*
* FM is able to estimate interactions even in problems with huge sparsity
@@ -67,6 +68,7 @@ private[classification] trait FMClassifierParams extends
ProbabilisticClassifier
*
* @note Multiclass labels are not currently supported.
*/
+// scalastyle:on line.size.limit
@Since("3.0.0")
class FMClassifier @Since("3.0.0") (
@Since("3.0.0") override val uid: String)
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
index e6e8c2f1fa4b..6e09143e9ee7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -270,12 +270,13 @@ private[ml] object FactorizationMachines {
private[regression] trait FMRegressorParams extends
FactorizationMachinesParams {
}
+// scalastyle:off line.size.limit
/**
* Factorization Machines learning algorithm for regression.
* It supports normal gradient descent and AdamW solver.
*
- * The implementation is based upon:
- * <a href="https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf">
+ * The implementation is based on:
+ * <a
href="https://web.archive.org/web/20191225211603/https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf">
* S. Rendle. "Factorization machines" 2010</a>.
*
* FM is able to estimate interactions even in problems with huge sparsity
@@ -296,6 +297,7 @@ private[regression] trait FMRegressorParams extends
FactorizationMachinesParams
* FM regression model uses MSE loss which can be solved by gradient descent
method, and
* regularization terms like L2 are usually added to the loss function to
prevent overfitting.
*/
+// scalastyle:on line.size.limit
@Since("3.0.0")
class FMRegressor @Since("3.0.0") (
@Since("3.0.0") override val uid: String)
diff --git
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 7938427544bd..c4f169005519 100644
---
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -115,14 +115,16 @@ class RegressionMetrics @Since("2.0.0") (
math.sqrt(this.meanSquaredError)
}
+ // scalastyle:off line.size.limit
/**
* Returns R^2^, the unadjusted coefficient of determination.
* @see <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination">
* Coefficient of determination (Wikipedia)</a>
* In case of regression through the origin, the definition of R^2^ is to be
modified.
- * @see <a
href="https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf">
+ * @see <a
href="https://web.archive.org/web/20161024050532/https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf">
* J. G. Eisenhauer, Regression through the Origin. Teaching Statistics 25,
76-80 (2003)</a>
*/
+ // scalastyle:on line.size.limit
@Since("1.2.0")
def r2: Double = {
if (throughOrigin) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]