Repository: systemml Updated Branches: refs/heads/master 4add81b04 -> 85cb9e34e
[MINOR] Cleanup and simplification of l2svm algorithm script Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/85cb9e34 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/85cb9e34 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/85cb9e34 Branch: refs/heads/master Commit: 85cb9e34e79b1b87ebe09d2a37658f9265d8ef9a Parents: 4add81b Author: Matthias Boehm <mboe...@gmail.com> Authored: Sun Feb 11 21:21:18 2018 -0800 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sun Feb 11 21:21:18 2018 -0800 ---------------------------------------------------------------------- scripts/algorithms/l2-svm.dml | 84 +++++++++++++++----------------------- 1 file changed, 34 insertions(+), 50 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/85cb9e34/scripts/algorithms/l2-svm.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml index cf669b5..a8e6166 100644 --- a/scripts/algorithms/l2-svm.dml +++ b/scripts/algorithms/l2-svm.dml @@ -43,81 +43,67 @@ # Log String --- [OPTIONAL] Location to write the log file # --------------------------------------------------------------------------------------------- -# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text" +# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y \ +# icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text" # # Note about inputs: -# Assumes that labels (entries in Y) -# are set to either -1 or +1 -# or the result of recoding -# +# Assumes that labels (entries in Y) are set to either -1 or +1 or non-negative integers -cmdLine_fmt = ifdef($fmt, "text") -cmdLine_icpt = ifdef($icpt, 0) -cmdLine_tol = ifdef($tol, 0.001) -cmdLine_reg = ifdef($reg, 1.0) -cmdLine_maxiter = ifdef($maxiter, 100) +fmt = ifdef($fmt, "text") +intercept = ifdef($icpt, 0) +epsilon = ifdef($tol, 0.001) +lambda = ifdef($reg, 1.0) +maxiterations = ifdef($maxiter, 100) X = read($X) Y = read($Y) +#check input parameter assertions if(nrow(X) < 2) stop("Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows") +if(intercept != 0 & intercept != 1) + stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1") +if(epsilon < 0) + stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative") +if(lambda < 0) + stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative") +if(maxiterations < 1) + stop("Stopping due to invalid argument: Maximum iterations should be a positive integer") +#check input lables and transform into -1/1 check_min = min(Y) check_max = max(Y) num_min = sum(Y == check_min) num_max = sum(Y == check_max) - if(check_min == check_max) stop("Stopping due to invalid inputs: Y seems to contain exactly one label") - if(num_min + num_max != nrow(Y)) stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels") - -if(check_min != -1 | check_max != +1) +if(check_min != -1 | check_max != 1) Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) positive_label = check_max negative_label = check_min - -intercept = cmdLine_icpt -if(intercept != 0 & intercept != 1) - stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1") - -epsilon = cmdLine_tol -if(epsilon < 0) - stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative") - -lambda = cmdLine_reg -if(lambda < 0) - stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative") - -maxiterations = cmdLine_maxiter -if(maxiterations < 1) - stop("Stopping due to invalid argument: Maximum iterations should be a positive integer") - num_samples = nrow(X) dimensions = ncol(X) +num_rows_in_w = dimensions if (intercept == 1) { - ones = matrix(1, rows=num_samples, cols=1) + ones = matrix(1, rows=num_samples, cols=1) X = cbind(X, ones); + num_rows_in_w += 1 } -num_rows_in_w = dimensions -if(intercept == 1){ - num_rows_in_w = num_rows_in_w + 1 -} -w = matrix(0, rows=num_rows_in_w, cols=1) - +w = matrix(0, num_rows_in_w, 1) +Xw = matrix(0, rows=nrow(X), cols=1) g_old = t(X) %*% Y s = g_old -Xw = matrix(0, rows=nrow(X), cols=1) debug_str = "# Iter, Obj" iter = 0 continue = TRUE -while(continue & iter < maxiterations) { + +while(continue & iter < maxiterations) { # minimizing primal obj along direction s step_sz = 0 Xd = X %*% s @@ -125,15 +111,14 @@ while(continue & iter < maxiterations) { dd = lambda * sum(s * s) continue1 = TRUE - while(continue1){ + while(continue1) { tmp_Xw = Xw + step_sz*Xd - out = 1 - Y * (tmp_Xw) - sv = (out > 0) + out = 1 - Y * tmp_Xw + sv = out > 0 out = out * sv g = wd + step_sz*dd - sum(out * Y * Xd) h = dd + sum(Xd * sv * Xd) step_sz = step_sz - g/h - continue1 = (g*g/h >= 0.0000000001); } @@ -142,7 +127,7 @@ while(continue & iter < maxiterations) { Xw += step_sz * Xd out = 1 - Y * Xw - sv = (out > 0) + sv = out > 0 out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) g_new = t(X) %*% (out * Y) - lambda * w @@ -161,16 +146,15 @@ while(continue & iter < maxiterations) { iter = iter + 1 } -extra_model_params = matrix(0, rows=4, cols=1) +extra_model_params = matrix(0, 4, 1) extra_model_params[1,1] = positive_label extra_model_params[2,1] = negative_label extra_model_params[3,1] = intercept extra_model_params[4,1] = dimensions -w = t(cbind(t(w), t(extra_model_params))) -write(w, $model, format=cmdLine_fmt) +w = rbind(w, extra_model_params) +write(w, $model, format=fmt) logFile = $Log -if(logFile != " ") { +if(logFile != " ") write(debug_str, logFile) -}