systemml git commit: [SYSTEMML-1648] Making SVM scripts work with MLContext

mboehm7 Sun, 22 Oct 2017 21:37:16 -0700

Repository: systemml
Updated Branches:
  refs/heads/master a51f8e819 -> 596005a80



[SYSTEMML-1648] Making SVM scripts work with MLContext

Closes #687.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/596005a8
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/596005a8
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/596005a8

Branch: refs/heads/master
Commit: 596005a80d0b39fef9b33b55145ffda043a4573d
Parents: a51f8e8
Author: j143 <[email protected]>
Authored: Sun Oct 22 21:35:08 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sun Oct 22 21:35:08 2017 -0700

----------------------------------------------------------------------
 scripts/algorithms/l2-svm-predict.dml |  82 ++++++-------
 scripts/algorithms/l2-svm.dml         | 118 +++++++++---------
 scripts/algorithms/m-svm-predict.dml  |  45 +++----
 scripts/algorithms/m-svm.dml          | 186 ++++++++++++++++-------------
 4 files changed, 222 insertions(+), 209 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/596005a8/scripts/algorithms/l2-svm-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/l2-svm-predict.dml 
b/scripts/algorithms/l2-svm-predict.dml
index 9052265..73e28b4 100644
--- a/scripts/algorithms/l2-svm-predict.dml
+++ b/scripts/algorithms/l2-svm-predict.dml
@@ -51,6 +51,7 @@ cmdLine_Y = ifdef($Y, " ")
 cmdLine_confusion = ifdef($confusion, " ")
 cmdLine_accuracy = ifdef($accuracy, " ")
 cmdLine_scores = ifdef($scores, " ")
+cmdLine_scoring_only = ifdef($scoring_only, FALSE)
 cmdLine_fmt = ifdef($fmt, "text")
 
 X = read($X)
@@ -59,7 +60,7 @@ w = read($model)
 
 dimensions = as.scalar(w[nrow(w),1])
 if(dimensions != ncol(X))
-       stop("Stopping due to invalid input: Model dimensions do not seem to 
match input data dimensions")
+  stop("Stopping due to invalid input: Model dimensions do not seem to match 
input data dimensions")
        
 intercept = as.scalar(w[nrow(w)-1,1])
 negative_label = as.scalar(w[nrow(w)-2,1])
@@ -68,56 +69,51 @@ w = w[1:(nrow(w)-4),]
 
 b = 0.0
 if(intercept == 1)
-       b = as.scalar(w[nrow(w),1])
+  b = as.scalar(w[nrow(w),1])
 
 scores = b + (X %*% w[1:ncol(X),])
 
 if(cmdLine_scores != " ")
-       write(scores, cmdLine_scores, format=cmdLine_fmt)
+  write(scores, cmdLine_scores, format=cmdLine_fmt)
 
-if(cmdLine_Y != " "){
-       y = read(cmdLine_Y)
+if(!cmdLine_scoring_only){
+  Y = read(cmdLine_Y)
 
-       pred = (scores >= 0)
-       pred_labels = pred*positive_label + (1-pred)*negative_label
-       num_correct = sum(pred_labels == y)
-       acc = 100*num_correct/nrow(X)
+  pred = (scores >= 0)
+  pred_labels = pred*positive_label + (1-pred)*negative_label
+  num_correct = sum(pred_labels == Y)
+  acc = 100*num_correct/nrow(X)
 
-       acc_str = "Accuracy (%): " + acc
-       print(acc_str)
-       if(cmdLine_accuracy != " ")
-               write(acc_str, cmdLine_accuracy)
+  acc_str = "Accuracy (%): " + acc
+  print(acc_str)
+  
+  if(cmdLine_accuracy != " ")
+    write(acc_str, cmdLine_accuracy)
 
-       if(cmdLine_confusion != " "){
-               pred = 2*pred - 1
-               if(negative_label != -1 | positive_label != +1)
-               y = 2/(positive_label - negative_label)*y - (negative_label + 
positive_label)/(positive_label - negative_label)
-               
-               pred_is_minus = (pred == -1)
-               pred_is_plus = 1 - pred_is_minus
-               y_is_minus = (y == -1)
-               y_is_plus = 1 - y_is_minus
-
-               check_min_y_minus = sum(pred_is_minus*y_is_minus)
-               check_min_y_plus = sum(pred_is_minus*y_is_plus)
-               check_max_y_minus = sum(pred_is_plus*y_is_minus)
-               check_max_y_plus = sum(pred_is_plus*y_is_plus)
+  if(cmdLine_confusion != " "){
+  
+    pred = 2*pred - 1
+    
+    if(negative_label != -1 | positive_label != +1)
+      Y = 2/(positive_label - negative_label)*Y - (negative_label + 
positive_label)/(positive_label - negative_label)
+               
+    pred_is_minus = (pred == -1)
+    pred_is_plus = 1 - pred_is_minus
+    y_is_minus = (Y == -1)
+    y_is_plus = 1 - y_is_minus
 
-               #s = check_min_y_minus + "," + check_min_y_plus
-               #s = append(s, check_max_y_minus + "," + check_max_y_plus)
-               #s = append(s, "")
-               #write(s, cmdLine_confusion)
+    check_min_y_minus = sum(pred_is_minus*y_is_minus)
+    check_min_y_plus = sum(pred_is_minus*y_is_plus)
+    check_max_y_minus = sum(pred_is_plus*y_is_minus)
+    check_max_y_plus = sum(pred_is_plus*y_is_plus)
                
-               confusion_mat = matrix(0, rows=3, cols=3)
-        confusion_mat[1,2] = negative_label
-        confusion_mat[1,3] = positive_label
-        confusion_mat[2,1] = negative_label
-        confusion_mat[3,1] = positive_label
-        confusion_mat[2,2] = check_min_y_minus
-        confusion_mat[2,3] = check_max_y_minus
-        confusion_mat[3,2] = check_min_y_plus
-        confusion_mat[3,3] = check_max_y_plus
-
-        write(confusion_mat, cmdLine_confusion, format="csv")
-       }
+    confusion_mat = matrix(0, rows=2, cols=2)
+    confusion_mat[1,1] = check_min_y_minus
+    confusion_mat[1,2] = check_min_y_plus
+    confusion_mat[2,1] = check_max_y_minus
+    confusion_mat[2,2] = check_max_y_plus
+       
+    write(confusion_mat, cmdLine_confusion, format="csv")
+  }
 }
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/596005a8/scripts/algorithms/l2-svm.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml
index d5c7e02..141ef82 100644
--- a/scripts/algorithms/l2-svm.dml
+++ b/scripts/algorithms/l2-svm.dml
@@ -61,7 +61,7 @@ X = read($X)
 Y = read($Y)
 
 if(nrow(X) < 2)
-       stop("Stopping due to invalid inputs: Not possible to learn a binary 
class classifier without at least 2 rows")
+  stop("Stopping due to invalid inputs: Not possible to learn a binary class 
classifier without at least 2 rows")
 
 check_min = min(Y)
 check_max = max(Y)
@@ -69,46 +69,44 @@ num_min = sum(Y == check_min)
 num_max = sum(Y == check_max)
 
 if(check_min == check_max)
-       stop("Stopping due to invalid inputs: Y seems to contain exactly one 
label")
+  stop("Stopping due to invalid inputs: Y seems to contain exactly one label")
 
 if(num_min + num_max != nrow(Y))
-       stop("Stopping due to invalid inputs: Y seems to contain more than 2 
labels")
+  stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels")
        
 if(check_min != -1 | check_max != +1) 
-       Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - 
check_min)
+  Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - 
check_min)
 
 positive_label = check_max
 negative_label = check_min
 
-continue = 1
-
 intercept = cmdLine_icpt
 if(intercept != 0 & intercept != 1)
-       stop("Stopping due to invalid argument: Currently supported intercept 
options are 0 and 1")
+  stop("Stopping due to invalid argument: Currently supported intercept 
options are 0 and 1")
 
 epsilon = cmdLine_tol
 if(epsilon < 0)
-       stop("Stopping due to invalid argument: Tolerance (tol) must be 
non-negative")
+  stop("Stopping due to invalid argument: Tolerance (tol) must be 
non-negative")
        
 lambda = cmdLine_reg
 if(lambda < 0)
-       stop("Stopping due to invalid argument: Regularization constant (reg) 
must be non-negative")
+  stop("Stopping due to invalid argument: Regularization constant (reg) must 
be non-negative")
        
 maxiterations = cmdLine_maxiter
 if(maxiterations < 1)
-       stop("Stopping due to invalid argument: Maximum iterations should be a 
positive integer")
+  stop("Stopping due to invalid argument: Maximum iterations should be a 
positive integer")
 
 num_samples = nrow(X)
 dimensions = ncol(X)
 
 if (intercept == 1) {
-       ones  = matrix(1, rows=num_samples, cols=1)
-       X = cbind(X, ones);
+  ones  = matrix(1, rows=num_samples, cols=1)
+  X = cbind(X, ones);
 }
 
 num_rows_in_w = dimensions
 if(intercept == 1){
-       num_rows_in_w = num_rows_in_w + 1
+  num_rows_in_w = num_rows_in_w + 1
 }
 w = matrix(0, rows=num_rows_in_w, cols=1)
 
@@ -118,54 +116,49 @@ s = g_old
 Xw = matrix(0, rows=nrow(X), cols=1)
 debug_str = "# Iter, Obj"
 iter = 0
-while(continue == 1 & iter < maxiterations)  {
-       # minimizing primal obj along direction s
-    step_sz = 0
-    Xd = X %*% s
-    wd = lambda * sum(w * s)
-    dd = lambda * sum(s * s)
-    continue1 = 1
-    while(continue1 == 1){
-               tmp_Xw = Xw + step_sz*Xd
-       out = 1 - Y * (tmp_Xw)
-       sv = (out > 0)
-       out = out * sv
-       g = wd + step_sz*dd - sum(out * Y * Xd)
-       h = dd + sum(Xd * sv * Xd)
-       step_sz = step_sz - g/h
-       if (g*g/h < 0.0000000001){
-               continue1 = 0
-       }
-    }
-
-    #update weights
-    w = w + step_sz*s
-       Xw = Xw + step_sz*Xd
-       
-    out = 1 - Y * Xw
+continue = TRUE
+while(continue & iter < maxiterations)  {
+  # minimizing primal obj along direction s
+  step_sz = 0
+  Xd = X %*% s
+  wd = lambda * sum(w * s)
+  dd = lambda * sum(s * s)
+  
+  continue1 = TRUE
+  while(continue1){
+    tmp_Xw = Xw + step_sz*Xd
+    out = 1 - Y * (tmp_Xw)
     sv = (out > 0)
-    out = sv * out
-    obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
-    g_new = t(X) %*% (out * Y) - lambda * w
-
-    print("ITER " + iter + ": OBJ=" + obj)
-       debug_str = append(debug_str, iter + "," + obj)
+    out = out * sv
+    g = wd + step_sz*dd - sum(out * Y * Xd)
+    h = dd + sum(Xd * sv * Xd)
+    step_sz = step_sz - g/h
+    
+    continue1 = (gg/h >= 0.0000000001);
+  }
+
+  #update weights
+  w = w + step_sz*s
+  Xw = Xw + step_sz*Xd
        
-    tmp = sum(s * g_old)
-    if(step_sz*tmp < epsilon*obj){
-       continue = 0
-    }
-
-    #non-linear CG step
-    be = sum(g_new * g_new)/sum(g_old * g_old)
-    s = be * s + g_new
-    g_old = g_new
-
-       if(sum(s^2) == 0){
-           continue = 0
-       }
-
-    iter = iter + 1
+  out = 1 - Y * Xw
+  sv = (out > 0)
+  out = sv * out
+  obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
+  g_new = t(X) %*% (out * Y) - lambda * w
+
+  print("ITER " + iter + ": OBJ=" + obj)
+  debug_str = append(debug_str, iter + "," + obj)
+       
+  tmp = sum(s * g_old)
+  continue = (step_sz*tmp >= epsilon*obj & sum(s^2) != 0);
+
+  #non-linear CG step
+  be = sum(g_new * g_new)/sum(g_old * g_old)
+  s = be * s + g_new
+  g_old = g_new
+    
+  iter = iter + 1
 }
 
 extra_model_params = matrix(0, rows=4, cols=1)
@@ -174,10 +167,13 @@ extra_model_params[2,1] = negative_label
 extra_model_params[3,1] = intercept
 extra_model_params[4,1] = dimensions
 
+weights = w
 w = t(cbind(t(w), t(extra_model_params)))
 write(w, $model, format=cmdLine_fmt)
+# write(extra_model_params, " ", format=cmdLine_fmt)
+# write(weights, " ", format=cmdLine_fmt)
 
 logFile = $Log
 if(logFile != " ") {
-       write(debug_str, logFile)
-}
\ No newline at end of file
+  write(debug_str, logFile)
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/596005a8/scripts/algorithms/m-svm-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/m-svm-predict.dml 
b/scripts/algorithms/m-svm-predict.dml
index a959836..8ad8bf0 100644
--- a/scripts/algorithms/m-svm-predict.dml
+++ b/scripts/algorithms/m-svm-predict.dml
@@ -26,13 +26,14 @@
 # accuracy (%) for the predictions
 #
 # Example Usage:
-# hadoop jar SystemML.jar -f m-svm-predict.dml -nvargs X=data Y=labels 
model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
+# hadoop jar SystemML.jar -f m-svm-predict.dml -nvargs X=data Y=labels 
scoring_only=FALSE model=model scores=scores accuracy=accuracy 
confusion=confusion fmt="text"
 #                                                                              
                         
 
 cmdLine_Y = ifdef($Y, " ")
 cmdLine_confusion = ifdef($confusion, " ")
 cmdLine_accuracy = ifdef($accuracy, " ")
 cmdLine_scores = ifdef($scores, " ")
+cmdLine_scoring_only = ifdef($scoring_only, FALSE)
 cmdLine_fmt = ifdef($fmt, "text")
 
 X = read($X);
@@ -40,7 +41,7 @@ W = read($model);
 
 dimensions = as.scalar(W[nrow(W),1])
 if(dimensions != ncol(X))
-       stop("Stopping due to invalid input: Model dimensions do not seem to 
match input data dimensions")
+  stop("Stopping due to invalid input: Model dimensions do not seem to match 
input data dimensions")
 
 intercept = as.scalar(W[nrow(W)-1,1])
 W = W[1:(nrow(W)-2),]
@@ -51,34 +52,34 @@ m=ncol(X);
 
 b = matrix(0, rows=1, cols=num_classes)
 if (intercept == 1)
-       b = W[m+1,]
+  b = W[m+1,]
 
 ones = matrix(1, rows=N, cols=1)
 scores = X %*% W[1:m,] + ones %*% b;
        
 if(cmdLine_scores != " ")
-       write(scores, cmdLine_scores, format=cmdLine_fmt);
+  write(scores, cmdLine_scores, format=cmdLine_fmt);
 
-if(cmdLine_Y != " "){
-       y = read(cmdLine_Y);
+if(!cmdLine_scoring_only){
+  Y = read(cmdLine_Y);
        
-       if(min(y) < 1)
-               stop("Stopping due to invalid argument: Label vector (Y) must 
be recoded")
+  if(min(Y) < 1)
+    stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
        
-       pred = rowIndexMax(scores);
-       correct_percentage = sum((pred - y) == 0) / N * 100;
-       
-       acc_str = "Accuracy (%): " + correct_percentage
-       print(acc_str)
-       if(cmdLine_accuracy != " ")
-               write(acc_str, cmdLine_accuracy)
+  pred = rowIndexMax(scores);
+  correct_percentage = sum((pred - Y) == 0) / N * 100;
+  
+  acc_str = "Accuracy (%): " + correct_percentage
+  print(acc_str)
+  if(cmdLine_accuracy != " ")
+    write(acc_str, cmdLine_accuracy)
 
-       num_classes_ground_truth = max(y)
-       if(num_classes < num_classes_ground_truth)
-               num_classes = num_classes_ground_truth
+  num_classes_ground_truth = max(Y)
+  if(num_classes < num_classes_ground_truth)
+    num_classes = num_classes_ground_truth
 
-       if(cmdLine_confusion != " "){
-               confusion_mat = table(y, pred, num_classes, num_classes)
-               write(confusion_mat, cmdLine_confusion, format="csv")
-       }
+  if(cmdLine_confusion != " "){
+    confusion_mat = table(Y, pred, num_classes, num_classes)
+    write(confusion_mat, cmdLine_confusion, format="csv")
+  }
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/596005a8/scripts/algorithms/m-svm.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/m-svm.dml b/scripts/algorithms/m-svm.dml
index 8d3d5f3..6c11811 100644
--- a/scripts/algorithms/m-svm.dml
+++ b/scripts/algorithms/m-svm.dml
@@ -26,6 +26,23 @@
 # Assume SVM_HOME is set to the home of the dml script
 # Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
 # Assume epsilon = 0.001, lambda=1.0, max_iterations = 100
+#
+# INPUT PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME      TYPE    DEFAULT     MEANING
+# 
---------------------------------------------------------------------------------------------
+# X         String  ---         Location to read the matrix X of feature 
vectors
+# Y         String  ---         Location to read response matrix Y
+# icpt      Int     0           Intercept presence
+#                               0 = no intercept
+#                               1 = add intercept;
+# tol       Double  0.001       Tolerance (epsilon);
+# reg       Double  1.0         Regularization parameter
+# maxiter   Int     100         Maximum number of conjugate gradient iterations
+# model     String  ---         Location to write model
+# fmt       String  "text"      The output format of the output, such as 
"text" or "csv"
+# Log       String  ---         [OPTIONAL] Location to write the log file
+# 
---------------------------------------------------------------------------------------------
 # 
 # hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X 
Y=$INPUT_DIR/y icpt=intercept tol=.001 reg=1.0 maxiter=100 model=$OUTPUT_DIR/w 
Log=$OUTPUT_DIR/Log fmt="text"
 #
@@ -41,141 +58,144 @@ print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " 
reg=" + cmdLine_reg + "
 X = read($X)
 
 if(nrow(X) < 2)
-       stop("Stopping due to invalid inputs: Not possible to learn a 
classifier without at least 2 rows")
+  stop("Stopping due to invalid inputs: Not possible to learn a classifier 
without at least 2 rows")
 
 dimensions = ncol(X)
 
 Y = read($Y)
 
 if(nrow(X) != nrow(Y))
-       stop("Stopping due to invalid argument: Numbers of rows in X and Y must 
match")
+  stop("Stopping due to invalid argument: Numbers of rows in X and Y must 
match")
 
 intercept = cmdLine_icpt
 if(intercept != 0 & intercept != 1)
-       stop("Stopping due to invalid argument: Currently supported intercept 
options are 0 and 1")
+  stop("Stopping due to invalid argument: Currently supported intercept 
options are 0 and 1")
 
 min_y = min(Y)
 if(min_y < 1)
-       stop("Stopping due to invalid argument: Label vector (Y) must be 
recoded")
+  stop("Stopping due to invalid argument: Label vector (Y) must be recoded")
+  
 num_classes = max(Y)
 if(num_classes == 1)
-       stop("Stopping due to invalid argument: Maximum label value is 1, need 
more than one class to learn a multi-class classifier")  
+  stop("Stopping due to invalid argument: Maximum label value is 1, need more 
than one class to learn a multi-class classifier")
+  
 mod1 = Y %% 1
 mod1_should_be_nrow = sum(abs(mod1 == 0))
 if(mod1_should_be_nrow != nrow(Y))
-       stop("Stopping due to invalid argument: Please ensure that Y contains 
(positive) integral labels")
+  stop("Stopping due to invalid argument: Please ensure that Y contains 
(positive) integral labels")
        
 epsilon = cmdLine_tol
 if(epsilon < 0)
-       stop("Stopping due to invalid argument: Tolerance (tol) must be 
non-negative")
+  stop("Stopping due to invalid argument: Tolerance (tol) must be 
non-negative")
 
 lambda = cmdLine_reg
 if(lambda < 0)
-       stop("Stopping due to invalid argument: Regularization constant (reg) 
must be non-negative")
+  stop("Stopping due to invalid argument: Regularization constant (reg) must 
be non-negative")
 
 max_iterations = cmdLine_maxiter
 if(max_iterations < 1)
-       stop("Stopping due to invalid argument: Maximum iterations should be a 
positive integer")
+  stop("Stopping due to invalid argument: Maximum iterations should be a 
positive integer")
 
 num_samples = nrow(X)
 num_features = ncol(X)
 
 if (intercept == 1) {
-       ones  = matrix(1, rows=num_samples, cols=1);
-       X = cbind(X, ones);
+  ones  = matrix(1, rows=num_samples, cols=1);
+  X = cbind(X, ones);
 }
 
 num_rows_in_w = num_features
 if(intercept == 1){
-       num_rows_in_w = num_rows_in_w + 1
+  num_rows_in_w = num_rows_in_w + 1
 }
 w = matrix(0, rows=num_rows_in_w, cols=num_classes)
 
 debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
+
 parfor(iter_class in 1:num_classes){             
-       Y_local = 2 * (Y == iter_class) - 1
-       w_class = matrix(0, rows=num_features, cols=1)
-       if (intercept == 1) {
-               zero_matrix = matrix(0, rows=1, cols=1);
-               w_class = t(cbind(t(w_class), zero_matrix));
-       }
- 
-       g_old = t(X) %*% Y_local
-       s = g_old
-
-       Xw = matrix(0, rows=nrow(X), cols=1)
-       iter = 0
-       continue = 1
-       while(continue == 1)  {
-               # minimizing primal obj along direction s
-               step_sz = 0
-               Xd = X %*% s
-               wd = lambda * sum(w_class * s)
-               dd = lambda * sum(s * s)
-               continue1 = 1
-               while(continue1 == 1){
-                       tmp_Xw = Xw + step_sz*Xd
-                       out = 1 - Y_local * (tmp_Xw)
-                       sv = (out > 0)
-                       out = out * sv
-                       g = wd + step_sz*dd - sum(out * Y_local * Xd)
-                       h = dd + sum(Xd * sv * Xd)
-                       step_sz = step_sz - g/h
-                       if (g*g/h < 0.0000000001){
-                       continue1 = 0
-               }
-       }
- 
-               #update weights
-               w_class = w_class + step_sz*s
-               Xw = Xw + step_sz*Xd
+  Y_local = 2 * (Y == iter_class) - 1
+  w_class = matrix(0, rows=num_features, cols=1)
+  
+  if (intercept == 1) {
+    zero_matrix = matrix(0, rows=1, cols=1);
+    w_class = t(cbind(t(w_class), zero_matrix));
+  }
+  
+  g_old = t(X) %*% Y_local
+  s = g_old
+
+  Xw = matrix(0, rows=nrow(X), cols=1)
+  iter = 0
+  continue = TRUE
+  while(continue & iter < maxiterations)  {
+    # minimizing primal obj along direction s
+    step_sz = 0
+    Xd = X %*% s
+    wd = lambda * sum(w_class * s)
+    dd = lambda * sum(s * s)
+    
+    continue1 = TRUE
+    while(continue1){
+      tmp_Xw = Xw + step_sz*Xd
+      out = 1 - Y_local * (tmp_Xw)
+      sv = (out > 0)
+      out = out * sv
+      g = wd + step_sz*dd - sum(out * Y_local * Xd)
+      h = dd + sum(Xd * sv * Xd)
+      step_sz = step_sz - g/h
+      
+      continue1 = (g*g/h >= 0.0000000001)
+      
+    }
+    
+    #update weights
+    w_class = w_class + step_sz*s
+    Xw = Xw + step_sz*Xd
  
-               out = 1 - Y_local * Xw
-               sv = (out > 0)
-               out = sv * out
-               obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-               g_new = t(X) %*% (out * Y_local) - lambda * w_class
+    out = 1 - Y_local * Xw
+    sv = (out > 0)
+    out = sv * out
+    obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+    g_new = t(X) %*% (out * Y_local) - lambda * w_class
 
-               tmp = sum(s * g_old)
+    tmp = sum(s * g_old)
   
-               train_acc = sum(Y_local*(X%*%w_class) >= 0)/num_samples*100
-               print("For class " + iter_class + " iteration " + iter + " 
training accuracy: " + train_acc)
-               debug_mat[iter+1,iter_class] = obj         
+    train_acc = sum(Y_local*(X%*%w_class) >= 0)/num_samples*100
+    print("For class " + iter_class + " iteration " + iter + " training 
accuracy: " + train_acc)
+    debug_mat[iter+1,iter_class] = obj    
    
-               if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){
-                       continue = 0
-               }
- 
-               #non-linear CG step
-               be = sum(g_new * g_new)/sum(g_old * g_old)
-               s = be * s + g_new
-               g_old = g_new
-
-               if(sum(s^2) == 0){
-               continue = 0
-               }
-
-               iter = iter + 1
-       }
-
-       w[,iter_class] = w_class
-}
+    continue = (step_sz*tmp >= epsilon*obj & sum(s^2) != 0);
+        
+    #non-linear CG step
+    be = sum(g_new * g_new)/sum(g_old * g_old)
+    s = be * s + g_new
+    g_old = g_new
+    
+    iter = iter + 1
+  }
+
+  w[,iter_class] = w_class
+} # parfor loop
 
 extra_model_params = matrix(0, rows=2, cols=ncol(w))
 extra_model_params[1, 1] = intercept
 extra_model_params[2, 1] = dimensions
+weights = w
 w = t(cbind(t(w), t(extra_model_params)))
 write(w, $model, format=cmdLine_fmt)
+# write(extra_model_params, " ", format=cmdLine_fmt)
+# write(weights, " ", format=cmdLine_fmt)
 
 debug_str = "# Class, Iter, Obj"
 for(iter_class in 1:ncol(debug_mat)){
-       for(iter in 1:nrow(debug_mat)){
-               obj = as.scalar(debug_mat[iter, iter_class])
-               if(obj != -1) 
-                       debug_str = append(debug_str, iter_class + "," + iter + 
"," + obj)
-       }
+  for(iter in 1:nrow(debug_mat)){
+    obj = as.scalar(debug_mat[iter, iter_class])
+    if(obj != -1) 
+      debug_str = append(debug_str, iter_class + "," + iter + "," + obj)
+  }
 }
+
 logFile = $Log
-if(logFile != " ") {
-       write(debug_str, logFile)
-}
\ No newline at end of file
+if(logFile != " ")
+  write(debug_str, logFile)
+

systemml git commit: [SYSTEMML-1648] Making SVM scripts work with MLContext

Reply via email to