http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/kmeans/Kmeans.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/kmeans/Kmeans.dml b/src/test/scripts/applications/kmeans/Kmeans.dml index 3e1f8c9..368b98d 100644 --- a/src/test/scripts/applications/kmeans/Kmeans.dml +++ b/src/test/scripts/applications/kmeans/Kmeans.dml @@ -1,108 +1,108 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# Implements the k-Means clustering algorithm -# INPUT 1: Input file name for X input data (data records) -# INPUT 2: The number k of centroids -# INPUT 3: Output file name for the centroids -# Example: hadoop jar SystemML.jar -f kMeans.dml -args X_input_file 5 centroids_file - -print( "Performing initialization..." ); - -# X : matrix of data points as rows -X = read( $1 ); - -num_records = nrow( X ); -num_features = ncol( X ); -num_centroids = $2; - -one_per_record = matrix( 1.0, rows = num_records, cols = 1); -one_per_feature = matrix( 1.0, rows = num_features, cols = 1); -one_per_centroid = matrix( 1.0, rows = num_centroids, cols = 1); - -# Y : matrix of centroids as rows -Y = matrix( 0.0, rows = num_centroids, cols = num_features ); -# D : matrix of squared distances from X rows to Y rows, up to a Y-independent term -D = matrix( 0.0, rows = num_records, cols = num_centroids ); - -print( "Taking a data sample to compute the convergence criterion..." ); - -X_sample = X; -sample_size = 1000; -if (num_records > sample_size) -{ - # Sample approximately 1000 records (Bernoulli sampling) - P = Rand( rows = num_records, cols = 1, min = 0.0, max = 1.0 ); - P = ppred( P * num_records, sample_size, "<=" ); - X_sample = X * (P %*% t( one_per_feature )); - X_sample = removeEmpty( target = X_sample, margin = "rows" ); -} - -sample_size = nrow( X_sample ); -one_per_sample = matrix( 1.0, rows = sample_size, cols = 1 ); - -# Compute eps for the convergence criterion as the average square distance -# between records in the sample times a small number - -eps = 0.0000001 * - sum (one_per_sample %*% t( rowSums( X_sample * X_sample ) ) - + rowSums( X_sample * X_sample ) %*% t( one_per_sample ) - - 2.0 * X_sample %*% t( X_sample )) / (sample_size * sample_size); - -# Start iterations - -centroid_change = 10.0 + eps; -iter_count = 0; -print ("Starting the iterations..."); - -while (centroid_change > eps) -{ - iter_count = iter_count + 1; - old_Y = matrix( 0.0, rows = num_centroids, cols = num_features ); - if ( iter_count == 1 - | ( centroid_change != centroid_change ) # Check if - | ( ( centroid_change == centroid_change + 1 ) # centroid_change - & ( centroid_change == 2 * centroid_change ) ) ) # is a "NaN" - { - # Start anew, by setting D to a random matrix - D = Rand (rows = num_records, cols = num_centroids, min = 0.0, max = 1.0); - } else { - old_Y = Y; - # Euclidean squared distances from records (X rows) to centroids (Y rows) - # without a redundant Y-independent term - D = one_per_record %*% t(rowSums (Y * Y)) - 2.0 * X %*% t(Y); - } - # Find the closest centroid for each record - P = ppred (D, rowMins (D) %*% t(one_per_centroid), "<="); - # If some records belong to multiple centroids, share them equally - P = P / (rowSums (P) %*% t(one_per_centroid)); - # Normalize the columns of P to compute record weights for new centroids - P = P / (one_per_record %*% colSums (P)); - # Compute new centroids as weighted averages over the records - Y = t(P) %*% X; - # Measure the squared difference between old and new centroids - centroid_change = sum ( (Y - old_Y) * (Y - old_Y) ) / num_centroids; - print ("Iteration " + iter_count + ": centroid_change = " + centroid_change); -} - -print( "Writing out the centroids..." ); -write( Y, $3, format = "text" ); -print( "Done." ); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# Implements the k-Means clustering algorithm +# INPUT 1: Input file name for X input data (data records) +# INPUT 2: The number k of centroids +# INPUT 3: Output file name for the centroids +# Example: hadoop jar SystemML.jar -f kMeans.dml -args X_input_file 5 centroids_file + +print( "Performing initialization..." ); + +# X : matrix of data points as rows +X = read( $1 ); + +num_records = nrow( X ); +num_features = ncol( X ); +num_centroids = $2; + +one_per_record = matrix( 1.0, rows = num_records, cols = 1); +one_per_feature = matrix( 1.0, rows = num_features, cols = 1); +one_per_centroid = matrix( 1.0, rows = num_centroids, cols = 1); + +# Y : matrix of centroids as rows +Y = matrix( 0.0, rows = num_centroids, cols = num_features ); +# D : matrix of squared distances from X rows to Y rows, up to a Y-independent term +D = matrix( 0.0, rows = num_records, cols = num_centroids ); + +print( "Taking a data sample to compute the convergence criterion..." ); + +X_sample = X; +sample_size = 1000; +if (num_records > sample_size) +{ + # Sample approximately 1000 records (Bernoulli sampling) + P = Rand( rows = num_records, cols = 1, min = 0.0, max = 1.0 ); + P = ppred( P * num_records, sample_size, "<=" ); + X_sample = X * (P %*% t( one_per_feature )); + X_sample = removeEmpty( target = X_sample, margin = "rows" ); +} + +sample_size = nrow( X_sample ); +one_per_sample = matrix( 1.0, rows = sample_size, cols = 1 ); + +# Compute eps for the convergence criterion as the average square distance +# between records in the sample times a small number + +eps = 0.0000001 * + sum (one_per_sample %*% t( rowSums( X_sample * X_sample ) ) + + rowSums( X_sample * X_sample ) %*% t( one_per_sample ) + - 2.0 * X_sample %*% t( X_sample )) / (sample_size * sample_size); + +# Start iterations + +centroid_change = 10.0 + eps; +iter_count = 0; +print ("Starting the iterations..."); + +while (centroid_change > eps) +{ + iter_count = iter_count + 1; + old_Y = matrix( 0.0, rows = num_centroids, cols = num_features ); + if ( iter_count == 1 + | ( centroid_change != centroid_change ) # Check if + | ( ( centroid_change == centroid_change + 1 ) # centroid_change + & ( centroid_change == 2 * centroid_change ) ) ) # is a "NaN" + { + # Start anew, by setting D to a random matrix + D = Rand (rows = num_records, cols = num_centroids, min = 0.0, max = 1.0); + } else { + old_Y = Y; + # Euclidean squared distances from records (X rows) to centroids (Y rows) + # without a redundant Y-independent term + D = one_per_record %*% t(rowSums (Y * Y)) - 2.0 * X %*% t(Y); + } + # Find the closest centroid for each record + P = ppred (D, rowMins (D) %*% t(one_per_centroid), "<="); + # If some records belong to multiple centroids, share them equally + P = P / (rowSums (P) %*% t(one_per_centroid)); + # Normalize the columns of P to compute record weights for new centroids + P = P / (one_per_record %*% colSums (P)); + # Compute new centroids as weighted averages over the records + Y = t(P) %*% X; + # Measure the squared difference between old and new centroids + centroid_change = sum ( (Y - old_Y) * (Y - old_Y) ) / num_centroids; + print ("Iteration " + iter_count + ": centroid_change = " + centroid_change); +} + +print( "Writing out the centroids..." ); +write( Y, $3, format = "text" ); +print( "Done." );
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/l2svm/L2SVM.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/l2svm/L2SVM.R b/src/test/scripts/applications/l2svm/L2SVM.R index ccf6ca1..bf419f1 100644 --- a/src/test/scripts/applications/l2svm/L2SVM.R +++ b/src/test/scripts/applications/l2svm/L2SVM.R @@ -1,103 +1,103 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# JUnit test class: dml.test.integration.applications.L2SVMTest.java -# command line invocation assuming $L2SVM_HOME is set to the home of the R script -# Rscript $L2SVM_HOME/L2SVM.R $L2SVM_HOME/in/ 0.00000001 1 100 $L2SVM_HOME/expected/ - -args <- commandArgs(TRUE) -library("Matrix") - -X = readMM(paste(args[1], "X.mtx", sep="")); -Y = readMM(paste(args[1], "Y.mtx", sep="")); - -check_min = min(Y) -check_max = max(Y) -num_min = sum(Y == check_min) -num_max = sum(Y == check_max) -if(num_min + num_max != nrow(Y)){ - print("please check Y, it should contain only 2 labels") -}else{ - if(check_min != -1 | check_max != +1) - Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) -} - -intercept = as.integer(args[2]); -epsilon = as.double(args[3]); -lambda = as.double(args[4]); -maxiterations = as.integer(args[5]); - -N = nrow(X) -D = ncol(X) - -if (intercept == 1) { - ones = matrix(1,N,1) - X = cbind(X, ones); -} - -num_rows_in_w = D -if(intercept == 1){ - num_rows_in_w = num_rows_in_w + 1 -} -w = matrix(0, num_rows_in_w, 1) - -g_old = t(X) %*% Y -s = g_old - -Xw = matrix(0,nrow(X),1) -iter = 0 -continue = TRUE -while(continue && iter < maxiterations){ - t = 0 - Xd = X %*% s - wd = lambda * sum(w * s) - dd = lambda * sum(s * s) - continue1 = TRUE - while(continue1){ - tmp_Xw = Xw + t*Xd - out = 1 - Y * (tmp_Xw) - sv = which(out > 0) - g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv]) - h = dd + sum(Xd[sv] * Xd[sv]) - t = t - g/h - continue1 = (g*g/h >= 1e-10) - } - - w = w + t*s - Xw = Xw + t*Xd - - out = 1 - Y * (X %*% w) - sv = which(out > 0) - obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w) - g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w - - print(paste("OBJ : ", obj)) - - continue = (t*sum(s * g_old) >= epsilon*obj) - - be = sum(g_new * g_new)/sum(g_old * g_old) - s = be * s + g_new - g_old = g_new - - iter = iter + 1 -} - -writeMM(as(w,"CsparseMatrix"), paste(args[6], "w", sep="")); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# JUnit test class: dml.test.integration.applications.L2SVMTest.java +# command line invocation assuming $L2SVM_HOME is set to the home of the R script +# Rscript $L2SVM_HOME/L2SVM.R $L2SVM_HOME/in/ 0.00000001 1 100 $L2SVM_HOME/expected/ + +args <- commandArgs(TRUE) +library("Matrix") + +X = readMM(paste(args[1], "X.mtx", sep="")); +Y = readMM(paste(args[1], "Y.mtx", sep="")); + +check_min = min(Y) +check_max = max(Y) +num_min = sum(Y == check_min) +num_max = sum(Y == check_max) +if(num_min + num_max != nrow(Y)){ + print("please check Y, it should contain only 2 labels") +}else{ + if(check_min != -1 | check_max != +1) + Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) +} + +intercept = as.integer(args[2]); +epsilon = as.double(args[3]); +lambda = as.double(args[4]); +maxiterations = as.integer(args[5]); + +N = nrow(X) +D = ncol(X) + +if (intercept == 1) { + ones = matrix(1,N,1) + X = cbind(X, ones); +} + +num_rows_in_w = D +if(intercept == 1){ + num_rows_in_w = num_rows_in_w + 1 +} +w = matrix(0, num_rows_in_w, 1) + +g_old = t(X) %*% Y +s = g_old + +Xw = matrix(0,nrow(X),1) +iter = 0 +continue = TRUE +while(continue && iter < maxiterations){ + t = 0 + Xd = X %*% s + wd = lambda * sum(w * s) + dd = lambda * sum(s * s) + continue1 = TRUE + while(continue1){ + tmp_Xw = Xw + t*Xd + out = 1 - Y * (tmp_Xw) + sv = which(out > 0) + g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv]) + h = dd + sum(Xd[sv] * Xd[sv]) + t = t - g/h + continue1 = (g*g/h >= 1e-10) + } + + w = w + t*s + Xw = Xw + t*Xd + + out = 1 - Y * (X %*% w) + sv = which(out > 0) + obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w) + g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w + + print(paste("OBJ : ", obj)) + + continue = (t*sum(s * g_old) >= epsilon*obj) + + be = sum(g_new * g_new)/sum(g_old * g_old) + s = be * s + g_new + g_old = g_new + + iter = iter + 1 +} + +writeMM(as(w,"CsparseMatrix"), paste(args[6], "w", sep="")); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/l2svm/L2SVM.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/l2svm/L2SVM.dml b/src/test/scripts/applications/l2svm/L2SVM.dml index bf7de14..13f2b4c 100644 --- a/src/test/scripts/applications/l2svm/L2SVM.dml +++ b/src/test/scripts/applications/l2svm/L2SVM.dml @@ -1,124 +1,124 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# How to invoke this dml script L2SVM.dml? -# Assume L2SVM_HOME is set to the home of the dml script -# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR -# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log" - -# Note about inputs: -# Assumes that labels (entries in Y) -# are set to either -1 or +1 -# or the result of recoding - -cmdLine_fmt=ifdef($fmt,"text") -cmdLine_icpt=ifdef($icpt, 0) -cmdLine_tol=ifdef($tol, 0.001) -cmdLine_reg=ifdef($reg, 1.0) -cmdLine_maxiter=ifdef($maxiter, 100) - -X = read($X) -Y = read($Y) - -check_min = min(Y) -check_max = max(Y) -num_min = sum(ppred(Y, check_min, "==")) -num_max = sum(ppred(Y, check_max, "==")) -if(num_min + num_max != nrow(Y)) print("please check Y, it should contain only 2 labels") -else{ - if(check_min != -1 | check_max != +1) - Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) -} - -epsilon = cmdLine_tol -lambda = cmdLine_reg -maxiterations = cmdLine_maxiter -intercept = cmdLine_icpt - -num_samples = nrow(X) -dimensions = ncol(X) - -if (intercept == 1) { - ones = matrix(1, rows=num_samples, cols=1) - X = append(X, ones); -} - -num_rows_in_w = dimensions -if(intercept == 1){ - num_rows_in_w = num_rows_in_w + 1 -} -w = matrix(0, rows=num_rows_in_w, cols=1) - -g_old = t(X) %*% Y -s = g_old - -Xw = matrix(0, rows=nrow(X), cols=1) -debug_str = "# Iter, Obj" -iter = 0 -continue = 1 -while(continue == 1 & iter < maxiterations) { - # minimizing primal obj along direction s - step_sz = 0 - Xd = X %*% s - wd = lambda * sum(w * s) - dd = lambda * sum(s * s) - continue1 = 1 - while(continue1 == 1){ - tmp_Xw = Xw + step_sz*Xd - out = 1 - Y * (tmp_Xw) - sv = ppred(out, 0, ">") - out = out * sv - g = wd + step_sz*dd - sum(out * Y * Xd) - h = dd + sum(Xd * sv * Xd) - step_sz = step_sz - g/h - if (g*g/h < 0.0000000001){ - continue1 = 0 - } - } - - #update weights - w = w + step_sz*s - Xw = Xw + step_sz*Xd - - out = 1 - Y * Xw - sv = ppred(out, 0, ">") - out = sv * out - obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) - g_new = t(X) %*% (out * Y) - lambda * w - - print("OBJ = " + obj) - debug_str = append(debug_str, iter + "," + obj) - - tmp = sum(s * g_old) - if(step_sz*tmp < epsilon*obj){ - continue = 0 - } - - #non-linear CG step - be = sum(g_new * g_new)/sum(g_old * g_old) - s = be * s + g_new - g_old = g_new - - iter = iter + 1 -} - -write(w, $model, format=cmdLine_fmt) -write(debug_str, $Log) +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# How to invoke this dml script L2SVM.dml? +# Assume L2SVM_HOME is set to the home of the dml script +# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR +# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log" + +# Note about inputs: +# Assumes that labels (entries in Y) +# are set to either -1 or +1 +# or the result of recoding + +cmdLine_fmt=ifdef($fmt,"text") +cmdLine_icpt=ifdef($icpt, 0) +cmdLine_tol=ifdef($tol, 0.001) +cmdLine_reg=ifdef($reg, 1.0) +cmdLine_maxiter=ifdef($maxiter, 100) + +X = read($X) +Y = read($Y) + +check_min = min(Y) +check_max = max(Y) +num_min = sum(ppred(Y, check_min, "==")) +num_max = sum(ppred(Y, check_max, "==")) +if(num_min + num_max != nrow(Y)) print("please check Y, it should contain only 2 labels") +else{ + if(check_min != -1 | check_max != +1) + Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) +} + +epsilon = cmdLine_tol +lambda = cmdLine_reg +maxiterations = cmdLine_maxiter +intercept = cmdLine_icpt + +num_samples = nrow(X) +dimensions = ncol(X) + +if (intercept == 1) { + ones = matrix(1, rows=num_samples, cols=1) + X = append(X, ones); +} + +num_rows_in_w = dimensions +if(intercept == 1){ + num_rows_in_w = num_rows_in_w + 1 +} +w = matrix(0, rows=num_rows_in_w, cols=1) + +g_old = t(X) %*% Y +s = g_old + +Xw = matrix(0, rows=nrow(X), cols=1) +debug_str = "# Iter, Obj" +iter = 0 +continue = 1 +while(continue == 1 & iter < maxiterations) { + # minimizing primal obj along direction s + step_sz = 0 + Xd = X %*% s + wd = lambda * sum(w * s) + dd = lambda * sum(s * s) + continue1 = 1 + while(continue1 == 1){ + tmp_Xw = Xw + step_sz*Xd + out = 1 - Y * (tmp_Xw) + sv = ppred(out, 0, ">") + out = out * sv + g = wd + step_sz*dd - sum(out * Y * Xd) + h = dd + sum(Xd * sv * Xd) + step_sz = step_sz - g/h + if (g*g/h < 0.0000000001){ + continue1 = 0 + } + } + + #update weights + w = w + step_sz*s + Xw = Xw + step_sz*Xd + + out = 1 - Y * Xw + sv = ppred(out, 0, ">") + out = sv * out + obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) + g_new = t(X) %*% (out * Y) - lambda * w + + print("OBJ = " + obj) + debug_str = append(debug_str, iter + "," + obj) + + tmp = sum(s * g_old) + if(step_sz*tmp < epsilon*obj){ + continue = 0 + } + + #non-linear CG step + be = sum(g_new * g_new)/sum(g_old * g_old) + s = be * s + g_new + g_old = g_new + + iter = iter + 1 +} + +write(w, $model, format=cmdLine_fmt) +write(debug_str, $Log) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/l2svm/L2SVM.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/l2svm/L2SVM.pydml b/src/test/scripts/applications/l2svm/L2SVM.pydml index d2f89e6..119ff44 100644 --- a/src/test/scripts/applications/l2svm/L2SVM.pydml +++ b/src/test/scripts/applications/l2svm/L2SVM.pydml @@ -1,119 +1,119 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# How to invoke this pydml script L2SVM.pydml? -# Assume L2SVM_HOME is set to the home of the pydml script -# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR -# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -python -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log" - -# Note about inputs: -# Assumes that labels (entries in Y) -# are set to either -1 or +1 -# or the result of recoding - -cmdLine_fmt=ifdef($fmt,"text") -cmdLine_icpt=ifdef($icpt, 0) -cmdLine_tol=ifdef($tol, 0.001) -cmdLine_reg=ifdef($reg, 1.0) -cmdLine_maxiter=ifdef($maxiter, 100) - -X = load($X) -Y = load($Y) - -check_min = min(Y) -check_max = max(Y) -num_min = sum(ppred(Y, check_min, "==")) -num_max = sum(ppred(Y, check_max, "==")) -if(num_min + num_max != nrow(Y)): - print("please check Y, it should contain only 2 labels") -else: - if(check_min != -1 | check_max != +1): - Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) - -epsilon = cmdLine_tol -lambda = cmdLine_reg -maxiterations = cmdLine_maxiter -intercept = cmdLine_icpt - -num_samples = nrow(X) -dimensions = ncol(X) - -if (intercept == 1): - ones = full(1, rows=num_samples, cols=1) - X = append(X, ones) - -num_rows_in_w = dimensions -if(intercept == 1): - num_rows_in_w = num_rows_in_w + 1 -w = full(0, rows=num_rows_in_w, cols=1) - -g_old = dot(transpose(X), Y) -s = g_old - -Xw = full(0, rows=nrow(X), cols=1) -debug_str = "# Iter, Obj" -iter = 0 -continue = 1 -while(continue == 1 & iter < maxiterations): - # minimizing primal obj along direction s - step_sz = 0 - Xd = dot(X, s) - wd = lambda * sum(w * s) - dd = lambda * sum(s * s) - continue1 = 1 - while(continue1 == 1): - tmp_Xw = Xw + step_sz*Xd - out = 1 - Y * (tmp_Xw) - sv = ppred(out, 0, ">") - out = out * sv - g = wd + step_sz*dd - sum(out * Y * Xd) - h = dd + sum(Xd * sv * Xd) - step_sz = step_sz - g/h - if (g*g/h < 0.0000000001): - continue1 = 0 - - #update weights - w = w + step_sz*s - Xw = Xw + step_sz*Xd - - out = 1 - Y * Xw - sv = ppred(out, 0, ">") - out = sv * out - obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) - g_new = dot(transpose(X), (out * Y)) - lambda * w - - print("OBJ = " + obj) - debug_str = append(debug_str, iter + "," + obj) - - tmp = sum(s * g_old) - if(step_sz*tmp < epsilon*obj): - continue = 0 - - #non-linear CG step - be = sum(g_new * g_new)/sum(g_old * g_old) - s = be * s + g_new - g_old = g_new - - iter = iter + 1 - - -save(w, $model, format=cmdLine_fmt) -save(debug_str, $Log) +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# How to invoke this pydml script L2SVM.pydml? +# Assume L2SVM_HOME is set to the home of the pydml script +# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR +# hadoop jar SystemML.jar -f $L2SVM_HOME/L2SVM.pydml -python -nvargs X="$INPUT_DIR/X" Y="$INPUT_DIR/Y" icpt=0 tol=1.0E-8 reg=1.0 maxiter=3 model="$OUTPUT_DIR/w" Log="$OUTPUT_DIR/Log" + +# Note about inputs: +# Assumes that labels (entries in Y) +# are set to either -1 or +1 +# or the result of recoding + +cmdLine_fmt=ifdef($fmt,"text") +cmdLine_icpt=ifdef($icpt, 0) +cmdLine_tol=ifdef($tol, 0.001) +cmdLine_reg=ifdef($reg, 1.0) +cmdLine_maxiter=ifdef($maxiter, 100) + +X = load($X) +Y = load($Y) + +check_min = min(Y) +check_max = max(Y) +num_min = sum(ppred(Y, check_min, "==")) +num_max = sum(ppred(Y, check_max, "==")) +if(num_min + num_max != nrow(Y)): + print("please check Y, it should contain only 2 labels") +else: + if(check_min != -1 | check_max != +1): + Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) + +epsilon = cmdLine_tol +lambda = cmdLine_reg +maxiterations = cmdLine_maxiter +intercept = cmdLine_icpt + +num_samples = nrow(X) +dimensions = ncol(X) + +if (intercept == 1): + ones = full(1, rows=num_samples, cols=1) + X = append(X, ones) + +num_rows_in_w = dimensions +if(intercept == 1): + num_rows_in_w = num_rows_in_w + 1 +w = full(0, rows=num_rows_in_w, cols=1) + +g_old = dot(transpose(X), Y) +s = g_old + +Xw = full(0, rows=nrow(X), cols=1) +debug_str = "# Iter, Obj" +iter = 0 +continue = 1 +while(continue == 1 & iter < maxiterations): + # minimizing primal obj along direction s + step_sz = 0 + Xd = dot(X, s) + wd = lambda * sum(w * s) + dd = lambda * sum(s * s) + continue1 = 1 + while(continue1 == 1): + tmp_Xw = Xw + step_sz*Xd + out = 1 - Y * (tmp_Xw) + sv = ppred(out, 0, ">") + out = out * sv + g = wd + step_sz*dd - sum(out * Y * Xd) + h = dd + sum(Xd * sv * Xd) + step_sz = step_sz - g/h + if (g*g/h < 0.0000000001): + continue1 = 0 + + #update weights + w = w + step_sz*s + Xw = Xw + step_sz*Xd + + out = 1 - Y * Xw + sv = ppred(out, 0, ">") + out = sv * out + obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) + g_new = dot(transpose(X), (out * Y)) - lambda * w + + print("OBJ = " + obj) + debug_str = append(debug_str, iter + "," + obj) + + tmp = sum(s * g_old) + if(step_sz*tmp < epsilon*obj): + continue = 0 + + #non-linear CG step + be = sum(g_new * g_new)/sum(g_old * g_old) + s = be * s + g_new + g_old = g_new + + iter = iter + 1 + + +save(w, $model, format=cmdLine_fmt) +save(debug_str, $Log) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/l2svm/L2SVMTest.Rt ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/l2svm/L2SVMTest.Rt b/src/test/scripts/applications/l2svm/L2SVMTest.Rt index cb0bce7..8bd3e90 100644 --- a/src/test/scripts/applications/l2svm/L2SVMTest.Rt +++ b/src/test/scripts/applications/l2svm/L2SVMTest.Rt @@ -1,71 +1,71 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# JUnit test class: dml.test.integration.applications.L2SVMTest.java -library("Matrix") - -X = readMM("./test/scripts/applications/l2svm/in/X.mtx") -Y = readMM("./test/scripts/applications/l2svm/in/Y.mtx") -epsilon = 0.00000001 -lambda = 1 - -N = nrow(X) -D = ncol(X) - -w = matrix(0,D,1) - -g_old = t(X) %*% Y -s = g_old - -continue = TRUE -while(continue){ - t = 0 - Xd = X %*% s - wd = lambda * sum(w * s) - dd = lambda * sum(s * s) - continue1 = TRUE - while(continue1){ - tmp_w = w + t*s - out = 1 - Y * (X %*% tmp_w) - sv = which(out > 0) - g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv]) - h = dd + sum(Xd[sv] * Xd[sv]) - t = t - g/h - continue1 = (g*g/h >= 1e-10) - } - - w = w + t*s - - out = 1 - Y * (X %*% w) - sv = which(out > 0) - obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w) - g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w - - print(paste("OBJ : ", obj)) - - continue = (t*sum(s * g_old) >= epsilon*obj) - - be = sum(g_new * g_new)/sum(g_old * g_old) - s = be * s + g_new - g_old = g_new -} - -writeMM(w, "./test/scripts/applications/l2svm/expected/w"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# JUnit test class: dml.test.integration.applications.L2SVMTest.java +library("Matrix") + +X = readMM("./test/scripts/applications/l2svm/in/X.mtx") +Y = readMM("./test/scripts/applications/l2svm/in/Y.mtx") +epsilon = 0.00000001 +lambda = 1 + +N = nrow(X) +D = ncol(X) + +w = matrix(0,D,1) + +g_old = t(X) %*% Y +s = g_old + +continue = TRUE +while(continue){ + t = 0 + Xd = X %*% s + wd = lambda * sum(w * s) + dd = lambda * sum(s * s) + continue1 = TRUE + while(continue1){ + tmp_w = w + t*s + out = 1 - Y * (X %*% tmp_w) + sv = which(out > 0) + g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv]) + h = dd + sum(Xd[sv] * Xd[sv]) + t = t - g/h + continue1 = (g*g/h >= 1e-10) + } + + w = w + t*s + + out = 1 - Y * (X %*% w) + sv = which(out > 0) + obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w) + g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w + + print(paste("OBJ : ", obj)) + + continue = (t*sum(s * g_old) >= epsilon*obj) + + be = sum(g_new * g_new)/sum(g_old * g_old) + s = be * s + g_new + g_old = g_new +} + +writeMM(w, "./test/scripts/applications/l2svm/expected/w"); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/l2svm/L2SVMTest.dmlt ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/l2svm/L2SVMTest.dmlt b/src/test/scripts/applications/l2svm/L2SVMTest.dmlt index 79f4252..5145aa3 100644 --- a/src/test/scripts/applications/l2svm/L2SVMTest.dmlt +++ b/src/test/scripts/applications/l2svm/L2SVMTest.dmlt @@ -1,80 +1,80 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -X = read("./test/scripts/applications/l2svm/in/X", rows=1000, cols=100, format="text") -Y = read("./test/scripts/applications/l2svm/in/y", rows=1000, cols=1, format="text") -epsilon = 0.00000001 -lambda = 1 - -num_samples = nrow(X) -dimensions = ncol(X) - -g_old = t(X) %*% Y -s = g_old -w = Rand(rows=dimensions, cols=1, min=0, max=0, pdf="uniform") - -iter = 0 -continue = 1 -while(continue == 1) { - # minimizing primal obj along direction s - step_sz = 0 - Xd = X %*% s - wd = lambda * sum(w * s) - dd = lambda * sum(s * s) - continue1 = 1 - while(continue1 == 1){ - tmp_w = w + step_sz*s - out = 1 - Y * (X %*% tmp_w) - sv = ppred(out, 0, ">") - out = out * sv - g = wd + step_sz*dd - sum(out * Y * Xd) - h = dd + sum(Xd * sv * Xd) - step_sz = step_sz - g/h - if (g*g/h < 0.0000000001){ - continue1 = 0 - } - } - - #update weights - w = w + step_sz*s - - out = 1 - Y * (X %*% w) - sv = ppred(out, 0, ">") - out = sv * out - obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) - g_new = t(X) %*% (out * Y) - lambda * w - - print("OBJ = " + obj) - - tmp = sum(s * g_old) - if(step_sz*tmp < epsilon*obj){ - continue = 0 - } - - #non-linear CG step - be = sum(g_new * g_new)/sum(g_old * g_old) - s = be * s + g_new - g_old = g_new - - iter = iter + 1 -} - -write(w, "./test/scripts/applications/l2svm/out/w", format="text") +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +X = read("./test/scripts/applications/l2svm/in/X", rows=1000, cols=100, format="text") +Y = read("./test/scripts/applications/l2svm/in/y", rows=1000, cols=1, format="text") +epsilon = 0.00000001 +lambda = 1 + +num_samples = nrow(X) +dimensions = ncol(X) + +g_old = t(X) %*% Y +s = g_old +w = Rand(rows=dimensions, cols=1, min=0, max=0, pdf="uniform") + +iter = 0 +continue = 1 +while(continue == 1) { + # minimizing primal obj along direction s + step_sz = 0 + Xd = X %*% s + wd = lambda * sum(w * s) + dd = lambda * sum(s * s) + continue1 = 1 + while(continue1 == 1){ + tmp_w = w + step_sz*s + out = 1 - Y * (X %*% tmp_w) + sv = ppred(out, 0, ">") + out = out * sv + g = wd + step_sz*dd - sum(out * Y * Xd) + h = dd + sum(Xd * sv * Xd) + step_sz = step_sz - g/h + if (g*g/h < 0.0000000001){ + continue1 = 0 + } + } + + #update weights + w = w + step_sz*s + + out = 1 - Y * (X %*% w) + sv = ppred(out, 0, ">") + out = sv * out + obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) + g_new = t(X) %*% (out * Y) - lambda * w + + print("OBJ = " + obj) + + tmp = sum(s * g_old) + if(step_sz*tmp < epsilon*obj){ + continue = 0 + } + + #non-linear CG step + be = sum(g_new * g_new)/sum(g_old * g_old) + s = be * s + g_new + g_old = g_new + + iter = iter + 1 +} + +write(w, "./test/scripts/applications/l2svm/out/w", format="text") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/linearLogReg/LinearLogReg.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.R b/src/test/scripts/applications/linearLogReg/LinearLogReg.R index fe22c8f..3a35d3f 100644 --- a/src/test/scripts/applications/linearLogReg/LinearLogReg.R +++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.R @@ -1,217 +1,217 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# JUnit test class: dml.test.integration.applications.LinearLogReg.java -# command line invocation assuming $LLR_HOME is set to the home of the R script -# Rscript $LLR_HOME/LinearLogReg.R $LLR_HOME/in/ $LLR_HOME/expected/ - -args <- commandArgs(TRUE) - -library("Matrix") -#library("batch") -# Usage: /home/vikas/R-2.10.1/bin/R --vanilla --args Xfile X yfile y Cval 2 tol 0.01 maxiter 100 < linearLogReg.r - -# Solves Linear Logistic Regression using Trust Region methods. -# Can be adapted for L2-SVMs and more general unconstrained optimization problems also -# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) - -options(warn=-1) - -C = 2; -tol = 0.001 -maxiter = 3 -maxinneriter = 3 - -eta0 = 0.0001 -eta1 = 0.25 -eta2 = 0.75 -sigma1 = 0.25 -sigma2 = 0.5 -sigma3 = 4.0 -psi = 0.1 - -# read (training and test) data files -- should be in matrix market format. see data.mtx -X = readMM(paste(args[1], "X.mtx", sep="")); -Xt = readMM(paste(args[1], "Xt.mtx", sep="")); - -N = nrow(X) -D = ncol(X) -Nt = nrow(Xt) - -# read (training and test) labels -y = readMM(paste(args[1], "y.mtx", sep="")); -yt = readMM(paste(args[1], "yt.mtx", sep="")); - -# initialize w -w = matrix(0,D,1) -o = X %*% w -logistic = 1.0/(1.0 + exp(-y*o)) - -# VS : change -obj = 0.5 * t(w) %*% w + C*sum(-log(logistic)) -grad = w + C*t(X) %*% ((logistic - 1)*y) -logisticD = logistic*(1-logistic) -delta = sqrt(sum(grad*grad)) - -# number of iterations -iter = 0 - -# starting point for CG -zeros_D = matrix(0,D,1) -# VS: change -zeros_N = matrix(0,N,1) - -# boolean for convergence check - -converge = (delta < tol) -norm_r2 = sum(grad*grad) -gnorm = sqrt(norm_r2) -# VS: change -norm_grad = sqrt(norm_r2) -norm_grad_initial = norm_grad - -while(!converge) { - - norm_grad = sqrt(sum(grad*grad)) - - print("next iteration..") - print(paste("Iterations : ",iter, "Objective : ", obj[1,1], "Gradient Norm : ", norm_grad)) - - # SOLVE TRUST REGION SUB-PROBLEM - s = zeros_D - os = zeros_N - r = -grad - d = r - innerconverge = (sqrt(sum(r*r)) <= psi*norm_grad) - inneriter = 0; - while(!innerconverge) { - inneriter = inneriter + 1 - norm_r2 = sum(r*r) - od = X %*% d - Hd = d + C*(t(X) %*% (logisticD*od)) - alpha_deno = t(d) %*% Hd - alpha = norm_r2/alpha_deno - - s = s + alpha[1,1]*d - os = os + alpha[1,1]*od - - sts = t(s) %*% s - delta2 = delta*delta - - if (sts[1,1] > delta2) { - # VS: change - print("cg reaches trust region boundary") - # VS: change - s = s - alpha[1,1]*d - os = os - alpha[1,1]*od - std = t(s) %*% d - dtd = t(d) %*% d - # VS:change - sts = t(s) %*% s - rad = sqrt(std*std + dtd*(delta2 - sts)) - if(std[1,1]>=0) { - tau = (delta2 - sts)/(std + rad) - } - else { - tau = (rad - std)/dtd - } - s = s + tau[1,1]*d - os = os + tau[1,1]*od - r = r - tau[1,1]*Hd - break - } - r = r - alpha[1,1]*Hd - old_norm_r2 = norm_r2 - norm_r2 = sum(r*r) - beta = norm_r2/old_norm_r2 - d = r + beta*d - innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) # innerconverge = (sqrt(norm_r2) <= psi*norm_grad) - } - - print(paste("Inner CG Iteration = ", inneriter)) - # END TRUST REGION SUB-PROBLEM - # compute rho, update w, obtain delta - gs = t(s) %*% grad - qk = -0.5*(gs - (t(s) %*% r)) - - wnew = w + s - # VS Change X %*% wnew removed - onew = o + os - # VS: change - logisticnew = 1.0/(1.0 + exp(-y*onew)) - objnew = 0.5 * t(wnew) %*% wnew + C*sum(-log(logisticnew)) - - # VS: change - actred = (obj - objnew) - rho = actred/qk - - print(paste("Actual :", actred[1,1], "Predicted :", qk[1,1])) - - rho = rho[1,1] - snorm = sqrt(sum(s*s)) - - if(iter==0) { - delta = min(delta, snorm) - } - if (objnew[1,1] - obj[1,1] - gs[1,1] <= 0) { - alpha = sigma3; - } - else { - alpha = max(sigma1, -0.5*gs[1,1]/(objnew[1,1] - obj[1,1] - gs[1,1])) - } - - - - if (rho > eta0) { - - w = wnew - o = onew - grad = w + C*t(X) %*% ((logisticnew - 1)*y) - # VS: change - norm_grad = sqrt(sum(grad*grad)) - logisticD = logisticnew*(1-logisticnew) - obj = objnew - - } - - if (rho < eta0) - {delta = min(max(alpha, sigma1)*snorm, sigma2*delta)} - else if (rho < eta1) - {delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta))} - else if (rho < eta2) - {delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta))} - else - {delta = max(delta, min(alpha*snorm, sigma3*delta))} - - ot = Xt %*% w - correct = sum((yt*ot)>0) - iter = iter + 1 - converge = (norm_grad < tol*norm_grad_initial) | (iter>maxiter) - - print(paste("Delta :", delta)) - print(paste("Accuracy=", correct*100/Nt)) - print(paste("OuterIter=", iter)) - print(paste("Converge=", converge)) -} - -writeMM(as(w,"CsparseMatrix"), paste(args[2],"w", sep=""), format = "text") - - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# JUnit test class: dml.test.integration.applications.LinearLogReg.java +# command line invocation assuming $LLR_HOME is set to the home of the R script +# Rscript $LLR_HOME/LinearLogReg.R $LLR_HOME/in/ $LLR_HOME/expected/ + +args <- commandArgs(TRUE) + +library("Matrix") +#library("batch") +# Usage: /home/vikas/R-2.10.1/bin/R --vanilla --args Xfile X yfile y Cval 2 tol 0.01 maxiter 100 < linearLogReg.r + +# Solves Linear Logistic Regression using Trust Region methods. +# Can be adapted for L2-SVMs and more general unconstrained optimization problems also +# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) + +options(warn=-1) + +C = 2; +tol = 0.001 +maxiter = 3 +maxinneriter = 3 + +eta0 = 0.0001 +eta1 = 0.25 +eta2 = 0.75 +sigma1 = 0.25 +sigma2 = 0.5 +sigma3 = 4.0 +psi = 0.1 + +# read (training and test) data files -- should be in matrix market format. see data.mtx +X = readMM(paste(args[1], "X.mtx", sep="")); +Xt = readMM(paste(args[1], "Xt.mtx", sep="")); + +N = nrow(X) +D = ncol(X) +Nt = nrow(Xt) + +# read (training and test) labels +y = readMM(paste(args[1], "y.mtx", sep="")); +yt = readMM(paste(args[1], "yt.mtx", sep="")); + +# initialize w +w = matrix(0,D,1) +o = X %*% w +logistic = 1.0/(1.0 + exp(-y*o)) + +# VS : change +obj = 0.5 * t(w) %*% w + C*sum(-log(logistic)) +grad = w + C*t(X) %*% ((logistic - 1)*y) +logisticD = logistic*(1-logistic) +delta = sqrt(sum(grad*grad)) + +# number of iterations +iter = 0 + +# starting point for CG +zeros_D = matrix(0,D,1) +# VS: change +zeros_N = matrix(0,N,1) + +# boolean for convergence check + +converge = (delta < tol) +norm_r2 = sum(grad*grad) +gnorm = sqrt(norm_r2) +# VS: change +norm_grad = sqrt(norm_r2) +norm_grad_initial = norm_grad + +while(!converge) { + + norm_grad = sqrt(sum(grad*grad)) + + print("next iteration..") + print(paste("Iterations : ",iter, "Objective : ", obj[1,1], "Gradient Norm : ", norm_grad)) + + # SOLVE TRUST REGION SUB-PROBLEM + s = zeros_D + os = zeros_N + r = -grad + d = r + innerconverge = (sqrt(sum(r*r)) <= psi*norm_grad) + inneriter = 0; + while(!innerconverge) { + inneriter = inneriter + 1 + norm_r2 = sum(r*r) + od = X %*% d + Hd = d + C*(t(X) %*% (logisticD*od)) + alpha_deno = t(d) %*% Hd + alpha = norm_r2/alpha_deno + + s = s + alpha[1,1]*d + os = os + alpha[1,1]*od + + sts = t(s) %*% s + delta2 = delta*delta + + if (sts[1,1] > delta2) { + # VS: change + print("cg reaches trust region boundary") + # VS: change + s = s - alpha[1,1]*d + os = os - alpha[1,1]*od + std = t(s) %*% d + dtd = t(d) %*% d + # VS:change + sts = t(s) %*% s + rad = sqrt(std*std + dtd*(delta2 - sts)) + if(std[1,1]>=0) { + tau = (delta2 - sts)/(std + rad) + } + else { + tau = (rad - std)/dtd + } + s = s + tau[1,1]*d + os = os + tau[1,1]*od + r = r - tau[1,1]*Hd + break + } + r = r - alpha[1,1]*Hd + old_norm_r2 = norm_r2 + norm_r2 = sum(r*r) + beta = norm_r2/old_norm_r2 + d = r + beta*d + innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) # innerconverge = (sqrt(norm_r2) <= psi*norm_grad) + } + + print(paste("Inner CG Iteration = ", inneriter)) + # END TRUST REGION SUB-PROBLEM + # compute rho, update w, obtain delta + gs = t(s) %*% grad + qk = -0.5*(gs - (t(s) %*% r)) + + wnew = w + s + # VS Change X %*% wnew removed + onew = o + os + # VS: change + logisticnew = 1.0/(1.0 + exp(-y*onew)) + objnew = 0.5 * t(wnew) %*% wnew + C*sum(-log(logisticnew)) + + # VS: change + actred = (obj - objnew) + rho = actred/qk + + print(paste("Actual :", actred[1,1], "Predicted :", qk[1,1])) + + rho = rho[1,1] + snorm = sqrt(sum(s*s)) + + if(iter==0) { + delta = min(delta, snorm) + } + if (objnew[1,1] - obj[1,1] - gs[1,1] <= 0) { + alpha = sigma3; + } + else { + alpha = max(sigma1, -0.5*gs[1,1]/(objnew[1,1] - obj[1,1] - gs[1,1])) + } + + + + if (rho > eta0) { + + w = wnew + o = onew + grad = w + C*t(X) %*% ((logisticnew - 1)*y) + # VS: change + norm_grad = sqrt(sum(grad*grad)) + logisticD = logisticnew*(1-logisticnew) + obj = objnew + + } + + if (rho < eta0) + {delta = min(max(alpha, sigma1)*snorm, sigma2*delta)} + else if (rho < eta1) + {delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta))} + else if (rho < eta2) + {delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta))} + else + {delta = max(delta, min(alpha*snorm, sigma3*delta))} + + ot = Xt %*% w + correct = sum((yt*ot)>0) + iter = iter + 1 + converge = (norm_grad < tol*norm_grad_initial) | (iter>maxiter) + + print(paste("Delta :", delta)) + print(paste("Accuracy=", correct*100/Nt)) + print(paste("OuterIter=", iter)) + print(paste("Converge=", converge)) +} + +writeMM(as(w,"CsparseMatrix"), paste(args[2],"w", sep=""), format = "text") + + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/linearLogReg/LinearLogReg.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.dml b/src/test/scripts/applications/linearLogReg/LinearLogReg.dml index fb0bf00..cf2f7ad 100644 --- a/src/test/scripts/applications/linearLogReg/LinearLogReg.dml +++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.dml @@ -1,231 +1,231 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# Solves Linear Logistic Regression using Trust Region methods. -# Can be adapted for L2-SVMs and more general unconstrained optimization problems also -# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) - -# Note this script is externalized to customers, please do not change w/o consulting component owner. -# How to invoke this dml script LinearLogReg.dml? -# Assume LLR_HOME is set to the home of the dml script -# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR -# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt -# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w" - -C = 2 -tol = 0.001 -maxiter = 3 -maxinneriter = 3 - -eta0 = 0.0001 -eta1 = 0.25 -eta2 = 0.75 -sigma1 = 0.25 -sigma2 = 0.5 -sigma3 = 4.0 -psi = 0.1 - -# read (training and test) data files -X = read($1) -Xt = read($2) -N = nrow(X) -D = ncol(X) -Nt = nrow(Xt) - -# read (training and test) labels -y = read($3) -yt = read($4) - -#initialize w -w = Rand(rows=D, cols=1, min=0.0, max=0.0); -e = Rand(rows=1, cols=1, min=1.0, max=1.0); -o = X %*% w -logistic = 1.0/(1.0 + exp( -y * o)) - -obj = 0.5 * t(w) %*% w + C*sum(-log(logistic)) -grad = w + C*t(X) %*% ((logistic - 1)*y) -logisticD = logistic*(1-logistic) -delta = sqrt(sum(grad*grad)) - -# number of iterations -iter = 0 - -# starting point for CG -zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0); -# VS: change -zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0); - -# boolean for convergence check - -converge = (delta < tol) | (iter > maxiter) -norm_r2 = sum(grad*grad) - -# VS: change -norm_grad = sqrt(norm_r2) -norm_grad_initial = norm_grad - -alpha = t(w) %*% w -alpha2 = alpha - -while(!converge) { - - norm_grad = sqrt(sum(grad*grad)) - - print("-- Outer Iteration = " + iter) - objScalar = castAsScalar(obj) - print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad) - - # SOLVE TRUST REGION SUB-PROBLEM - s = zeros_D - os = zeros_N - r = -grad - d = r - inneriter = 0 - innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) - while (!innerconverge) { - inneriter = inneriter + 1 - norm_r2 = sum(r*r) - od = X %*% d - Hd = d + C*(t(X) %*% (logisticD*od)) - alpha_deno = t(d) %*% Hd - alpha = norm_r2 / alpha_deno - - s = s + castAsScalar(alpha) * d - os = os + castAsScalar(alpha) * od - - sts = t(s) %*% s - delta2 = delta*delta - stsScalar = castAsScalar(sts) - - shouldBreak = FALSE; # to mimic "break" in the following 'if' condition - if (stsScalar > delta2) { - print(" --- cg reaches trust region boundary") - s = s - castAsScalar(alpha) * d - os = os - castAsScalar(alpha) * od - std = t(s) %*% d - dtd = t(d) %*% d - sts = t(s) %*% s - rad = sqrt(std*std + dtd*(delta2 - sts)) - stdScalar = castAsScalar(std) - if(stdScalar >= 0) { - tau = (delta2 - sts)/(std + rad) - } - else { - tau = (rad - std)/dtd - } - - s = s + castAsScalar(tau) * d - os = os + castAsScalar(tau) * od - r = r - castAsScalar(tau) * Hd - - #break - shouldBreak = TRUE; - innerconverge = TRUE; - - } - - if (!shouldBreak) { - r = r - castAsScalar(alpha) * Hd - old_norm_r2 = norm_r2 - norm_r2 = sum(r*r) - beta = norm_r2/old_norm_r2 - d = r + beta*d - innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) - } - } - - print(" --- Inner CG Iteration = " + inneriter) - # END TRUST REGION SUB-PROBLEM - # compute rho, update w, obtain delta - gs = t(s) %*% grad - qk = -0.5*(gs - (t(s) %*% r)) - - wnew = w + s - onew = o + os - logisticnew = 1.0/(1.0 + exp(-y * onew )) - objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew)) - - actred = (obj - objnew) - actredScalar = castAsScalar(actred) - rho = actred / qk - qkScalar = castAsScalar(qk) - rhoScalar = castAsScalar(rho); - snorm = sqrt(sum( s * s )) - print(" Actual = " + actredScalar) - print(" Predicted = " + qkScalar) - - if (iter==0) { - delta = min(delta, snorm) - } - alpha2 = objnew - obj - gs - alpha2Scalar = castAsScalar(alpha2) - if (alpha2Scalar <= 0) { - alpha = sigma3*e - } - else { - ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar) - alpha = ascalar*e - } - - if (rhoScalar > eta0) { - - w = wnew - o = onew - grad = w + C*t(X) %*% ((logisticnew - 1) * y ) - norm_grad = sqrt(sum(grad*grad)) - logisticD = logisticnew * (1 - logisticnew) - obj = objnew - } - - alphaScalar = castAsScalar(alpha) - if (rhoScalar < eta0){ - delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta ) - } - else { - if (rhoScalar < eta1){ - delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta)) - } - else { - if (rhoScalar < eta2) { - delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta)) - } - else { - delta = max(delta, min( alphaScalar * snorm, sigma3 * delta)) - } - } - } - - - ot = Xt %*% w - ot2 = yt * ot - correct = sum(ppred(ot2, 0, ">")) - accuracy = correct*100.0/Nt - iter = iter + 1 - converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) - - print(" Delta = " + delta) - print(" Accuracy = " + accuracy) - print(" Correct = " + correct) - print(" OuterIter = " + iter) - print(" Converge = " + converge) -} - -write(w, $5, format="text"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# Solves Linear Logistic Regression using Trust Region methods. +# Can be adapted for L2-SVMs and more general unconstrained optimization problems also +# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) + +# Note this script is externalized to customers, please do not change w/o consulting component owner. +# How to invoke this dml script LinearLogReg.dml? +# Assume LLR_HOME is set to the home of the dml script +# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR +# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt +# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.dml -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w" + +C = 2 +tol = 0.001 +maxiter = 3 +maxinneriter = 3 + +eta0 = 0.0001 +eta1 = 0.25 +eta2 = 0.75 +sigma1 = 0.25 +sigma2 = 0.5 +sigma3 = 4.0 +psi = 0.1 + +# read (training and test) data files +X = read($1) +Xt = read($2) +N = nrow(X) +D = ncol(X) +Nt = nrow(Xt) + +# read (training and test) labels +y = read($3) +yt = read($4) + +#initialize w +w = Rand(rows=D, cols=1, min=0.0, max=0.0); +e = Rand(rows=1, cols=1, min=1.0, max=1.0); +o = X %*% w +logistic = 1.0/(1.0 + exp( -y * o)) + +obj = 0.5 * t(w) %*% w + C*sum(-log(logistic)) +grad = w + C*t(X) %*% ((logistic - 1)*y) +logisticD = logistic*(1-logistic) +delta = sqrt(sum(grad*grad)) + +# number of iterations +iter = 0 + +# starting point for CG +zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0); +# VS: change +zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0); + +# boolean for convergence check + +converge = (delta < tol) | (iter > maxiter) +norm_r2 = sum(grad*grad) + +# VS: change +norm_grad = sqrt(norm_r2) +norm_grad_initial = norm_grad + +alpha = t(w) %*% w +alpha2 = alpha + +while(!converge) { + + norm_grad = sqrt(sum(grad*grad)) + + print("-- Outer Iteration = " + iter) + objScalar = castAsScalar(obj) + print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad) + + # SOLVE TRUST REGION SUB-PROBLEM + s = zeros_D + os = zeros_N + r = -grad + d = r + inneriter = 0 + innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) + while (!innerconverge) { + inneriter = inneriter + 1 + norm_r2 = sum(r*r) + od = X %*% d + Hd = d + C*(t(X) %*% (logisticD*od)) + alpha_deno = t(d) %*% Hd + alpha = norm_r2 / alpha_deno + + s = s + castAsScalar(alpha) * d + os = os + castAsScalar(alpha) * od + + sts = t(s) %*% s + delta2 = delta*delta + stsScalar = castAsScalar(sts) + + shouldBreak = FALSE; # to mimic "break" in the following 'if' condition + if (stsScalar > delta2) { + print(" --- cg reaches trust region boundary") + s = s - castAsScalar(alpha) * d + os = os - castAsScalar(alpha) * od + std = t(s) %*% d + dtd = t(d) %*% d + sts = t(s) %*% s + rad = sqrt(std*std + dtd*(delta2 - sts)) + stdScalar = castAsScalar(std) + if(stdScalar >= 0) { + tau = (delta2 - sts)/(std + rad) + } + else { + tau = (rad - std)/dtd + } + + s = s + castAsScalar(tau) * d + os = os + castAsScalar(tau) * od + r = r - castAsScalar(tau) * Hd + + #break + shouldBreak = TRUE; + innerconverge = TRUE; + + } + + if (!shouldBreak) { + r = r - castAsScalar(alpha) * Hd + old_norm_r2 = norm_r2 + norm_r2 = sum(r*r) + beta = norm_r2/old_norm_r2 + d = r + beta*d + innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) + } + } + + print(" --- Inner CG Iteration = " + inneriter) + # END TRUST REGION SUB-PROBLEM + # compute rho, update w, obtain delta + gs = t(s) %*% grad + qk = -0.5*(gs - (t(s) %*% r)) + + wnew = w + s + onew = o + os + logisticnew = 1.0/(1.0 + exp(-y * onew )) + objnew = 0.5 * t(wnew) %*% wnew + C * sum(-log(logisticnew)) + + actred = (obj - objnew) + actredScalar = castAsScalar(actred) + rho = actred / qk + qkScalar = castAsScalar(qk) + rhoScalar = castAsScalar(rho); + snorm = sqrt(sum( s * s )) + print(" Actual = " + actredScalar) + print(" Predicted = " + qkScalar) + + if (iter==0) { + delta = min(delta, snorm) + } + alpha2 = objnew - obj - gs + alpha2Scalar = castAsScalar(alpha2) + if (alpha2Scalar <= 0) { + alpha = sigma3*e + } + else { + ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar) + alpha = ascalar*e + } + + if (rhoScalar > eta0) { + + w = wnew + o = onew + grad = w + C*t(X) %*% ((logisticnew - 1) * y ) + norm_grad = sqrt(sum(grad*grad)) + logisticD = logisticnew * (1 - logisticnew) + obj = objnew + } + + alphaScalar = castAsScalar(alpha) + if (rhoScalar < eta0){ + delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta ) + } + else { + if (rhoScalar < eta1){ + delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta)) + } + else { + if (rhoScalar < eta2) { + delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta)) + } + else { + delta = max(delta, min( alphaScalar * snorm, sigma3 * delta)) + } + } + } + + + ot = Xt %*% w + ot2 = yt * ot + correct = sum(ppred(ot2, 0, ">")) + accuracy = correct*100.0/Nt + iter = iter + 1 + converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) + + print(" Delta = " + delta) + print(" Accuracy = " + accuracy) + print(" Correct = " + correct) + print(" OuterIter = " + iter) + print(" Converge = " + converge) +} + +write(w, $5, format="text"); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml b/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml index a8dc934..1a9e769 100644 --- a/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml +++ b/src/test/scripts/applications/linearLogReg/LinearLogReg.pydml @@ -1,214 +1,214 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# Solves Linear Logistic Regression using Trust Region methods. -# Can be adapted for L2-SVMs and more general unconstrained optimization problems also -# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) - -# Note this script is externalized to customers, please do not change w/o consulting component owner. -# How to invoke this pydml script LinearLogReg.pydml? -# Assume LLR_HOME is set to the home of the pydml script -# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR -# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt -# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.pydml -python -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w" - -C = 2 -tol = 0.001 -maxiter = 3 -maxinneriter = 3 - -eta0 = 0.0001 -eta1 = 0.25 -eta2 = 0.75 -sigma1 = 0.25 -sigma2 = 0.5 -sigma3 = 4.0 -psi = 0.1 - -# load (training and test) data files -X = load($1) -Xt = load($2) -N = nrow(X) -D = ncol(X) -Nt = nrow(Xt) - -# load (training and test) labels -y = load($3) -yt = load($4) - -#initialize w -w = Rand(rows=D, cols=1, min=0.0, max=0.0) -e = Rand(rows=1, cols=1, min=1.0, max=1.0) -o = dot(X, w) -logistic = 1.0/(1.0 + exp( -y * o)) - -# CHECK ORDER OF OPERATIONS HERE -obj = dot((0.5 * transpose(w)), w) + C*sum(-log(logistic)) -grad = w + dot(C*transpose(X), ((logistic - 1)*y)) -logisticD = logistic*(1-logistic) -delta = sqrt(sum(grad*grad)) - -# number of iterations -iter = 0 - -# starting point for CG -zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0) -# VS: change -zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0) - -# boolean for convergence check - -converge = (delta < tol) | (iter > maxiter) -norm_r2 = sum(grad*grad) - -# VS: change -norm_grad = sqrt(norm_r2) -norm_grad_initial = norm_grad - -alpha = dot(transpose(w), w) -alpha2 = alpha - -while(!converge): - - norm_grad = sqrt(sum(grad*grad)) - - print("-- Outer Iteration = " + iter) - objScalar = castAsScalar(obj) - print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad) - - # SOLVE TRUST REGION SUB-PROBLEM - s = zeros_D - os = zeros_N - r = -grad - d = r - inneriter = 0 - innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) - while (!innerconverge): - inneriter = inneriter + 1 - norm_r2 = sum(r*r) - od = dot(X, d) - Hd = d + C*(dot(transpose(X), (logisticD*od))) - alpha_deno = dot(transpose(d), Hd) - alpha = norm_r2 / alpha_deno - - s = s + castAsScalar(alpha) * d - os = os + castAsScalar(alpha) * od - - sts = dot(transpose(s), s) - delta2 = delta*delta - stsScalar = castAsScalar(sts) - - shouldBreak = False # to mimic "break" in the following 'if' condition - if (stsScalar > delta2): - print(" --- cg reaches trust region boundary") - s = s - castAsScalar(alpha) * d - os = os - castAsScalar(alpha) * od - std = dot(transpose(s), d) - dtd = dot(transpose(d), d) - sts = dot(transpose(s), s) - rad = sqrt(std*std + dtd*(delta2 - sts)) - stdScalar = castAsScalar(std) - if(stdScalar >= 0): - tau = (delta2 - sts)/(std + rad) - else: - tau = (rad - std)/dtd - - s = s + castAsScalar(tau) * d - os = os + castAsScalar(tau) * od - r = r - castAsScalar(tau) * Hd - - #break - shouldBreak = True - innerconverge = True - if (!shouldBreak): - r = r - castAsScalar(alpha) * Hd - old_norm_r2 = norm_r2 - norm_r2 = sum(r*r) - beta = norm_r2/old_norm_r2 - d = r + beta*d - innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) - # end while (!innerconverge) - - print(" --- Inner CG Iteration = " + inneriter) - # END TRUST REGION SUB-PROBLEM - # compute rho, update w, obtain delta - gs = dot(transpose(s), grad) - qk = -0.5*(gs - (dot(transpose(s), r))) - - wnew = w + s - onew = o + os - logisticnew = 1.0/(1.0 + exp(-y * onew )) - objnew = dot((0.5 * transpose(wnew)), wnew) + C * sum(-log(logisticnew)) - - actred = (obj - objnew) - actredScalar = castAsScalar(actred) - rho = actred / qk - qkScalar = castAsScalar(qk) - rhoScalar = castAsScalar(rho) - snorm = sqrt(sum( s * s )) - print(" Actual = " + actredScalar) - print(" Predicted = " + qkScalar) - - if (iter==0): - delta = min(delta, snorm) - alpha2 = objnew - obj - gs - alpha2Scalar = castAsScalar(alpha2) - if (alpha2Scalar <= 0): - alpha = sigma3*e - else: - ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar) - alpha = ascalar*e - - if (rhoScalar > eta0): - w = wnew - o = onew - grad = w + dot(C*transpose(X), ((logisticnew - 1) * y )) - norm_grad = sqrt(sum(grad*grad)) - logisticD = logisticnew * (1 - logisticnew) - obj = objnew - - alphaScalar = castAsScalar(alpha) - if (rhoScalar < eta0): - delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta ) - else: - if (rhoScalar < eta1): - delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta)) - else: - if (rhoScalar < eta2): - delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta)) - else: - delta = max(delta, min( alphaScalar * snorm, sigma3 * delta)) - - ot = dot(Xt, w) - ot2 = yt * ot - correct = sum(ppred(ot2, 0, ">")) - accuracy = correct*100.0/Nt - iter = iter + 1 - converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) - - print(" Delta = " + delta) - print(" Accuracy = " + accuracy) - print(" Correct = " + correct) - print(" OuterIter = " + iter) - print(" Converge = " + converge) -# end while(!converge) - -save(w, $5, format="text") +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# Solves Linear Logistic Regression using Trust Region methods. +# Can be adapted for L2-SVMs and more general unconstrained optimization problems also +# setup optimization parameters (See: Trust Region Newton Method for Logistic Regression, Lin, Weng and Keerthi, JMLR 9 (2008) 627-650) + +# Note this script is externalized to customers, please do not change w/o consulting component owner. +# How to invoke this pydml script LinearLogReg.pydml? +# Assume LLR_HOME is set to the home of the pydml script +# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR +# Assume rows = 100 and cols = 50 for x, rows_test= 25 and cols_test = 50 for Xt +# hadoop jar SystemML.jar -f $LLR_HOME/LinearLogReg.pydml -python -args "$INPUT_DIR/X" "$INPUT_DIR/Xt" "$INPUT_DIR/y" "$INPUT_DIR/yt" "$OUTPUT_DIR/w" + +C = 2 +tol = 0.001 +maxiter = 3 +maxinneriter = 3 + +eta0 = 0.0001 +eta1 = 0.25 +eta2 = 0.75 +sigma1 = 0.25 +sigma2 = 0.5 +sigma3 = 4.0 +psi = 0.1 + +# load (training and test) data files +X = load($1) +Xt = load($2) +N = nrow(X) +D = ncol(X) +Nt = nrow(Xt) + +# load (training and test) labels +y = load($3) +yt = load($4) + +#initialize w +w = Rand(rows=D, cols=1, min=0.0, max=0.0) +e = Rand(rows=1, cols=1, min=1.0, max=1.0) +o = dot(X, w) +logistic = 1.0/(1.0 + exp( -y * o)) + +# CHECK ORDER OF OPERATIONS HERE +obj = dot((0.5 * transpose(w)), w) + C*sum(-log(logistic)) +grad = w + dot(C*transpose(X), ((logistic - 1)*y)) +logisticD = logistic*(1-logistic) +delta = sqrt(sum(grad*grad)) + +# number of iterations +iter = 0 + +# starting point for CG +zeros_D = Rand(rows = D, cols = 1, min = 0.0, max = 0.0) +# VS: change +zeros_N = Rand(rows = N, cols = 1, min = 0.0, max = 0.0) + +# boolean for convergence check + +converge = (delta < tol) | (iter > maxiter) +norm_r2 = sum(grad*grad) + +# VS: change +norm_grad = sqrt(norm_r2) +norm_grad_initial = norm_grad + +alpha = dot(transpose(w), w) +alpha2 = alpha + +while(!converge): + + norm_grad = sqrt(sum(grad*grad)) + + print("-- Outer Iteration = " + iter) + objScalar = castAsScalar(obj) + print(" Iterations = " + iter + ", Objective = " + objScalar + ", Gradient Norm = " + norm_grad) + + # SOLVE TRUST REGION SUB-PROBLEM + s = zeros_D + os = zeros_N + r = -grad + d = r + inneriter = 0 + innerconverge = ( sqrt(sum(r*r)) <= psi * norm_grad) + while (!innerconverge): + inneriter = inneriter + 1 + norm_r2 = sum(r*r) + od = dot(X, d) + Hd = d + C*(dot(transpose(X), (logisticD*od))) + alpha_deno = dot(transpose(d), Hd) + alpha = norm_r2 / alpha_deno + + s = s + castAsScalar(alpha) * d + os = os + castAsScalar(alpha) * od + + sts = dot(transpose(s), s) + delta2 = delta*delta + stsScalar = castAsScalar(sts) + + shouldBreak = False # to mimic "break" in the following 'if' condition + if (stsScalar > delta2): + print(" --- cg reaches trust region boundary") + s = s - castAsScalar(alpha) * d + os = os - castAsScalar(alpha) * od + std = dot(transpose(s), d) + dtd = dot(transpose(d), d) + sts = dot(transpose(s), s) + rad = sqrt(std*std + dtd*(delta2 - sts)) + stdScalar = castAsScalar(std) + if(stdScalar >= 0): + tau = (delta2 - sts)/(std + rad) + else: + tau = (rad - std)/dtd + + s = s + castAsScalar(tau) * d + os = os + castAsScalar(tau) * od + r = r - castAsScalar(tau) * Hd + + #break + shouldBreak = True + innerconverge = True + if (!shouldBreak): + r = r - castAsScalar(alpha) * Hd + old_norm_r2 = norm_r2 + norm_r2 = sum(r*r) + beta = norm_r2/old_norm_r2 + d = r + beta*d + innerconverge = (sqrt(norm_r2) <= psi * norm_grad) | (inneriter > maxinneriter) + # end while (!innerconverge) + + print(" --- Inner CG Iteration = " + inneriter) + # END TRUST REGION SUB-PROBLEM + # compute rho, update w, obtain delta + gs = dot(transpose(s), grad) + qk = -0.5*(gs - (dot(transpose(s), r))) + + wnew = w + s + onew = o + os + logisticnew = 1.0/(1.0 + exp(-y * onew )) + objnew = dot((0.5 * transpose(wnew)), wnew) + C * sum(-log(logisticnew)) + + actred = (obj - objnew) + actredScalar = castAsScalar(actred) + rho = actred / qk + qkScalar = castAsScalar(qk) + rhoScalar = castAsScalar(rho) + snorm = sqrt(sum( s * s )) + print(" Actual = " + actredScalar) + print(" Predicted = " + qkScalar) + + if (iter==0): + delta = min(delta, snorm) + alpha2 = objnew - obj - gs + alpha2Scalar = castAsScalar(alpha2) + if (alpha2Scalar <= 0): + alpha = sigma3*e + else: + ascalar = max(sigma1, -0.5*castAsScalar(gs)/alpha2Scalar) + alpha = ascalar*e + + if (rhoScalar > eta0): + w = wnew + o = onew + grad = w + dot(C*transpose(X), ((logisticnew - 1) * y )) + norm_grad = sqrt(sum(grad*grad)) + logisticD = logisticnew * (1 - logisticnew) + obj = objnew + + alphaScalar = castAsScalar(alpha) + if (rhoScalar < eta0): + delta = min(max( alphaScalar , sigma1) * snorm, sigma2 * delta ) + else: + if (rhoScalar < eta1): + delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma2 * delta)) + else: + if (rhoScalar < eta2): + delta = max(sigma1 * delta, min( alphaScalar * snorm, sigma3 * delta)) + else: + delta = max(delta, min( alphaScalar * snorm, sigma3 * delta)) + + ot = dot(Xt, w) + ot2 = yt * ot + correct = sum(ppred(ot2, 0, ">")) + accuracy = correct*100.0/Nt + iter = iter + 1 + converge = (norm_grad < (tol * norm_grad_initial)) | (iter > maxiter) + + print(" Delta = " + delta) + print(" Accuracy = " + accuracy) + print(" Correct = " + correct) + print(" OuterIter = " + iter) + print(" Converge = " + converge) +# end while(!converge) + +save(w, $5, format="text") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/m-svm/m-svm.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/m-svm/m-svm.R b/src/test/scripts/applications/m-svm/m-svm.R index 01f5d6a..e4eb562 100644 --- a/src/test/scripts/applications/m-svm/m-svm.R +++ b/src/test/scripts/applications/m-svm/m-svm.R @@ -1,120 +1,120 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -args <- commandArgs(TRUE) - -library("Matrix") - -X = as.matrix(readMM(paste(args[1], "X.mtx", sep=""))) - -check_X = sum(X) -if(check_X == 0){ - print("X has no non-zeros") -}else{ - Y = as.matrix(readMM(paste(args[1], "Y.mtx", sep=""))) - intercept = as.integer(args[6]) - num_classes = as.integer(args[2]) - epsilon = as.double(args[3]) - lambda = as.double(args[4]) - max_iterations = as.integer(args[5]) - - num_samples = nrow(X) - num_features = ncol(X) - - if (intercept == 1) { - ones = matrix(1, num_samples, 1); - X = cbind(X, ones); - } - - num_rows_in_w = num_features - if(intercept == 1){ - num_rows_in_w = num_rows_in_w + 1 - } - w = matrix(0, num_rows_in_w, num_classes) - - debug_mat = matrix(-1, max_iterations, num_classes) - for(iter_class in 1:num_classes){ - Y_local = 2 * (Y == iter_class) - 1 - w_class = matrix(0, num_features, 1) - if (intercept == 1) { - zero_matrix = matrix(0, 1, 1); - w_class = t(cbind(t(w_class), zero_matrix)); - } - - g_old = t(X) %*% Y_local - s = g_old - - Xw = matrix(0, nrow(X), 1) - iter = 0 - continue = 1 - while(continue == 1) { - # minimizing primal obj along direction s - step_sz = 0 - Xd = X %*% s - wd = lambda * sum(w_class * s) - dd = lambda * sum(s * s) - continue1 = 1 - while(continue1 == 1){ - tmp_Xw = Xw + step_sz*Xd - out = 1 - Y_local * (tmp_Xw) - sv = (out > 0) - out = out * sv - g = wd + step_sz*dd - sum(out * Y_local * Xd) - h = dd + sum(Xd * sv * Xd) - step_sz = step_sz - g/h - if (g*g/h < 0.0000000001){ - continue1 = 0 - } - } - - #update weights - w_class = w_class + step_sz*s - Xw = Xw + step_sz*Xd - - out = 1 - Y_local * Xw - sv = (out > 0) - out = sv * out - obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) - g_new = t(X) %*% (out * Y_local) - lambda * w_class - - tmp = sum(s * g_old) - - train_acc = sum(Y_local*(X%*%w_class) >= 0)/num_samples*100 - print(paste("For class ", iter_class, " iteration ", iter, " training accuracy: ", train_acc, sep="")) - debug_mat[iter+1,iter_class] = obj - - if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ - continue = 0 - } - - #non-linear CG step - be = sum(g_new * g_new)/sum(g_old * g_old) - s = be * s + g_new - g_old = g_new - - iter = iter + 1 - } - - w[,iter_class] = w_class - } - - writeMM(as(w, "CsparseMatrix"), paste(args[7], "w", sep="")) -} +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) + +library("Matrix") + +X = as.matrix(readMM(paste(args[1], "X.mtx", sep=""))) + +check_X = sum(X) +if(check_X == 0){ + print("X has no non-zeros") +}else{ + Y = as.matrix(readMM(paste(args[1], "Y.mtx", sep=""))) + intercept = as.integer(args[6]) + num_classes = as.integer(args[2]) + epsilon = as.double(args[3]) + lambda = as.double(args[4]) + max_iterations = as.integer(args[5]) + + num_samples = nrow(X) + num_features = ncol(X) + + if (intercept == 1) { + ones = matrix(1, num_samples, 1); + X = cbind(X, ones); + } + + num_rows_in_w = num_features + if(intercept == 1){ + num_rows_in_w = num_rows_in_w + 1 + } + w = matrix(0, num_rows_in_w, num_classes) + + debug_mat = matrix(-1, max_iterations, num_classes) + for(iter_class in 1:num_classes){ + Y_local = 2 * (Y == iter_class) - 1 + w_class = matrix(0, num_features, 1) + if (intercept == 1) { + zero_matrix = matrix(0, 1, 1); + w_class = t(cbind(t(w_class), zero_matrix)); + } + + g_old = t(X) %*% Y_local + s = g_old + + Xw = matrix(0, nrow(X), 1) + iter = 0 + continue = 1 + while(continue == 1) { + # minimizing primal obj along direction s + step_sz = 0 + Xd = X %*% s + wd = lambda * sum(w_class * s) + dd = lambda * sum(s * s) + continue1 = 1 + while(continue1 == 1){ + tmp_Xw = Xw + step_sz*Xd + out = 1 - Y_local * (tmp_Xw) + sv = (out > 0) + out = out * sv + g = wd + step_sz*dd - sum(out * Y_local * Xd) + h = dd + sum(Xd * sv * Xd) + step_sz = step_sz - g/h + if (g*g/h < 0.0000000001){ + continue1 = 0 + } + } + + #update weights + w_class = w_class + step_sz*s + Xw = Xw + step_sz*Xd + + out = 1 - Y_local * Xw + sv = (out > 0) + out = sv * out + obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) + g_new = t(X) %*% (out * Y_local) - lambda * w_class + + tmp = sum(s * g_old) + + train_acc = sum(Y_local*(X%*%w_class) >= 0)/num_samples*100 + print(paste("For class ", iter_class, " iteration ", iter, " training accuracy: ", train_acc, sep="")) + debug_mat[iter+1,iter_class] = obj + + if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ + continue = 0 + } + + #non-linear CG step + be = sum(g_new * g_new)/sum(g_old * g_old) + s = be * s + g_new + g_old = g_new + + iter = iter + 1 + } + + w[,iter_class] = w_class + } + + writeMM(as(w, "CsparseMatrix"), paste(args[7], "w", sep="")) +}
