Repository: incubator-systemml Updated Branches: refs/heads/master f788b42d0 -> d9c6acb3a
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_MLogreg.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/Algorithm_MLogreg.dml b/src/test/scripts/functions/codegen/Algorithm_MLogreg.dml deleted file mode 100644 index 88c05d9..0000000 --- a/src/test/scripts/functions/codegen/Algorithm_MLogreg.dml +++ /dev/null @@ -1,274 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -X = read($1) -Y_vec = read($2) -intercept = $3; -tol = $4; -maxiter = $5; - -intercept_status = intercept; -regularization = 0.001; -maxinneriter = 0; - -print ("BEGIN MULTINOMIAL LOGISTIC REGRESSION SCRIPT"); - -eta0 = 0.0001; -eta1 = 0.25; -eta2 = 0.75; -sigma1 = 0.25; -sigma2 = 0.5; -sigma3 = 4.0; -psi = 0.1; - -N = nrow (X); -D = ncol (X); - -# Introduce the intercept, shift and rescale the columns of X if needed -if (intercept_status == 1 | intercept_status == 2) # add the intercept column -{ - X = append (X, matrix (1, rows = N, cols = 1)); - D = ncol (X); -} - -scale_lambda = matrix (1, rows = D, cols = 1); -if (intercept_status == 1 | intercept_status == 2) -{ - scale_lambda [D, 1] = 0; -} - -if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1 -{ # Important assumption: X [, D] = matrix (1, rows = N, cols = 1) - avg_X_cols = t(colSums(X)) / N; - var_X_cols = (t(colSums (X ^ 2)) - N * (avg_X_cols ^ 2)) / (N - 1); - is_unsafe = ppred (var_X_cols, 0.0, "<="); - scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe); - scale_X [D, 1] = 1; - shift_X = - avg_X_cols * scale_X; - shift_X [D, 1] = 0; - rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2); -} else { - scale_X = matrix (1, rows = D, cols = 1); - shift_X = matrix (0, rows = D, cols = 1); - rowSums_X_sq = rowSums (X ^ 2); -} - -# Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and rowSums(X ^ 2) -# with "rowSums_X_sq" in order to preserve the sparsity of X under shift and scale. -# The transform is then associatively applied to the other side of the expression, -# and is rewritten via "scale_X" and "shift_X" as follows: -# -# ssX_A = (SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as: -# ssX_A = diag (scale_X) %*% A; -# ssX_A [D, ] = ssX_A [D, ] + t(shift_X) %*% A; -# -# tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as: -# tssX_A = diag (scale_X) %*% A + shift_X %*% A [D, ]; - -# Convert "Y_vec" into indicator matrice: -max_y = max (Y_vec); -if (min (Y_vec) <= 0) { - # Category labels "0", "-1" etc. are converted into the largest label - Y_vec = Y_vec + (- Y_vec + max_y + 1) * (Y_vec <= 0); - max_y = max_y + 1; -} -Y = table (seq (1, N, 1), Y_vec, N, max_y); -K = ncol (Y) - 1; # The number of non-baseline categories - - -lambda = (scale_lambda %*% matrix (1, rows = 1, cols = K)) * regularization; -delta = 0.5 * sqrt (D) / max (sqrt (rowSums_X_sq)); - -B = matrix (0, rows = D, cols = K); ### LT = X %*% (SHIFT/SCALE TRANSFORM) %*% B; - ### LT = append (LT, matrix (0, rows = N, cols = 1)); - ### LT = LT - rowMaxs (LT) %*% matrix (1, rows = 1, cols = K+1); -P = matrix (1, rows = N, cols = K+1); ### exp_LT = exp (LT); -P = P / (K + 1); ### P = exp_LT / (rowSums (exp_LT) %*% matrix (1, rows = 1, cols = K+1)); -obj = N * log (K + 1); ### obj = - sum (Y * LT) + sum (log (rowSums (exp_LT))) + 0.5 * sum (lambda * (B_new ^ 2)); - -Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]); -if (intercept_status == 2) { - Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ]; -} -Grad = Grad + lambda * B; -norm_Grad = sqrt (sum (Grad ^ 2)); -norm_Grad_initial = norm_Grad; - -if (maxinneriter == 0) { - maxinneriter = D * K; -} -iter = 1; - -# boolean for convergence check -converge = (norm_Grad < tol) | (iter > maxiter); - -print ("-- Initially: Objective = " + obj + ", Gradient Norm = " + norm_Grad + ", Trust Delta = " + delta); - -while (! converge) -{ - # SOLVE TRUST REGION SUB-PROBLEM - S = matrix (0, rows = D, cols = K); - R = - Grad; - V = R; - delta2 = delta ^ 2; - inneriter = 1; - norm_R2 = sum (R ^ 2); - innerconverge = (sqrt (norm_R2) <= psi * norm_Grad); - is_trust_boundary_reached = 0; - - while (! innerconverge) - { - if (intercept_status == 2) { - ssX_V = diag (scale_X) %*% V; - ssX_V [D, ] = ssX_V [D, ] + t(shift_X) %*% V; - } else { - ssX_V = V; - } - Q = P [, 1:K] * (X %*% ssX_V); - HV = t(X) %*% (Q - P [, 1:K] * (rowSums (Q) %*% matrix (1, rows = 1, cols = K))); - if (intercept_status == 2) { - HV = diag (scale_X) %*% HV + shift_X %*% HV [D, ]; - } - HV = HV + lambda * V; - alpha = norm_R2 / sum (V * HV); - Snew = S + alpha * V; - norm_Snew2 = sum (Snew ^ 2); - if (norm_Snew2 <= delta2) - { - S = Snew; - R = R - alpha * HV; - old_norm_R2 = norm_R2 - norm_R2 = sum (R ^ 2); - V = R + (norm_R2 / old_norm_R2) * V; - innerconverge = (sqrt (norm_R2) <= psi * norm_Grad); - } else { - is_trust_boundary_reached = 1; - sv = sum (S * V); - v2 = sum (V ^ 2); - s2 = sum (S ^ 2); - rad = sqrt (sv ^ 2 + v2 * (delta2 - s2)); - if (sv >= 0) { - alpha = (delta2 - s2) / (sv + rad); - } else { - alpha = (rad - sv) / v2; - } - S = S + alpha * V; - R = R - alpha * HV; - innerconverge = TRUE; - } - inneriter = inneriter + 1; - innerconverge = innerconverge | (inneriter > maxinneriter); - } - - # END TRUST REGION SUB-PROBLEM - - # compute rho, update B, obtain delta - gs = sum (S * Grad); - qk = - 0.5 * (gs - sum (S * R)); - B_new = B + S; - if (intercept_status == 2) { - ssX_B_new = diag (scale_X) %*% B_new; - ssX_B_new [D, ] = ssX_B_new [D, ] + t(shift_X) %*% B_new; - } else { - ssX_B_new = B_new; - } - - LT = append ((X %*% ssX_B_new), matrix (0, rows = N, cols = 1)); - LT = LT - rowMaxs (LT) %*% matrix (1, rows = 1, cols = K+1); - exp_LT = exp (LT); - P_new = exp_LT / (rowSums (exp_LT) %*% matrix (1, rows = 1, cols = K+1)); - obj_new = - sum (Y * LT) + sum (log (rowSums (exp_LT))) + 0.5 * sum (lambda * (B_new ^ 2)); - - # Consider updating LT in the inner loop - # Consider the big "obj" and "obj_new" rounding-off their small difference below: - - actred = (obj - obj_new); - - rho = actred / qk; - is_rho_accepted = (rho > eta0); - snorm = sqrt (sum (S ^ 2)); - - if (iter == 1) { - delta = min (delta, snorm); - } - - alpha2 = obj_new - obj - gs; - if (alpha2 <= 0) { - alpha = sigma3; - } - else { - alpha = max (sigma1, -0.5 * gs / alpha2); - } - - if (rho < eta0) { - delta = min (max (alpha, sigma1) * snorm, sigma2 * delta); - } - else { - if (rho < eta1) { - delta = max (sigma1 * delta, min (alpha * snorm, sigma2 * delta)); - } - else { - if (rho < eta2) { - delta = max (sigma1 * delta, min (alpha * snorm, sigma3 * delta)); - } - else { - delta = max (delta, min (alpha * snorm, sigma3 * delta)); - } - } - } - - if (is_trust_boundary_reached == 1) - { - print ("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) + " CG iterations, trust bound REACHED"); - } else { - print ("-- Outer Iteration " + iter + ": Had " + (inneriter - 1) + " CG iterations"); - } - print (" -- Obj.Reduction: Actual = " + actred + ", Predicted = " + qk + - " (A/P: " + (round (10000.0 * rho) / 10000.0) + "), Trust Delta = " + delta); - - if (is_rho_accepted) - { - B = B_new; - P = P_new; - Grad = t(X) %*% (P [, 1:K] - Y [, 1:K]); - if (intercept_status == 2) { - Grad = diag (scale_X) %*% Grad + shift_X %*% Grad [D, ]; - } - Grad = Grad + lambda * B; - norm_Grad = sqrt (sum (Grad ^ 2)); - obj = obj_new; - print (" -- New Objective = " + obj + ", Beta Change Norm = " + snorm + ", Gradient Norm = " + norm_Grad); - } - - iter = iter + 1; - converge = ((norm_Grad < (tol * norm_Grad_initial)) | (iter > maxiter) | - ((is_trust_boundary_reached == 0) & (abs (actred) < (abs (obj) + abs (obj_new)) * 0.00000000000001))); - if (converge) { print ("Termination / Convergence condition satisfied."); } else { print (" "); } -} - -if (intercept_status == 2) { - B_out = diag (scale_X) %*% B; - B_out [D, ] = B_out [D, ] + t(shift_X) %*% B; -} else { - B_out = B; -} -write (B_out, $6); - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_MSVM.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/Algorithm_MSVM.R b/src/test/scripts/functions/codegen/Algorithm_MSVM.R index 52a898b..6cdce91 100644 --- a/src/test/scripts/functions/codegen/Algorithm_MSVM.R +++ b/src/test/scripts/functions/codegen/Algorithm_MSVM.R @@ -35,7 +35,7 @@ if(nrow(X) < 2) lambda = 0.001 num_samples = nrow(X) -dimensions = nrow(X) +dimensions = ncol(X) num_features = ncol(X) min_y = min(Y) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_MSVM.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/Algorithm_MSVM.dml b/src/test/scripts/functions/codegen/Algorithm_MSVM.dml deleted file mode 100644 index 0ab739f..0000000 --- a/src/test/scripts/functions/codegen/Algorithm_MSVM.dml +++ /dev/null @@ -1,150 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -X = read($1) -Y = read($2) -intercept = $3; -eps = $4; -maxiter = $5; - -if(nrow(X) < 2) - stop("Stopping due to invalid inputs: Not possible to learn a classifier without at least 2 rows") - -epsilon = eps -lambda = 0.001 -max_iterations = maxiter -num_samples = nrow(X) -dimensions = nrow(X) -num_features = ncol(X) - - -if(nrow(X) != nrow(Y)) - stop("Stopping due to invalid argument: Numbers of rows in X and Y must match") - -if(intercept != 0 & intercept != 1) - stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1") - -min_y = min(Y) -if(min_y < 1) - stop("Stopping due to invalid argument: Label vector (Y) must be recoded") -num_classes = max(Y) -if(num_classes == 1) - stop("Stopping due to invalid argument: Maximum label value is 1, need more than one class to learn a multi-class classifier") -mod1 = Y %% 1 -mod1_should_be_nrow = sum(abs(ppred(mod1, 0, "=="))) -if(mod1_should_be_nrow != nrow(Y)) - stop("Stopping due to invalid argument: Please ensure that Y contains (positive) integral labels") - -if(epsilon < 0) - stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative") - -if(lambda < 0) - stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative") - -if(max_iterations < 1) - stop("Stopping due to invalid argument: Maximum iterations should be a positive integer") - -if (intercept == 1) { - ones = matrix(1, rows=num_samples, cols=1); - X = append(X, ones); -} - -num_rows_in_w = num_features -if(intercept == 1){ - num_rows_in_w = num_rows_in_w + 1 -} -w = matrix(0, rows=num_rows_in_w, cols=num_classes) - -debug_mat = matrix(-1, rows=max_iterations, cols=num_classes) -parfor(iter_class in 1:num_classes){ - Y_local = 2 * ppred(Y, iter_class, "==") - 1 - w_class = matrix(0, rows=num_features, cols=1) - if (intercept == 1) { - zero_matrix = matrix(0, rows=1, cols=1); - w_class = t(append(t(w_class), zero_matrix)); - } - - g_old = t(X) %*% Y_local - s = g_old - - Xw = matrix(0, rows=nrow(X), cols=1) - iter = 0 - continue = 1 - while(continue == 1) { - # minimizing primal obj along direction s - step_sz = 0 - Xd = X %*% s - wd = lambda * sum(w_class * s) - dd = lambda * sum(s * s) - continue1 = 1 - while(continue1 == 1){ - tmp_Xw = Xw + step_sz*Xd - out = 1 - Y_local * (tmp_Xw) - sv = ppred(out, 0, ">") - out = out * sv - g = wd + step_sz*dd - sum(out * Y_local * Xd) - h = dd + sum(Xd * sv * Xd) - step_sz = step_sz - g/h - if (g*g/h < 0.0000000001){ - continue1 = 0 - } - } - - #update weights - w_class = w_class + step_sz*s - Xw = Xw + step_sz*Xd - - out = 1 - Y_local * Xw - sv = ppred(out, 0, ">") - out = sv * out - obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) - g_new = t(X) %*% (out * Y_local) - lambda * w_class - - tmp = sum(s * g_old) - - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 - print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) - debug_mat[iter+1,iter_class] = obj - - if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)){ - continue = 0 - } - - #non-linear CG step - be = sum(g_new * g_new)/sum(g_old * g_old) - s = be * s + g_new - g_old = g_new - - if(sum(s^2) == 0){ - continue = 0 - } - - iter = iter + 1 - } - - w[,iter_class] = w_class -} - -extra_model_params = matrix(0, rows=2, cols=ncol(w)) -extra_model_params[1, 1] = intercept -extra_model_params[2, 1] = dimensions -w = t(append(t(w), t(extra_model_params))) -write(w, $6, format="text") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d9c6acb3/src/test/scripts/functions/codegen/Algorithm_PNMF.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/Algorithm_PNMF.dml b/src/test/scripts/functions/codegen/Algorithm_PNMF.dml deleted file mode 100644 index 641cc09..0000000 --- a/src/test/scripts/functions/codegen/Algorithm_PNMF.dml +++ /dev/null @@ -1,40 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -X = read($1); -W = read($2); -H = read($3); - -k = $4; -eps = $5; -max_iter = $6; -iter = 1; - -while( iter < max_iter ) { - H = (H*(t(W)%*%(X/(W%*%H+eps)))) / t(colSums(W)); - W = (W*((X/(W%*%H+eps))%*%t(H))) / t(rowSums(H)); - obj = sum(W%*%H) - sum(X*log(W%*%H+eps)); - print("iter=" + iter + " obj=" + obj); - iter = iter + 1; -} - -write(W, $7); -write(H, $8);