Repository: incubator-systemml Updated Branches: refs/heads/master 33c6c4b90 -> 9b9d019b2
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/decision-tree.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/decision-tree.dml b/scripts/algorithms/decision-tree.dml index c3a4d35..72fbc57 100644 --- a/scripts/algorithms/decision-tree.dml +++ b/scripts/algorithms/decision-tree.dml @@ -88,7 +88,7 @@ num_records = nrow (X); num_classes = ncol (Y_bin); # check if there is only one class label -Y_bin_sum = sum (ppred (colSums (Y_bin), num_records, "==")); +Y_bin_sum = sum (colSums (Y_bin) == num_records); if (Y_bin_sum == 1) { stop ("Y contains only one class label. No model will be learned!"); } else if (Y_bin_sum > 1) { @@ -99,7 +99,7 @@ if (Y_bin_sum == 1) { if (fileR != " ") { R = read (fileR); R = order (target = R, by = 2); # sort by start indices - dummy_coded = ppred (R[,2], R[,3], "!="); + dummy_coded = (R[,2] != R[,3]); R_scale = removeEmpty (target = R[,2:3] * (1 - dummy_coded), margin = "rows"); R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows"); if (fileS_map != " ") { @@ -140,7 +140,7 @@ if (fileR != " ") { W[2 * num_cat_features,] = 0; } - last = ppred (R_cat[,2], ncol (X), "!="); + last = (R_cat[,2] != ncol (X)); R_cat1 = (R_cat[,2] + 1) * last; R_cat[,2] = (R_cat[,2] * (1 - last)) + R_cat1; R_cat_vec = matrix (R_cat, rows = 2 * num_cat_features, cols = 1); @@ -252,7 +252,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # --- find best split --- # samples that reach cur_node - Ix = ppred (L[,1], cur_node, "=="); + Ix = (L[,1] == cur_node); cur_Y_bin = Y_bin * Ix; label_counts_overall = colSums (cur_Y_bin); @@ -260,7 +260,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { label_dist_overall = label_counts_overall / label_sum_overall; if (imp == "entropy") { - label_dist_zero = ppred (label_dist_overall, 0, "=="); + label_dist_zero = (label_dist_overall == 0); cur_impurity = - sum (label_dist_overall * log (label_dist_overall + label_dist_zero)); # / log (2); # impurity before } else { # imp == "Gini" cur_impurity = sum (label_dist_overall * (1 - label_dist_overall)); # impurity before @@ -301,7 +301,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # determine best feature to split on and the split value best_scale_gain = max (I_gain_scale); max_I_gain_ind_scale = as.scalar (rowIndexMax (t (I_gain_scale))); - p = ppred (cum_count_thresholds, max_I_gain_ind_scale, "<"); + p = (cum_count_thresholds < max_I_gain_ind_scale); sum_cum_count_thresholds = sum (p); best_scale_feature = sum_cum_count_thresholds + 1; best_scale_split = max_I_gain_ind_scale; @@ -380,8 +380,8 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { } I_gain = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left + ( label_sum_right / label_sum_overall ) * impurity_right); - Ix_label_sum_left_zero = ppred (label_sum_left, 0, "=="); - Ix_label_sum_right_zero = ppred (label_sum_right, 0, "=="); + Ix_label_sum_left_zero = (label_sum_left == 0); + Ix_label_sum_right_zero = (label_sum_right == 0); Ix_label_sum_zero = Ix_label_sum_left_zero * Ix_label_sum_right_zero; I_gain = I_gain * (1 - Ix_label_sum_zero); @@ -500,7 +500,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # samples going to the left subtree Ix_left = rowSums (X_cat[,start_ind:(start_ind + offset - 1)] * best_split_values_bin); - Ix_left = ppred (Ix_left, 1, ">="); + Ix_left = (Ix_left >= 1); Ix_left = Ix * Ix_left; Ix_right = Ix * (1 - Ix_left); @@ -625,7 +625,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { cur_node_small = as.scalar (cur_nodes_small_nonzero[1,i7]); # build dataset for SMALL node - Ix = ppred (L[,1], cur_node_small, "=="); + Ix = (L[,1] == cur_node_small); if (num_scale_features > 0) { X_scale_ext_small = removeEmpty (target = X_scale_ext, margin = "rows", select = Ix); } @@ -667,14 +667,14 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # --- find best split --- # samples that reach cur_node - Ix = ppred (L_small[,1], cur_node, "=="); + Ix = (L_small[,1] == cur_node); cur_Y_bin = Y_bin_small * Ix; label_counts_overall = colSums (cur_Y_bin); label_sum_overall = sum (label_counts_overall); label_dist_overall = label_counts_overall / label_sum_overall; if (imp == "entropy") { - label_dist_zero = ppred (label_dist_overall, 0, "=="); + label_dist_zero = (label_dist_overall == 0); cur_impurity = - sum (label_dist_overall * log (label_dist_overall + label_dist_zero)); } else { # imp == "Gini" cur_impurity = sum (label_dist_overall * (1 - label_dist_overall)); @@ -714,7 +714,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # determine best feature to split on and the split value best_scale_gain = max (I_gain_scale); max_I_gain_ind_scale = as.scalar (rowIndexMax (t (I_gain_scale))); - p = ppred (cum_count_thresholds, max_I_gain_ind_scale, "<"); + p = (cum_count_thresholds < max_I_gain_ind_scale); sum_cum_count_thresholds = sum (p); best_scale_feature = sum_cum_count_thresholds + 1; best_scale_split = max_I_gain_ind_scale; @@ -792,8 +792,8 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { } I_gain = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left + ( label_sum_right / label_sum_overall ) * impurity_right); - Ix_label_sum_left_zero = ppred (label_sum_left, 0, "=="); - Ix_label_sum_right_zero = ppred (label_sum_right, 0, "=="); + Ix_label_sum_left_zero = (label_sum_left == 0); + Ix_label_sum_right_zero = (label_sum_right == 0); Ix_label_sum_zero = Ix_label_sum_left_zero * Ix_label_sum_right_zero; I_gain = I_gain * (1 - Ix_label_sum_zero); @@ -909,7 +909,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # samples going to the left subtree Ix_left = rowSums (X_cat_small[,start_ind:(start_ind + offset - 1)] * best_split_values_bin); - Ix_left = ppred (Ix_left, 1, ">="); + Ix_left = (Ix_left >= 1); Ix_left = Ix * Ix_left; Ix_right = Ix * (1 - Ix_left); @@ -1176,11 +1176,11 @@ if (ncol (M) > 1) { process_red_subtree = FALSE; invalid_node_ind = matrix (0, rows = 1, cols = ncol (M)); while (red_leaf & ncol (M) > 1) { - leaf_ind = ppred (M[3,], 0, "=="); + leaf_ind = (M[3,] == 0); labels = M[4,] * leaf_ind; parent_ids = floor (M[1,] /2); - cond1 = ppred (labels[,1:(ncol (M) - 1)], labels[,2:ncol (M)], "=="); # siebling leaves with same label - cond2 = ppred (parent_ids[,1:(ncol (M)- 1)], parent_ids[,2:ncol (M)], "=="); # same parents + cond1 = (labels[,1:(ncol (M) - 1)] == labels[,2:ncol (M)]); # siebling leaves with same label + cond2 = (parent_ids[,1:(ncol (M)- 1)] == parent_ids[,2:ncol (M)]); # same parents red_leaf_ind = cond1 * cond2 * leaf_ind[,2:ncol (M)]; if (sum (red_leaf_ind) > 0) { # if redundant subtrees exist @@ -1189,8 +1189,8 @@ if (ncol (M) > 1) { parfor (it in 1:ncol (red_leaf_ids_nonzero), check = 0){ cur_right_leaf_id = as.scalar (red_leaf_ids_nonzero[1,it]); cur_parent_id = floor (cur_right_leaf_id / 2); - cur_right_leaf_pos = as.scalar (rowIndexMax (ppred (M[1,], cur_right_leaf_id, "=="))); - cur_parent_pos = as.scalar(rowIndexMax (ppred (M[1,], cur_parent_id, "=="))); + cur_right_leaf_pos = as.scalar (rowIndexMax ((M[1,] == cur_right_leaf_id))); + cur_parent_pos = as.scalar(rowIndexMax (M[1,] == cur_parent_id)); M[2:nrow (M), cur_parent_pos] = M[2:nrow (M), cur_right_leaf_pos]; M[3,cur_right_leaf_pos] = -1; M[3,cur_right_leaf_pos - 1] = -1; @@ -1206,14 +1206,14 @@ if (ncol (M) > 1) { if (process_red_subtree) { print ("REMOVING REDUNDANT SUBTREES..."); - valid_node_ind = ppred (invalid_node_ind, 0, "=="); + valid_node_ind = (invalid_node_ind == 0); M = removeEmpty (target = M * valid_node_ind, margin = "cols"); } } if (ncol (M) > 1) { # if internal nodes exist print ("COMPUTING OFFSETS TO THE LEFT CHILD FOR INTERNAL NODES..."); - internal_ind = ppred (M[3,], 0, ">"); + internal_ind = (M[3,] > 0); internal_ids = internal_ind * M[1,]; internal_ids_nonzero = removeEmpty (target = internal_ids, margin = "cols"); a1 = internal_ids_nonzero; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/l2-svm-predict.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/l2-svm-predict.dml b/scripts/algorithms/l2-svm-predict.dml index cace79f..604ef64 100644 --- a/scripts/algorithms/l2-svm-predict.dml +++ b/scripts/algorithms/l2-svm-predict.dml @@ -64,9 +64,9 @@ if(cmdLine_scores != " ") if(cmdLine_Y != " "){ y = read(cmdLine_Y) - pred = ppred(scores, 0, ">=") + pred = (scores >= 0) pred_labels = pred*positive_label + (1-pred)*negative_label - num_correct = sum(ppred(pred_labels, y, "==")) + num_correct = sum(pred_labels == y) acc = 100*num_correct/nrow(X) acc_str = "Accuracy (%): " + acc @@ -79,9 +79,9 @@ if(cmdLine_Y != " "){ if(negative_label != -1 | positive_label != +1) y = 2/(positive_label - negative_label)*y - (negative_label + positive_label)/(positive_label - negative_label) - pred_is_minus = ppred(pred, -1, "==") + pred_is_minus = (pred == -1) pred_is_plus = 1 - pred_is_minus - y_is_minus = ppred(y, -1, "==") + y_is_minus = (y == -1) y_is_plus = 1 - y_is_minus check_min_y_minus = sum(pred_is_minus*y_is_minus) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/l2-svm.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml index d6efbec..fa40418 100644 --- a/scripts/algorithms/l2-svm.dml +++ b/scripts/algorithms/l2-svm.dml @@ -48,8 +48,8 @@ if(nrow(X) < 2) check_min = min(Y) check_max = max(Y) -num_min = sum(ppred(Y, check_min, "==")) -num_max = sum(ppred(Y, check_max, "==")) +num_min = sum(Y == check_min) +num_max = sum(Y == check_max) if(check_min == check_max) stop("Stopping due to invalid inputs: Y seems to contain exactly one label") @@ -111,7 +111,7 @@ while(continue == 1 & iter < maxiterations) { while(continue1 == 1){ tmp_Xw = Xw + step_sz*Xd out = 1 - Y * (tmp_Xw) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y * Xd) h = dd + sum(Xd * sv * Xd) @@ -126,7 +126,7 @@ while(continue == 1 & iter < maxiterations) { Xw = Xw + step_sz*Xd out = 1 - Y * Xw - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w) g_new = t(X) %*% (out * Y) - lambda * w http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/m-svm-predict.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/m-svm-predict.dml b/scripts/algorithms/m-svm-predict.dml index e489568..a959836 100644 --- a/scripts/algorithms/m-svm-predict.dml +++ b/scripts/algorithms/m-svm-predict.dml @@ -66,7 +66,7 @@ if(cmdLine_Y != " "){ stop("Stopping due to invalid argument: Label vector (Y) must be recoded") pred = rowIndexMax(scores); - correct_percentage = sum(ppred(pred - y, 0, "==")) / N * 100; + correct_percentage = sum((pred - y) == 0) / N * 100; acc_str = "Accuracy (%): " + correct_percentage print(acc_str) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/m-svm.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/m-svm.dml b/scripts/algorithms/m-svm.dml index 4224d26..e4a7cad 100644 --- a/scripts/algorithms/m-svm.dml +++ b/scripts/algorithms/m-svm.dml @@ -61,7 +61,7 @@ num_classes = max(Y) if(num_classes == 1) stop("Stopping due to invalid argument: Maximum label value is 1, need more than one class to learn a multi-class classifier") mod1 = Y %% 1 -mod1_should_be_nrow = sum(abs(ppred(mod1, 0, "=="))) +mod1_should_be_nrow = sum(abs(mod1 == 0)) if(mod1_should_be_nrow != nrow(Y)) stop("Stopping due to invalid argument: Please ensure that Y contains (positive) integral labels") @@ -93,7 +93,7 @@ w = matrix(0, rows=num_rows_in_w, cols=num_classes) debug_mat = matrix(-1, rows=max_iterations, cols=num_classes) parfor(iter_class in 1:num_classes){ - Y_local = 2 * ppred(Y, iter_class, "==") - 1 + Y_local = 2 * (Y == iter_class) - 1 w_class = matrix(0, rows=num_features, cols=1) if (intercept == 1) { zero_matrix = matrix(0, rows=1, cols=1); @@ -116,7 +116,7 @@ parfor(iter_class in 1:num_classes){ while(continue1 == 1){ tmp_Xw = Xw + step_sz*Xd out = 1 - Y_local * (tmp_Xw) - sv = ppred(out, 0, ">") + sv = (out > 0) out = out * sv g = wd + step_sz*dd - sum(out * Y_local * Xd) h = dd + sum(Xd * sv * Xd) @@ -131,14 +131,14 @@ parfor(iter_class in 1:num_classes){ Xw = Xw + step_sz*Xd out = 1 - Y_local * Xw - sv = ppred(out, 0, ">") + sv = (out > 0) out = sv * out obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class) g_new = t(X) %*% (out * Y_local) - lambda * w_class tmp = sum(s * g_old) - train_acc = sum(ppred(Y_local*(X%*%w_class), 0, ">="))/num_samples*100 + train_acc = sum(Y_local*(X%*%w_class) >= 0)/num_samples*100 print("For class " + iter_class + " iteration " + iter + " training accuracy: " + train_acc) debug_mat[iter+1,iter_class] = obj http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/naive-bayes-predict.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/naive-bayes-predict.dml b/scripts/algorithms/naive-bayes-predict.dml index b4d2ebe..e6f8fa4 100644 --- a/scripts/algorithms/naive-bayes-predict.dml +++ b/scripts/algorithms/naive-bayes-predict.dml @@ -65,7 +65,7 @@ if(cmdLine_Y != " "){ stop("Stopping due to invalid argument: Label vector (Y) must be recoded") pred = rowIndexMax(log_probs) - acc = sum(ppred(pred, C, "==")) / numRows * 100 + acc = sum(pred == C) / numRows * 100 acc_str = "Accuracy (%): " + acc print(acc_str) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/obsolete/naive-bayes-parfor.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/obsolete/naive-bayes-parfor.dml b/scripts/algorithms/obsolete/naive-bayes-parfor.dml index c90e64c..b961455 100644 --- a/scripts/algorithms/obsolete/naive-bayes-parfor.dml +++ b/scripts/algorithms/obsolete/naive-bayes-parfor.dml @@ -45,7 +45,7 @@ numClasses = max(C) if(numClasses == 1) stop("Stopping due to invalid argument: Maximum label value is 1, need more than one class to learn a multi-class classifier") mod1 = C %% 1 -mod1_should_be_nrow = sum(abs(ppred(mod1, 0, "=="))) +mod1_should_be_nrow = sum(abs(mod1 == 0)) if(mod1_should_be_nrow != numRows) stop("Stopping due to invalid argument: Please ensure that Y contains (positive) integral labels") @@ -85,7 +85,7 @@ D_w_ones = append(D, ones) model = append(class_conditionals, class_prior) log_probs = D_w_ones %*% t(log(model)) pred = rowIndexMax(log_probs) -acc = sum(ppred(pred, C, "==")) / numRows * 100 +acc = sum(pred == C) / numRows * 100 acc_str = "Training Accuracy (%): " + acc print(acc_str) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/random-forest-predict.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/random-forest-predict.dml b/scripts/algorithms/random-forest-predict.dml index 7bc6cd6..f8a04cd 100644 --- a/scripts/algorithms/random-forest-predict.dml +++ b/scripts/algorithms/random-forest-predict.dml @@ -90,7 +90,7 @@ R_scale = matrix (0, rows = 1, cols = 1); if (fileR != " ") { R = read (fileR); - dummy_coded = ppred (R[,2], R[,3], "!="); + dummy_coded = (R[,2] != R[,3]); R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = "rows"); R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows"); } else { # only scale features available @@ -143,7 +143,7 @@ parfor (i in 1:num_records, check = 0) { cur_end_ind = as.scalar (R_cat[cur_feature,2]); cur_value = as.scalar (rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); cur_offset = as.scalar (M[6,cur_node_pos]); - value_found = sum (ppred (M[7:(7 + cur_offset - 1),cur_node_pos], cur_value, "==")); + value_found = sum (M[7:(7 + cur_offset - 1),cur_node_pos] == cur_value); if (value_found >= 1) { # go to left branch cur_node_pos = cur_node_pos + as.scalar (M[3,cur_node_pos]); # cur_node = as.scalar (cur_M[1,cur_node_pos]); @@ -162,7 +162,7 @@ if (fileY != " ") { Y_dummy = read (fileY); num_classes = ncol (Y_dummy); Y = rowSums (Y_dummy * t (seq (1, num_classes))); - result = ppred (Y, Y_predicted, "=="); + result = (Y == Y_predicted); result = sum (result); accuracy = result / num_records * 100; acc_str = "Accuracy (%): " + accuracy; @@ -172,12 +172,12 @@ if (fileY != " ") { print (acc_str); } if (fileC != " ") { - oob_ind = ppred (rowSums (label_counts_oob), 0, ">") + oob_ind = (rowSums (label_counts_oob) > 0) label_counts_oob = removeEmpty (target = label_counts_oob, margin = "rows"); num_oob = nrow (label_counts_oob); Y_predicted_oob = rowIndexMax (label_counts_oob); Y_oob = removeEmpty (target = Y * oob_ind, margin = "rows"); - result = ppred (Y_oob, Y_predicted_oob, "=="); + result = (Y_oob == Y_predicted_oob); oob_error = (1 - (sum (result) / num_oob)) * 100; oob_str = "Out-Of-Bag error (%): " + oob_error; if (fileOOB != " ") { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/random-forest.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/random-forest.dml b/scripts/algorithms/random-forest.dml index b68d711..6fd501e 100644 --- a/scripts/algorithms/random-forest.dml +++ b/scripts/algorithms/random-forest.dml @@ -95,7 +95,7 @@ num_records = nrow (X); num_classes = ncol (Y_bin); # check if there is only one class label -Y_bin_sum = sum (ppred (colSums (Y_bin), num_records, "==")); +Y_bin_sum = sum (colSums (Y_bin) == num_records); if (Y_bin_sum == 1) { stop ("Y contains only one class label. No model will be learned!"); } else if (Y_bin_sum > 1) { @@ -106,7 +106,7 @@ if (Y_bin_sum == 1) { if (fileR != " ") { R = read (fileR); R = order (target = R, by = 2); # sort by start indices - dummy_coded = ppred (R[,2], R[,3], "!="); + dummy_coded = (R[,2] != R[,3]); R_scale = removeEmpty (target = R[,2:3] * (1 - dummy_coded), margin = "rows"); R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows"); if (fileS_map != " ") { @@ -147,7 +147,7 @@ if (fileR != " ") { W[2 * num_cat_features,] = 0; } - last = ppred (R_cat[,2], ncol (X), "!="); + last = (R_cat[,2] != ncol (X)); R_cat1 = (R_cat[,2] + 1) * last; R_cat[,2] = (R_cat[,2] * (1 - last)) + R_cat1; R_cat_vec = matrix (R_cat, rows = 2 * num_cat_features, cols = 1); @@ -209,7 +209,7 @@ L = matrix (1, rows = num_records, cols = num_trees); # last visited node id for # create matrix of counts (generated by Poisson distribution) storing how many times each sample appears in each tree print ("CONPUTING COUNTS..."); C = rand (rows = num_records, cols = num_trees, pdf = "poisson", lambda = rate); -Ix_nonzero = ppred (C, 0, "!="); +Ix_nonzero = (C != 0); L = L * Ix_nonzero; total_counts = sum (C); @@ -283,12 +283,12 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # select sample features WOR feature_samples = sample (num_features_total, num_feature_samples); feature_samples = order (target = feature_samples, by = 1); - num_scale_feature_samples = sum (ppred (feature_samples, num_scale_features, "<=")); + num_scale_feature_samples = sum (feature_samples <= num_scale_features); num_cat_feature_samples = num_feature_samples - num_scale_feature_samples; # --- find best split --- # samples that reach cur_node - Ix = ppred (L[,cur_tree], cur_node, "=="); + Ix = (L[,cur_tree] == cur_node); cur_Y_bin = Y_bin * (Ix * C[,cur_tree]); label_counts_overall = colSums (cur_Y_bin); @@ -296,7 +296,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { label_dist_overall = label_counts_overall / label_sum_overall; if (imp == "entropy") { - label_dist_zero = ppred (label_dist_overall, 0, "=="); + label_dist_zero = (label_dist_overall == 0); cur_impurity = - sum (label_dist_overall * log (label_dist_overall + label_dist_zero)); # / log (2); # impurity before } else { # imp == "Gini" cur_impurity = sum (label_dist_overall * (1 - label_dist_overall)); # impurity before @@ -445,8 +445,8 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { } I_gain = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left + ( label_sum_right / label_sum_overall ) * impurity_right); - Ix_label_sum_left_zero = ppred (label_sum_left, 0, "=="); - Ix_label_sum_right_zero = ppred (label_sum_right, 0, "=="); + Ix_label_sum_left_zero = (label_sum_left == 0); + Ix_label_sum_right_zero = (label_sum_right == 0); Ix_label_sum_zero = Ix_label_sum_left_zero * Ix_label_sum_right_zero; I_gain = I_gain * (1 - Ix_label_sum_zero); @@ -572,7 +572,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # samples going to the left subtree Ix_left = rowSums (X_cat[,start_ind:(start_ind + offset - 1)] * best_split_values_bin); - Ix_left = ppred (Ix_left, 1, ">="); + Ix_left = (Ix_left >= 1); Ix_left = Ix * Ix_left; Ix_right = Ix * (1 - Ix_left); @@ -710,7 +710,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { cur_tree_small = as.scalar (cur_nodes_small_nonzero[3,i7]); # build dataset for SMALL node - Ix = ppred (L[,cur_tree_small], cur_node_small, "=="); + Ix = (L[,cur_tree_small] == cur_node_small); if (num_scale_features > 0) { X_scale_ext_small = removeEmpty (target = X_scale_ext, margin = "rows", select = Ix); } @@ -763,19 +763,19 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # select sample features WOR feature_samples = sample (num_features_total, num_feature_samples); feature_samples = order (target = feature_samples, by = 1); - num_scale_feature_samples = sum (ppred (feature_samples, num_scale_features, "<=")); + num_scale_feature_samples = sum (feature_samples <= num_scale_features); num_cat_feature_samples = num_feature_samples - num_scale_feature_samples; # --- find best split --- # samples that reach cur_node - Ix = ppred (L_small[,cur_tree], cur_node, "=="); + Ix = (L_small[,cur_tree] == cur_node); cur_Y_bin = Y_bin_small * (Ix * C_small[,cur_tree]); label_counts_overall = colSums (cur_Y_bin); label_sum_overall = sum (label_counts_overall); label_dist_overall = label_counts_overall / label_sum_overall; if (imp == "entropy") { - label_dist_zero = ppred (label_dist_overall, 0, "=="); + label_dist_zero = (label_dist_overall == 0); cur_impurity = - sum (label_dist_overall * log (label_dist_overall + label_dist_zero)); # / log (2); } else { # imp == "Gini" cur_impurity = sum (label_dist_overall * (1 - label_dist_overall)); @@ -923,8 +923,8 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { } I_gain = cur_impurity - ( ( label_sum_left / label_sum_overall ) * impurity_left + ( label_sum_right / label_sum_overall ) * impurity_right); - Ix_label_sum_left_zero = ppred (label_sum_left, 0, "=="); - Ix_label_sum_right_zero = ppred (label_sum_right, 0, "=="); + Ix_label_sum_left_zero = (label_sum_left == 0); + Ix_label_sum_right_zero = (label_sum_right == 0); Ix_label_sum_zero = Ix_label_sum_left_zero * Ix_label_sum_right_zero; I_gain = I_gain * (1 - Ix_label_sum_zero); @@ -1047,7 +1047,7 @@ while ((num_cur_nodes_large + num_cur_nodes_small) > 0 & level < depth) { # samples going to the left subtree Ix_left = rowSums (X_cat_small[,start_ind:(start_ind + offset - 1)] * best_split_values_bin); - Ix_left = ppred (Ix_left, 1, ">="); + Ix_left = (Ix_left >= 1); Ix_left = Ix * Ix_left; Ix_right = Ix * (1 - Ix_left); @@ -1317,13 +1317,13 @@ if (ncol (M) > 1) { process_red_subtree = FALSE; invalid_node_ind = matrix (0, rows = 1, cols = ncol (M)); while (red_leaf & ncol (M) > 1) { - leaf_ind = ppred (M[4,], 0, "=="); + leaf_ind = (M[4,] == 0); labels = M[5,] * leaf_ind; tree_ids = M[2,]; parent_ids = floor (M[1,] /2); - cond1 = ppred (labels[,1:(ncol (M) - 1)], labels[,2:ncol (M)], "=="); # siebling leaves with same label - cond2 = ppred (parent_ids[,1:(ncol (M) - 1)], parent_ids[,2:ncol (M)], "=="); # same parents - cond3 = ppred (tree_ids[,1:(ncol (M) - 1)], tree_ids[,2:ncol (M)], "=="); # same tree + cond1 = (labels[,1:(ncol (M) - 1)] == labels[,2:ncol (M)]); # siebling leaves with same label + cond2 = (parent_ids[,1:(ncol (M) - 1)] == parent_ids[,2:ncol (M)]); # same parents + cond3 = (tree_ids[,1:(ncol (M) - 1)] == tree_ids[,2:ncol (M)]); # same tree red_leaf_ind = cond1 * cond2 * cond3 * leaf_ind[,2:ncol (M)]; if (sum (red_leaf_ind) > 0) { # if redundant subtrees exist @@ -1333,8 +1333,8 @@ if (ncol (M) > 1) { cur_right_leaf_id = as.scalar (red_leaf_ids_nonzero[1,it]); cur_parent_id = floor (cur_right_leaf_id / 2); cur_tree_id = as.scalar (red_leaf_ids_nonzero[2,it]); - cur_right_leaf_pos = as.scalar (rowIndexMax (ppred (M[1,], cur_right_leaf_id, "==") * ppred (M[2,], cur_tree_id, "=="))); - cur_parent_pos = as.scalar(rowIndexMax (ppred (M[1,], cur_parent_id, "==") * ppred (M[2,], cur_tree_id, "=="))); + cur_right_leaf_pos = as.scalar (rowIndexMax ((M[1,] == cur_right_leaf_id) * (M[2,] == cur_tree_id))); + cur_parent_pos = as.scalar(rowIndexMax ((M[1,] == cur_parent_id) * (M[2,] == cur_tree_id))); M[3:nrow (M), cur_parent_pos] = M[3:nrow (M), cur_right_leaf_pos]; M[4,cur_right_leaf_pos] = -1; M[4,cur_right_leaf_pos - 1] = -1; @@ -1349,12 +1349,12 @@ if (ncol (M) > 1) { if (process_red_subtree) { print ("REMOVING REDUNDANT SUBTREES..."); - valid_node_ind = ppred (invalid_node_ind, 0, "=="); + valid_node_ind = (invalid_node_ind == 0); M = removeEmpty (target = M * valid_node_ind, margin = "cols"); } } -internal_ind = ppred (M[4,], 0, ">"); +internal_ind = (M[4,] > 0); internal_ids = M[1:2,] * internal_ind; internal_ids_nonzero = removeEmpty (target = internal_ids, margin = "cols"); if (as.scalar (internal_ids_nonzero[1,1]) > 0) { # if internal nodes exist http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/algorithms/stratstats.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/stratstats.dml b/scripts/algorithms/stratstats.dml index d380220..3745fab 100644 --- a/scripts/algorithms/stratstats.dml +++ b/scripts/algorithms/stratstats.dml @@ -140,8 +140,8 @@ print ("Preparing the covariates..."); XnoNaNs = replace (target = XwithNaNs, pattern = 0.0/0.0, replacement = 0); YnoNaNs = replace (target = YwithNaNs, pattern = 0.0/0.0, replacement = 0); -XNaNmask = ppred (XwithNaNs, XwithNaNs, "=="); -YNaNmask = ppred (YwithNaNs, YwithNaNs, "=="); +XNaNmask = (XwithNaNs == XwithNaNs); +YNaNmask = (YwithNaNs == YwithNaNs); one_to_num_attrs_X = seq (1, num_attrs_X, 1); one_to_num_attrs_Y = seq (1, num_attrs_Y, 1); ProjX = matrix (0, rows = num_attrs, cols = num_attrs_X); @@ -161,8 +161,8 @@ Y_mask = YNaNmask %*% ProjY; print ("Preparing the strata..."); SnoNaNs = replace (target = SwithNaNs, pattern = 0.0/0.0, replacement = 0); -S = round (SnoNaNs) * ppred (SnoNaNs, 0.0, ">"); -Proj_good_stratumID = diag (ppred (S, 0.0, ">")); +S = round (SnoNaNs) * (SnoNaNs > 0); +Proj_good_stratumID = diag (S > 0); Proj_good_stratumID = removeEmpty (target = Proj_good_stratumID, margin = "rows"); vector_of_good_stratumIDs = Proj_good_stratumID %*% S; vector_of_good_stratumIDs = vector_of_good_stratumIDs + (1 - min (vector_of_good_stratumIDs)); @@ -199,8 +199,8 @@ print ("Computing the stratified single-variate statistics..."); Cnt_X_per_stratum = StrataSummator %*% X_mask; Cnt_Y_per_stratum = StrataSummator %*% Y_mask; -Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "=="); -Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "=="); +Is_none_X_per_stratum = (Cnt_X_per_stratum == 0); +Is_none_Y_per_stratum = (Cnt_Y_per_stratum == 0); One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum + Is_none_X_per_stratum); One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum + Is_none_Y_per_stratum); num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum); @@ -294,7 +294,7 @@ Sum_Y_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ((Y * Y) %*% Proj_Y_to_XY)); Sum_XY_per_stratum = StrataSummator %*% (( X %*% Proj_X_to_XY) * ( Y %*% Proj_Y_to_XY)); -Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "=="); +Is_none_XY_per_stratum = (Cnt_XY_per_stratum == 0); One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / (Cnt_XY_per_stratum + Is_none_XY_per_stratum); num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum); @@ -361,7 +361,7 @@ OutMtx [35, ] = stdev_errY_vs_X_stratified; # Stratified residual standard devi OutMtx [36, ] = r_sqr_X_vs_Y_stratified; # Stratified R-squared OutMtx [37, ] = adj_r_sqr_X_vs_Y_stratified; # Stratified adjusted R-squared OutMtx [38, ] = p_val_Y_vs_X_stratified; # Stratified P-value for hypothesis "slope = 0" -OutMtx [39, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">=")); # Number of strata with at least two counted points +OutMtx [39, ] = colSums (Cnt_XY_per_stratum >= 2); # Number of strata with at least two counted points OutMtx = t(OutMtx); @@ -378,7 +378,7 @@ fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[dou q = as.scalar (fStat [i, j]); d1 = as.scalar (df_1 [i, j]); d2 = as.scalar (df_2 [i, j]); - if (d1 >= 1 & d2 >= 1 & q >= 0.0) { + if (d1 >= 1 & d2 >= 1 & q >= 0) { tailprob [i, j] = pf(target = q, df1 = d1, df2 = d2, lower.tail=FALSE); } else { tailprob [i, j] = 0/0; @@ -388,9 +388,9 @@ fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[dou sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] output_A) { - mask_A = ppred (input_A, 0.0, ">="); + mask_A = (input_A >= 0); prep_A = input_A * mask_A; - mask_A = mask_A * ppred (prep_A, prep_A, "=="); + mask_A = mask_A * (prep_A == prep_A); prep_A = replace (target = prep_A, pattern = 0.0/0.0, replacement = 0); output_A = sqrt (prep_A) / mask_A; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4ALS.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4ALS.dml b/scripts/datagen/genRandData4ALS.dml index eff1ddf..a5838d7 100644 --- a/scripts/datagen/genRandData4ALS.dml +++ b/scripts/datagen/genRandData4ALS.dml @@ -37,7 +37,7 @@ I = floor(rand(rows = nnz, cols = 1, min = 1, max = m + 0.999999999)); J = floor(rand(rows = nnz, cols = 1, min = 1, max = n + 0.999999999)); X = rand(rows = nnz, cols = 1, pdf = "normal") * sqrt(sigma); N = table(I, J, X); -T = ppred(N, 0, "!="); +T = (N != 0); X = T * (W %*% H) + T * N; write(X, Xfile, format = fmt); write(W, Wfile, format = fmt); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4Kmeans.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4Kmeans.dml b/scripts/datagen/genRandData4Kmeans.dml index fe50ac5..9b3c90c 100644 --- a/scripts/datagen/genRandData4Kmeans.dml +++ b/scripts/datagen/genRandData4Kmeans.dml @@ -94,9 +94,9 @@ X = Y_bitmap %*% C + X_shift; print ("Computing record-to-cluster assignments by minimum centroid distance..."); D = t(t(-2 * (X %*% t(C))) + rowSums (C ^ 2)); -P = ppred (D, rowMins (D), "<="); +P = (D <= rowMins (D)); aggr_P = t(cumsum (t(P))); -Y_by_C = rowSums (ppred (aggr_P, 0, "==")) + 1; +Y_by_C = rowSums (aggr_P == 0) + 1; print ("Computing useful statistics..."); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4LinearReg_LTstats.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4LinearReg_LTstats.dml b/scripts/datagen/genRandData4LinearReg_LTstats.dml index e4e8384..4b4f634 100644 --- a/scripts/datagen/genRandData4LinearReg_LTstats.dml +++ b/scripts/datagen/genRandData4LinearReg_LTstats.dml @@ -73,7 +73,7 @@ if (numCategories == 1) { isBinomialPMOne = TRUE; } do_we_output_intercept = 1; -if (minIntercept == 0.0 & maxIntercept == 0.0) { +if (minIntercept == 0 & maxIntercept == 0) { do_we_output_intercept = 0; } @@ -110,7 +110,7 @@ Prob = t(cumsum (t(Prob))); r = Rand (rows = numSamples, cols = 1, min = 0, max = 1, pdf = "uniform", seed = 0); R = r %*% ones; -Y = 1 + rowSums (ppred (Prob, R, "<")); +Y = 1 + rowSums (Prob < R); if (isBinomialPMOne) { Y = 3 - 2 * Y; } @@ -185,7 +185,7 @@ generateWeights = b_qe = 2.0 * meanLT * (t(r_1) %*% XDW); c_qe = meanLT^2 * norm_r_1_sq - sigmaLT^2 * nrow(X); - is_sigmaLT_OK = ppred (c_qe, 0.0, "<="); + is_sigmaLT_OK = (c_qe <= 0); new_sigmaLT = is_sigmaLT_OK * sigmaLT + (1 - is_sigmaLT_OK) * abs (meanLT) * sqrt (norm_r_1_sq / nrow(X)); c_qe = is_sigmaLT_OK * c_qe; x_qe = (- b_qe + sqrt (b_qe * b_qe - 4.0 * a_qe * c_qe)) / (2.0 * a_qe); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4LogReg_LTstats.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4LogReg_LTstats.dml b/scripts/datagen/genRandData4LogReg_LTstats.dml index 1797f4f..1c33ea5 100644 --- a/scripts/datagen/genRandData4LogReg_LTstats.dml +++ b/scripts/datagen/genRandData4LogReg_LTstats.dml @@ -72,7 +72,7 @@ if (numCategories == 1) { isBinomialPMOne = TRUE; } do_we_output_intercept = 1; -if (minIntercept == 0.0 & maxIntercept == 0.0) { +if (minIntercept == 0 & maxIntercept == 0) { do_we_output_intercept = 0; } @@ -108,7 +108,7 @@ Prob = t(cumsum (t(Prob))); r = Rand (rows = numSamples, cols = 1, min = 0, max = 1, pdf = "uniform", seed = 0); R = r %*% ones; -Y = 1 + rowSums (ppred (Prob, R, "<")); +Y = 1 + rowSums (Prob < R); if (isBinomialPMOne) { Y = 3 - 2 * Y; } @@ -185,7 +185,7 @@ generateWeights = b_qe = 2.0 * meanLT * (t(r_1) %*% XDW); c_qe = meanLT^2 * norm_r_1_sq - sigmaLT^2 * nrow(X); - is_sigmaLT_OK = ppred (c_qe, 0.0, "<="); + is_sigmaLT_OK = (c_qe <= 0); new_sigmaLT = is_sigmaLT_OK * sigmaLT + (1 - is_sigmaLT_OK) * abs (meanLT) * sqrt (norm_r_1_sq / nrow(X)); c_qe = is_sigmaLT_OK * c_qe; x_qe = (- b_qe + sqrt (b_qe * b_qe - 4.0 * a_qe * c_qe)) / (2.0 * a_qe); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4LogisticRegression.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4LogisticRegression.dml b/scripts/datagen/genRandData4LogisticRegression.dml index d18bc9e..98a7b98 100644 --- a/scripts/datagen/genRandData4LogisticRegression.dml +++ b/scripts/datagen/genRandData4LogisticRegression.dml @@ -62,7 +62,7 @@ if(addNoise == 1){ r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0) #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform") } -Y = 1 - 2*ppred(prob, r, "<") +Y = 1 - 2 * (prob < r) if( $12 == 1 ) { Y = (Y+3)/2; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4MultiClassSVM.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4MultiClassSVM.dml b/scripts/datagen/genRandData4MultiClassSVM.dml index 5d9fbcb..afa86e8 100644 --- a/scripts/datagen/genRandData4MultiClassSVM.dml +++ b/scripts/datagen/genRandData4MultiClassSVM.dml @@ -60,7 +60,7 @@ if(addNoise == 1){ r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0) #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform") } -Y = 1 - 2*ppred(prob, r, "<") +Y = 1 - 2 * (prob < r) Y = (Y+3)/2 write(w, $5, format="binary") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4Multinomial.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4Multinomial.dml b/scripts/datagen/genRandData4Multinomial.dml index 01544ef..9366675 100644 --- a/scripts/datagen/genRandData4Multinomial.dml +++ b/scripts/datagen/genRandData4Multinomial.dml @@ -57,7 +57,7 @@ Prob = Prob / (1.0 + rowSums(Prob)); Prob = t(cumsum (t(Prob))); r = Rand (rows = num_records, cols = 1, min = 0, max = 1, pdf = "uniform"); -Y = 1 + rowSums (ppred (Prob, r, "<")); +Y = 1 + rowSums (Prob < r); # ensure all classes are represented Y[(num_records-num_categories+1):num_records,1] = seq(1,num_categories); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4NMF.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4NMF.dml b/scripts/datagen/genRandData4NMF.dml index 87e3f47..a82ac4e 100644 --- a/scripts/datagen/genRandData4NMF.dml +++ b/scripts/datagen/genRandData4NMF.dml @@ -43,7 +43,7 @@ numWordsPerDoc = $4 docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75) denomsTM = rowSums(docTopicMixtures) -zerosInDenomsTM = ppred(denomsTM, 0, "==") +zerosInDenomsTM = denomsTM == 0 denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM parfor(i in 1:numTopics){ docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4NMFBlockwise.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4NMFBlockwise.dml b/scripts/datagen/genRandData4NMFBlockwise.dml index 06b8057..0ad548e 100644 --- a/scripts/datagen/genRandData4NMFBlockwise.dml +++ b/scripts/datagen/genRandData4NMFBlockwise.dml @@ -47,7 +47,7 @@ blocksize = $8 docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, pdf="uniform", seed=0, sparsity=0.75) denomsTM = rowSums(docTopicMixtures) -zerosInDenomsTM = ppred(denomsTM, 0, "==") +zerosInDenomsTM = (denomsTM == 0) denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM parfor(i in 1:numTopics){ docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4StratStats.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4StratStats.dml b/scripts/datagen/genRandData4StratStats.dml index 333bcab..1bca453 100644 --- a/scripts/datagen/genRandData4StratStats.dml +++ b/scripts/datagen/genRandData4StratStats.dml @@ -139,7 +139,7 @@ Data = append (append (min_stratumID - 1 + SID, t(t_X)), t(t_Y)); RNaNS = Rand (rows = num_records, cols = 1, min = 1.0, max = 1.0, sparsity = prob_NaN_in_stratum); RNaNX = Rand (rows = num_records, cols = num_features, min = 1.0, max = 1.0, sparsity = prob_NaN_in_X); RNaNY = Rand (rows = num_records, cols = num_features, min = 1.0, max = 1.0, sparsity = prob_NaN_in_Y); -Mask = ppred (append (append (RNaNS, RNaNX), RNaNY), 0.0, "!="); +Mask = (append (append (RNaNS, RNaNX), RNaNY)) != 0; Data = Data + (1.0 - Mask) / (1.0 - Mask); # Output the dataset and the auxiliaries http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/datagen/genRandData4Transform.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4Transform.dml b/scripts/datagen/genRandData4Transform.dml index bc799d6..6a44299 100644 --- a/scripts/datagen/genRandData4Transform.dml +++ b/scripts/datagen/genRandData4Transform.dml @@ -48,7 +48,7 @@ num_categorical_cols = 0.0 scalar_ind = matrix(1, rows=num_scalar_cols, cols=1) if(prob_categorical > 0){ categorical_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform") - categorical_ind = ppred(categorical_ind, prob_categorical, "<") + categorical_ind = categorical_ind < prob_categorical categorical_col_ids = removeEmpty(target=seq(1, num_cols, 1)*categorical_ind, margin="rows") num_categorical_cols = sum(categorical_ind) write(categorical_col_ids, $out_categorical, format="csv") @@ -82,12 +82,12 @@ if(num_categorical_cols > 0 & num_scalar_cols > 0){ if(prob_missing_col > 0){ missing_col_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform") - missing_col_ind = ppred(missing_col_ind, prob_missing_col, "<") + missing_col_ind = missing_col_ind < prob_missing_col #currently only support missing value imputation for scale cols missing_col_ind = missing_col_ind * scalar_ind missing_col_ids = removeEmpty(target=seq(1, num_cols, 1)*missing_col_ind, margin="rows") missing_values = Rand(rows=num_rows, cols=nrow(missing_col_ids), min=0, max=1, pdf="uniform") - missing_values = ppred(missing_values, prob_missing_val, "<") + missing_values = missing_values < prob_missing_val X = append(X, missing_values) write(missing_col_ids, $out_missing, format="csv") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/staging/knn.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/knn.dml b/scripts/staging/knn.dml index aadff67..e5cc366 100644 --- a/scripts/staging/knn.dml +++ b/scripts/staging/knn.dml @@ -339,7 +339,7 @@ getErr_k = function ( matrix[double] in_m_neighbor_value, out_m_err = matrix( 0,i_row,i_col - in_i_k_min + 1 ); if( in_i_cl_type == 2 ){ #category - m_correct = ppred( in_m_neighbor_value,in_m_cl[1:i_row,],"!=" ); + m_correct = in_m_neighbor_value != in_m_cl[1:i_row,]; }else{ #continues m_correct = (in_m_neighbor_value - in_m_cl[1:i_row,])^2;#ppred( in_m_neighbor_value,in_m_cl,"-" ); @@ -471,7 +471,7 @@ return( } #m_tmp = ppred( colSums( m_active_flag_tmp ),1,">=" ); - m_active_flag = ppred( m_active_flag + m_active_flag_tmp,1,">=" ); + m_active_flag = m_active_flag + m_active_flag_tmp >= 1; i_active_model_number = -sum( m_active_flag - 1 ); # 5.break while check @@ -493,7 +493,7 @@ return( m_err_for_order = append( t( m_iter_err_sum ), matrix( seq( k_min,k_max,1 ),k_max-k_min+1,1 ) ); m_err_for_order = removeEmpty( - target=m_err_for_order * t( ppred( m_active_flag,0,"==" ) ),margin="rows" ); + target=m_err_for_order * t( m_active_flag == 0 ),margin="rows" ); for( i in 1:nrow( m_err_for_order ) ){ print( "k:" + as.scalar( m_err_for_order[i,2] ) + ",err:" + as.scalar( m_err_for_order[i,1] ) ); @@ -513,7 +513,7 @@ getErr = function ( matrix[double] in_m_neighbor_value, if( in_i_cl_type == 2 ){ #category - m_correct = ppred( in_m_neighbor_value,in_m_cl[1:i_row,],"!=" ); + m_correct = in_m_neighbor_value != in_m_cl[1:i_row,]; }else{ #continues m_correct = (in_m_neighbor_value - in_m_cl[1:i_row,])^2;#ppred( in_m_neighbor_value,in_m_cl,"-" ); @@ -657,7 +657,7 @@ return( #We mark bit to 1 for selected feature before current loop, #and mark bit to 1 also for dropped feature in current loop - if( sum( ppred( m_active_flag_tmp,1,"!=" ) ) > 1 ){ + if( sum( m_active_flag_tmp != 1 ) > 1 ){ b_selected_morethan_one = TRUE; } m_col_sums_err = m_iter_err_sum #colSums( m_err ); @@ -665,7 +665,7 @@ return( d_min_LOOCV = as.scalar( m_col_sums_err[1,i_index_min_LOOCV] ); i_index_min_LOOCV = i_index_min_LOOCV%% ( i_n_column+1 ) - if( sum( ppred( m_active_flag_tmp,1,"!=" ) ) <= 1 ){ + if( sum( m_active_flag_tmp != 1 ) <= 1 ){ b_continue_loop = FALSE; } if( as.scalar( m_active_flag_tmp[1,i_n_column+1] ) == 1 ){ http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/staging/regression/lasso/lasso.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/regression/lasso/lasso.dml b/scripts/staging/regression/lasso/lasso.dml index 1da88d3..476ffbe 100644 --- a/scripts/staging/regression/lasso/lasso.dml +++ b/scripts/staging/regression/lasso/lasso.dml @@ -66,7 +66,7 @@ while(iter < maxiter & continue) { u = w - g/alpha lambda = tau/alpha - wnew = sign(u) * (abs(u) - lambda) * ppred(abs(u) - lambda, 0, ">") + wnew = sign(u) * (abs(u) - lambda) * ((abs(u) - lambda) > 0) dw = wnew - w dw2 = sum(dw*dw) @@ -99,13 +99,13 @@ while(iter < maxiter & continue) { alpha = max(alpha_min, min(alpha_max, alphanew)) old_inactive_set = inactive_set - inactive_set = ppred(w, 0, "!=") + inactive_set = w != 0 diff = sum(abs(old_inactive_set - inactive_set)) if(diff == 0 & relChangeObj < tol) continue = FALSE - num_inactive = sum(ppred(w, 0, "!=")) + num_inactive = sum(w != 0) print("ITER=" + iter + " OBJ=" + obj + " relative change=" + relChangeObj + " num_inactive=" + num_inactive) iter = iter + 1 } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/utils/project.dml ---------------------------------------------------------------------- diff --git a/scripts/utils/project.dml b/scripts/utils/project.dml index ee6cd80..49658dc 100644 --- a/scripts/utils/project.dml +++ b/scripts/utils/project.dml @@ -63,7 +63,7 @@ if (exclude==FALSE) EE[1:nrow(E),1] = E # Convert exclude column list to include column list, and create column indices - EE = ppred(EE, 0, "==") + EE = (EE == 0) EE = EE * seq(1, ncol(X), 1) P = removeEmpty(target=EE, margin="rows") http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/scripts/utils/sample.dml ---------------------------------------------------------------------- diff --git a/scripts/utils/sample.dml b/scripts/utils/sample.dml index f1fc7e1..fc5e039 100644 --- a/scripts/utils/sample.dml +++ b/scripts/utils/sample.dml @@ -60,8 +60,8 @@ svUpBnd = cumsum(sv); # Construct sampling matrix SM, and apply to create samples parfor ( i in 1:nrow(sv)) { - T1 = ppred(R, as.scalar(svUpBnd[i,1]), "<="); - T2 = ppred(R, as.scalar(svLowBnd[i,1]), ">"); + T1 = R <= as.scalar(svUpBnd[i,1]); + T2 = R > as.scalar(svLowBnd[i,1]); SM = T1 * T2; P = removeEmpty(target=diag(SM), margin="rows"); iX = P %*% X; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9b9d019b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java index b292db6..3bb7b0a 100644 --- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java +++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java @@ -559,6 +559,9 @@ public class BuiltinFunctionExpression extends DataIdentifier break; case PPRED: + // TODO: remove this when ppred has been removed from DML + raiseValidateError("ppred() has been deprecated. Please use the operator directly.", true); + // ppred (X,Y, "<"); ppred (X,y, "<"); ppred (y,X, "<"); checkNumParameters(3);
