Repository: systemml Updated Branches: refs/heads/master add561b38 -> 095781868
[SYSTEMML-2342] Cleanup ARIMA dml script Closes #779. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/09578186 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/09578186 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/09578186 Branch: refs/heads/master Commit: 0957818688c8819a5601a61837504ee83fc22745 Parents: add561b Author: Tobias Schmidt <[email protected]> Authored: Mon Jun 4 20:34:23 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Mon Jun 4 20:42:20 2018 -0700 ---------------------------------------------------------------------- .../integration/applications/ArimaTest.java | 37 +- .../applications/arima_box-jenkins/arima.dml | 344 ++++++++++--------- .../applications/arima_box-jenkins/arima_old.R | 278 +++++++++++++++ .../arima_box-jenkins/arima_old.dml | 289 ++++++++++++++++ 4 files changed, 774 insertions(+), 174 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java b/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java index 91fa55b..149c0b1 100644 --- a/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java +++ b/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java @@ -34,7 +34,9 @@ import org.junit.runners.Parameterized.Parameters; public abstract class ArimaTest extends AutomatedTestBase { protected final static String TEST_DIR = "applications/arima_box-jenkins/"; - protected final static String TEST_NAME = "arima"; + protected final static String TEST_NAME1 = "arima"; + protected final static String TEST_NAME2 = "arima_old"; + protected String TEST_CLASS_DIR = TEST_DIR + ArimaTest.class.getSimpleName() + "/"; protected int max_func_invoc, p, d, q, P, D, Q, s, include_mean, useJacobi; @@ -64,11 +66,12 @@ public abstract class ArimaTest extends AutomatedTestBase { @Override public void setUp() { - addTestConfiguration(TEST_CLASS_DIR, TEST_NAME); + addTestConfiguration(TEST_CLASS_DIR, TEST_NAME1); + addTestConfiguration(TEST_CLASS_DIR, TEST_NAME2); } protected void testArima(ScriptType scriptType) { - System.out.println("------------ BEGIN " + TEST_NAME + " " + scriptType + " TEST WITH {" + + System.out.println("------------ BEGIN " + TEST_NAME1 + " " + scriptType + " TEST WITH {" + max_func_invoc + ", " + p + ", " + d + ", " + @@ -81,12 +84,16 @@ public abstract class ArimaTest extends AutomatedTestBase { useJacobi+ "} ------------"); this.scriptType = scriptType; - getAndLoadTestConfiguration(TEST_NAME); - List<String> proArgs = new ArrayList<String>(); + if (scriptType == ScriptType.PYDML) { proArgs.add("-python"); + getAndLoadTestConfiguration(TEST_NAME1); } + else { + getAndLoadTestConfiguration(TEST_NAME2); + } + proArgs.add("-args"); proArgs.add(input("col.mtx")); proArgs.add(Integer.toString(max_func_invoc)); @@ -100,8 +107,26 @@ public abstract class ArimaTest extends AutomatedTestBase { proArgs.add(Integer.toString(include_mean)); proArgs.add(Integer.toString(useJacobi)); proArgs.add(output("learnt.model")); - programArgs = proArgs.toArray(new String[proArgs.size()]); + + /* TODO use after R script is made consistent + getAndLoadTestConfiguration(TEST_NAME2); + proArgs.add("-nvargs"); + proArgs.add("X="+input("col.mtx")); + proArgs.add("max_func="+Integer.toString(max_func_invoc)); + proArgs.add("p="+Integer.toString(p)); + proArgs.add("d="+Integer.toString(d)); + proArgs.add("q="+Integer.toString(q)); + proArgs.add("P="+Integer.toString(P)); + proArgs.add("D="+Integer.toString(D)); + proArgs.add("Q="+Integer.toString(Q)); + proArgs.add("s="+Integer.toString(s)); + proArgs.add("include_mean="+Integer.toString(include_mean)); + proArgs.add("solver="+(useJacobi==1?"jacobi":"cg_solver")); + proArgs.add("dest="+output("learnt.model")); + */ + + programArgs = proArgs.toArray(new String[proArgs.size()]); fullDMLScriptName = getScript(); rCmd = getRCmd(inputDir(), Integer.toString(max_func_invoc), Integer.toString(p), Integer.toString(d), Integer.toString(q), Integer.toString(P), http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/scripts/applications/arima_box-jenkins/arima.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/arima_box-jenkins/arima.dml b/src/test/scripts/applications/arima_box-jenkins/arima.dml index 5e3aef6..6c39339 100644 --- a/src/test/scripts/applications/arima_box-jenkins/arima.dml +++ b/src/test/scripts/applications/arima_box-jenkins/arima.dml @@ -20,235 +20,239 @@ #------------------------------------------------------------- # Arguments -# 1st arg: X (one column time series) -# 2nd arg: max_func_invoc -# 3rd arg: p (non-seasonal AR order) -# 4th arg: d (non-seasonal differencing order) -# 5th arg: q (non-seasonal MA order) -# 6th arg: P (seasonal AR order) -# 7th arg: D (seasonal differencing order) -# 8th arg: Q (seasonal MA order) -# 9th arg: s (period in terms of number of time-steps) -# 10th arg: 0/1 (1 means include.mean) -# 11th arg: 0 to use CG solver, 1 to use Jacobi's method -# 12th arg: file name to store learnt parameters +# 'X': X (one column time series - there has to be an .mtd file associated to the src file!) +# 'max_func_invoc': max_func_invoc (default 1000) +# 'p': p (non-seasonal AR order) (default 0) +# 'd': d (non-seasonal differencing order) (default 0) +# 'q': q (non-seasonal MA order) (default 0) +# 'P': P (seasonal AR order) (default 0) +# 'D': D (seasonal differencing order)(default 0) +# 'Q': Q (seasonal MA order)(default 0) +# 's': s (period in terms of number of time-steps) (default 1) +# 'include_mean': TRUE/FALSE (default FALSE) +# 'solver': must be either 'cg' for conjugate gradients method or 'jacobi' for jacobi method (default "jacobi") +# 'dest': file name to store learnt parameters (default "arima-result.csv") +# 'result_format': the format of the destination file (default "csv") #changing to additive sar since R's arima seems to do that - -arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){ - b = X[,2:ncol(X)]%*%w - - R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0) +arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, String solver) return (Double obj){ + b = X[,2:ncol(X)]%*%w + + R = matrix(0, nrow(X), nrow(X)) for(i7 in seq(1, qIn, 1)){ - ma_ind_ns = P+pIn+i7 - err_ind_ns = i7 - ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1) - d_ns = ones_ns * as.scalar(w[ma_ind_ns,1]) - R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns) + d_ns = matrix(as.scalar(w[P+pIn+i7,1]), nrow(R)-i7, 1) + R[1+i7:nrow(R),1:ncol(R)-i7] = R[1+i7:nrow(R),1:ncol(R)-i7] + diag(d_ns) } + for(i8 in seq(1, Q, 1)){ - ma_ind_s = P+pIn+qIn+i8 err_ind_s = s*i8 - ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1) - d_s = ones_s * as.scalar(w[ma_ind_s,1]) + d_s = matrix(as.scalar(w[P+pIn+qIn+i8,1]), nrow(R)-err_ind_s, 1) R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s) } + + #TODO: provide default dml "solve()" as well + solution = eval(solver + "_solver", R, b, 0.01, 100) - #checking for strict diagonal dominance - #required for jacobi's method - # + errs = X[,1] - solution + obj = sum(errs*errs) +} + +cg_solver = function (Matrix[Double] R, Matrix[Double] B, Double tolerance, Integer max_iterations) return (Matrix[Double] y_hat){ + y_hat = matrix(0, nrow(R), 1) + iter = 0 + + A = R + diag(matrix(1, rows=nrow(R), cols=1)) + Z = t(A)%*%A + r = -(t(A)%*%B) + p = -r + norm_r2 = sum(r^2) + + continue = (norm_r2 != 0) - max_iter = 100 - tol = 0.01 + while(iter < max_iterations & continue){ + q = Z%*%p + alpha = norm_r2 / as.scalar(t(p) %*% q) + y_hat += alpha * p + r += alpha * q + old_norm_r2 = norm_r2 + norm_r2 = sum(r^2) + continue = (norm_r2 >= tolerance) + beta = norm_r2 / old_norm_r2 + p = -r + beta * p + iter += 1 + } +} - y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0) +jacobi_solver = function (Matrix[Double] A, Matrix[Double] B, Double tolerance, Integer max_iterations) return (Matrix[Double] y_hat){ + + y_hat = matrix(0, nrow(A), 1) iter = 0 - - if(useJacobi == 1){ - check = sum(rowSums(abs(R)) >= 1) - if(check > 0){ - print("R is not diagonal dominant. Suggest switching to an exact solver.") - } + diff = tolerance+1 + + #checking for strict diagonal dominance + #required for jacobi's method + check = sum(rowSums(abs(A)) >= 1) + if(check > 0){ + print("The matrix is not diagonal dominant. Suggest switching to an exact solver.") + } - diff = tol+1.0 - while(iter < max_iter & diff > tol){ - y_hat_new = b - R%*%y_hat - diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat)) - y_hat = y_hat_new - iter = iter + 1 - } - }else{ - ones = matrix(1, rows=nrow(X), cols=1) - A = R + diag(ones) - Z = t(A)%*%A - y = t(A)%*%b - r = -y - p = -r - norm_r2 = sum(r*r) - continue = 1 - if(norm_r2 == 0){ - continue = 0 - } - while(iter < max_iter & continue == 1){ - q = Z%*%p - alpha = norm_r2 / as.scalar(t(p) %*% q) - y_hat = y_hat + alpha * p - old_norm_r2 = norm_r2 - r = r + alpha * q - norm_r2 = sum(r * r) - if(norm_r2 < tol){ - continue = 0 - } - beta = norm_r2 / old_norm_r2 - p = -r + beta * p - iter = iter + 1 - } + while(iter < max_iterations & diff > tolerance){ + y_hat_new = B - A%*%y_hat + diff = sum((y_hat_new-y_hat)^2) + y_hat = y_hat_new + iter += 1 } - - errs = X[,1] - y_hat - obj = sum(errs*errs) } -#input col of time series data -X = read($1) -max_func_invoc = $2 -#non-seasonal order -p = $3 -d = $4 -q = $5 +readParamters = function (String default_solver, Integer default_max_func_invoc, Integer default_include_mean, Integer default_p, Integer default_d, Integer default_q, Integer default_P, Integer default_D, Integer default_Q, Integer default_s , String default_dest, String default_result_format)return (Matrix[Double] X, String solver, Integer max_func_invoc, Integer include_mean, Integer p, Integer d, Integer q, Integer P, Integer D, Integer Q, Integer s, String dest, String result_format){ + #input col of time series data + X = read($X) + solver = ifdef($solver, default_solver) + max_func_invoc = ifdef($max_func_invoc, default_max_func_invoc) + include_mean = ifdef($include_mean, default_include_mean) + dest = ifdef($dest, default_dest) + result_format = ifdef($result_format, default_result_format) + + #non-seasonal order + p = ifdef($p, default_p) + d = ifdef($d, default_d) + q = ifdef($q, default_q) + + #seasonal order + P = ifdef($P, default_P) + D = ifdef($D, default_D) + Q = ifdef($Q, default_Q) + + #length of the season + s = ifdef($s, default_s) + while(FALSE){} #TODO +} -#seasonal order -P = $6 -D = $7 -Q = $8 +[X, solver, max_func_invoc, include_mean, p, d, q, P, D, Q, s, dest, result_format] = readParamters ("jacobi", 1000, FALSE, 0,0,0,0,0,0,1, "arima-results.csv", "csv") -#length of the season -s = $9 +print ("p= " + p) +print ("d= " + d) +print ("q= " + q) +print ("P= " + P) +print ("D= " + D) +print ("Q= " + Q) +print ("s= " + s) +print ("include_mean= " + include_mean) +print ("solver= " + solver) +print ("dest= " + dest) +print ("result_format= " + result_format) -include_mean = $10 +totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols +print ("totcols=" + totcols) -useJacobi = $11 +#TODO: check max_func_invoc < totcols --> print warning (stop here ??) num_rows = nrow(X) +print("nrows of X: " + num_rows) + if(num_rows <= d){ - print("non-seasonal differencing order should be larger than length of the time-series") + print("non-seasonal differencing order should be smaller than length of the time-series") } -Y = X -for(i in seq(1, d, 1)){ - n1 = nrow(Y)+0.0 - Y = Y[2:n1,] - Y[1:n1-1,] +mu = 0.0 +if(include_mean == 1){ + mu = mean(X) + X = X - mu } -num_rows = nrow(Y)+0.0 +# d-th order differencing: +for(i in seq(1,d,1)){ + X = X[2:nrow(X),] - X[1:nrow(X)-1,] +} + +num_rows = nrow(X) + if(num_rows <= s*D){ - print("seasonal differencing order should be larger than number of observations divided by length of season") + print("seasonal differencing order should be smaller than number of observations divided by length of season") } -for(i in seq(1,D, 1)){ - n1 = nrow(Y)+0.0 - Y = Y[s+1:n1,] - Y[1:n1-s,] +for(i in seq(1,D,1)){ + n1 = nrow(X)+0.0 + X = X[s+1:n1,] - X[1:n1-s,] } -n = nrow(Y) -max_ar_col = P+p -max_ma_col = Q+q -if(max_ar_col > max_ma_col){ - max_arma_col = max_ar_col -}else{ - max_arma_col = max_ma_col -} +n = nrow(X) +print("nrow of X= " + n) -mu = 0 -if(include_mean == 1){ - mu = sum(Y)/nrow(Y) - Y = Y - mu -} +#Matrix Z with target values of prediction (X) in first column and +#all values that can be used to predict a this target value in column 2:totcols of same row +Z = cbind(X, matrix(0, n, totcols - 1)) -totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols +#TODO: This operations can be optimised/simplified --> see documentation -Z = Rand(rows=n, cols=totcols, min=0, max=0) -Z[,1] = Y #target col +#fills Z with values used for non seasonal AR prediction parfor(i1 in seq(1, p, 1), check=0){ - Z[i1+1:n,1+i1] = Y[1:n-i1,] + Z[i1+1:n,1+i1] = X[1:n-i1,] } + +#prediciton values for seasonal AR parfor(i2 in seq(1, P, 1), check=0){ - Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,] + Z[s*i2+1:n,1+p+i2] = X[1:n-s*i2,] } + +#prediciton values for non seasonal MA parfor(i5 in seq(1, q, 1), check=0){ - Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,] + Z[i5+1:n,1+P+p+i5] = X[1:n-i5,] } + +#prediciton values for seasonal MA parfor(i6 in seq(1,Q, 1), check=0){ - Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,] + Z[s*i6+1:n,1+P+p+q+i6] = X[1:n-s*i6,] } -one = Rand(rows=1, cols=1, min=1, max=1) +simplex = cbind(matrix(0, totcols-1, 1), diag(matrix(0.1, totcols-1, 1))) -simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0) -for(i in 2:ncol(simplex)){ - simplex[i-1,i] = 0.1 -} +#debug output +print(toString(simplex)) num_func_invoc = 0 -objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0) -parfor(i3 in 1:ncol(simplex)){ - arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi) - objvals[1,i3] = arima_css_obj_val +objvals = matrix(0, 1, ncol(simplex)) + +parfor(i3 in seq(1,ncol(simplex))){ + objvals[1,i3] = arima_css(simplex[,i3], Z, p, P, q, Q, s, solver) } -num_func_invoc = num_func_invoc + ncol(simplex) +num_func_invoc += ncol(simplex) +print ("num_func_invoc = " + num_func_invoc) tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1]) -continue = 1 -best_index = 1 -while(continue == 1 & num_func_invoc <= max_func_invoc) { - best_index = 1 - worst_index = 1 - for(i in 2:ncol(objvals)){ - this = as.scalar(objvals[1,i]) - that = as.scalar(objvals[1,best_index]) - if(that > this){ - best_index = i - } - that = as.scalar(objvals[1,worst_index]) - if(that < this){ - worst_index = i - } - } +continue = TRUE +while(continue & num_func_invoc <= max_func_invoc){ + best_index = as.scalar(rowIndexMin(objvals)) + worst_index = as.scalar(rowIndexMax(objvals)) best_obj_val = as.scalar(objvals[1,best_index]) worst_obj_val = as.scalar(objvals[1,worst_index]) - if(worst_obj_val <= best_obj_val + tol){ - continue = 0 - } + continue = (worst_obj_val > best_obj_val + tol) + print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val) c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex)) x_r = 2*c - simplex[,worst_index] - obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi) - num_func_invoc = num_func_invoc + 1 + obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, solver) + num_func_invoc += 1 if(obj_x_r < best_obj_val){ x_e = 2*x_r - c - obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi) + obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, solver) num_func_invoc = num_func_invoc + 1 - if(obj_x_r <= obj_x_e){ - simplex[,worst_index] = x_r - objvals[1,worst_index] = obj_x_r - }else{ - simplex[,worst_index] = x_e - objvals[1,worst_index] = obj_x_e - } + simplex[,worst_index] = ifelse (obj_x_r <= obj_x_e, x_r, x_e) + objvals[1,worst_index] = ifelse (obj_x_r <= obj_x_e, obj_x_r, obj_x_e) }else{ if(obj_x_r < worst_obj_val){ simplex[,worst_index] = x_r @@ -256,8 +260,8 @@ while(continue == 1 & num_func_invoc <= max_func_invoc) { } x_c_in = (simplex[,worst_index] + c)/2 - obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi) - num_func_invoc = num_func_invoc + 1 + obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, solver) + num_func_invoc += 1 if(obj_x_c_in < as.scalar(objvals[1,worst_index])){ simplex[,worst_index] = x_c_in @@ -268,22 +272,26 @@ while(continue == 1 & num_func_invoc <= max_func_invoc) { parfor(i4 in 1:ncol(simplex)){ if(i4 != best_index){ simplex[,i4] = (simplex[,i4] + best_point)/2 - tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi) - objvals[1,i4] = tmp*one + objvals[1,i4] = arima_css(simplex[,i4], Z, p, P, q, Q, s, solver) } } - num_func_invoc = num_func_invoc + ncol(simplex) - 1 + num_func_invoc += ncol(simplex) - 1 } } } } +print ("best_index: "+ toString(best_index)) best_point = simplex[,best_index] -if(include_mean == 1){ - tmp2 = Rand(rows=totcols, cols=1, min=0, max=0) - tmp2[1:nrow(best_point),1] = best_point - tmp2[nrow(tmp2),1] = mu - best_point = tmp2 + +if(include_mean){ + best_point = rbind(best_point, as.matrix(mu)) } -write(best_point, $12, format="text") +result_format = ifdef($result_format, "csv") +write(best_point, dest, format=result_format) + + +debug_printMatrixShape = function (Matrix[Double] M, String matrixIdentifier){ + print ("Matrix '" + matrixIdentifier + "' has shape [" + nrow(M) + "," + ncol(M) + "]") +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/scripts/applications/arima_box-jenkins/arima_old.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/arima_box-jenkins/arima_old.R b/src/test/scripts/applications/arima_box-jenkins/arima_old.R new file mode 100644 index 0000000..451635b --- /dev/null +++ b/src/test/scripts/applications/arima_box-jenkins/arima_old.R @@ -0,0 +1,278 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) +library(Matrix) + +arima_css = function(w, X, p, P, q, Q, s, useJacobi){ + b = matrix(X[,2:ncol(X)], nrow(X), ncol(X)-1)%*%w + + R = matrix(0, nrow(X), nrow(X)) + if(q>0){ + for(i7 in 1:q){ + ma_ind_ns = P+p+i7 + err_ind_ns = i7 + ones_ns = rep(1, nrow(R)-err_ind_ns) + d_ns = ones_ns * w[ma_ind_ns,1] + R[(1+err_ind_ns):nrow(R),1:(ncol(R)-err_ind_ns)] = R[(1+err_ind_ns):nrow(R),1:(ncol(R)-err_ind_ns)] + diag(d_ns) + } + } + if(Q>0){ + for(i8 in 1:Q){ + ma_ind_s = P+p+q+i8 + err_ind_s = s*i8 + ones_s = rep(1, nrow(R)-err_ind_s) + d_s = ones_s * w[ma_ind_s,1] + R[(1+err_ind_s):nrow(R),1:(ncol(R)-err_ind_s)] = R[(1+err_ind_s):nrow(R),1:(ncol(R)-err_ind_s)] + diag(d_s) + } + } + + max_iter = 100 + tol = 0.01 + + y_hat = matrix(0, nrow(X), 1) + iter = 0 + + if(useJacobi == 1){ + check = sum(ifelse(rowSums(abs(R)) >= 1, 1, 0)) + if(check > 0){ + print("R is not diagonal dominant. Suggest switching to an exact solver.") + } + diff = tol+1.0 + while(iter < max_iter & diff > tol){ + y_hat_new = matrix(b - R%*%y_hat, nrow(y_hat), 1) + diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat)) + y_hat = y_hat_new + iter = iter + 1 + } + }else{ + ones = rep(1, nrow(X)) + A = R + diag(ones) + Z = t(A)%*%A + y = t(A)%*%b + r = -y + p = -r + norm_r2 = sum(r*r) + while(iter < max_iter & norm_r2 > tol){ + q = Z%*%p + alpha = norm_r2 / sum(p*q) + y_hat = y_hat + alpha * p + old_norm_r2 = norm_r2 + r = r + alpha * q + norm_r2 = sum(r * r) + beta = norm_r2 / old_norm_r2 + p = -r + beta * p + iter = iter + 1 + } + } + + errs = X[,1] - y_hat + obj = sum(errs*errs) + + return(obj) +} + +#input col of time series data +X = readMM(paste(args[1], "col.mtx", sep="")) + +max_func_invoc = as.integer(args[2]) + +#non-seasonal order +p = as.integer(args[3]) +d = as.integer(args[4]) +q = as.integer(args[5]) + +#seasonal order +P = as.integer(args[6]) +D = as.integer(args[7]) +Q = as.integer(args[8]) + +#length of the season +s = as.integer(args[9]) + +include_mean = as.integer(args[10]) + +useJacobi = as.integer(args[11]) + +num_rows = nrow(X) + +if(num_rows <= d){ + print("non-seasonal differencing order should be larger than length of the time-series") +} + +Y = matrix(X[,1], nrow(X), 1) +if(d>0){ + for(i in 1:d){ + n1 = nrow(Y) + Y = matrix(Y[2:n1,] - Y[1:(n1-1),], n1-1, 1) + } +} + +num_rows = nrow(Y) +if(num_rows <= s*D){ + print("seasonal differencing order should be larger than number of observations divided by length of season") +} + +if(D>0){ + for(i in 1:D){ + n1 = nrow(Y) + Y = matrix(Y[(s+1):n1,] - Y[1:(n1-s),], n1-s, 1) + } +} + +n = nrow(Y) + +max_ar_col = P+p +max_ma_col = Q+q +if(max_ar_col > max_ma_col){ + max_arma_col = max_ar_col +}else{ + max_arma_col = max_ma_col +} + +mu = 0 +if(include_mean == 1){ + mu = sum(Y)/nrow(Y) + Y = Y - mu +} + +totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols + +Z = matrix(0, n, totcols) +Z[,1] = Y #target col + +if(p>0){ + for(i1 in 1:p){ + Z[(i1+1):n,1+i1] = Y[1:(n-i1),] + } +} +if(P>0){ + for(i2 in 1:P){ + Z[(s*i2+1):n,1+p+i2] = Y[1:(n-s*i2),] + } +} +if(q>0){ + for(i5 in 1:q){ + Z[(i5+1):n,1+P+p+i5] = Y[1:(n-i5),] + } +} +if(Q>0){ + for(i6 in 1:Q){ + Z[(s*i6+1):n,1+P+p+q+i6] = Y[1:(n-s*i6),] + } +} + +simplex = matrix(0, totcols-1, totcols) +for(i in 2:ncol(simplex)){ + simplex[i-1,i] = 0.1 +} + +num_func_invoc = 0 + +objvals = matrix(0, 1, ncol(simplex)) +for(i3 in 1:ncol(simplex)){ + objvals[1,i3] = arima_css(matrix(simplex[,i3], nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi) +} +num_func_invoc = num_func_invoc + ncol(simplex) + +tol = 1.5 * 10^(-8) * objvals[1,1] + +continue = 1 +while(continue == 1 & num_func_invoc <= max_func_invoc) { + #print(paste(num_func_invoc, max_func_invoc)) + best_index = 1 + worst_index = 1 + for(i in 2:ncol(objvals)){ + this = objvals[1,i] + that = objvals[1,best_index] + if(that > this){ + best_index = i + } + that = objvals[1,worst_index] + if(that < this){ + worst_index = i + } + } + + best_obj_val = objvals[1,best_index] + worst_obj_val = objvals[1,worst_index] + if(worst_obj_val <= best_obj_val + tol){ + continue = 0 + } + + print(paste("#Function calls::", num_func_invoc, "OBJ:", best_obj_val)) + + c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex)) + + x_r = 2*c - simplex[,worst_index] + obj_x_r = arima_css(matrix(x_r, nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi) + num_func_invoc = num_func_invoc + 1 + + if(obj_x_r < best_obj_val){ + x_e = 2*x_r - c + obj_x_e = arima_css(matrix(x_e, nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi) + num_func_invoc = num_func_invoc + 1 + + if(obj_x_r <= obj_x_e){ + simplex[,worst_index] = x_r + objvals[1,worst_index] = obj_x_r + }else{ + simplex[,worst_index] = x_e + objvals[1,worst_index] = obj_x_e + } + }else{ + if(obj_x_r < worst_obj_val){ + simplex[,worst_index] = x_r + objvals[1,worst_index] = obj_x_r + } + + x_c_in = (simplex[,worst_index] + c)/2 + obj_x_c_in = arima_css(matrix(x_c_in, nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi) + num_func_invoc = num_func_invoc + 1 + + if(obj_x_c_in < objvals[1,worst_index]){ + simplex[,worst_index] = x_c_in + objvals[1,worst_index] = obj_x_c_in + }else{ + if(obj_x_r >= worst_obj_val){ + best_point = simplex[,best_index] + + for(i4 in 1:ncol(simplex)){ + if(i4 != best_index){ + simplex[,i4] = (simplex[,i4] + best_point)/2 + objvals[1,i4] = arima_css(matrix(simplex[,i4], nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi) + } + } + num_func_invoc = num_func_invoc + ncol(simplex) - 1 + } + } + } +} + +best_point = matrix(simplex[,best_index], nrow(simplex), 1) +if(include_mean == 1){ + tmp = matrix(0, totcols, 1) + tmp[1:nrow(best_point),1] = best_point + tmp[nrow(tmp),1] = mu + best_point = tmp +} + +writeMM(as(best_point, "CsparseMatrix"), paste(args[12], "learnt.model", sep="")) http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/scripts/applications/arima_box-jenkins/arima_old.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/arima_box-jenkins/arima_old.dml b/src/test/scripts/applications/arima_box-jenkins/arima_old.dml new file mode 100644 index 0000000..5e3aef6 --- /dev/null +++ b/src/test/scripts/applications/arima_box-jenkins/arima_old.dml @@ -0,0 +1,289 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# Arguments +# 1st arg: X (one column time series) +# 2nd arg: max_func_invoc +# 3rd arg: p (non-seasonal AR order) +# 4th arg: d (non-seasonal differencing order) +# 5th arg: q (non-seasonal MA order) +# 6th arg: P (seasonal AR order) +# 7th arg: D (seasonal differencing order) +# 8th arg: Q (seasonal MA order) +# 9th arg: s (period in terms of number of time-steps) +# 10th arg: 0/1 (1 means include.mean) +# 11th arg: 0 to use CG solver, 1 to use Jacobi's method +# 12th arg: file name to store learnt parameters + +#changing to additive sar since R's arima seems to do that + +arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){ + b = X[,2:ncol(X)]%*%w + + R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0) + + for(i7 in seq(1, qIn, 1)){ + ma_ind_ns = P+pIn+i7 + err_ind_ns = i7 + ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1) + d_ns = ones_ns * as.scalar(w[ma_ind_ns,1]) + R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns) + } + for(i8 in seq(1, Q, 1)){ + ma_ind_s = P+pIn+qIn+i8 + err_ind_s = s*i8 + ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1) + d_s = ones_s * as.scalar(w[ma_ind_s,1]) + R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s) + } + + #checking for strict diagonal dominance + #required for jacobi's method + # + + max_iter = 100 + tol = 0.01 + + y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0) + iter = 0 + + if(useJacobi == 1){ + check = sum(rowSums(abs(R)) >= 1) + if(check > 0){ + print("R is not diagonal dominant. Suggest switching to an exact solver.") + } + + diff = tol+1.0 + while(iter < max_iter & diff > tol){ + y_hat_new = b - R%*%y_hat + diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat)) + y_hat = y_hat_new + iter = iter + 1 + } + }else{ + ones = matrix(1, rows=nrow(X), cols=1) + A = R + diag(ones) + Z = t(A)%*%A + y = t(A)%*%b + r = -y + p = -r + norm_r2 = sum(r*r) + continue = 1 + if(norm_r2 == 0){ + continue = 0 + } + while(iter < max_iter & continue == 1){ + q = Z%*%p + alpha = norm_r2 / as.scalar(t(p) %*% q) + y_hat = y_hat + alpha * p + old_norm_r2 = norm_r2 + r = r + alpha * q + norm_r2 = sum(r * r) + if(norm_r2 < tol){ + continue = 0 + } + beta = norm_r2 / old_norm_r2 + p = -r + beta * p + iter = iter + 1 + } + } + + errs = X[,1] - y_hat + obj = sum(errs*errs) +} + +#input col of time series data +X = read($1) + +max_func_invoc = $2 + +#non-seasonal order +p = $3 +d = $4 +q = $5 + +#seasonal order +P = $6 +D = $7 +Q = $8 + +#length of the season +s = $9 + +include_mean = $10 + +useJacobi = $11 + +num_rows = nrow(X) + +if(num_rows <= d){ + print("non-seasonal differencing order should be larger than length of the time-series") +} + +Y = X +for(i in seq(1, d, 1)){ + n1 = nrow(Y)+0.0 + Y = Y[2:n1,] - Y[1:n1-1,] +} + +num_rows = nrow(Y)+0.0 +if(num_rows <= s*D){ + print("seasonal differencing order should be larger than number of observations divided by length of season") +} + +for(i in seq(1,D, 1)){ + n1 = nrow(Y)+0.0 + Y = Y[s+1:n1,] - Y[1:n1-s,] +} + +n = nrow(Y) + +max_ar_col = P+p +max_ma_col = Q+q +if(max_ar_col > max_ma_col){ + max_arma_col = max_ar_col +}else{ + max_arma_col = max_ma_col +} + +mu = 0 +if(include_mean == 1){ + mu = sum(Y)/nrow(Y) + Y = Y - mu +} + +totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols + +Z = Rand(rows=n, cols=totcols, min=0, max=0) +Z[,1] = Y #target col + +parfor(i1 in seq(1, p, 1), check=0){ + Z[i1+1:n,1+i1] = Y[1:n-i1,] +} +parfor(i2 in seq(1, P, 1), check=0){ + Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,] +} +parfor(i5 in seq(1, q, 1), check=0){ + Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,] +} +parfor(i6 in seq(1,Q, 1), check=0){ + Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,] +} + +one = Rand(rows=1, cols=1, min=1, max=1) + +simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0) +for(i in 2:ncol(simplex)){ + simplex[i-1,i] = 0.1 +} + +num_func_invoc = 0 + +objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0) +parfor(i3 in 1:ncol(simplex)){ + arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi) + objvals[1,i3] = arima_css_obj_val +} +num_func_invoc = num_func_invoc + ncol(simplex) + +tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1]) + +continue = 1 +best_index = 1 +while(continue == 1 & num_func_invoc <= max_func_invoc) { + best_index = 1 + worst_index = 1 + for(i in 2:ncol(objvals)){ + this = as.scalar(objvals[1,i]) + that = as.scalar(objvals[1,best_index]) + if(that > this){ + best_index = i + } + that = as.scalar(objvals[1,worst_index]) + if(that < this){ + worst_index = i + } + } + + best_obj_val = as.scalar(objvals[1,best_index]) + worst_obj_val = as.scalar(objvals[1,worst_index]) + if(worst_obj_val <= best_obj_val + tol){ + continue = 0 + } + + print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val) + + c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex)) + + x_r = 2*c - simplex[,worst_index] + obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi) + num_func_invoc = num_func_invoc + 1 + + if(obj_x_r < best_obj_val){ + x_e = 2*x_r - c + obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi) + num_func_invoc = num_func_invoc + 1 + + if(obj_x_r <= obj_x_e){ + simplex[,worst_index] = x_r + objvals[1,worst_index] = obj_x_r + }else{ + simplex[,worst_index] = x_e + objvals[1,worst_index] = obj_x_e + } + }else{ + if(obj_x_r < worst_obj_val){ + simplex[,worst_index] = x_r + objvals[1,worst_index] = obj_x_r + } + + x_c_in = (simplex[,worst_index] + c)/2 + obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi) + num_func_invoc = num_func_invoc + 1 + + if(obj_x_c_in < as.scalar(objvals[1,worst_index])){ + simplex[,worst_index] = x_c_in + objvals[1,worst_index] = obj_x_c_in + }else{ + if(obj_x_r >= worst_obj_val){ + best_point = simplex[,best_index] + parfor(i4 in 1:ncol(simplex)){ + if(i4 != best_index){ + simplex[,i4] = (simplex[,i4] + best_point)/2 + tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi) + objvals[1,i4] = tmp*one + } + } + num_func_invoc = num_func_invoc + ncol(simplex) - 1 + } + } + } +} + +best_point = simplex[,best_index] +if(include_mean == 1){ + tmp2 = Rand(rows=totcols, cols=1, min=0, max=0) + tmp2[1:nrow(best_point),1] = best_point + tmp2[nrow(tmp2),1] = mu + best_point = tmp2 +} + +write(best_point, $12, format="text")
