systemml git commit: [SYSTEMML-2342] Cleanup ARIMA dml script

mboehm7 Mon, 04 Jun 2018 20:43:57 -0700

Repository: systemml
Updated Branches:
  refs/heads/master add561b38 -> 095781868



[SYSTEMML-2342] Cleanup ARIMA dml script

Closes #779.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/09578186
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/09578186
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/09578186

Branch: refs/heads/master
Commit: 0957818688c8819a5601a61837504ee83fc22745
Parents: add561b
Author: Tobias Schmidt <[email protected]>
Authored: Mon Jun 4 20:34:23 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Mon Jun 4 20:42:20 2018 -0700

----------------------------------------------------------------------
 .../integration/applications/ArimaTest.java     |  37 +-
 .../applications/arima_box-jenkins/arima.dml    | 344 ++++++++++---------
 .../applications/arima_box-jenkins/arima_old.R  | 278 +++++++++++++++
 .../arima_box-jenkins/arima_old.dml             | 289 ++++++++++++++++
 4 files changed, 774 insertions(+), 174 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java 
b/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java
index 91fa55b..149c0b1 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/applications/ArimaTest.java
@@ -34,7 +34,9 @@ import org.junit.runners.Parameterized.Parameters;
 public abstract class ArimaTest extends AutomatedTestBase {
        
        protected final static String TEST_DIR = 
"applications/arima_box-jenkins/";
-       protected final static String TEST_NAME = "arima";
+       protected final static String TEST_NAME1 = "arima";
+       protected final static String TEST_NAME2 = "arima_old";
+       
        protected String TEST_CLASS_DIR = TEST_DIR + 
ArimaTest.class.getSimpleName() + "/";
        
        protected int max_func_invoc, p, d, q, P, D, Q, s, include_mean, 
useJacobi;
@@ -64,11 +66,12 @@ public abstract class ArimaTest extends AutomatedTestBase {
        
        @Override
        public void setUp() {
-               addTestConfiguration(TEST_CLASS_DIR, TEST_NAME);
+               addTestConfiguration(TEST_CLASS_DIR, TEST_NAME1);
+               addTestConfiguration(TEST_CLASS_DIR, TEST_NAME2);
        }
        
        protected void testArima(ScriptType scriptType) {
-               System.out.println("------------ BEGIN " + TEST_NAME + " " + 
scriptType + " TEST WITH {" +
+               System.out.println("------------ BEGIN " + TEST_NAME1 + " " + 
scriptType + " TEST WITH {" +
                        max_func_invoc + ", " + 
                        p + ", " + 
                        d + ", " + 
@@ -81,12 +84,16 @@ public abstract class ArimaTest extends AutomatedTestBase {
                        useJacobi+ "} ------------");
                this.scriptType = scriptType;
                
-               getAndLoadTestConfiguration(TEST_NAME);
-       
                List<String> proArgs = new ArrayList<String>();
+               
                if (scriptType == ScriptType.PYDML) {
                        proArgs.add("-python");
+                       getAndLoadTestConfiguration(TEST_NAME1);
                }
+               else {
+                       getAndLoadTestConfiguration(TEST_NAME2);
+               }
+               
                proArgs.add("-args");
                proArgs.add(input("col.mtx"));
                proArgs.add(Integer.toString(max_func_invoc));
@@ -100,8 +107,26 @@ public abstract class ArimaTest extends AutomatedTestBase {
                proArgs.add(Integer.toString(include_mean));
                proArgs.add(Integer.toString(useJacobi));
                proArgs.add(output("learnt.model"));
-               programArgs = proArgs.toArray(new String[proArgs.size()]);
                
+               
+               /* TODO use after R script is made consistent 
+                       getAndLoadTestConfiguration(TEST_NAME2);
+                       proArgs.add("-nvargs");
+                       proArgs.add("X="+input("col.mtx"));
+                       
proArgs.add("max_func="+Integer.toString(max_func_invoc));
+                       proArgs.add("p="+Integer.toString(p));
+                       proArgs.add("d="+Integer.toString(d));
+                       proArgs.add("q="+Integer.toString(q));
+                       proArgs.add("P="+Integer.toString(P));
+                       proArgs.add("D="+Integer.toString(D));
+                       proArgs.add("Q="+Integer.toString(Q));
+                       proArgs.add("s="+Integer.toString(s));
+                       
proArgs.add("include_mean="+Integer.toString(include_mean));
+                       
proArgs.add("solver="+(useJacobi==1?"jacobi":"cg_solver"));
+                       proArgs.add("dest="+output("learnt.model"));
+               */
+               
+               programArgs = proArgs.toArray(new String[proArgs.size()]);
                fullDMLScriptName = getScript();
 
                rCmd = getRCmd(inputDir(), Integer.toString(max_func_invoc), 
Integer.toString(p), Integer.toString(d), Integer.toString(q), 
Integer.toString(P), 

http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/scripts/applications/arima_box-jenkins/arima.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima.dml 
b/src/test/scripts/applications/arima_box-jenkins/arima.dml
index 5e3aef6..6c39339 100644
--- a/src/test/scripts/applications/arima_box-jenkins/arima.dml
+++ b/src/test/scripts/applications/arima_box-jenkins/arima.dml
@@ -20,235 +20,239 @@
 #-------------------------------------------------------------
 
 # Arguments
-# 1st arg: X (one column time series)
-# 2nd arg: max_func_invoc
-# 3rd arg: p (non-seasonal AR order)
-# 4th arg: d (non-seasonal differencing order)
-# 5th arg: q (non-seasonal MA order)
-# 6th arg: P (seasonal AR order)
-# 7th arg: D (seasonal differencing order)
-# 8th arg: Q (seasonal MA order)
-# 9th arg: s (period in terms of number of time-steps)
-# 10th arg: 0/1 (1 means include.mean)
-# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
-# 12th arg: file name to store learnt parameters
+# 'X': X (one column time series - there has to be an .mtd file associated to 
the src file!) 
+# 'max_func_invoc': max_func_invoc (default 1000)
+# 'p': p (non-seasonal AR order) (default 0)
+# 'd': d (non-seasonal differencing order) (default 0)
+# 'q': q (non-seasonal MA order) (default 0)
+# 'P': P (seasonal AR order) (default 0)
+# 'D': D (seasonal differencing order)(default 0)
+# 'Q': Q (seasonal MA order)(default 0)
+# 's': s (period in terms of number of time-steps) (default 1)
+# 'include_mean': TRUE/FALSE (default FALSE)
+# 'solver': must be either 'cg' for conjugate gradients method or 'jacobi' for 
jacobi method (default "jacobi")
+# 'dest': file name to store learnt parameters (default "arima-result.csv")
+# 'result_format': the format of the destination file (default "csv") 
 
 #changing to additive sar since R's arima seems to do that
-
-arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer 
P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){
-       b = X[,2:ncol(X)]%*%w
-       
-       R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
+arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer 
P, Integer qIn, Integer Q, Integer s, String solver) return (Double obj){
        
+       b = X[,2:ncol(X)]%*%w
+               
+       R = matrix(0, nrow(X), nrow(X))
        for(i7 in seq(1, qIn, 1)){
-               ma_ind_ns = P+pIn+i7
-               err_ind_ns = i7
-               ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
-               d_ns = ones_ns * as.scalar(w[ma_ind_ns,1])
-               R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = 
R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
+               d_ns = matrix(as.scalar(w[P+pIn+i7,1]), nrow(R)-i7, 1)
+               R[1+i7:nrow(R),1:ncol(R)-i7] = R[1+i7:nrow(R),1:ncol(R)-i7] + 
diag(d_ns)
        }
+       
        for(i8 in seq(1, Q, 1)){
-               ma_ind_s = P+pIn+qIn+i8
                err_ind_s = s*i8
-               ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
-               d_s = ones_s * as.scalar(w[ma_ind_s,1])
+               d_s = matrix(as.scalar(w[P+pIn+qIn+i8,1]), nrow(R)-err_ind_s, 1)
                R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = 
R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
        }
+       
+       #TODO: provide default dml "solve()" as well
+       solution = eval(solver + "_solver", R, b, 0.01, 100)    
        
-       #checking for strict diagonal dominance
-       #required for jacobi's method
-       #
+       errs = X[,1] - solution
+       obj = sum(errs*errs)
+}
+
+cg_solver = function (Matrix[Double] R, Matrix[Double] B, Double tolerance, 
Integer max_iterations) return (Matrix[Double] y_hat){
+       y_hat = matrix(0, nrow(R), 1)
+       iter = 0
+
+       A = R + diag(matrix(1, rows=nrow(R), cols=1))
+       Z = t(A)%*%A
+       r = -(t(A)%*%B)
+       p = -r
+       norm_r2 = sum(r^2)
+       
+       continue = (norm_r2 != 0)
        
-       max_iter = 100
-       tol = 0.01
+       while(iter < max_iterations & continue){
+               q = Z%*%p
+               alpha = norm_r2 / as.scalar(t(p) %*% q)
+               y_hat += alpha * p
+               r += alpha * q
+               old_norm_r2 = norm_r2
+               norm_r2 = sum(r^2)
+               continue = (norm_r2 >= tolerance)
+               beta = norm_r2 / old_norm_r2
+               p = -r + beta * p
+               iter += 1
+       }
+}
 
-       y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
+jacobi_solver = function (Matrix[Double] A, Matrix[Double] B, Double 
tolerance, Integer max_iterations) return (Matrix[Double] y_hat){
+       
+       y_hat = matrix(0, nrow(A), 1)
        iter = 0
-       
-       if(useJacobi == 1){
-               check = sum(rowSums(abs(R)) >= 1)
-               if(check > 0){
-                       print("R is not diagonal dominant. Suggest switching to 
an exact solver.")
-               }
+       diff = tolerance+1
+       
+       #checking for strict diagonal dominance
+       #required for jacobi's method
+       check = sum(rowSums(abs(A)) >= 1)
+       if(check > 0){
+               print("The matrix is not diagonal dominant. Suggest switching 
to an exact solver.")
+       }
                
-               diff = tol+1.0
-               while(iter < max_iter & diff > tol){
-               y_hat_new = b - R%*%y_hat
-               diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
-               y_hat = y_hat_new
-               iter = iter + 1
-       }
-       }else{
-               ones = matrix(1, rows=nrow(X), cols=1)
-               A = R + diag(ones)
-               Z = t(A)%*%A
-               y = t(A)%*%b
-               r = -y
-               p = -r
-               norm_r2 = sum(r*r)
-               continue = 1
-               if(norm_r2 == 0){
-                       continue = 0
-               }
-               while(iter < max_iter & continue == 1){
-                       q = Z%*%p
-                       alpha = norm_r2 / as.scalar(t(p) %*% q)
-                       y_hat = y_hat + alpha * p
-                       old_norm_r2 = norm_r2
-                       r = r + alpha * q
-                       norm_r2 = sum(r * r)
-                       if(norm_r2 < tol){
-                               continue = 0
-                       }
-                       beta = norm_r2 / old_norm_r2
-                       p = -r + beta * p
-                       iter = iter + 1
-               }
+       while(iter < max_iterations & diff > tolerance){
+               y_hat_new = B - A%*%y_hat
+               diff = sum((y_hat_new-y_hat)^2)
+               y_hat = y_hat_new
+               iter += 1
        }
-       
-       errs = X[,1] - y_hat
-       obj = sum(errs*errs)
 }
 
-#input col of time series data
-X = read($1)
 
-max_func_invoc = $2
 
-#non-seasonal order
-p = $3
-d = $4
-q = $5
+readParamters = function (String default_solver, Integer 
default_max_func_invoc, Integer default_include_mean, Integer default_p, 
Integer default_d, Integer default_q, Integer default_P, Integer default_D, 
Integer default_Q, Integer default_s , String default_dest, String 
default_result_format)return (Matrix[Double] X, String solver, Integer 
max_func_invoc, Integer include_mean, Integer p, Integer d, Integer q, Integer 
P, Integer D, Integer Q, Integer s, String dest, String result_format){
+       #input col of time series data
+       X = read($X)
+       solver = ifdef($solver, default_solver) 
+       max_func_invoc = ifdef($max_func_invoc, default_max_func_invoc)
+       include_mean = ifdef($include_mean, default_include_mean)
+       dest = ifdef($dest, default_dest)
+       result_format = ifdef($result_format, default_result_format)
+       
+       #non-seasonal order
+       p =     ifdef($p, default_p)
+       d = ifdef($d, default_d)
+       q = ifdef($q, default_q)
+
+       #seasonal order
+       P = ifdef($P, default_P)
+       D = ifdef($D, default_D)
+       Q = ifdef($Q, default_Q)
+
+       #length of the season
+       s = ifdef($s, default_s)
+       while(FALSE){} #TODO
+}
 
-#seasonal order
-P = $6
-D = $7
-Q = $8
+[X, solver, max_func_invoc, include_mean, p, d, q, P, D, Q, s, dest, 
result_format] = readParamters ("jacobi", 1000, FALSE, 0,0,0,0,0,0,1, 
"arima-results.csv", "csv")
 
-#length of the season
-s = $9
+print ("p= " + p)
+print ("d= " + d)
+print ("q= " + q)
+print ("P= " + P)
+print ("D= " + D)
+print ("Q= " + Q)
+print ("s= " + s)
+print ("include_mean= " + include_mean)
+print ("solver= " + solver)
+print ("dest= " + dest)
+print ("result_format= " + result_format)
 
-include_mean = $10
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
+print ("totcols=" + totcols)
 
-useJacobi = $11
+#TODO: check max_func_invoc < totcols --> print warning (stop here ??)
 
 num_rows = nrow(X)
 
+print("nrows of X: " + num_rows)
+
 if(num_rows <= d){
-       print("non-seasonal differencing order should be larger than length of 
the time-series")
+       print("non-seasonal differencing order should be smaller than length of 
the time-series")
 }
 
-Y = X
-for(i in seq(1, d, 1)){
-       n1 = nrow(Y)+0.0
-       Y = Y[2:n1,] - Y[1:n1-1,]
+mu = 0.0
+if(include_mean == 1){
+       mu = mean(X)
+       X = X - mu
 }
 
-num_rows = nrow(Y)+0.0
+# d-th order differencing:
+for(i in seq(1,d,1)){
+       X = X[2:nrow(X),] - X[1:nrow(X)-1,]
+}
+
+num_rows = nrow(X)
+
 if(num_rows <= s*D){
-       print("seasonal differencing order should be larger than number of 
observations divided by length of season")
+       print("seasonal differencing order should be smaller than number of 
observations divided by length of season")
 }
 
-for(i in seq(1,D, 1)){
-       n1 = nrow(Y)+0.0
-       Y = Y[s+1:n1,] - Y[1:n1-s,]
+for(i in seq(1,D,1)){
+       n1 = nrow(X)+0.0
+       X = X[s+1:n1,] - X[1:n1-s,]
 }
 
-n = nrow(Y)
 
-max_ar_col = P+p
-max_ma_col = Q+q
-if(max_ar_col > max_ma_col){
-       max_arma_col = max_ar_col
-}else{
-       max_arma_col = max_ma_col
-}
+n = nrow(X)
+print("nrow of X= " + n)
 
-mu = 0
-if(include_mean == 1){
-       mu = sum(Y)/nrow(Y)
-       Y = Y - mu
-}
+#Matrix Z with target values of prediction (X) in first column and 
+#all values that can be used to predict a this target value in column 
2:totcols of same row
+Z = cbind(X, matrix(0, n, totcols - 1))
 
-totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
+#TODO: This operations can be optimised/simplified --> see documentation
 
-Z = Rand(rows=n, cols=totcols, min=0, max=0)
-Z[,1] = Y #target col
 
+#fills Z with values used for non seasonal AR prediction
 parfor(i1 in seq(1, p, 1), check=0){
-       Z[i1+1:n,1+i1] = Y[1:n-i1,]
+       Z[i1+1:n,1+i1] = X[1:n-i1,]
 }
+
+#prediciton values for seasonal AR 
 parfor(i2 in seq(1, P, 1), check=0){
-       Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
+       Z[s*i2+1:n,1+p+i2] = X[1:n-s*i2,]
 }
+
+#prediciton values for non seasonal MA 
 parfor(i5 in seq(1, q, 1), check=0){
-       Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
+       Z[i5+1:n,1+P+p+i5] = X[1:n-i5,]
 }
+
+#prediciton values for seasonal MA 
 parfor(i6 in seq(1,Q, 1), check=0){
-       Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
+       Z[s*i6+1:n,1+P+p+q+i6] = X[1:n-s*i6,]
 }
 
-one = Rand(rows=1, cols=1, min=1, max=1)
+simplex = cbind(matrix(0, totcols-1, 1), diag(matrix(0.1, totcols-1, 1)))
 
-simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
-for(i in 2:ncol(simplex)){
-       simplex[i-1,i] = 0.1
-}
+#debug output
+print(toString(simplex))
 
 num_func_invoc = 0
 
-objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
-parfor(i3 in 1:ncol(simplex)){
-       arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
-       objvals[1,i3] = arima_css_obj_val
+objvals = matrix(0, 1, ncol(simplex))
+
+parfor(i3 in seq(1,ncol(simplex))){
+       objvals[1,i3] = arima_css(simplex[,i3], Z, p, P, q, Q, s, solver)
 }
-num_func_invoc = num_func_invoc + ncol(simplex)
 
+num_func_invoc += ncol(simplex)
+print ("num_func_invoc = " + num_func_invoc)
 tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1])
 
-continue = 1
-best_index = 1
-while(continue == 1 & num_func_invoc <= max_func_invoc) {
-       best_index = 1
-       worst_index = 1
-       for(i in 2:ncol(objvals)){
-               this = as.scalar(objvals[1,i])
-               that = as.scalar(objvals[1,best_index])
-               if(that > this){
-               best_index = i
-               }
-               that = as.scalar(objvals[1,worst_index])
-               if(that < this){
-               worst_index = i
-               }
-       }
+continue = TRUE
+while(continue & num_func_invoc <= max_func_invoc){
+       best_index = as.scalar(rowIndexMin(objvals))
+       worst_index = as.scalar(rowIndexMax(objvals))
        
        best_obj_val = as.scalar(objvals[1,best_index])
        worst_obj_val = as.scalar(objvals[1,worst_index])
-       if(worst_obj_val <= best_obj_val + tol){
-               continue = 0
-       }
        
+       continue = (worst_obj_val > best_obj_val + tol)
+               
        print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
        
        c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
        
        x_r = 2*c - simplex[,worst_index]
-       obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
-       num_func_invoc = num_func_invoc + 1
+       obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, solver)
+       num_func_invoc += 1
        
        if(obj_x_r < best_obj_val){
                x_e = 2*x_r - c
-               obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
+               obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, solver)
                num_func_invoc = num_func_invoc + 1
                
-               if(obj_x_r <= obj_x_e){
-                       simplex[,worst_index] = x_r
-                       objvals[1,worst_index] = obj_x_r
-               }else{
-                       simplex[,worst_index] = x_e
-                       objvals[1,worst_index] = obj_x_e
-               }
+               simplex[,worst_index] = ifelse (obj_x_r <= obj_x_e, x_r, x_e)
+               objvals[1,worst_index] = ifelse (obj_x_r <= obj_x_e, obj_x_r, 
obj_x_e)
        }else{
                if(obj_x_r < worst_obj_val){
                        simplex[,worst_index] = x_r
@@ -256,8 +260,8 @@ while(continue == 1 & num_func_invoc <= max_func_invoc) {
                }
                
                x_c_in = (simplex[,worst_index] + c)/2
-               obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
-               num_func_invoc = num_func_invoc + 1
+               obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, solver)
+               num_func_invoc += 1
                
                if(obj_x_c_in < as.scalar(objvals[1,worst_index])){
                        simplex[,worst_index] = x_c_in
@@ -268,22 +272,26 @@ while(continue == 1 & num_func_invoc <= max_func_invoc) {
                                parfor(i4 in 1:ncol(simplex)){
                                        if(i4 != best_index){
                                                simplex[,i4] = (simplex[,i4] + 
best_point)/2
-                                               tmp = arima_css(simplex[,i4], 
Z, p, P, q, Q, s, useJacobi)
-                                               objvals[1,i4] = tmp*one
+                                               objvals[1,i4] = 
arima_css(simplex[,i4], Z, p, P, q, Q, s, solver)
                                        }
                                }
-                               num_func_invoc = num_func_invoc + ncol(simplex) 
- 1
+                               num_func_invoc += ncol(simplex) - 1
                        }
                }
        }
 }
 
+print ("best_index: "+ toString(best_index))
 best_point = simplex[,best_index]
-if(include_mean == 1){
-       tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
-       tmp2[1:nrow(best_point),1] = best_point
-       tmp2[nrow(tmp2),1] = mu
-       best_point = tmp2
+
+if(include_mean){
+       best_point = rbind(best_point, as.matrix(mu))
 }
 
-write(best_point, $12, format="text")
+result_format = ifdef($result_format, "csv")
+write(best_point, dest, format=result_format)
+
+ 
+debug_printMatrixShape = function (Matrix[Double] M, String matrixIdentifier){
+       print ("Matrix '" + matrixIdentifier + "' has shape [" + nrow(M) + "," 
+ ncol(M) + "]")
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/scripts/applications/arima_box-jenkins/arima_old.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima_old.R 
b/src/test/scripts/applications/arima_box-jenkins/arima_old.R
new file mode 100644
index 0000000..451635b
--- /dev/null
+++ b/src/test/scripts/applications/arima_box-jenkins/arima_old.R
@@ -0,0 +1,278 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library(Matrix)
+
+arima_css = function(w, X, p, P, q, Q, s, useJacobi){
+  b = matrix(X[,2:ncol(X)], nrow(X), ncol(X)-1)%*%w
+
+  R = matrix(0, nrow(X), nrow(X))
+  if(q>0){
+    for(i7 in 1:q){
+      ma_ind_ns = P+p+i7
+      err_ind_ns = i7
+      ones_ns = rep(1, nrow(R)-err_ind_ns)
+      d_ns = ones_ns * w[ma_ind_ns,1]
+      R[(1+err_ind_ns):nrow(R),1:(ncol(R)-err_ind_ns)] = 
R[(1+err_ind_ns):nrow(R),1:(ncol(R)-err_ind_ns)] + diag(d_ns)
+    }
+  }
+  if(Q>0){
+    for(i8 in 1:Q){
+      ma_ind_s = P+p+q+i8
+      err_ind_s = s*i8
+      ones_s = rep(1, nrow(R)-err_ind_s)
+      d_s = ones_s * w[ma_ind_s,1]
+      R[(1+err_ind_s):nrow(R),1:(ncol(R)-err_ind_s)] = 
R[(1+err_ind_s):nrow(R),1:(ncol(R)-err_ind_s)] + diag(d_s)
+    }
+  }
+
+  max_iter = 100
+  tol = 0.01
+
+  y_hat = matrix(0, nrow(X), 1)
+  iter = 0
+  
+  if(useJacobi == 1){
+       check = sum(ifelse(rowSums(abs(R)) >= 1, 1, 0))
+       if(check > 0){
+         print("R is not diagonal dominant. Suggest switching to an exact 
solver.")
+       }
+       diff = tol+1.0
+       while(iter < max_iter & diff > tol){
+         y_hat_new = matrix(b - R%*%y_hat, nrow(y_hat), 1)
+         diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
+         y_hat = y_hat_new
+         iter = iter + 1
+       }
+  }else{
+       ones = rep(1, nrow(X))
+       A = R + diag(ones)
+       Z = t(A)%*%A
+       y = t(A)%*%b
+       r = -y
+       p = -r
+       norm_r2 = sum(r*r)
+       while(iter < max_iter & norm_r2 > tol){
+               q = Z%*%p 
+               alpha = norm_r2 / sum(p*q)
+               y_hat = y_hat + alpha * p
+               old_norm_r2 = norm_r2
+               r = r + alpha * q
+               norm_r2 = sum(r * r)
+               beta = norm_r2 / old_norm_r2
+               p = -r + beta * p
+               iter = iter + 1
+       }
+  }
+  
+  errs = X[,1] - y_hat
+  obj = sum(errs*errs)
+
+  return(obj)
+}
+
+#input col of time series data
+X = readMM(paste(args[1], "col.mtx", sep=""))
+
+max_func_invoc = as.integer(args[2])
+
+#non-seasonal order
+p = as.integer(args[3])
+d = as.integer(args[4])
+q = as.integer(args[5])
+
+#seasonal order
+P = as.integer(args[6])
+D = as.integer(args[7])
+Q = as.integer(args[8])
+
+#length of the season
+s = as.integer(args[9])
+
+include_mean = as.integer(args[10])
+
+useJacobi = as.integer(args[11])
+
+num_rows = nrow(X)
+
+if(num_rows <= d){
+  print("non-seasonal differencing order should be larger than length of the 
time-series")
+}
+
+Y = matrix(X[,1], nrow(X), 1)
+if(d>0){
+  for(i in 1:d){
+    n1 = nrow(Y)
+    Y = matrix(Y[2:n1,] - Y[1:(n1-1),], n1-1, 1)
+  }
+}
+
+num_rows = nrow(Y)
+if(num_rows <= s*D){
+  print("seasonal differencing order should be larger than number of 
observations divided by length of season")
+}
+
+if(D>0){
+  for(i in 1:D){
+    n1 = nrow(Y)
+    Y = matrix(Y[(s+1):n1,] - Y[1:(n1-s),], n1-s, 1)
+  }
+}
+
+n = nrow(Y)
+
+max_ar_col = P+p
+max_ma_col = Q+q
+if(max_ar_col > max_ma_col){
+  max_arma_col = max_ar_col
+}else{
+  max_arma_col = max_ma_col
+}
+
+mu = 0
+if(include_mean == 1){
+  mu = sum(Y)/nrow(Y)
+  Y = Y - mu
+}
+
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
+
+Z = matrix(0, n, totcols)
+Z[,1] = Y #target col
+
+if(p>0){
+  for(i1 in 1:p){
+    Z[(i1+1):n,1+i1] = Y[1:(n-i1),]
+  }
+}
+if(P>0){
+  for(i2 in 1:P){
+    Z[(s*i2+1):n,1+p+i2] = Y[1:(n-s*i2),]
+  }
+}
+if(q>0){
+  for(i5 in 1:q){
+    Z[(i5+1):n,1+P+p+i5] = Y[1:(n-i5),]
+  }
+}
+if(Q>0){
+  for(i6 in 1:Q){
+     Z[(s*i6+1):n,1+P+p+q+i6] = Y[1:(n-s*i6),]
+  }
+}
+
+simplex = matrix(0, totcols-1, totcols)
+for(i in 2:ncol(simplex)){
+  simplex[i-1,i] = 0.1
+}
+
+num_func_invoc = 0
+
+objvals = matrix(0, 1, ncol(simplex))
+for(i3 in 1:ncol(simplex)){
+  objvals[1,i3] = arima_css(matrix(simplex[,i3], nrow(simplex), 1), Z, p, P, 
q, Q, s, useJacobi)
+}
+num_func_invoc = num_func_invoc + ncol(simplex)
+
+tol = 1.5 * 10^(-8) * objvals[1,1]
+
+continue = 1
+while(continue == 1 & num_func_invoc <= max_func_invoc) {
+  #print(paste(num_func_invoc, max_func_invoc))
+  best_index = 1
+  worst_index = 1
+  for(i in 2:ncol(objvals)){
+    this = objvals[1,i]
+    that = objvals[1,best_index]
+    if(that > this){
+      best_index = i
+    }
+    that = objvals[1,worst_index]
+    if(that < this){
+      worst_index = i
+    }
+  }
+
+  best_obj_val = objvals[1,best_index]
+  worst_obj_val = objvals[1,worst_index]
+  if(worst_obj_val <= best_obj_val + tol){
+    continue = 0
+  }
+
+  print(paste("#Function calls::", num_func_invoc, "OBJ:", best_obj_val))
+
+  c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+
+  x_r = 2*c - simplex[,worst_index]
+  obj_x_r = arima_css(matrix(x_r, nrow(simplex), 1), Z, p, P, q, Q, s, 
useJacobi)
+  num_func_invoc = num_func_invoc + 1
+
+  if(obj_x_r < best_obj_val){
+    x_e = 2*x_r - c
+    obj_x_e = arima_css(matrix(x_e, nrow(simplex), 1), Z, p, P, q, Q, s, 
useJacobi)
+    num_func_invoc = num_func_invoc + 1
+
+    if(obj_x_r <= obj_x_e){
+      simplex[,worst_index] = x_r
+      objvals[1,worst_index] = obj_x_r
+    }else{
+      simplex[,worst_index] = x_e
+      objvals[1,worst_index] = obj_x_e
+    }
+  }else{
+    if(obj_x_r < worst_obj_val){
+      simplex[,worst_index] = x_r
+      objvals[1,worst_index] = obj_x_r
+    }
+
+    x_c_in = (simplex[,worst_index] + c)/2
+    obj_x_c_in = arima_css(matrix(x_c_in, nrow(simplex), 1), Z, p, P, q, Q, s, 
useJacobi)
+    num_func_invoc = num_func_invoc + 1
+    
+    if(obj_x_c_in < objvals[1,worst_index]){
+      simplex[,worst_index] = x_c_in
+      objvals[1,worst_index] = obj_x_c_in
+    }else{
+      if(obj_x_r >= worst_obj_val){
+        best_point = simplex[,best_index]
+
+               for(i4 in 1:ncol(simplex)){
+                       if(i4 != best_index){
+                 simplex[,i4] = (simplex[,i4] + best_point)/2
+                         objvals[1,i4] = arima_css(matrix(simplex[,i4], 
nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi)
+                       }
+               }
+               num_func_invoc = num_func_invoc + ncol(simplex) - 1
+      }
+    }
+  }
+}
+
+best_point = matrix(simplex[,best_index], nrow(simplex), 1)
+if(include_mean == 1){
+  tmp = matrix(0, totcols, 1)
+  tmp[1:nrow(best_point),1] = best_point
+  tmp[nrow(tmp),1] = mu
+  best_point = tmp
+}
+
+writeMM(as(best_point, "CsparseMatrix"), paste(args[12], "learnt.model", 
sep=""))

http://git-wip-us.apache.org/repos/asf/systemml/blob/09578186/src/test/scripts/applications/arima_box-jenkins/arima_old.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima_old.dml 
b/src/test/scripts/applications/arima_box-jenkins/arima_old.dml
new file mode 100644
index 0000000..5e3aef6
--- /dev/null
+++ b/src/test/scripts/applications/arima_box-jenkins/arima_old.dml
@@ -0,0 +1,289 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Arguments
+# 1st arg: X (one column time series)
+# 2nd arg: max_func_invoc
+# 3rd arg: p (non-seasonal AR order)
+# 4th arg: d (non-seasonal differencing order)
+# 5th arg: q (non-seasonal MA order)
+# 6th arg: P (seasonal AR order)
+# 7th arg: D (seasonal differencing order)
+# 8th arg: Q (seasonal MA order)
+# 9th arg: s (period in terms of number of time-steps)
+# 10th arg: 0/1 (1 means include.mean)
+# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
+# 12th arg: file name to store learnt parameters
+
+#changing to additive sar since R's arima seems to do that
+
+arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer 
P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){
+       b = X[,2:ncol(X)]%*%w
+       
+       R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
+       
+       for(i7 in seq(1, qIn, 1)){
+               ma_ind_ns = P+pIn+i7
+               err_ind_ns = i7
+               ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
+               d_ns = ones_ns * as.scalar(w[ma_ind_ns,1])
+               R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = 
R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
+       }
+       for(i8 in seq(1, Q, 1)){
+               ma_ind_s = P+pIn+qIn+i8
+               err_ind_s = s*i8
+               ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
+               d_s = ones_s * as.scalar(w[ma_ind_s,1])
+               R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = 
R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
+       }
+       
+       #checking for strict diagonal dominance
+       #required for jacobi's method
+       #
+       
+       max_iter = 100
+       tol = 0.01
+
+       y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
+       iter = 0
+       
+       if(useJacobi == 1){
+               check = sum(rowSums(abs(R)) >= 1)
+               if(check > 0){
+                       print("R is not diagonal dominant. Suggest switching to 
an exact solver.")
+               }
+               
+               diff = tol+1.0
+               while(iter < max_iter & diff > tol){
+               y_hat_new = b - R%*%y_hat
+               diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
+               y_hat = y_hat_new
+               iter = iter + 1
+       }
+       }else{
+               ones = matrix(1, rows=nrow(X), cols=1)
+               A = R + diag(ones)
+               Z = t(A)%*%A
+               y = t(A)%*%b
+               r = -y
+               p = -r
+               norm_r2 = sum(r*r)
+               continue = 1
+               if(norm_r2 == 0){
+                       continue = 0
+               }
+               while(iter < max_iter & continue == 1){
+                       q = Z%*%p
+                       alpha = norm_r2 / as.scalar(t(p) %*% q)
+                       y_hat = y_hat + alpha * p
+                       old_norm_r2 = norm_r2
+                       r = r + alpha * q
+                       norm_r2 = sum(r * r)
+                       if(norm_r2 < tol){
+                               continue = 0
+                       }
+                       beta = norm_r2 / old_norm_r2
+                       p = -r + beta * p
+                       iter = iter + 1
+               }
+       }
+       
+       errs = X[,1] - y_hat
+       obj = sum(errs*errs)
+}
+
+#input col of time series data
+X = read($1)
+
+max_func_invoc = $2
+
+#non-seasonal order
+p = $3
+d = $4
+q = $5
+
+#seasonal order
+P = $6
+D = $7
+Q = $8
+
+#length of the season
+s = $9
+
+include_mean = $10
+
+useJacobi = $11
+
+num_rows = nrow(X)
+
+if(num_rows <= d){
+       print("non-seasonal differencing order should be larger than length of 
the time-series")
+}
+
+Y = X
+for(i in seq(1, d, 1)){
+       n1 = nrow(Y)+0.0
+       Y = Y[2:n1,] - Y[1:n1-1,]
+}
+
+num_rows = nrow(Y)+0.0
+if(num_rows <= s*D){
+       print("seasonal differencing order should be larger than number of 
observations divided by length of season")
+}
+
+for(i in seq(1,D, 1)){
+       n1 = nrow(Y)+0.0
+       Y = Y[s+1:n1,] - Y[1:n1-s,]
+}
+
+n = nrow(Y)
+
+max_ar_col = P+p
+max_ma_col = Q+q
+if(max_ar_col > max_ma_col){
+       max_arma_col = max_ar_col
+}else{
+       max_arma_col = max_ma_col
+}
+
+mu = 0
+if(include_mean == 1){
+       mu = sum(Y)/nrow(Y)
+       Y = Y - mu
+}
+
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols  
+
+Z = Rand(rows=n, cols=totcols, min=0, max=0)
+Z[,1] = Y #target col
+
+parfor(i1 in seq(1, p, 1), check=0){
+       Z[i1+1:n,1+i1] = Y[1:n-i1,]
+}
+parfor(i2 in seq(1, P, 1), check=0){
+       Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
+}
+parfor(i5 in seq(1, q, 1), check=0){
+       Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
+}
+parfor(i6 in seq(1,Q, 1), check=0){
+       Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
+}
+
+one = Rand(rows=1, cols=1, min=1, max=1)
+
+simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
+for(i in 2:ncol(simplex)){
+       simplex[i-1,i] = 0.1
+}
+
+num_func_invoc = 0
+
+objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
+parfor(i3 in 1:ncol(simplex)){
+       arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
+       objvals[1,i3] = arima_css_obj_val
+}
+num_func_invoc = num_func_invoc + ncol(simplex)
+
+tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1])
+
+continue = 1
+best_index = 1
+while(continue == 1 & num_func_invoc <= max_func_invoc) {
+       best_index = 1
+       worst_index = 1
+       for(i in 2:ncol(objvals)){
+               this = as.scalar(objvals[1,i])
+               that = as.scalar(objvals[1,best_index])
+               if(that > this){
+               best_index = i
+               }
+               that = as.scalar(objvals[1,worst_index])
+               if(that < this){
+               worst_index = i
+               }
+       }
+       
+       best_obj_val = as.scalar(objvals[1,best_index])
+       worst_obj_val = as.scalar(objvals[1,worst_index])
+       if(worst_obj_val <= best_obj_val + tol){
+               continue = 0
+       }
+       
+       print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
+       
+       c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+       
+       x_r = 2*c - simplex[,worst_index]
+       obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
+       num_func_invoc = num_func_invoc + 1
+       
+       if(obj_x_r < best_obj_val){
+               x_e = 2*x_r - c
+               obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
+               num_func_invoc = num_func_invoc + 1
+               
+               if(obj_x_r <= obj_x_e){
+                       simplex[,worst_index] = x_r
+                       objvals[1,worst_index] = obj_x_r
+               }else{
+                       simplex[,worst_index] = x_e
+                       objvals[1,worst_index] = obj_x_e
+               }
+       }else{
+               if(obj_x_r < worst_obj_val){
+                       simplex[,worst_index] = x_r
+                       objvals[1,worst_index] = obj_x_r
+               }
+               
+               x_c_in = (simplex[,worst_index] + c)/2
+               obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
+               num_func_invoc = num_func_invoc + 1
+               
+               if(obj_x_c_in < as.scalar(objvals[1,worst_index])){
+                       simplex[,worst_index] = x_c_in
+                       objvals[1,worst_index] = obj_x_c_in
+               }else{
+                       if(obj_x_r >= worst_obj_val){
+                               best_point = simplex[,best_index]
+                               parfor(i4 in 1:ncol(simplex)){
+                                       if(i4 != best_index){
+                                               simplex[,i4] = (simplex[,i4] + 
best_point)/2
+                                               tmp = arima_css(simplex[,i4], 
Z, p, P, q, Q, s, useJacobi)
+                                               objvals[1,i4] = tmp*one
+                                       }
+                               }
+                               num_func_invoc = num_func_invoc + ncol(simplex) 
- 1
+                       }
+               }
+       }
+}
+
+best_point = simplex[,best_index]
+if(include_mean == 1){
+       tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
+       tmp2[1:nrow(best_point),1] = best_point
+       tmp2[nrow(tmp2),1] = mu
+       best_point = tmp2
+}
+
+write(best_point, $12, format="text")

systemml git commit: [SYSTEMML-2342] Cleanup ARIMA dml script

Reply via email to