Baunsgaard commented on code in PR #1838:
URL: https://github.com/apache/systemds/pull/1838#discussion_r1271551936


##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)

Review Comment:
   to transpose call t(X)



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])
+        }
+    }
+}
+
+/*
+  To work with 3d matrices, the values in first two dimensions of eta matrix
+  must be rolled the first dimension. The following indexing function maps the
+  index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
+*/
+index = function(Integer rolled_i, Integer nr_states) return(Double i, Double 
j) {
+    i = ceil(rolled_i / nr_states) #index starts at 1
+    j = rolled_i - (nr_states * (i-1)) #i-1) * 
+}
+
+calculate_eta =  function (Matrix[Double] X, Matrix[Double] alpha, 
Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] 
eta)
+{
+    nr_states = nrow(alpha)
+    T = ncol(alpha)    
+    
+    /*
+    eta a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    
+    tot_transitions = nr_states * nr_states
+    eta = rand(rows=tot_transitions, cols=T-1)

Review Comment:
   we need a seed here.



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])
+        }
+    }
+}
+
+/*
+  To work with 3d matrices, the values in first two dimensions of eta matrix
+  must be rolled the first dimension. The following indexing function maps the
+  index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
+*/
+index = function(Integer rolled_i, Integer nr_states) return(Double i, Double 
j) {
+    i = ceil(rolled_i / nr_states) #index starts at 1
+    j = rolled_i - (nr_states * (i-1)) #i-1) * 
+}
+
+calculate_eta =  function (Matrix[Double] X, Matrix[Double] alpha, 
Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] 
eta)
+{
+    nr_states = nrow(alpha)
+    T = ncol(alpha)    
+    
+    /*
+    eta a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    
+    tot_transitions = nr_states * nr_states
+    eta = rand(rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        [i, j] = index(trans_id, nr_states)
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            ot1 = output_t(X, t+1)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, ot1]
+            den = matrix(0, rows=nr_states, cols=1)
+            
+            parfor (k in 1:nr_states) {
+                den[k, 1] = sum(alpha[k, t] * t(A[k, ]) * beta[, t+1] * 
B[,ot1])
+            }
+            s = num_ij / sum(den)
+            eta[trans_id, t] = as.scalar(s[1,1])
+        }
+    }
+}
+
+calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return 
(Matrix[Double] gamma)
+{ 
+    /*
+    gamma a nr_state * T matrix with cell (i, t) being probability of 
+    the state being at i at timestep j given the observed output 
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    
+    gamma = rand(rows=nr_states, cols=T)
+    
+    parfor (i in 1:nrow(alpha)) {
+        parfor (t in 1:ncol(alpha)) {
+            num_it = alpha[i, t] * beta[i, t]
+            den_it = sum(alpha[,t] * beta[,t])
+            gamma[i, t] = num_it / den_it
+        }
+    }
+}
+
+output_t = function (Matrix[Double] X, Integer t) return (Integer o) {
+    o = as.integer(as.scalar(X[1, t]))
+}
+
+forward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B, 
Matrix[Double] ip) return (Matrix[Double] alpha)
+{   
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of 
+    the state being at state i at timestep j and the outputs till timestep j
+    */
+    
+    T = ncol(X)
+    nr_states = nrow(A) 
+    alpha = matrix(0, rows=nr_states, cols=T)
+    o_1 = output_t(X, 1)
+    alpha[ ,1] = ip * B[ ,o_1]
+
+    for (t in 2:T) {
+        ot = output_t(X, t)
+        for (i in 1:nr_states) {    
+            alpha[i, t] = B[i, ot] * sum(alpha[,t-1]* A[ ,i]) 
+        }
+    }
+}
+
+backward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B) 
return (Matrix[Double] beta)
+{
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of
+    the model producing outputs (o_t+1,..., o_T) given that the model is 
+    at state i at time t
+    */
+    
+    T = ncol(X)
+    nr_states = nrow(A)
+    beta = matrix(1/T, rows=nr_states, cols=T)
+    beta[,T] = matrix(1/T, rows=nr_states, cols=1)
+    
+    for (t in (T-1):1) {
+        ot = output_t(X, t)
+        for (i in 1:nr_states) {
+            beta[i, t] = sum(beta[, t+1] * t(A[i, ]) * B[ , ot])

Review Comment:
   Move beta[,t+1] to outer loop if possible



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42

Review Comment:
   make seed an argument initially



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)

Review Comment:
   Also, each call to random have to have a unique seed. A suggestion for this 
is to make each rand call with seed += 1. (+= is not supported so you have to 
assign back the seed with seed = seed + 1, between the lines.)



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])

Review Comment:
   1:T-1 takes all columns except the last one. Is this the intended behavior 
or should it take the entire row?



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])

Review Comment:
   materialize gamma[i, ] in the outer parfor.



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))

Review Comment:
   Values contained in the matrix is equivalent in each cell for both calls.
   perhaps the value to put into the matrix can be calculated once and then 
assigned into a matrix?
   And even better if this is something we can assign into the arguments of the 
rand call, then we would reduce the current calls from 3 matrices materialized 
to only 1 per line of (98, 100, and 102)
   



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)

Review Comment:
   we have a builtin function to calculate the number of unique values:
   countDistinct()
   
   you can give it an direction if you want to know the number of distinct in 
columns, rows or entirety.



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])
+        }
+    }
+}
+
+/*
+  To work with 3d matrices, the values in first two dimensions of eta matrix
+  must be rolled the first dimension. The following indexing function maps the
+  index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
+*/
+index = function(Integer rolled_i, Integer nr_states) return(Double i, Double 
j) {
+    i = ceil(rolled_i / nr_states) #index starts at 1
+    j = rolled_i - (nr_states * (i-1)) #i-1) * 
+}
+
+calculate_eta =  function (Matrix[Double] X, Matrix[Double] alpha, 
Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] 
eta)
+{
+    nr_states = nrow(alpha)
+    T = ncol(alpha)    
+    
+    /*
+    eta a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    
+    tot_transitions = nr_states * nr_states
+    eta = rand(rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        [i, j] = index(trans_id, nr_states)
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            ot1 = output_t(X, t+1)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, ot1]
+            den = matrix(0, rows=nr_states, cols=1)
+            
+            parfor (k in 1:nr_states) {
+                den[k, 1] = sum(alpha[k, t] * t(A[k, ]) * beta[, t+1] * 
B[,ot1])
+            }
+            s = num_ij / sum(den)
+            eta[trans_id, t] = as.scalar(s[1,1])
+        }
+    }
+}
+
+calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return 
(Matrix[Double] gamma)
+{ 
+    /*
+    gamma a nr_state * T matrix with cell (i, t) being probability of 
+    the state being at i at timestep j given the observed output 
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    
+    gamma = rand(rows=nr_states, cols=T)
+    
+    parfor (i in 1:nrow(alpha)) {
+        parfor (t in 1:ncol(alpha)) {
+            num_it = alpha[i, t] * beta[i, t]
+            den_it = sum(alpha[,t] * beta[,t])
+            gamma[i, t] = num_it / den_it
+        }
+    }

Review Comment:
   I think these parfors can be rewritten as:
   
   ```
   num_it_M = alpha * beta
   den_it_M =  rowsum(num_it_M)
   gamma = num_it_M / den_it_M
   ```
   
   This is because we support direct matrix and vector operations.
   And i think this nicely maps to it. (But i might have an error here.)



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])
+        }
+    }
+}
+
+/*
+  To work with 3d matrices, the values in first two dimensions of eta matrix
+  must be rolled the first dimension. The following indexing function maps the
+  index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
+*/
+index = function(Integer rolled_i, Integer nr_states) return(Double i, Double 
j) {
+    i = ceil(rolled_i / nr_states) #index starts at 1
+    j = rolled_i - (nr_states * (i-1)) #i-1) * 
+}
+
+calculate_eta =  function (Matrix[Double] X, Matrix[Double] alpha, 
Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] 
eta)
+{
+    nr_states = nrow(alpha)
+    T = ncol(alpha)    
+    
+    /*
+    eta a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    
+    tot_transitions = nr_states * nr_states
+    eta = rand(rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        [i, j] = index(trans_id, nr_states)
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            ot1 = output_t(X, t+1)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, ot1]

Review Comment:
   A[i,j] and B[j, ot1]  in outer forloop



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])
+        }
+    }
+}
+
+/*
+  To work with 3d matrices, the values in first two dimensions of eta matrix
+  must be rolled the first dimension. The following indexing function maps the
+  index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
+*/
+index = function(Integer rolled_i, Integer nr_states) return(Double i, Double 
j) {
+    i = ceil(rolled_i / nr_states) #index starts at 1
+    j = rolled_i - (nr_states * (i-1)) #i-1) * 
+}
+
+calculate_eta =  function (Matrix[Double] X, Matrix[Double] alpha, 
Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] 
eta)
+{
+    nr_states = nrow(alpha)
+    T = ncol(alpha)    
+    
+    /*
+    eta a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    
+    tot_transitions = nr_states * nr_states
+    eta = rand(rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        [i, j] = index(trans_id, nr_states)
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            ot1 = output_t(X, t+1)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, ot1]
+            den = matrix(0, rows=nr_states, cols=1)
+            
+            parfor (k in 1:nr_states) {
+                den[k, 1] = sum(alpha[k, t] * t(A[k, ]) * beta[, t+1] * 
B[,ot1])
+            }
+            s = num_ij / sum(den)
+            eta[trans_id, t] = as.scalar(s[1,1])
+        }
+    }
+}
+
+calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return 
(Matrix[Double] gamma)
+{ 
+    /*
+    gamma a nr_state * T matrix with cell (i, t) being probability of 
+    the state being at i at timestep j given the observed output 
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    
+    gamma = rand(rows=nr_states, cols=T)

Review Comment:
   This rand call seems to be done just to materialize a matrix?
   If so we could consider changing it to a matrix(rows...) call.
   But it might not be the best solution. 
   
   



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])
+        }
+    }
+}
+
+/*
+  To work with 3d matrices, the values in first two dimensions of eta matrix
+  must be rolled the first dimension. The following indexing function maps the
+  index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
+*/
+index = function(Integer rolled_i, Integer nr_states) return(Double i, Double 
j) {
+    i = ceil(rolled_i / nr_states) #index starts at 1
+    j = rolled_i - (nr_states * (i-1)) #i-1) * 
+}
+
+calculate_eta =  function (Matrix[Double] X, Matrix[Double] alpha, 
Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] 
eta)
+{
+    nr_states = nrow(alpha)
+    T = ncol(alpha)    
+    
+    /*
+    eta a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    
+    tot_transitions = nr_states * nr_states
+    eta = rand(rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        [i, j] = index(trans_id, nr_states)
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            ot1 = output_t(X, t+1)

Review Comment:
   remove the method output_t and call the extraction here. It should 
automatically know it is a scalar.
   Do you cast to int because you want to round? if so then call round()



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,258 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Output of hmm of last n timestep
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# ip        Start probabilities
+# A         Transition probabilities
+# B         Emission probabilities
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] ip, Matrix[Double] 
A, Matrix[Double] B)
+{
+
+    #X should have the size of 1 * ncols
+    #should be transposed for the unique function
+    unique_X = matrix(X, rows=ncol(X), cols=1)
+    nr_outputs = unique_vals(unique_X)
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to some max_states or 1 to log(n_timesteps), depending on whichever 
is greater.
+
+    Also the break condition is: If the likelihood decreases with increase in 
number of total states, 
+    break and take the paremeters of the last iteration as the optimal one.
+    */
+    
+    max_states = 6
+    T = ncol(X)
+    
+    if (max_states < log(T)){
+        max_states = ceil(log(T))
+    }
+    
+    search = TRUE
+    nr_states = 2
+    seed = 42
+
+    while (search) {
+        [A_new, B_new, ip_new, curr_ll, seed] = baum_welch(X, nr_states, 
nr_outputs, 20, seed)
+        seed = seed+1
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+        }
+        else {
+            A = A_new
+            B = B_new
+            ip = ip_new
+        }
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+
+        if (nr_states > max_states) {
+            search = FALSE
+        }
+    }
+}
+
+col_normalize = function(Matrix[Double] m) return (Matrix[Double] m_norm) {
+       m_norm = m / rowSums(m)
+}
+
+unique_vals = function (Matrix[Double] X) return (Integer l) {
+    # group and count duplicates
+    I = aggregate (target = X, groups = X[,1], fn = "count")
+    # select groups
+    res = removeEmpty (target = seq (1, max (X[,1])), margin = "rows", select 
= (I != 0))
+    l = length(res)
+}
+
+baum_welch = function( Matrix[Double] X, Integer nr_states, Integer 
nr_outputs, Integer IT, Integer seed) return (Matrix[Double] A, Matrix[Double] 
B, Matrix[Double] ip, Double likelihood, Integer seed)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = col_normalize(matrix(1/nr_states, rows=nr_states, cols=nr_states) + 
rand(rows=nr_states, cols=nr_states, seed=seed))
+    seed = seed+1
+    B = col_normalize(matrix(1/nr_outputs,rows=nr_states, cols=nr_outputs) + 
rand(rows=nr_states, cols=nr_outputs, seed=seed))
+    seed = seed+1
+    ip = matrix(1/nr_states, rows=nr_states, cols=1) + rand(rows=nr_states, 
cols=1, seed=seed)
+    seed = seed+1
+
+    likelihood = 0
+    T = ncol(X)
+    
+    for (i in 1:IT) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha, beta)
+        eta = calculate_eta(X, alpha, beta, A, B)
+
+        ip = gamma[, 1]
+        A = calculate_A(eta, gamma, nr_states)
+        B = calculate_B(gamma, X, nr_states, nr_outputs)
+        
+        #likelhood using forward method
+        likelihood = sum(gamma[, T] * alpha[, T])
+    }
+}
+
+calculate_A = function(Matrix[Double] eta, Matrix[Double] gamma, Integer 
nr_states) return (Matrix[Double] A) {
+    A = rand(rows=nr_states, cols=nr_states)
+    T = ncol(eta)
+
+    parfor (i in 1:nr_states) {
+        parfor (j in 1:nr_states) {
+            eta_id = (i-1) * nr_states + j
+            num_ij = sum(eta[eta_id, 1:T-1])
+            den_ij = sum(gamma[i,1:T-1])
+            A[i,j] = num_ij/den_ij
+        }
+    }
+}
+
+calculate_B = function(Matrix[Double] gamma, Matrix[Double] X, Integer 
nr_states, Integer nr_outputs) return (Matrix[Double] B) {
+    B = rand(rows=nr_states, cols=nr_outputs)
+
+    parfor (i in 1:nr_states){
+        parfor (j in 1:nr_outputs) {
+            B[i, j] = sum((X == j) * gamma[i, ]) / sum(gamma[i, ])
+        }
+    }
+}
+
+/*
+  To work with 3d matrices, the values in first two dimensions of eta matrix
+  must be rolled the first dimension. The following indexing function maps the
+  index of rolled matrix(eta) to "normal" 2d matrices like alpha, beta, etc
+*/
+index = function(Integer rolled_i, Integer nr_states) return(Double i, Double 
j) {
+    i = ceil(rolled_i / nr_states) #index starts at 1
+    j = rolled_i - (nr_states * (i-1)) #i-1) * 
+}
+
+calculate_eta =  function (Matrix[Double] X, Matrix[Double] alpha, 
Matrix[Double] beta, Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] 
eta)
+{
+    nr_states = nrow(alpha)
+    T = ncol(alpha)    
+    
+    /*
+    eta a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    
+    tot_transitions = nr_states * nr_states
+    eta = rand(rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        [i, j] = index(trans_id, nr_states)
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            ot1 = output_t(X, t+1)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, ot1]
+            den = matrix(0, rows=nr_states, cols=1)
+            
+            parfor (k in 1:nr_states) {
+                den[k, 1] = sum(alpha[k, t] * t(A[k, ]) * beta[, t+1] * 
B[,ot1])
+            }
+            s = num_ij / sum(den)
+            eta[trans_id, t] = as.scalar(s[1,1])
+        }
+    }
+}
+
+calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return 
(Matrix[Double] gamma)
+{ 
+    /*
+    gamma a nr_state * T matrix with cell (i, t) being probability of 
+    the state being at i at timestep j given the observed output 
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    
+    gamma = rand(rows=nr_states, cols=T)
+    
+    parfor (i in 1:nrow(alpha)) {
+        parfor (t in 1:ncol(alpha)) {
+            num_it = alpha[i, t] * beta[i, t]
+            den_it = sum(alpha[,t] * beta[,t])
+            gamma[i, t] = num_it / den_it
+        }
+    }
+}
+
+output_t = function (Matrix[Double] X, Integer t) return (Integer o) {
+    o = as.integer(as.scalar(X[1, t]))
+}
+
+forward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B, 
Matrix[Double] ip) return (Matrix[Double] alpha)
+{   
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of 
+    the state being at state i at timestep j and the outputs till timestep j
+    */
+    
+    T = ncol(X)
+    nr_states = nrow(A) 
+    alpha = matrix(0, rows=nr_states, cols=T)
+    o_1 = output_t(X, 1)
+    alpha[ ,1] = ip * B[ ,o_1]
+
+    for (t in 2:T) {
+        ot = output_t(X, t)
+        for (i in 1:nr_states) {    
+            alpha[i, t] = B[i, ot] * sum(alpha[,t-1]* A[ ,i]) 

Review Comment:
   move alpha[,t-1] to outer loop



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@systemds.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to