[GitHub] [systemds] cluelesprogrammer commented on a diff in pull request #1838: [SYSTEMDS-3549] Hidden Markov model builtin

via GitHub Sun, 23 Jul 2023 13:00:30 -0700


cluelesprogrammer commented on code in PR #1838:
URL: https://github.com/apache/systemds/pull/1838#discussion_r1271550948



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,181 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Set of outputs of last n timesteps
+#
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# outputs     Probability of the set of outputs
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] P, Matrix[Double] A, 
Matrix[Double] B)
+{
+    #X should have the size of 1 * ncols
+    
+    #should be transposed for the unique function
+    unique_X = unique(matrix(X, rows=ncol(X), cols=1))
+    nr_outputs = length(unique_X))
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to 10 or 1 to log(n_timesteps), depending on whichever is greater. 
If the likelihood
+    decreases with increase in number of total states, break and take the 
paraemters 
+    of the last iteration as the optimal one.
+    the last 
+    */
+    
+    max_states = 10
+    T = ncol(X)
+    
+    if (10 > log(T)) {
+        max_states = 10
+    } else {
+        max_states = log(T)
+    }
+    
+    search = TRUE
+    nr_states = 2
+    while (search) {
+        A, B, ip, curr_ll = baum_welch(X, nr_states)
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+            break
+        }
+        
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+    }
+}
+
+forward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B, 
Matrix[Double] ip) return (Matrix[Double] alpha)
+{   
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of 
+    the state being at state i at timestep j and the outputs till timestep j
+    */
+    
+    T = col(X)
+    nr_states = row(A) 
+    alpha = matrix(0, rows=nr_states, cols=T)
+    alpha[ ,1] = ip * X[1,1]
+    
+    for (t in 2:T) {
+        for (i in 1:nr_states) {
+            alpha[i, t] = B[i, X[t]] * sum(alpha[, t-1]* A[ ,i]) 
+        }
+    }
+}
+
+backward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B) 
return (Matrix[Double] beta)
+{
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of
+    the model producing outputs (o_t+1,..., o_T) given that the model is 
+    at state i at time t
+    */
+    
+    T = col(X)
+    nr_states = row(A)
+    beta = matrix(0, rows=nr_states, cols=T)
+    beta[,T] = matrix(1, rows=length(ip), cols=1)
+    
+    for (t in (T-1):1) {
+        for (i in 1:nr_states) {
+            beta[i, t] = sum(beta[, t+1] * A[i, ] * B[ , X[t]])
+        }
+    }
+}
+
+calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return 
(Matrix[Double] gamma)
+{
+    /*
+    gamma a nr_state * T matrix with cell (i, t) being probability of 
+    the state being at i at timestep j given the observed output 
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    gamma = matrix(1/nr
+    parfor (i in 1:nrow(alpha)) {
+        for (t in 1:ncol(alpha)) {
+            num_ij = alpha[i, t] * beta[i, t]
+            den_ij = sum(alpha[,t] * beta[,t]
+            gamma[i, j] = num_ij / den_ij
+    }
+   }        
+}
+
+calculate_eta =  function (Matrix[Double] alpha, Matrix[Double] beta, 
Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] eta)
+{
+    /*
+    gamma a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    tot_transitions = nr_states * nr_states
+    eta = matrix(1/nr_states, rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            i = floor(trans_id / nr_states) + 1 #index starts at 1
+            j = trans_id - ((i-1) * nr_states)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, X[t+1]]
+            den_ij = sum(sum(alpha[, t]))
+            eta[trans_id, t] = alpha[i, j]
+        }
+    }
+}
+
+
+baum_welch = function (Matrix[Double] X, nr_states, nr_outputs) return 
(Matrix[Double] A, Matrix[Double] B, Matrix[Double] ip, likelihood)

Review Comment:
   Resolved



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,181 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Set of outputs of last n timesteps
+#
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# outputs     Probability of the set of outputs
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] P, Matrix[Double] A, 
Matrix[Double] B)
+{
+    #X should have the size of 1 * ncols
+    
+    #should be transposed for the unique function
+    unique_X = unique(matrix(X, rows=ncol(X), cols=1))
+    nr_outputs = length(unique_X))
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to 10 or 1 to log(n_timesteps), depending on whichever is greater. 
If the likelihood
+    decreases with increase in number of total states, break and take the 
paraemters 
+    of the last iteration as the optimal one.
+    the last 
+    */
+    
+    max_states = 10
+    T = ncol(X)
+    
+    if (10 > log(T)) {
+        max_states = 10
+    } else {
+        max_states = log(T)
+    }
+    
+    search = TRUE
+    nr_states = 2
+    while (search) {
+        A, B, ip, curr_ll = baum_welch(X, nr_states)
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+            break
+        }
+        
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+    }
+}
+
+forward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B, 
Matrix[Double] ip) return (Matrix[Double] alpha)
+{   
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of 
+    the state being at state i at timestep j and the outputs till timestep j
+    */
+    
+    T = col(X)
+    nr_states = row(A) 
+    alpha = matrix(0, rows=nr_states, cols=T)
+    alpha[ ,1] = ip * X[1,1]
+    
+    for (t in 2:T) {
+        for (i in 1:nr_states) {
+            alpha[i, t] = B[i, X[t]] * sum(alpha[, t-1]* A[ ,i]) 
+        }
+    }
+}
+
+backward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B) 
return (Matrix[Double] beta)
+{
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of
+    the model producing outputs (o_t+1,..., o_T) given that the model is 
+    at state i at time t
+    */
+    
+    T = col(X)
+    nr_states = row(A)
+    beta = matrix(0, rows=nr_states, cols=T)
+    beta[,T] = matrix(1, rows=length(ip), cols=1)
+    
+    for (t in (T-1):1) {
+        for (i in 1:nr_states) {
+            beta[i, t] = sum(beta[, t+1] * A[i, ] * B[ , X[t]])
+        }
+    }
+}
+
+calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return 
(Matrix[Double] gamma)
+{
+    /*
+    gamma a nr_state * T matrix with cell (i, t) being probability of 
+    the state being at i at timestep j given the observed output 
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    gamma = matrix(1/nr
+    parfor (i in 1:nrow(alpha)) {
+        for (t in 1:ncol(alpha)) {
+            num_ij = alpha[i, t] * beta[i, t]
+            den_ij = sum(alpha[,t] * beta[,t]
+            gamma[i, j] = num_ij / den_ij
+    }
+   }        
+}
+
+calculate_eta =  function (Matrix[Double] alpha, Matrix[Double] beta, 
Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] eta)
+{
+    /*
+    gamma a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    tot_transitions = nr_states * nr_states
+    eta = matrix(1/nr_states, rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            i = floor(trans_id / nr_states) + 1 #index starts at 1
+            j = trans_id - ((i-1) * nr_states)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, X[t+1]]
+            den_ij = sum(sum(alpha[, t]))
+            eta[trans_id, t] = alpha[i, j]
+        }
+    }
+}
+
+
+baum_welch = function (Matrix[Double] X, nr_states, nr_outputs) return 
(Matrix[Double] A, Matrix[Double] B, Matrix[Double] ip, likelihood)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = matrix(1/nr_states, rows=nr_states, cols=nr_states)
+    B = matrix(1/nr_outputs, rows=nr_states, cols=nr_ouptuts)
+    ip = matrix(1/nr_states, rows=nr_states, cols=1)
+    
+    converge = FALSE
+    
+    while (!converge) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha)
+        eta = calculate_eta(alpha, A, B)       
+        /*
+        TODO: compute likelihood, if it does not change much from previous
+        iteration, break.
+        */
+    }
+}

Review Comment:
   Resolved



##########
scripts/builtin/hmm.dml:
##########
@@ -0,0 +1,181 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script implements the hidden markov model method
+# INPUT:
+# 
--------------------------------------------------------------------------------------------
+# X         Set of outputs of last n timesteps
+#
+# OUTPUT:
+# 
--------------------------------------------------------------------------------------------
+# outputs     Probability of the set of outputs
+# 
--------------------------------------------------------------------------------------------
+
+m_hmm = function(Matrix[Double] X) return (Matrix[Double] P, Matrix[Double] A, 
Matrix[Double] B)
+{
+    #X should have the size of 1 * ncols
+    
+    #should be transposed for the unique function
+    unique_X = unique(matrix(X, rows=ncol(X), cols=1))
+    nr_outputs = length(unique_X))
+    
+    /*
+    Since nr of states if unknown, fit a hmm model for every total number of 
states
+    from 1 to 10 or 1 to log(n_timesteps), depending on whichever is greater. 
If the likelihood
+    decreases with increase in number of total states, break and take the 
paraemters 
+    of the last iteration as the optimal one.
+    the last 
+    */
+    
+    max_states = 10
+    T = ncol(X)
+    
+    if (10 > log(T)) {
+        max_states = 10
+    } else {
+        max_states = log(T)
+    }
+    
+    search = TRUE
+    nr_states = 2
+    while (search) {
+        A, B, ip, curr_ll = baum_welch(X, nr_states)
+        if (nr_states == 2) {
+            prev_ll = -1
+        }
+        if (curr_ll < prev_ll) {
+            search = FALSE
+            break
+        }
+        
+        prev_ll = curr_ll
+        nr_states = nr_states+1
+    }
+}
+
+forward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B, 
Matrix[Double] ip) return (Matrix[Double] alpha)
+{   
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of 
+    the state being at state i at timestep j and the outputs till timestep j
+    */
+    
+    T = col(X)
+    nr_states = row(A) 
+    alpha = matrix(0, rows=nr_states, cols=T)
+    alpha[ ,1] = ip * X[1,1]
+    
+    for (t in 2:T) {
+        for (i in 1:nr_states) {
+            alpha[i, t] = B[i, X[t]] * sum(alpha[, t-1]* A[ ,i]) 
+        }
+    }
+}
+
+backward = function (Matrix[Double] X, Matrix[Double] A, Matrix[Double] B) 
return (Matrix[Double] beta)
+{
+    /*
+    alpha a matrix of size nr_states * T with a cell i,t being probability of
+    the model producing outputs (o_t+1,..., o_T) given that the model is 
+    at state i at time t
+    */
+    
+    T = col(X)
+    nr_states = row(A)
+    beta = matrix(0, rows=nr_states, cols=T)
+    beta[,T] = matrix(1, rows=length(ip), cols=1)
+    
+    for (t in (T-1):1) {
+        for (i in 1:nr_states) {
+            beta[i, t] = sum(beta[, t+1] * A[i, ] * B[ , X[t]])
+        }
+    }
+}
+
+calculate_gamma = function (Matrix[Double] alpha, Matrix[Double] beta) return 
(Matrix[Double] gamma)
+{
+    /*
+    gamma a nr_state * T matrix with cell (i, t) being probability of 
+    the state being at i at timestep j given the observed output 
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    gamma = matrix(1/nr
+    parfor (i in 1:nrow(alpha)) {
+        for (t in 1:ncol(alpha)) {
+            num_ij = alpha[i, t] * beta[i, t]
+            den_ij = sum(alpha[,t] * beta[,t]
+            gamma[i, j] = num_ij / den_ij
+    }
+   }        
+}
+
+calculate_eta =  function (Matrix[Double] alpha, Matrix[Double] beta, 
Matrix[Double] A, Matrix[Double] B) return (Matrix[Double] eta)
+{
+    /*
+    gamma a (nr_states * nr_states) * T matrix with cell (i, t) being 
probability of 
+    the state being at i at timestep j given the observed output
+    */
+    nr_states = nrow(alpha)
+    T = ncol(alpha)
+    tot_transitions = nr_states * nr_states
+    eta = matrix(1/nr_states, rows=tot_transitions, cols=T-1)
+    
+    /*
+    The transitions will be indiced as such:transition 1-N will represent
+    transition from state 1 to 1, 2 upto state N. transition N+1-2N will 
represent
+    transitions from 2 to 1, 2 upto state N  
+    */
+    
+    parfor (trans_id in 1:tot_transitions) {
+        for (t in 1:(T-1)) {
+            #indices for alpha and beta
+            i = floor(trans_id / nr_states) + 1 #index starts at 1
+            j = trans_id - ((i-1) * nr_states)
+            num_ij = alpha[i, t] * A[i, j] * beta[j, t+1] * B[j, X[t+1]]
+            den_ij = sum(sum(alpha[, t]))
+            eta[trans_id, t] = alpha[i, j]
+        }
+    }
+}
+
+
+baum_welch = function (Matrix[Double] X, nr_states, nr_outputs) return 
(Matrix[Double] A, Matrix[Double] B, Matrix[Double] ip, likelihood)
+{
+    #initialize state transition and emmission matrices uniformly
+    A = matrix(1/nr_states, rows=nr_states, cols=nr_states)
+    B = matrix(1/nr_outputs, rows=nr_states, cols=nr_ouptuts)
+    ip = matrix(1/nr_states, rows=nr_states, cols=1)
+    
+    converge = FALSE
+    
+    while (!converge) {
+        alpha = forward(X, A, B, ip)
+        beta = backward(X, A, B)
+        
+        gamma = calculate_gamma(alpha)
+        eta = calculate_eta(alpha, A, B)       
+        /*
+        TODO: compute likelihood, if it does not change much from previous

Review Comment:
   Resolved



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@systemds.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [systemds] cluelesprogrammer commented on a diff in pull request #1838: [SYSTEMDS-3549] Hidden Markov model builtin

Reply via email to