http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/id3/id3.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/id3/id3.R b/src/test/scripts/applications/id3/id3.R index 8528b52..b838607 100644 --- a/src/test/scripts/applications/id3/id3.R +++ b/src/test/scripts/applications/id3/id3.R @@ -1,242 +1,242 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -args <- commandArgs(TRUE) - -library("Matrix") -library("matrixStats") - -options(warn=-1) - - -id3_learn = function(X, y, X_subset, attributes, minsplit) -{ - #try the two base cases first - - #of the remaining samples, compute a histogram for labels - hist_labels_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum)) - hist_labels = as.matrix(hist_labels_helper[,2]) - - #go through the histogram to compute the number of labels - #with non-zero samples - #and to pull out the most popular label - - num_non_zero_labels = sum(hist_labels > 0); - most_popular_label = which.max(t(hist_labels)); - num_remaining_attrs = sum(attributes) - - num_samples = sum(X_subset) - mpl = as.numeric(most_popular_label) - - nodes = matrix(0, 1, 1) - edges = matrix(0, 1, 1) - - #if all samples have the same label then return a leaf node - #if no attributes remain then return a leaf node with the most popular label - if(num_non_zero_labels == 1 | num_remaining_attrs == 0 | num_samples < minsplit){ - nodes = matrix(0, 1, 2) - nodes[1,1] = -1 - nodes[1,2] = most_popular_label - edges = matrix(-1, 1, 1) - }else{ - #computing gains for all available attributes using parfor - hist_labels2_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum)) - hist_labels2 = as.matrix(hist_labels2_helper[,2]) - num_samples2 = sum(X_subset) - zero_entries_in_hist1 = (hist_labels2 == 0) - pi1 = hist_labels2/num_samples2 - log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1 - entropy_vector1 = -pi1*log(log_term1) - ht = sum(entropy_vector1) - - sz = nrow(attributes) - gains = matrix(0, sz, 1) - for(i in 1:nrow(attributes)){ - if(as.numeric(attributes[i,1]) == 1){ - attr_vals = X[,i] - attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum)) - attr_domain = as.matrix(attr_domain_helper[,2]) - - hxt_vector = matrix(0, nrow(attr_domain), 1) - - for(j in 1:nrow(attr_domain)){ - if(as.numeric(attr_domain[j,1]) != 0){ - val = j - Tj = X_subset * (X[,i] == val) - - #entropy = compute_entropy(Tj, y) - hist_labels1_helper = as.matrix(aggregate(as.vector(Tj), by=list(as.vector(y)), FUN=sum)) - hist_labels1 = as.matrix(hist_labels1_helper[,2]) - num_samples1 = sum(Tj) - zero_entries_in_hist = (hist_labels1 == 0) - pi = hist_labels1/num_samples1 - log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi - entropy_vector = -pi*log(log_term) - entropy = sum(entropy_vector) - - hxt_vector[j,1] = sum(Tj)/sum(X_subset)*entropy - } - } - hxt = sum(hxt_vector) - gains[i,1] = (ht - hxt) - } - } - - #pick out attr with highest gain - best_attr = -1 - max_gain = 0 - for(i4 in 1:nrow(gains)){ - if(as.numeric(attributes[i4,1]) == 1){ - g = as.numeric(gains[i4,1]) - if(best_attr == -1 | max_gain <= g){ - max_gain = g - best_attr = i4 - } - } - } - - attr_vals = X[,best_attr] - attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum)) - attr_domain = as.matrix(attr_domain_helper[,2]) - - new_attributes = attributes - new_attributes[best_attr, 1] = 0 - - max_sz_subtree = 2*sum(X_subset) - sz2 = nrow(attr_domain) - sz1 = sz2*max_sz_subtree - - tempNodeStore = matrix(0, 2, sz1) - tempEdgeStore = matrix(0, 3, sz1) - numSubtreeNodes = matrix(0, sz2, 1) - numSubtreeEdges = matrix(0, sz2, 1) - - for(i1 in 1:nrow(attr_domain)){ - - Ti = X_subset * (X[,best_attr] == i1) - num_nodes_Ti = sum(Ti) - - if(num_nodes_Ti > 0){ - tmpRet <- id3_learn(X, y, Ti, new_attributes, minsplit) - nodesi = as.matrix(tmpRet$a); - edgesi = as.matrix(tmpRet$b); - - start_pt = 1+(i1-1)*max_sz_subtree - tempNodeStore[,start_pt:(start_pt+nrow(nodesi)-1)] = t(nodesi) - - numSubtreeNodes[i1,1] = nrow(nodesi) - if(nrow(edgesi)!=1 | ncol(edgesi)!=1 | as.numeric(edgesi[1,1])!=-1){ - tempEdgeStore[,start_pt:(start_pt+nrow(edgesi)-1)] = t(edgesi) - numSubtreeEdges[i1,1] = nrow(edgesi) - }else{ - numSubtreeEdges[i1,1] = 0 - } - } - } - - num_nodes_in_subtrees = sum(numSubtreeNodes) - num_edges_in_subtrees = sum(numSubtreeEdges) - - #creating root node - sz = 1+num_nodes_in_subtrees - - nodes = matrix(0, sz, 2) - nodes[1,1] = best_attr - numNodes = 1 - - #edges from root to children - sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees - - edges = matrix(1, sz, 3) - numEdges = 0 - for(i6 in 1:nrow(attr_domain)){ - num_nodesi = as.numeric(numSubtreeNodes[i6,1]) - if(num_nodesi > 0){ - edges[numEdges+1,2] = i6 - numEdges = numEdges + 1 - } - } - - nonEmptyAttri = 0 - for(i7 in 1:nrow(attr_domain)){ - numNodesInSubtree = as.numeric(numSubtreeNodes[i7,1]) - - if(numNodesInSubtree > 0){ - start_pt1 = 1 + (i7-1)*max_sz_subtree - nodes[(numNodes+1):(numNodes+numNodesInSubtree),] = t(tempNodeStore[,start_pt1:(start_pt1+numNodesInSubtree-1)]) - - numEdgesInSubtree = as.numeric(numSubtreeEdges[i7,1]) - - if(numEdgesInSubtree!=0){ - edgesi1 = t(tempEdgeStore[,start_pt1:(start_pt1+numEdgesInSubtree-1)]) - edgesi1[,1] = edgesi1[,1] + numNodes - edgesi1[,3] = edgesi1[,3] + numNodes - - edges[(numEdges+1):(numEdges+numEdgesInSubtree),] = edgesi1 - numEdges = numEdges + numEdgesInSubtree - } - - edges[nonEmptyAttri+1,3] = numNodes + 1 - nonEmptyAttri = nonEmptyAttri + 1 - - numNodes = numNodes + numNodesInSubtree - } - } - } - - return ( list(a=nodes, b=edges) ); -} - -X = readMM(paste(args[1], "X.mtx", sep="")); -y = readMM(paste(args[1], "y.mtx", sep="")); - -n = nrow(X) -m = ncol(X) - -minsplit = 2 - - -X_subset = matrix(1, n, 1) -attributes = matrix(1, m, 1) -# recoding inputs -featureCorrections = as.vector(1 - colMins(as.matrix(X))) -onesMat = matrix(1, n, m) - -X = onesMat %*% diag(featureCorrections) + X -labelCorrection = 1 - min(y) -y = y + labelCorrection + 0 - -tmpRet <- id3_learn(X, y, X_subset, attributes, minsplit) -nodes = as.matrix(tmpRet$a) -edges = as.matrix(tmpRet$b) - -# decoding outputs -nodes[,2] = nodes[,2] - labelCorrection * (nodes[,1] == -1) -for(i3 in 1:nrow(edges)){ - e_parent = as.numeric(edges[i3,1]) - parent_feature = as.numeric(nodes[e_parent,1]) - correction = as.numeric(featureCorrections[parent_feature]) - edges[i3,2] = edges[i3,2] - correction -} - -writeMM(as(nodes,"CsparseMatrix"), paste(args[2],"nodes", sep=""), format = "text") -writeMM(as(edges,"CsparseMatrix"), paste(args[2],"edges", sep=""), format = "text") - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) + +library("Matrix") +library("matrixStats") + +options(warn=-1) + + +id3_learn = function(X, y, X_subset, attributes, minsplit) +{ + #try the two base cases first + + #of the remaining samples, compute a histogram for labels + hist_labels_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum)) + hist_labels = as.matrix(hist_labels_helper[,2]) + + #go through the histogram to compute the number of labels + #with non-zero samples + #and to pull out the most popular label + + num_non_zero_labels = sum(hist_labels > 0); + most_popular_label = which.max(t(hist_labels)); + num_remaining_attrs = sum(attributes) + + num_samples = sum(X_subset) + mpl = as.numeric(most_popular_label) + + nodes = matrix(0, 1, 1) + edges = matrix(0, 1, 1) + + #if all samples have the same label then return a leaf node + #if no attributes remain then return a leaf node with the most popular label + if(num_non_zero_labels == 1 | num_remaining_attrs == 0 | num_samples < minsplit){ + nodes = matrix(0, 1, 2) + nodes[1,1] = -1 + nodes[1,2] = most_popular_label + edges = matrix(-1, 1, 1) + }else{ + #computing gains for all available attributes using parfor + hist_labels2_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum)) + hist_labels2 = as.matrix(hist_labels2_helper[,2]) + num_samples2 = sum(X_subset) + zero_entries_in_hist1 = (hist_labels2 == 0) + pi1 = hist_labels2/num_samples2 + log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1 + entropy_vector1 = -pi1*log(log_term1) + ht = sum(entropy_vector1) + + sz = nrow(attributes) + gains = matrix(0, sz, 1) + for(i in 1:nrow(attributes)){ + if(as.numeric(attributes[i,1]) == 1){ + attr_vals = X[,i] + attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum)) + attr_domain = as.matrix(attr_domain_helper[,2]) + + hxt_vector = matrix(0, nrow(attr_domain), 1) + + for(j in 1:nrow(attr_domain)){ + if(as.numeric(attr_domain[j,1]) != 0){ + val = j + Tj = X_subset * (X[,i] == val) + + #entropy = compute_entropy(Tj, y) + hist_labels1_helper = as.matrix(aggregate(as.vector(Tj), by=list(as.vector(y)), FUN=sum)) + hist_labels1 = as.matrix(hist_labels1_helper[,2]) + num_samples1 = sum(Tj) + zero_entries_in_hist = (hist_labels1 == 0) + pi = hist_labels1/num_samples1 + log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi + entropy_vector = -pi*log(log_term) + entropy = sum(entropy_vector) + + hxt_vector[j,1] = sum(Tj)/sum(X_subset)*entropy + } + } + hxt = sum(hxt_vector) + gains[i,1] = (ht - hxt) + } + } + + #pick out attr with highest gain + best_attr = -1 + max_gain = 0 + for(i4 in 1:nrow(gains)){ + if(as.numeric(attributes[i4,1]) == 1){ + g = as.numeric(gains[i4,1]) + if(best_attr == -1 | max_gain <= g){ + max_gain = g + best_attr = i4 + } + } + } + + attr_vals = X[,best_attr] + attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum)) + attr_domain = as.matrix(attr_domain_helper[,2]) + + new_attributes = attributes + new_attributes[best_attr, 1] = 0 + + max_sz_subtree = 2*sum(X_subset) + sz2 = nrow(attr_domain) + sz1 = sz2*max_sz_subtree + + tempNodeStore = matrix(0, 2, sz1) + tempEdgeStore = matrix(0, 3, sz1) + numSubtreeNodes = matrix(0, sz2, 1) + numSubtreeEdges = matrix(0, sz2, 1) + + for(i1 in 1:nrow(attr_domain)){ + + Ti = X_subset * (X[,best_attr] == i1) + num_nodes_Ti = sum(Ti) + + if(num_nodes_Ti > 0){ + tmpRet <- id3_learn(X, y, Ti, new_attributes, minsplit) + nodesi = as.matrix(tmpRet$a); + edgesi = as.matrix(tmpRet$b); + + start_pt = 1+(i1-1)*max_sz_subtree + tempNodeStore[,start_pt:(start_pt+nrow(nodesi)-1)] = t(nodesi) + + numSubtreeNodes[i1,1] = nrow(nodesi) + if(nrow(edgesi)!=1 | ncol(edgesi)!=1 | as.numeric(edgesi[1,1])!=-1){ + tempEdgeStore[,start_pt:(start_pt+nrow(edgesi)-1)] = t(edgesi) + numSubtreeEdges[i1,1] = nrow(edgesi) + }else{ + numSubtreeEdges[i1,1] = 0 + } + } + } + + num_nodes_in_subtrees = sum(numSubtreeNodes) + num_edges_in_subtrees = sum(numSubtreeEdges) + + #creating root node + sz = 1+num_nodes_in_subtrees + + nodes = matrix(0, sz, 2) + nodes[1,1] = best_attr + numNodes = 1 + + #edges from root to children + sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees + + edges = matrix(1, sz, 3) + numEdges = 0 + for(i6 in 1:nrow(attr_domain)){ + num_nodesi = as.numeric(numSubtreeNodes[i6,1]) + if(num_nodesi > 0){ + edges[numEdges+1,2] = i6 + numEdges = numEdges + 1 + } + } + + nonEmptyAttri = 0 + for(i7 in 1:nrow(attr_domain)){ + numNodesInSubtree = as.numeric(numSubtreeNodes[i7,1]) + + if(numNodesInSubtree > 0){ + start_pt1 = 1 + (i7-1)*max_sz_subtree + nodes[(numNodes+1):(numNodes+numNodesInSubtree),] = t(tempNodeStore[,start_pt1:(start_pt1+numNodesInSubtree-1)]) + + numEdgesInSubtree = as.numeric(numSubtreeEdges[i7,1]) + + if(numEdgesInSubtree!=0){ + edgesi1 = t(tempEdgeStore[,start_pt1:(start_pt1+numEdgesInSubtree-1)]) + edgesi1[,1] = edgesi1[,1] + numNodes + edgesi1[,3] = edgesi1[,3] + numNodes + + edges[(numEdges+1):(numEdges+numEdgesInSubtree),] = edgesi1 + numEdges = numEdges + numEdgesInSubtree + } + + edges[nonEmptyAttri+1,3] = numNodes + 1 + nonEmptyAttri = nonEmptyAttri + 1 + + numNodes = numNodes + numNodesInSubtree + } + } + } + + return ( list(a=nodes, b=edges) ); +} + +X = readMM(paste(args[1], "X.mtx", sep="")); +y = readMM(paste(args[1], "y.mtx", sep="")); + +n = nrow(X) +m = ncol(X) + +minsplit = 2 + + +X_subset = matrix(1, n, 1) +attributes = matrix(1, m, 1) +# recoding inputs +featureCorrections = as.vector(1 - colMins(as.matrix(X))) +onesMat = matrix(1, n, m) + +X = onesMat %*% diag(featureCorrections) + X +labelCorrection = 1 - min(y) +y = y + labelCorrection + 0 + +tmpRet <- id3_learn(X, y, X_subset, attributes, minsplit) +nodes = as.matrix(tmpRet$a) +edges = as.matrix(tmpRet$b) + +# decoding outputs +nodes[,2] = nodes[,2] - labelCorrection * (nodes[,1] == -1) +for(i3 in 1:nrow(edges)){ + e_parent = as.numeric(edges[i3,1]) + parent_feature = as.numeric(nodes[e_parent,1]) + correction = as.numeric(featureCorrections[parent_feature]) + edges[i3,2] = edges[i3,2] - correction +} + +writeMM(as(nodes,"CsparseMatrix"), paste(args[2],"nodes", sep=""), format = "text") +writeMM(as(edges,"CsparseMatrix"), paste(args[2],"edges", sep=""), format = "text") +
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/imputeGaussMCMC.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/imputeGaussMCMC.dml b/src/test/scripts/applications/impute/imputeGaussMCMC.dml index 1ebe5a0..21ecaee 100644 --- a/src/test/scripts/applications/impute/imputeGaussMCMC.dml +++ b/src/test/scripts/applications/impute/imputeGaussMCMC.dml @@ -1,687 +1,687 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports". -# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term"). -# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs. -# Soft linear regression constraints define dependencies between values in/across the reports. -# Linear regression parameters are unknown and sampled together with the missing values in the reports. -# -# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero, -# but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed. -# There are "num_terms" reports in the matrix, each having "num_attrs" attribute values. -# -# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from -# "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees) -# where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)]. -# Term = t, attribute = i --> index = (t-1) * num_attrs + i -# -# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines -# a linear map from the stretched matrix of reports to the stretched matrix of regression factors. -# -# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors" -# (if nonzero) to be added to the regression factors before they are multiplied by parameters. -# -# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map -# from the vector of parameters to the stretched matrix of regression factors. -# -# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients -# (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors. -# -# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression -# -# INPUT 8 : Number of factors in a regression equation, including the estimated value -# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once) -# BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner -# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once) -# -# INPUT 11: Output file name and path for the average MCMC reports table -# INPUT 12: Output file for debugging (currently: the average parameters vector) -# -# Example: -# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args -# test/scripts/applications/impute/initial_reports -# test/scripts/applications/impute/CReps -# test/scripts/applications/impute/RegresValueMap -# test/scripts/applications/impute/RegresFactorDefault -# test/scripts/applications/impute/RegresParamMap -# test/scripts/applications/impute/RegresCoeffDefault -# test/scripts/applications/impute/RegresScaleMult -# 4 1000 100 -# test/scripts/applications/impute/output_reports -# test/scripts/applications/impute/debug_info - - -print ("START ImputeGaussMCMC"); -print ("Reading the input files..."); - -initial_reports = read ($1); -CReps = read ($2); - -num_terms = ncol (initial_reports); # Number of periods for which reports are produced -num_attrs = nrow (initial_reports); # Number of attribute values per each term report -num_frees = ncol (CReps); # Number of free variables used to describe all consistent reports - -dReps_size = num_terms * num_attrs; -dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE); - -# We assume that all report-series consistent with hard constraints form an affine set: -# reports = CReps %*% freeVars + dReps -# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping, -# and "dReps" are the default values for the reports that correspond to the initial reports matrix. - -RegresValueMap = read ($3); -RegresFactorDefault = read ($4); -RegresParamMap = read ($5); -RegresCoeffDefault = read ($6); -RegresScaleMult = read ($7); - -num_factors = $8; # Number of factors in each regression equation, including the estimated value -num_reg_eqs = nrow (RegresParamMap) / num_factors; # Number of regression equations -num_params = ncol (RegresParamMap); # Number of parameters used in all regressions -max_num_burnin_iterations = $9; -max_num_observed_iterations = $10; - -ones_fv = matrix (1.0, rows = 1, cols = num_frees); -ones_pm = matrix (1.0, rows = 1, cols = num_params); -twos_re = matrix (2.0, rows = 1, cols = num_reg_eqs); - -# Create a summation operator (matrix) that adds the factors in each regression: - -ncols_SumFactor = num_reg_eqs * num_factors; -SumFactor = matrix (0.0, rows = num_reg_eqs, cols = ncols_SumFactor); -ones_f = matrix (1.0, rows = 1, cols = num_factors); -SumFactor [1, 1:num_factors] = ones_f; -nSumFactor = 1; -while (nSumFactor < num_reg_eqs) { - incSumFactor = nSumFactor; - if (incSumFactor > num_reg_eqs - nSumFactor) { - incSumFactor = num_reg_eqs - nSumFactor; - } - SumFactor [(nSumFactor + 1) : (nSumFactor + incSumFactor), - (nSumFactor * num_factors + 1) : ((nSumFactor + incSumFactor) * num_factors)] = - SumFactor [1 : incSumFactor, 1 : (incSumFactor * num_factors)]; - nSumFactor = nSumFactor + incSumFactor; -} - -freeVars = matrix (0.0, rows = num_frees, cols = 1); -params = matrix (1.0, rows = num_params, cols = 1); - - - -num_opt_iter = 20; -print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)..."); - -reports = CReps %*% freeVars + dReps; -regresValues = RegresValueMap %*% reports + RegresFactorDefault; -regresParams = RegresParamMap %*% params + RegresCoeffDefault; - -bilinear_sums = SumFactor %*% (regresValues * regresParams); -w_bilinear_sums = RegresScaleMult * bilinear_sums; -bilinear_form_value = sum (w_bilinear_sums * bilinear_sums); - -opt_iter = 1; -is_step_params = 1; -is_opt_converged = 0; - -print ("Before optimization: Initial bilinear form value = " + bilinear_form_value); - -while (is_opt_converged == 0) -{ - deg = is_step_params * num_params + (1 - is_step_params) * num_frees; - shift_vector = matrix (0.0, rows = deg, cols = 1); - - # Compute gradient - - if (is_step_params == 1) { - gradient = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap))); - } else { - gradient = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps)))); - } - - # Make a few conjugate gradient steps - - residual = t(gradient); - p = - residual; - norm_r2 = sum (residual * residual); - cg_iter = 1; - cg_terminate = 0; - - while (cg_terminate == 0) - { - # Want: q = A %*% p; - # Compute gradient change from 0 to p - - if (is_step_params == 1) { - w_bilinear_p = RegresScaleMult * (SumFactor %*% (regresValues * (RegresParamMap %*% p))); - gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap))); - } else { - w_bilinear_p = RegresScaleMult * (SumFactor %*% ((RegresValueMap %*% CReps %*% p) * regresParams)); - gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps)))); - } - q = t(gradient_change_p); - - alpha = norm_r2 / castAsScalar (t(p) %*% q); - shift_vector_change = alpha * p; - shift_vector = shift_vector + shift_vector_change; - old_norm_r2 = norm_r2; - residual = residual + alpha * q; - norm_r2 = sum (residual * residual); - p = - residual + (norm_r2 / old_norm_r2) * p; - cg_iter = cg_iter + 1; - if (cg_iter > min (deg, 2 + opt_iter / 3)) { - cg_terminate = 1; - } - } - - if (is_step_params == 1) { - params = params + shift_vector; - regresParams = RegresParamMap %*% params + RegresCoeffDefault; - } else { - freeVars = freeVars + shift_vector; - reports = CReps %*% freeVars + dReps; - regresValues = RegresValueMap %*% reports + RegresFactorDefault; - } - - # Update the bilinear form and check convergence - - if (is_step_params == 1) { - old_bilinear_form_value = bilinear_form_value; - } - - bilinear_sums = SumFactor %*% (regresValues * regresParams); - w_bilinear_sums = RegresScaleMult * bilinear_sums; - bilinear_form_value = sum (w_bilinear_sums * bilinear_sums); - - if (is_step_params == 1) { - print ("Optimization step " + opt_iter + " (params) : bilinear form value = " + bilinear_form_value); - } else { - print ("Optimization step " + opt_iter + " (reports): bilinear form value = " + bilinear_form_value); - } - - is_step_params = 1 - is_step_params; - opt_iter = opt_iter + 1; - - if (is_step_params == 1 & opt_iter > num_opt_iter) { - is_opt_converged = 1; - } -} - - - -/* UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT - - - -print ("Starting Gradient Descent..."); -### GRADIENT DESCENT WITH MODIFICATIONS TO ENHANCE CONVERGENCE - -# num_past_dirVs = 3; -# past_dirVFrees = matrix (0.0, rows = num_frees, cols = num_past_dirVs); -# past_dirVParams = matrix (0.0, rows = num_params, cols = num_past_dirVs); - -shift_T = -1000.0; -is_enough_gradient_descent = 0; -gd_iter = 0; - -while (is_enough_gradient_descent == 0) -{ -### GD-STEP 1: COMPUTE LOSS & GRADIENT AT CURRENT POINT - - reports = CReps %*% freeVars + dReps; - regresValues = RegresValueMap %*% reports + RegresFactorDefault; - regresParams = RegresParamMap %*% params + RegresCoeffDefault; - - bilinear_sums = SumFactor %*% (regresValues * regresParams); - w_bilinear_sums = RegresScaleMult * bilinear_sums; - gradientInFrees = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps)))); - gradientInParams = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap))); - -### CG-STEP 2: MAKE A FEW APPROXIMATE CONJUGATE GRADIENT STEPS - - shift_frees = matrix (0.0, rows = num_frees, cols = 1); - shift_params = matrix (0.0, rows = num_params, cols = 1); - - residual_frees = t(gradientInFrees); - residual_params = t(gradientInParams); - - p_frees = - residual_frees; - p_params = - residual_params; - - norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params); - - cg_iter = 1; - cg_terminate = 0; - cg_eps = 0.000001; - - while (cg_terminate == 0) - { - regresValues_eps_p = regresValues + cg_eps * (RegresValueMap %*% CReps %*% p_frees); - regresParams_eps_p = regresParams + cg_eps * (RegresParamMap %*% p_params); - - bilinear_sums_eps_p = SumFactor %*% (regresValues_eps_p * regresParams_eps_p); - w_bilinear_sums_eps_p = RegresScaleMult * bilinear_sums_eps_p; - - gradientInFrees_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_fv) * (SumFactor %*% ((regresParams_eps_p %*% ones_fv) * (RegresValueMap %*% CReps)))); - gradientInParams_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_pm) * (SumFactor %*% ((regresValues_eps_p %*% ones_pm) * RegresParamMap))); - - q_frees = t(gradientInFrees_eps_p - gradientInFrees) / cg_eps; - q_params = t(gradientInParams_eps_p - gradientInParams) / cg_eps; - - alpha = norm_r2 / castAsScalar (t(p_frees) %*% q_frees + t(p_params) %*% q_params); - - shift_frees = shift_frees + alpha * p_frees; - shift_params = shift_params + alpha * p_params; - - old_norm_r2 = norm_r2; - - residual_frees = residual_frees + alpha * q_frees; - residual_params = residual_params + alpha * q_params; - - norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params); - - p_frees = - residual_frees + (norm_r2 / old_norm_r2) * p_frees; - p_params = - residual_params + (norm_r2 / old_norm_r2) * p_params; - - cg_iter = cg_iter + 1; - if (cg_iter > 4) { - cg_terminate = 1; - } - } - -### GD-STEP 3: COMPUTE THE NEW DIRECTION VECTOR & "TEST" SHIFT - - dirVFrees_candidate = shift_frees; - dirVParams_candidate = shift_params; - -# random_frees = Rand (rows = num_frees, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0); -# random_params = Rand (rows = num_params, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0); -# dirVFrees_candidate = dirVFrees_candidate * random_frees; -# dirVParams_candidate = dirVParams_candidate * random_params; - - dirVFrees = dirVFrees_candidate; - dirVParams = dirVParams_candidate; - -# dirV_proj_factors = t(past_dirVFrees) %*% dirVFrees_candidate + t(past_dirVParams) %*% dirVParams_candidate; -# dirVFrees = dirVFrees_candidate - past_dirVFrees %*% dirV_proj_factors; -# dirVParams = dirVParams_candidate - past_dirVParams %*% dirV_proj_factors; - - dirV_denom = sqrt (sum (dirVFrees * dirVFrees) + sum (dirVParams * dirVParams)); - dirVFrees = dirVFrees / dirV_denom; - dirVParams = dirVParams / dirV_denom; - -# past_dirVFrees [, 2:num_past_dirVs] = past_dirVFrees [, 1:(num_past_dirVs-1)]; -# past_dirVParams [, 2:num_past_dirVs] = past_dirVParams [, 1:(num_past_dirVs-1)]; -# past_dirVFrees [, 1] = dirVFrees; -# past_dirVParams [, 1] = dirVParams; - - -### GD-STEP 4: COMPUTE THE POLYNOMIAL FOR d loss(t) / dt - - dirVRegresValues = RegresValueMap %*% CReps %*% dirVFrees; - dirVRegresParams = RegresParamMap %*% dirVParams; - dirVdirV_bilinear_sums = SumFactor %*% (dirVRegresValues * dirVRegresParams); - - dirV_bilinear_sums = SumFactor %*% (dirVRegresValues * regresParams + regresValues * dirVRegresParams); - L_0 = sum (w_bilinear_sums * bilinear_sums); - L_prime_0 = 2.0 * sum (w_bilinear_sums * dirV_bilinear_sums); - - freeVars_T = freeVars + shift_T * dirVFrees; - params_T = params + shift_T * dirVParams; - - reports_T = CReps %*% freeVars_T + dReps; - regresValues_T = RegresValueMap %*% reports_T + RegresFactorDefault; - regresParams_T = RegresParamMap %*% params_T + RegresCoeffDefault; - - bilinear_sums_T = SumFactor %*% (regresValues_T * regresParams_T); - w_bilinear_sums_T = RegresScaleMult * bilinear_sums_T; - dirV_bilinear_sums_T = SumFactor %*% (dirVRegresValues * regresParams_T + regresValues_T * dirVRegresParams); - - L_T = sum (w_bilinear_sums_T * bilinear_sums_T); - L_prime_T = 2.0 * sum (w_bilinear_sums_T * dirV_bilinear_sums_T); - - coeff_a = 4.0 * sum (RegresScaleMult * dirVdirV_bilinear_sums * dirVdirV_bilinear_sums); - coeff_b = -1.5 * coeff_a * shift_T + 3.0 * (L_prime_0 + L_prime_T + 2.0 * (L_0 - L_T) / shift_T) / (shift_T * shift_T); - coeff_c = 0.5 * coeff_a * shift_T * shift_T - 2.0 * (2.0 * L_prime_0 + L_prime_T + 3.0 * (L_0 - L_T) / shift_T) / shift_T; - coeff_d = L_prime_0; - -### GD-STEP 5: SOLVE CUBIC EQUATION & PICK THE BEST SHIFT (ROOT) - - coeff_aa = coeff_b / coeff_a; - coeff_bb = coeff_c / coeff_a; - coeff_cc = coeff_d / coeff_a; - - coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0; - coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0; - - root_choice = 0.0; - if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q) - { - two_pi_third = 2.0943951023931954923084289221863; - acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q); - - x = abs (acos_argument); - acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016 - + x * ( 0.0889789874 + x * (-0.0501743046 - + x * ( 0.0308918810 + x * (-0.0170881256 - + x * ( 0.0066700901 + x * (-0.0012624911)))))))); - if (acos_argument >= 0.0) { - coeff_theta = acos_x; - } else { - coeff_theta = 3.1415926535897932384626433832795 - acos_x; - } - - root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0); - root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third); - root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third); - root_min = min (min (root_1, root_2), root_3); - root_max = max (max (root_1, root_2), root_3); - root_int_diff = (((root_max * coeff_a / 4.0 + coeff_b / 3.0) * root_max + coeff_c / 2.0) * root_max + coeff_d) * root_max - - (((root_min * coeff_a / 4.0 + coeff_b / 3.0) * root_min + coeff_c / 2.0) * root_min + coeff_d) * root_min; - if (root_int_diff >= 0.0) { - root_choice = root_min; - } else { - root_choice = root_max; - } - } else { - if (coeff_R >= 0.0) { - sgn_coeff_R = 1.0; - } else { - sgn_coeff_R = -1.0; - } - coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0); - if (coeff_bigA != 0.0) { - root_choice = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0; - } else { - root_choice = - coeff_aa / 3.0; - } - } - - root_choice = root_choice - - (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) - / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c); - root_choice = root_choice - - (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) - / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c); - - -### GD-STEP 6: FINISH UP THE ITERATION - - freeVars = freeVars + root_choice * dirVFrees; - params = params + root_choice * dirVParams; - - root_int_diff = (((root_choice * coeff_a / 4.0 + coeff_b / 3.0) * root_choice + coeff_c / 2.0) * root_choice + coeff_d) * root_choice; - if (- root_int_diff < 0.00000001 * L_0) { - is_enough_gradient_descent = 1; - } - gd_iter = gd_iter + 1; - print ("Grad Descent Iter " + gd_iter + ": L = " + (L_0 + root_int_diff) + "; shift = " + root_choice); - shift_T = - 100.0 * sqrt (abs(root_choice) * abs(shift_T)); -} - - -print ("Gradient Descent finished. Starting MCMC..."); - - - - -END UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT */ - - - - - - - - -print ("Performing MCMC initialization..."); - -reports = CReps %*% freeVars + dReps; -regresValues = RegresValueMap %*% reports + RegresFactorDefault; -regresParams = RegresParamMap %*% params + RegresCoeffDefault; - -bilinear_vector = regresValues * regresParams; -bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE); -bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form)); - -max_num_iter = max_num_burnin_iterations + max_num_observed_iterations; -dim_sample = num_frees + num_params; -sample_ones = matrix (1.0, rows = dim_sample, cols = 1); - -# Generate a random permutation matrix for the sampling order of freeVars and params - -SampleOrder = diag (sample_ones); -num_swaps = 10 * dim_sample; -rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0); -left_swap = round (0.5 + dim_sample * rnd); -rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0); -right_swap = round (0.5 + dim_sample * rnd); -for (swap_i in 1:num_swaps) { - l = castAsScalar (left_swap [swap_i, 1]); - r = castAsScalar (right_swap [swap_i, 1]); - if (l != r) { - tmp_row = SampleOrder [l, ]; - SampleOrder [l, ] = SampleOrder [r, ]; - SampleOrder [r, ] = tmp_row; - } -} - -pi = 3.1415926535897932384626433832795; -zero = matrix (0.0, rows = 1, cols = 1); - -isVar = colSums (SampleOrder [1 : num_frees, ]); -sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms); -sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1); -num_of_observed_reports = 0; -sum_of_observed_losses = 0.0; -is_observed = 0; - -is_calculating_loss_change = 0; -is_monitoring_loss_change = 0; -avg_prob_of_loss_increase = 0; -update_factor_for_avg_loss_change = 0.02; -avg_loss_change = -50.0 * update_factor_for_avg_loss_change; -old_bilinear_form_value = bilinear_form_value; - -# Starting MCMC iterations - -iter = 0; - -while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations)) -{ - iter = iter + 1; - - # Initialize (bi-)linear forms - - regresValues = RegresValueMap %*% reports + RegresFactorDefault; - regresParams = RegresParamMap %*% params + RegresCoeffDefault; - bilinear_form_vector = regresValues * regresParams; - - bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE); - bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form)); - - if (bilinear_form_value > old_bilinear_form_value) { - avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change; - } else { - avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change); - } - if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) { - is_calculating_loss_change = 1; - } - if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) { - is_calculating_loss_change = 1; - is_monitoring_loss_change = 1; - print ("Monitoring the average loss change is ON. "); - } - if (is_calculating_loss_change == 1) { - avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change) - + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change; - } - if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) { - print ("Burn-in ENDS, observation STARTS. "); - is_observed = 1; - } - - old_bilinear_form_value = bilinear_form_value; - - bilinear_form_value_to_print = bilinear_form_value; - if (bilinear_form_value < 100000) { - bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000; - } else { - if (bilinear_form_value < 1000000000) { - bilinear_form_value_to_print = round (bilinear_form_value); - }} - - if (is_monitoring_loss_change == 0) { - print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) - + ", bilinear form value = " + bilinear_form_value_to_print); - } else { - print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) - + ", bilinear form value = " + bilinear_form_value_to_print + ", avg_loss_change = " + (round (10000 * avg_loss_change) / 10000)); - } - - # Create a normally distributed random sample - - dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero)); - rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0); - rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0); - rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2); - rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2); - rnd_normal = matrix (0.0, rows = dim_sample, cols = 1); - rnd_normal [1 : dim_half_sample, ] = rnd_normal_1; - rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2; - - # Initialize updaters - - freeVars_updater = freeVars * 0.0; - params_updater = params * 0.0; - regresValues_updater = regresValues * 0.0; - regresParams_updater = regresParams * 0.0; - bilinear_updater_vector = bilinear_form_vector * 0.0; - - # Perform the sampling - - for (idx in 1:dim_sample) - { - # Generate the sample unit-vector and updaters - - if (castAsScalar (isVar [1, idx]) > 0.5) { - freeVars_updater = SampleOrder [1 : num_frees, idx]; - regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater; - bilinear_updater_vector = regresValues_updater * regresParams; - } else { - params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx]; - regresParams_updater = RegresParamMap %*% params_updater; - bilinear_updater_vector = regresValues * regresParams_updater; - } - bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE); - - # Compute the quadratic by three shift-points: -1, 0, +1 - - bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form)); - q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater)); - q_plus_1 = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater)); - coeff_b = (q_plus_1 - q_minus_1) / 2.0; - coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value; - - # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)), - # then compute the shift to get the new sample - - mean_shift = - coeff_b / (2.0 * coeff_a); - sigma_shift = 1.0 / sqrt (2.0 * coeff_a); - shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]); - -# BEGIN DEBUG INSERT -# mmm = 1; -# if (castAsScalar (isVar [1, idx]) > 0.5 & # IT IS A FREE VARIABLE, NOT A PARAMETER -# castAsScalar (freeVars_updater [mmm, 1]) > 0) # IT IS mmm-TH FREE VARIABLE -# { -# # print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b); -# print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift); -# } -# if (castAsScalar (isVar [1, idx]) <= 0.5 & # IT IS A PARAMETER, NOT A FREE VARIABLE -# castAsScalar (params_updater [mmm, 1]) > 0) # IT IS mmm-TH PARAMETER -# { -# # print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b); -# print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift); -# } -# END DEBUG INSERT - - # Perform the updates - - bilinear_form = bilinear_form + shift * bilinear_updater; - if (castAsScalar (isVar [1, idx]) > 0.5) { - freeVars = freeVars + shift * freeVars_updater; - regresValues = regresValues + shift * regresValues_updater; - } else { - params = params + shift * params_updater; - regresParams = regresParams + shift * regresParams_updater; - } - } - - # Update / adjust the reports and the parameters - - reports = CReps %*% freeVars + dReps; - reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE); - - # Make an observation of the reports and/or the parameters - - if (is_observed > 0) - { - sum_of_observed_reports = sum_of_observed_reports + reports_matrix; - num_of_observed_reports = num_of_observed_reports + 1; - - sum_of_observed_params = sum_of_observed_params + params; - sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value; - } - -# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000); -# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000); -# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000); -# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000); -# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000); -# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000); -# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000); -# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000); - -# v5 =castAsScalar(round(reports_matrix[ 5,num_terms])); -# v8 =castAsScalar(round(reports_matrix[ 8,num_terms])); -# v9 =castAsScalar(round(reports_matrix[ 9,num_terms])); -# v10=castAsScalar(round(reports_matrix[10,num_terms])); -# v16=castAsScalar(round(reports_matrix[16,num_terms])); -# v19=castAsScalar(round(reports_matrix[19,num_terms])); - -#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4); -## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19); -#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4); -## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19); - -} - -print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports)); -print ("Writing out the results..."); - -avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports; -avg_params = sum_of_observed_params / num_of_observed_reports; -write (avg_reports_matrix, $11, format="text"); -write (avg_params, $12, format="text"); - -print ("END ImputeGaussMCMC"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports". +# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term"). +# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs. +# Soft linear regression constraints define dependencies between values in/across the reports. +# Linear regression parameters are unknown and sampled together with the missing values in the reports. +# +# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero, +# but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed. +# There are "num_terms" reports in the matrix, each having "num_attrs" attribute values. +# +# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from +# "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees) +# where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)]. +# Term = t, attribute = i --> index = (t-1) * num_attrs + i +# +# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines +# a linear map from the stretched matrix of reports to the stretched matrix of regression factors. +# +# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors" +# (if nonzero) to be added to the regression factors before they are multiplied by parameters. +# +# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map +# from the vector of parameters to the stretched matrix of regression factors. +# +# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients +# (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors. +# +# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression +# +# INPUT 8 : Number of factors in a regression equation, including the estimated value +# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once) +# BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner +# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once) +# +# INPUT 11: Output file name and path for the average MCMC reports table +# INPUT 12: Output file for debugging (currently: the average parameters vector) +# +# Example: +# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args +# test/scripts/applications/impute/initial_reports +# test/scripts/applications/impute/CReps +# test/scripts/applications/impute/RegresValueMap +# test/scripts/applications/impute/RegresFactorDefault +# test/scripts/applications/impute/RegresParamMap +# test/scripts/applications/impute/RegresCoeffDefault +# test/scripts/applications/impute/RegresScaleMult +# 4 1000 100 +# test/scripts/applications/impute/output_reports +# test/scripts/applications/impute/debug_info + + +print ("START ImputeGaussMCMC"); +print ("Reading the input files..."); + +initial_reports = read ($1); +CReps = read ($2); + +num_terms = ncol (initial_reports); # Number of periods for which reports are produced +num_attrs = nrow (initial_reports); # Number of attribute values per each term report +num_frees = ncol (CReps); # Number of free variables used to describe all consistent reports + +dReps_size = num_terms * num_attrs; +dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE); + +# We assume that all report-series consistent with hard constraints form an affine set: +# reports = CReps %*% freeVars + dReps +# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping, +# and "dReps" are the default values for the reports that correspond to the initial reports matrix. + +RegresValueMap = read ($3); +RegresFactorDefault = read ($4); +RegresParamMap = read ($5); +RegresCoeffDefault = read ($6); +RegresScaleMult = read ($7); + +num_factors = $8; # Number of factors in each regression equation, including the estimated value +num_reg_eqs = nrow (RegresParamMap) / num_factors; # Number of regression equations +num_params = ncol (RegresParamMap); # Number of parameters used in all regressions +max_num_burnin_iterations = $9; +max_num_observed_iterations = $10; + +ones_fv = matrix (1.0, rows = 1, cols = num_frees); +ones_pm = matrix (1.0, rows = 1, cols = num_params); +twos_re = matrix (2.0, rows = 1, cols = num_reg_eqs); + +# Create a summation operator (matrix) that adds the factors in each regression: + +ncols_SumFactor = num_reg_eqs * num_factors; +SumFactor = matrix (0.0, rows = num_reg_eqs, cols = ncols_SumFactor); +ones_f = matrix (1.0, rows = 1, cols = num_factors); +SumFactor [1, 1:num_factors] = ones_f; +nSumFactor = 1; +while (nSumFactor < num_reg_eqs) { + incSumFactor = nSumFactor; + if (incSumFactor > num_reg_eqs - nSumFactor) { + incSumFactor = num_reg_eqs - nSumFactor; + } + SumFactor [(nSumFactor + 1) : (nSumFactor + incSumFactor), + (nSumFactor * num_factors + 1) : ((nSumFactor + incSumFactor) * num_factors)] = + SumFactor [1 : incSumFactor, 1 : (incSumFactor * num_factors)]; + nSumFactor = nSumFactor + incSumFactor; +} + +freeVars = matrix (0.0, rows = num_frees, cols = 1); +params = matrix (1.0, rows = num_params, cols = 1); + + + +num_opt_iter = 20; +print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)..."); + +reports = CReps %*% freeVars + dReps; +regresValues = RegresValueMap %*% reports + RegresFactorDefault; +regresParams = RegresParamMap %*% params + RegresCoeffDefault; + +bilinear_sums = SumFactor %*% (regresValues * regresParams); +w_bilinear_sums = RegresScaleMult * bilinear_sums; +bilinear_form_value = sum (w_bilinear_sums * bilinear_sums); + +opt_iter = 1; +is_step_params = 1; +is_opt_converged = 0; + +print ("Before optimization: Initial bilinear form value = " + bilinear_form_value); + +while (is_opt_converged == 0) +{ + deg = is_step_params * num_params + (1 - is_step_params) * num_frees; + shift_vector = matrix (0.0, rows = deg, cols = 1); + + # Compute gradient + + if (is_step_params == 1) { + gradient = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap))); + } else { + gradient = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps)))); + } + + # Make a few conjugate gradient steps + + residual = t(gradient); + p = - residual; + norm_r2 = sum (residual * residual); + cg_iter = 1; + cg_terminate = 0; + + while (cg_terminate == 0) + { + # Want: q = A %*% p; + # Compute gradient change from 0 to p + + if (is_step_params == 1) { + w_bilinear_p = RegresScaleMult * (SumFactor %*% (regresValues * (RegresParamMap %*% p))); + gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap))); + } else { + w_bilinear_p = RegresScaleMult * (SumFactor %*% ((RegresValueMap %*% CReps %*% p) * regresParams)); + gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps)))); + } + q = t(gradient_change_p); + + alpha = norm_r2 / castAsScalar (t(p) %*% q); + shift_vector_change = alpha * p; + shift_vector = shift_vector + shift_vector_change; + old_norm_r2 = norm_r2; + residual = residual + alpha * q; + norm_r2 = sum (residual * residual); + p = - residual + (norm_r2 / old_norm_r2) * p; + cg_iter = cg_iter + 1; + if (cg_iter > min (deg, 2 + opt_iter / 3)) { + cg_terminate = 1; + } + } + + if (is_step_params == 1) { + params = params + shift_vector; + regresParams = RegresParamMap %*% params + RegresCoeffDefault; + } else { + freeVars = freeVars + shift_vector; + reports = CReps %*% freeVars + dReps; + regresValues = RegresValueMap %*% reports + RegresFactorDefault; + } + + # Update the bilinear form and check convergence + + if (is_step_params == 1) { + old_bilinear_form_value = bilinear_form_value; + } + + bilinear_sums = SumFactor %*% (regresValues * regresParams); + w_bilinear_sums = RegresScaleMult * bilinear_sums; + bilinear_form_value = sum (w_bilinear_sums * bilinear_sums); + + if (is_step_params == 1) { + print ("Optimization step " + opt_iter + " (params) : bilinear form value = " + bilinear_form_value); + } else { + print ("Optimization step " + opt_iter + " (reports): bilinear form value = " + bilinear_form_value); + } + + is_step_params = 1 - is_step_params; + opt_iter = opt_iter + 1; + + if (is_step_params == 1 & opt_iter > num_opt_iter) { + is_opt_converged = 1; + } +} + + + +/* UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT + + + +print ("Starting Gradient Descent..."); +### GRADIENT DESCENT WITH MODIFICATIONS TO ENHANCE CONVERGENCE + +# num_past_dirVs = 3; +# past_dirVFrees = matrix (0.0, rows = num_frees, cols = num_past_dirVs); +# past_dirVParams = matrix (0.0, rows = num_params, cols = num_past_dirVs); + +shift_T = -1000.0; +is_enough_gradient_descent = 0; +gd_iter = 0; + +while (is_enough_gradient_descent == 0) +{ +### GD-STEP 1: COMPUTE LOSS & GRADIENT AT CURRENT POINT + + reports = CReps %*% freeVars + dReps; + regresValues = RegresValueMap %*% reports + RegresFactorDefault; + regresParams = RegresParamMap %*% params + RegresCoeffDefault; + + bilinear_sums = SumFactor %*% (regresValues * regresParams); + w_bilinear_sums = RegresScaleMult * bilinear_sums; + gradientInFrees = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps)))); + gradientInParams = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap))); + +### CG-STEP 2: MAKE A FEW APPROXIMATE CONJUGATE GRADIENT STEPS + + shift_frees = matrix (0.0, rows = num_frees, cols = 1); + shift_params = matrix (0.0, rows = num_params, cols = 1); + + residual_frees = t(gradientInFrees); + residual_params = t(gradientInParams); + + p_frees = - residual_frees; + p_params = - residual_params; + + norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params); + + cg_iter = 1; + cg_terminate = 0; + cg_eps = 0.000001; + + while (cg_terminate == 0) + { + regresValues_eps_p = regresValues + cg_eps * (RegresValueMap %*% CReps %*% p_frees); + regresParams_eps_p = regresParams + cg_eps * (RegresParamMap %*% p_params); + + bilinear_sums_eps_p = SumFactor %*% (regresValues_eps_p * regresParams_eps_p); + w_bilinear_sums_eps_p = RegresScaleMult * bilinear_sums_eps_p; + + gradientInFrees_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_fv) * (SumFactor %*% ((regresParams_eps_p %*% ones_fv) * (RegresValueMap %*% CReps)))); + gradientInParams_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_pm) * (SumFactor %*% ((regresValues_eps_p %*% ones_pm) * RegresParamMap))); + + q_frees = t(gradientInFrees_eps_p - gradientInFrees) / cg_eps; + q_params = t(gradientInParams_eps_p - gradientInParams) / cg_eps; + + alpha = norm_r2 / castAsScalar (t(p_frees) %*% q_frees + t(p_params) %*% q_params); + + shift_frees = shift_frees + alpha * p_frees; + shift_params = shift_params + alpha * p_params; + + old_norm_r2 = norm_r2; + + residual_frees = residual_frees + alpha * q_frees; + residual_params = residual_params + alpha * q_params; + + norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params); + + p_frees = - residual_frees + (norm_r2 / old_norm_r2) * p_frees; + p_params = - residual_params + (norm_r2 / old_norm_r2) * p_params; + + cg_iter = cg_iter + 1; + if (cg_iter > 4) { + cg_terminate = 1; + } + } + +### GD-STEP 3: COMPUTE THE NEW DIRECTION VECTOR & "TEST" SHIFT + + dirVFrees_candidate = shift_frees; + dirVParams_candidate = shift_params; + +# random_frees = Rand (rows = num_frees, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0); +# random_params = Rand (rows = num_params, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0); +# dirVFrees_candidate = dirVFrees_candidate * random_frees; +# dirVParams_candidate = dirVParams_candidate * random_params; + + dirVFrees = dirVFrees_candidate; + dirVParams = dirVParams_candidate; + +# dirV_proj_factors = t(past_dirVFrees) %*% dirVFrees_candidate + t(past_dirVParams) %*% dirVParams_candidate; +# dirVFrees = dirVFrees_candidate - past_dirVFrees %*% dirV_proj_factors; +# dirVParams = dirVParams_candidate - past_dirVParams %*% dirV_proj_factors; + + dirV_denom = sqrt (sum (dirVFrees * dirVFrees) + sum (dirVParams * dirVParams)); + dirVFrees = dirVFrees / dirV_denom; + dirVParams = dirVParams / dirV_denom; + +# past_dirVFrees [, 2:num_past_dirVs] = past_dirVFrees [, 1:(num_past_dirVs-1)]; +# past_dirVParams [, 2:num_past_dirVs] = past_dirVParams [, 1:(num_past_dirVs-1)]; +# past_dirVFrees [, 1] = dirVFrees; +# past_dirVParams [, 1] = dirVParams; + + +### GD-STEP 4: COMPUTE THE POLYNOMIAL FOR d loss(t) / dt + + dirVRegresValues = RegresValueMap %*% CReps %*% dirVFrees; + dirVRegresParams = RegresParamMap %*% dirVParams; + dirVdirV_bilinear_sums = SumFactor %*% (dirVRegresValues * dirVRegresParams); + + dirV_bilinear_sums = SumFactor %*% (dirVRegresValues * regresParams + regresValues * dirVRegresParams); + L_0 = sum (w_bilinear_sums * bilinear_sums); + L_prime_0 = 2.0 * sum (w_bilinear_sums * dirV_bilinear_sums); + + freeVars_T = freeVars + shift_T * dirVFrees; + params_T = params + shift_T * dirVParams; + + reports_T = CReps %*% freeVars_T + dReps; + regresValues_T = RegresValueMap %*% reports_T + RegresFactorDefault; + regresParams_T = RegresParamMap %*% params_T + RegresCoeffDefault; + + bilinear_sums_T = SumFactor %*% (regresValues_T * regresParams_T); + w_bilinear_sums_T = RegresScaleMult * bilinear_sums_T; + dirV_bilinear_sums_T = SumFactor %*% (dirVRegresValues * regresParams_T + regresValues_T * dirVRegresParams); + + L_T = sum (w_bilinear_sums_T * bilinear_sums_T); + L_prime_T = 2.0 * sum (w_bilinear_sums_T * dirV_bilinear_sums_T); + + coeff_a = 4.0 * sum (RegresScaleMult * dirVdirV_bilinear_sums * dirVdirV_bilinear_sums); + coeff_b = -1.5 * coeff_a * shift_T + 3.0 * (L_prime_0 + L_prime_T + 2.0 * (L_0 - L_T) / shift_T) / (shift_T * shift_T); + coeff_c = 0.5 * coeff_a * shift_T * shift_T - 2.0 * (2.0 * L_prime_0 + L_prime_T + 3.0 * (L_0 - L_T) / shift_T) / shift_T; + coeff_d = L_prime_0; + +### GD-STEP 5: SOLVE CUBIC EQUATION & PICK THE BEST SHIFT (ROOT) + + coeff_aa = coeff_b / coeff_a; + coeff_bb = coeff_c / coeff_a; + coeff_cc = coeff_d / coeff_a; + + coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0; + coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0; + + root_choice = 0.0; + if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q) + { + two_pi_third = 2.0943951023931954923084289221863; + acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q); + + x = abs (acos_argument); + acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016 + + x * ( 0.0889789874 + x * (-0.0501743046 + + x * ( 0.0308918810 + x * (-0.0170881256 + + x * ( 0.0066700901 + x * (-0.0012624911)))))))); + if (acos_argument >= 0.0) { + coeff_theta = acos_x; + } else { + coeff_theta = 3.1415926535897932384626433832795 - acos_x; + } + + root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0); + root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third); + root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third); + root_min = min (min (root_1, root_2), root_3); + root_max = max (max (root_1, root_2), root_3); + root_int_diff = (((root_max * coeff_a / 4.0 + coeff_b / 3.0) * root_max + coeff_c / 2.0) * root_max + coeff_d) * root_max + - (((root_min * coeff_a / 4.0 + coeff_b / 3.0) * root_min + coeff_c / 2.0) * root_min + coeff_d) * root_min; + if (root_int_diff >= 0.0) { + root_choice = root_min; + } else { + root_choice = root_max; + } + } else { + if (coeff_R >= 0.0) { + sgn_coeff_R = 1.0; + } else { + sgn_coeff_R = -1.0; + } + coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0); + if (coeff_bigA != 0.0) { + root_choice = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0; + } else { + root_choice = - coeff_aa / 3.0; + } + } + + root_choice = root_choice - + (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) + / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c); + root_choice = root_choice - + (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d) + / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c); + + +### GD-STEP 6: FINISH UP THE ITERATION + + freeVars = freeVars + root_choice * dirVFrees; + params = params + root_choice * dirVParams; + + root_int_diff = (((root_choice * coeff_a / 4.0 + coeff_b / 3.0) * root_choice + coeff_c / 2.0) * root_choice + coeff_d) * root_choice; + if (- root_int_diff < 0.00000001 * L_0) { + is_enough_gradient_descent = 1; + } + gd_iter = gd_iter + 1; + print ("Grad Descent Iter " + gd_iter + ": L = " + (L_0 + root_int_diff) + "; shift = " + root_choice); + shift_T = - 100.0 * sqrt (abs(root_choice) * abs(shift_T)); +} + + +print ("Gradient Descent finished. Starting MCMC..."); + + + + +END UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT */ + + + + + + + + +print ("Performing MCMC initialization..."); + +reports = CReps %*% freeVars + dReps; +regresValues = RegresValueMap %*% reports + RegresFactorDefault; +regresParams = RegresParamMap %*% params + RegresCoeffDefault; + +bilinear_vector = regresValues * regresParams; +bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE); +bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form)); + +max_num_iter = max_num_burnin_iterations + max_num_observed_iterations; +dim_sample = num_frees + num_params; +sample_ones = matrix (1.0, rows = dim_sample, cols = 1); + +# Generate a random permutation matrix for the sampling order of freeVars and params + +SampleOrder = diag (sample_ones); +num_swaps = 10 * dim_sample; +rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0); +left_swap = round (0.5 + dim_sample * rnd); +rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0); +right_swap = round (0.5 + dim_sample * rnd); +for (swap_i in 1:num_swaps) { + l = castAsScalar (left_swap [swap_i, 1]); + r = castAsScalar (right_swap [swap_i, 1]); + if (l != r) { + tmp_row = SampleOrder [l, ]; + SampleOrder [l, ] = SampleOrder [r, ]; + SampleOrder [r, ] = tmp_row; + } +} + +pi = 3.1415926535897932384626433832795; +zero = matrix (0.0, rows = 1, cols = 1); + +isVar = colSums (SampleOrder [1 : num_frees, ]); +sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms); +sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1); +num_of_observed_reports = 0; +sum_of_observed_losses = 0.0; +is_observed = 0; + +is_calculating_loss_change = 0; +is_monitoring_loss_change = 0; +avg_prob_of_loss_increase = 0; +update_factor_for_avg_loss_change = 0.02; +avg_loss_change = -50.0 * update_factor_for_avg_loss_change; +old_bilinear_form_value = bilinear_form_value; + +# Starting MCMC iterations + +iter = 0; + +while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations)) +{ + iter = iter + 1; + + # Initialize (bi-)linear forms + + regresValues = RegresValueMap %*% reports + RegresFactorDefault; + regresParams = RegresParamMap %*% params + RegresCoeffDefault; + bilinear_form_vector = regresValues * regresParams; + + bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE); + bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form)); + + if (bilinear_form_value > old_bilinear_form_value) { + avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change; + } else { + avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change); + } + if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) { + is_calculating_loss_change = 1; + } + if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) { + is_calculating_loss_change = 1; + is_monitoring_loss_change = 1; + print ("Monitoring the average loss change is ON. "); + } + if (is_calculating_loss_change == 1) { + avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change) + + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change; + } + if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) { + print ("Burn-in ENDS, observation STARTS. "); + is_observed = 1; + } + + old_bilinear_form_value = bilinear_form_value; + + bilinear_form_value_to_print = bilinear_form_value; + if (bilinear_form_value < 100000) { + bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000; + } else { + if (bilinear_form_value < 1000000000) { + bilinear_form_value_to_print = round (bilinear_form_value); + }} + + if (is_monitoring_loss_change == 0) { + print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) + + ", bilinear form value = " + bilinear_form_value_to_print); + } else { + print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000) + + ", bilinear form value = " + bilinear_form_value_to_print + ", avg_loss_change = " + (round (10000 * avg_loss_change) / 10000)); + } + + # Create a normally distributed random sample + + dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero)); + rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0); + rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0); + rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2); + rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2); + rnd_normal = matrix (0.0, rows = dim_sample, cols = 1); + rnd_normal [1 : dim_half_sample, ] = rnd_normal_1; + rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2; + + # Initialize updaters + + freeVars_updater = freeVars * 0.0; + params_updater = params * 0.0; + regresValues_updater = regresValues * 0.0; + regresParams_updater = regresParams * 0.0; + bilinear_updater_vector = bilinear_form_vector * 0.0; + + # Perform the sampling + + for (idx in 1:dim_sample) + { + # Generate the sample unit-vector and updaters + + if (castAsScalar (isVar [1, idx]) > 0.5) { + freeVars_updater = SampleOrder [1 : num_frees, idx]; + regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater; + bilinear_updater_vector = regresValues_updater * regresParams; + } else { + params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx]; + regresParams_updater = RegresParamMap %*% params_updater; + bilinear_updater_vector = regresValues * regresParams_updater; + } + bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE); + + # Compute the quadratic by three shift-points: -1, 0, +1 + + bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form)); + q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater)); + q_plus_1 = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater)); + coeff_b = (q_plus_1 - q_minus_1) / 2.0; + coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value; + + # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)), + # then compute the shift to get the new sample + + mean_shift = - coeff_b / (2.0 * coeff_a); + sigma_shift = 1.0 / sqrt (2.0 * coeff_a); + shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]); + +# BEGIN DEBUG INSERT +# mmm = 1; +# if (castAsScalar (isVar [1, idx]) > 0.5 & # IT IS A FREE VARIABLE, NOT A PARAMETER +# castAsScalar (freeVars_updater [mmm, 1]) > 0) # IT IS mmm-TH FREE VARIABLE +# { +# # print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b); +# print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift); +# } +# if (castAsScalar (isVar [1, idx]) <= 0.5 & # IT IS A PARAMETER, NOT A FREE VARIABLE +# castAsScalar (params_updater [mmm, 1]) > 0) # IT IS mmm-TH PARAMETER +# { +# # print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b); +# print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift); +# } +# END DEBUG INSERT + + # Perform the updates + + bilinear_form = bilinear_form + shift * bilinear_updater; + if (castAsScalar (isVar [1, idx]) > 0.5) { + freeVars = freeVars + shift * freeVars_updater; + regresValues = regresValues + shift * regresValues_updater; + } else { + params = params + shift * params_updater; + regresParams = regresParams + shift * regresParams_updater; + } + } + + # Update / adjust the reports and the parameters + + reports = CReps %*% freeVars + dReps; + reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE); + + # Make an observation of the reports and/or the parameters + + if (is_observed > 0) + { + sum_of_observed_reports = sum_of_observed_reports + reports_matrix; + num_of_observed_reports = num_of_observed_reports + 1; + + sum_of_observed_params = sum_of_observed_params + params; + sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value; + } + +# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000); +# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000); +# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000); +# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000); +# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000); +# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000); +# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000); +# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000); + +# v5 =castAsScalar(round(reports_matrix[ 5,num_terms])); +# v8 =castAsScalar(round(reports_matrix[ 8,num_terms])); +# v9 =castAsScalar(round(reports_matrix[ 9,num_terms])); +# v10=castAsScalar(round(reports_matrix[10,num_terms])); +# v16=castAsScalar(round(reports_matrix[16,num_terms])); +# v19=castAsScalar(round(reports_matrix[19,num_terms])); + +#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4); +## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19); +#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4); +## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19); + +} + +print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports)); +print ("Writing out the results..."); + +avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports; +avg_params = sum_of_observed_params / num_of_observed_reports; +write (avg_reports_matrix, $11, format="text"); +write (avg_params, $12, format="text"); + +print ("END ImputeGaussMCMC");
