http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml b/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml index 18c8eb9..6514b63 100644 --- a/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml +++ b/src/test/scripts/applications/impute/wfundInputGenerator.The0thReportAttempt.dml @@ -1,501 +1,501 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode -# -args -# test/scripts/applications/impute/initial_reports -# test/scripts/applications/impute/initial_reports_preprocessed -# test/scripts/applications/impute/CReps -# test/scripts/applications/impute/RegresValueMap -# test/scripts/applications/impute/RegresFactorDefault -# test/scripts/applications/impute/RegresParamMap -# test/scripts/applications/impute/RegresCoeffDefault -# test/scripts/applications/impute/RegresScaleMult - -initial_reports = read ($1); - -is_GROUP_4_ENABLED = 0; # = 1 or 0 ("0" if Group-4 = all 0s) -num_EXTRA_MISSING_FREES = 0; # = 0 ("3" or "6" for Uganda) - -num_known_terms = 5; # The number of known term reports -num_predicted_terms = 1; # The number of predicted (future) term reports - -num_terms = num_known_terms + num_predicted_terms + 1; # We predict the "0-th" report, too -num_attrs = 19; - -num_frees_per_term = 13; -if (is_GROUP_4_ENABLED == 1) { - num_frees_per_term = 15; -} -num_regular_frees = (num_predicted_terms + 1) * num_frees_per_term; -num_frees = num_regular_frees + num_EXTRA_MISSING_FREES; - -zero = matrix (0.0, rows = 1, cols = 1); - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS -# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS -# --------------------------------------------------------- - -CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); - -for (dt in 0:num_predicted_terms) -{ - ta_shift = 0; - if (dt > 0) { - ta_shift = (num_known_terms + dt) * num_attrs; - } - fv_shift = dt * num_frees_per_term; - -# constraint that row1 = row2 + row3 + row4 + row5 + row6 + row7 -# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6 - for (i in 1:6) { - CReps [ta_shift + 1, fv_shift + i] = 1.0 + zero; - CReps [ta_shift + 1 + i, fv_shift + i] = 1.0 + zero; - } -# row 8 is free variable not appearing in any non-free variable - CReps [ta_shift + 8, fv_shift + 7] = 1.0 + zero; - -# constraint that row9 = row10 + row11 + row12 + row13 + row14 + row15 -# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13 - for (i in 1:6) { - CReps [ta_shift + 9, fv_shift + 7 + i] = 1.0 + zero; - CReps [ta_shift + 9 + i, fv_shift + 7 + i] = 1.0 + zero; - } -# constraint that row16 = row14 + row15 -# translated to free vars: row16 = free14 + free15 -if (is_GROUP_4_ENABLED == 1) { - for (i in 1:2) { - CReps [ta_shift + 16, fv_shift + 13 + i] = 1.0 + zero; - CReps [ta_shift + 16 + i, fv_shift + 13 + i] = 1.0 + zero; - } -} -# constraint that row19 = total cost (all free variables) -# translated to free vars: row19 = all free variables - for (i in 1:num_frees_per_term) { - CReps [ta_shift + 19, fv_shift + i] = 1.0 + zero; - } -} - -# --------------------------------------------------------- -# SPECIAL FREE VARIABLES TO HANDLE UGANDA'S MISSING VALUES -# --------------------------------------------------------- - -if (num_EXTRA_MISSING_FREES == 3 | num_EXTRA_MISSING_FREES == 6) -{ - ta_shift = 3 * num_attrs; - CReps [ta_shift + 4, num_regular_frees + 1] = 1.0 + zero; - CReps [ta_shift + 5, num_regular_frees + 2] = 1.0 + zero; - CReps [ta_shift + 6, num_regular_frees + 3] = 1.0 + zero; - CReps [ta_shift + 7, num_regular_frees + 1] = -1.0 + zero; - CReps [ta_shift + 7, num_regular_frees + 2] = -1.0 + zero; - CReps [ta_shift + 7, num_regular_frees + 3] = -1.0 + zero; -} - -if (num_EXTRA_MISSING_FREES == 6) -{ - ta_shift = 7 * num_attrs; - CReps [ta_shift + 4, num_regular_frees + 4] = 1.0 + zero; - CReps [ta_shift + 5, num_regular_frees + 5] = 1.0 + zero; - CReps [ta_shift + 6, num_regular_frees + 6] = 1.0 + zero; - CReps [ta_shift + 7, num_regular_frees + 4] = -1.0 + zero; - CReps [ta_shift + 7, num_regular_frees + 5] = -1.0 + zero; - CReps [ta_shift + 7, num_regular_frees + 6] = -1.0 + zero; -} - - -# --------------------------------------------------------------------------------------- -# -# In all regressions, except the last few "special" ones, there are 4 factors: -# x[t] ~ aggregate[t], x[t-1], (x[t-1] - x[t-2]) -# The last regressions are for regularization, but they also follow the 4-factor pattern. - -num_factors = 4; - -# We have one regression equation per time-term for each attribute, -# plus a few "special" regularization regression equations: - -num_special_regs = 12; -if (is_GROUP_4_ENABLED == 1) { - num_special_regs = 16; -} - -num_reg_eqs = num_terms * num_attrs + num_special_regs; - -RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); -RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - -# All regression equations for the same attribute share the same parameters, regardless -# of the term; some parameters are shared across multiple attributes, (those attributes -# whose behavior is believed to be similar) as specified in the table below: - -num_params = 28; -if (is_GROUP_4_ENABLED == 1) { - num_params = 35; -} - -# Factors: -self[t] total[t] self[t-1] self[t-1]- -# self[t-2] -# PARAMS: -# Group 1: 1.0 prm#01 prm#08 prm#09 Row #01 = free#01 + ... + free#06 -# Group 1: " prm#02 prm#10 prm#11 Row #02 = free#01 -# Group 1: " prm#03 " " Row #03 = free#02 -# Group 1: " prm#04 " " Row #04 = free#03 -# Group 1: " prm#05 " " Row #05 = free#04 -# Group 1: " prm#06 " " Row #06 = free#05 -# Group 1: " prm#07 " " Row #07 = free#06 -# -------------------------------------------------------------------- -# Group 2: 1.0 prm#12 prm#13 prm#14 Row #08 = free#07 -# -------------------------------------------------------------------- -# Group 3: 1.0 prm#15 prm#22 prm#23 Row #09 = free#08 + ... + free#13 -# Group 3: " prm#16 prm#24 prm#25 Row #10 = free#08 -# Group 3: " prm#17 " " Row #11 = free#09 -# Group 3: " prm#18 " " Row #12 = free#10 -# Group 3: " prm#19 " " Row #13 = free#11 -# Group 3: " prm#20 " " Row #14 = free#12 -# Group 3: " prm#21 " " Row #15 = free#13 -# -------------------------------------------------------------------- -# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED -# Group 4: 1.0 prm#29 prm#32 prm#33 Row #16 = free#14 + free#15 -# Group 4: " prm#30 prm#34 prm#35 Row #17 = free#14 -# Group 4: " prm#31 " " Row #18 = free#15 -# -------------------------------------------------------------------- -# Group 5: 1.0 prm#26 prm#27 prm#28 Row #19 = free#01 + ... + free#15 -# -# (The aggregates in Groups 1..4 regress on the total cost in Group 5; -# the total cost in Group 5 regresses on the intercept.) - -# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS: -# Factors: 1.0 -1.0 0.0 0.0 -# PARAMS: -# prm#27 1.0 0.0 0.0 # self[t-1] -# prm#28 0.0 0.0 0.0 # trend -# prm#08 0.0 0.0 0.0 # self[t-1] -# prm#09 0.0 0.0 0.0 # trend -# prm#10 0.0 0.0 0.0 # self[t-1] -# prm#11 0.0 0.0 0.0 # trend -# prm#13 0.0 0.0 0.0 # self[t-1] -# prm#14 0.0 0.0 0.0 # trend -# prm#22 0.0 0.0 0.0 # self[t-1] -# prm#23 0.0 0.0 0.0 # trend -# prm#24 0.0 0.0 0.0 # self[t-1] -# prm#25 0.0 0.0 0.0 # trend -### GROUP-4 ZEROS: THESE EQUATIONS USE REVOKED PARAMETERS AND DO NOT APPEAR -# prm#32 0.0 0.0 0.0 # self[t-1] -# prm#33 0.0 0.0 0.0 # trend -# prm#34 0.0 0.0 0.0 # self[t-1] -# prm#35 0.0 0.0 0.0 # trend -# -# --------------------------------------------------------------------------------------- - - - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS -# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS -# --------------------------------------------------------- - - -for (t in 1 : num_terms) { - for (i in 1 : num_attrs) { - -reg_index = ((t-1) * num_attrs + i - 1) * num_factors; - -# ------------------------------- -# SETTING FACTORS #1, #3, and #4: -# ------------------------------- - -if (t == 1 & i != 19) { # THESE "REGRESSIONS" ARE DIFFERENT (MORE LIKE REGULARIZATIONS): - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t] - RegresValueMap [reg_index + 3, (t-1) * num_attrs + i ] = 1.5 + zero; # 3rd factor is approximated as: - RegresValueMap [reg_index + 3, t * num_attrs + i ] = -0.3 + zero; # 1.5 x[t] - 0.3 x[t+1] - 0.2 x[t+2] = - RegresValueMap [reg_index + 3, (t+1) * num_attrs + i ] = -0.2 + zero; # x[t] - 0.5 (x[t+1] - x[t]) - 0.2 (x[t+2] - x[t+1]) -} -if (t == 2) { - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] = 1.0 + zero; # 3rd factor: x[t-1] - w = 0.5; - RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] = (- 1 - w) + zero; # 4th factor is approximated as: - RegresValueMap [reg_index + 4, (t-1) * num_attrs + i ] = (1 + 2*w) + zero; # - (1+w)x[t-1] + (1+2w)x[t] - w x[t+1] = - RegresValueMap [reg_index + 4, t * num_attrs + i ] = (- w) + zero; # (x[t]-x[t-1]) - w * ((x[t+1]-x[t]) - (x[t]-x[t-1])) -} -if (t >= 3) { - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] = 1.0 + zero; # 3rd factor: x[t-1] - RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] = 1.0 + zero; # 4th factor is - RegresValueMap [reg_index + 4, (t-3) * num_attrs + i ] = -1.0 + zero; # x[t-1] - x[t-2] -} - -# ------------------------------------------- -# SETTING FACTOR #2 DEPENDS ON THE ATTRIBUTE: -# ------------------------------------------- - -if (i == 1) { # GROUP 1 SUBTOTAL - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] -} -if (2 <= i & i <= 7) { # GROUP 1 ATTRIBUTES - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 1] = 1.0 + zero; # 2nd factor: Row#01[t] -} - -if (i == 8) { # GROUP 2 SUBTOTAL - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] -} - -if (i == 9) { # GROUP 3 SUBTOTAL - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] -} -if (10 <= i & i <= 15) { # GROUP 3 ATTRIBUTES: - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 9] = 1.0 + zero; # 2nd factor: Row#09[t] -} - -if (i == 16) { # GROUP 4 SUBTOTAL - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] -} -if (17 <= i & i <= 18) { # GROUP 4 ATTRIBUTES: - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] = 1.0 + zero; # 2nd factor: Row#16[t] -} - -if (i == 19 & t >= 2) { # THE TOTAL, ONLY FOR t >= 2 - RegresFactorDefault [reg_index + 2, 1] = 1.0 + zero; # 2nd factor: Intercept -} - -### -### SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS ARE HANDLED SEPARATELY! -### - -}} - - -# ---------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS -# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS -# ---------------------------------------------------------- - -RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); -RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - -for (t in 1 : num_terms) { - ta_shift = (t-1) * num_attrs - 1; - -# Group 1 attributes: - reg_index = (ta_shift + 1) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 1] = 1.0 + zero; # Param #01 - RegresParamMap [reg_index + 3, 8] = 1.0 + zero; # Param #08 - RegresParamMap [reg_index + 4, 9] = 1.0 + zero; # Param #09 - for (i in 2 : 7) { - reg_index = (ta_shift + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, i] = 1.0 + zero; # Param #02-#07 - RegresParamMap [reg_index + 3, 10] = 1.0 + zero; # Param #10 - RegresParamMap [reg_index + 4, 11] = 1.0 + zero; # Param #11 - } - -# Group 2 attribute: - reg_index = (ta_shift + 8) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 12] = 1.0 + zero; # Param #12 - RegresParamMap [reg_index + 3, 13] = 1.0 + zero; # Param #13 - RegresParamMap [reg_index + 4, 14] = 1.0 + zero; # Param #14 - -# Group 3 attributes: - reg_index = (ta_shift + 9) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 15] = 1.0 + zero; # Param #17 - RegresParamMap [reg_index + 3, 22] = 1.0 + zero; # Param #22 - RegresParamMap [reg_index + 4, 23] = 1.0 + zero; # Param #23 - for (i in 10 : 15) { - reg_index = (ta_shift + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 6 + i] = 1.0 + zero; # Param #16-#21 - RegresParamMap [reg_index + 3, 24] = 1.0 + zero; # Param #24 - RegresParamMap [reg_index + 4, 25] = 1.0 + zero; # Param #25 - } - -# Group 4 attributes: -if (is_GROUP_4_ENABLED == 1) { - reg_index = (ta_shift + 16) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 29] = 1.0 + zero; # Param #29 - RegresParamMap [reg_index + 3, 32] = 1.0 + zero; # Param #32 - RegresParamMap [reg_index + 4, 33] = 1.0 + zero; # Param #33 - for (i in 17 : 18) { - reg_index = (ta_shift + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero; # Param #30-#31 - RegresParamMap [reg_index + 3, 34] = 1.0 + zero; # Param #34 - RegresParamMap [reg_index + 4, 35] = 1.0 + zero; # Param #35 - } -} - -# Group 5 attribute: - reg_index = (ta_shift + 19) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 26] = 1.0 + zero; # Param #26 - RegresParamMap [reg_index + 3, 27] = 1.0 + zero; # Param #27 - RegresParamMap [reg_index + 4, 28] = 1.0 + zero; # Param #28 -} - - -# ---------------------------------------------------------------------- -# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION -# ---------------------------------------------------------------------- - -RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); - -global_weight = 0.5 + zero; - -attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms])); -max_attr_size = max (attribute_size); - -for (t in 1 : num_terms) { - for (i in 1 : num_attrs) { - - scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001; - acceptable_drift = scale_down * max_attr_size * 0.002; - if (t == 1) { - acceptable_drift = acceptable_drift * 10; - } - - regeqn = (t-1) * num_attrs + i; - RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2); - -}} - - - - -# ---------------------------------------------------------------- -# SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS -# GENERATE ALL THEIR AFFINE MAPS AND SCALE MULTIPLIERS ("WEIGHTS") -# ---------------------------------------------------------------- - -acceptable_drift = 0.02; - -# DO WHAT (ALMOST) ALL REGULARIZATIONS NEED -for (i in 1:num_special_regs) { - reg_index = (num_reg_eqs - num_special_regs + i - 1) * num_factors; - RegresFactorDefault [reg_index + 1, 1] = 1.0 + zero; - RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero; - regeqn = num_reg_eqs - num_special_regs + i; - RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2); -} - -reg_index = (num_reg_eqs - num_special_regs) * num_factors; - -# PARAMETER #27, TOTAL's "self[t-1]" - RegresParamMap [reg_index + 1, 27] = 1.0 + zero; - RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; - - regeqn = num_reg_eqs - num_special_regs + 1; - drift_acceptable_here = acceptable_drift / 4; - RegresScaleMult [regeqn, 1] = global_weight / (drift_acceptable_here ^ 2); - -reg_index = reg_index + num_factors; - -# PARAMETER #28, TOTAL's "trend" - RegresParamMap [reg_index + 1, 28] = 1.0 + zero; - RegresCoeffDefault [reg_index + 2, 1] = 0.7 + zero; -### RegresParamMap [reg_index + 2, 27] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #08, GROUP-1 SUBTOTAL's "self[t-1]" - RegresParamMap [reg_index + 1, 08] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #09, GROUP-1 SUBTOTAL's "trend" - RegresParamMap [reg_index + 1, 09] = 1.0 + zero; - RegresParamMap [reg_index + 2, 08] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #10, GROUP-1 VALUE's "self[t-1]" - RegresParamMap [reg_index + 1, 10] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #11, GROUP-1 VALUE's "trend" - RegresParamMap [reg_index + 1, 11] = 1.0 + zero; - RegresParamMap [reg_index + 2, 10] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #13, GROUP-2 SUBTOTAL's "self[t-1]" - RegresParamMap [reg_index + 1, 13] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #14, GROUP-2 SUBTOTAL's "trend" - RegresParamMap [reg_index + 1, 14] = 1.0 + zero; - RegresParamMap [reg_index + 2, 13] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #22, GROUP-3 SUBTOTAL's "self[t-1]" - RegresParamMap [reg_index + 1, 22] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #23, GROUP-3 SUBTOTAL's "trend" - RegresParamMap [reg_index + 1, 23] = 1.0 + zero; - RegresParamMap [reg_index + 2, 22] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #24, GROUP-3 VALUE's "self[t-1]" - RegresParamMap [reg_index + 1, 24] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #25, GROUP-3 VALUE's "trend" - RegresParamMap [reg_index + 1, 25] = 1.0 + zero; - RegresParamMap [reg_index + 2, 24] = 1.0 + zero; -reg_index = reg_index + num_factors; - -if (is_GROUP_4_ENABLED == 1) { - -# PARAMETER #32, GROUP-4 SUBTOTAL's "self[t-1]" - RegresParamMap [reg_index + 1, 32] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #33, GROUP-4 SUBTOTAL's "trend" - RegresParamMap [reg_index + 1, 33] = 1.0 + zero; - RegresParamMap [reg_index + 2, 32] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #34, GROUP-4 VALUE's "self[t-1]" - RegresParamMap [reg_index + 1, 34] = 1.0 + zero; -reg_index = reg_index + num_factors; - -# PARAMETER #35, GROUP-4 VALUE's "trend" - RegresParamMap [reg_index + 1, 35] = 1.0 + zero; - RegresParamMap [reg_index + 2, 34] = 1.0 + zero; -reg_index = reg_index + num_factors; -} - - - -# -------------------------------- -# WRITE OUT ALL GENERATED MATRICES -# -------------------------------- - -initial_reports_preprocessed = matrix (0.0, rows = num_attrs, cols = num_terms); -initial_reports_preprocessed [, 2:(num_known_terms+1)] = initial_reports [, 1:num_known_terms]; - -write (initial_reports_preprocessed, $2, format="text"); -write (CReps, $3, format="text"); -write (RegresValueMap, $4, format="text"); -write (RegresFactorDefault,$5, format="text"); -write (RegresParamMap, $6, format="text"); -write (RegresCoeffDefault, $7, format="text"); -write (RegresScaleMult, $8, format="text"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode +# -args +# test/scripts/applications/impute/initial_reports +# test/scripts/applications/impute/initial_reports_preprocessed +# test/scripts/applications/impute/CReps +# test/scripts/applications/impute/RegresValueMap +# test/scripts/applications/impute/RegresFactorDefault +# test/scripts/applications/impute/RegresParamMap +# test/scripts/applications/impute/RegresCoeffDefault +# test/scripts/applications/impute/RegresScaleMult + +initial_reports = read ($1); + +is_GROUP_4_ENABLED = 0; # = 1 or 0 ("0" if Group-4 = all 0s) +num_EXTRA_MISSING_FREES = 0; # = 0 ("3" or "6" for Uganda) + +num_known_terms = 5; # The number of known term reports +num_predicted_terms = 1; # The number of predicted (future) term reports + +num_terms = num_known_terms + num_predicted_terms + 1; # We predict the "0-th" report, too +num_attrs = 19; + +num_frees_per_term = 13; +if (is_GROUP_4_ENABLED == 1) { + num_frees_per_term = 15; +} +num_regular_frees = (num_predicted_terms + 1) * num_frees_per_term; +num_frees = num_regular_frees + num_EXTRA_MISSING_FREES; + +zero = matrix (0.0, rows = 1, cols = 1); + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS +# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS +# --------------------------------------------------------- + +CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); + +for (dt in 0:num_predicted_terms) +{ + ta_shift = 0; + if (dt > 0) { + ta_shift = (num_known_terms + dt) * num_attrs; + } + fv_shift = dt * num_frees_per_term; + +# constraint that row1 = row2 + row3 + row4 + row5 + row6 + row7 +# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6 + for (i in 1:6) { + CReps [ta_shift + 1, fv_shift + i] = 1.0 + zero; + CReps [ta_shift + 1 + i, fv_shift + i] = 1.0 + zero; + } +# row 8 is free variable not appearing in any non-free variable + CReps [ta_shift + 8, fv_shift + 7] = 1.0 + zero; + +# constraint that row9 = row10 + row11 + row12 + row13 + row14 + row15 +# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13 + for (i in 1:6) { + CReps [ta_shift + 9, fv_shift + 7 + i] = 1.0 + zero; + CReps [ta_shift + 9 + i, fv_shift + 7 + i] = 1.0 + zero; + } +# constraint that row16 = row14 + row15 +# translated to free vars: row16 = free14 + free15 +if (is_GROUP_4_ENABLED == 1) { + for (i in 1:2) { + CReps [ta_shift + 16, fv_shift + 13 + i] = 1.0 + zero; + CReps [ta_shift + 16 + i, fv_shift + 13 + i] = 1.0 + zero; + } +} +# constraint that row19 = total cost (all free variables) +# translated to free vars: row19 = all free variables + for (i in 1:num_frees_per_term) { + CReps [ta_shift + 19, fv_shift + i] = 1.0 + zero; + } +} + +# --------------------------------------------------------- +# SPECIAL FREE VARIABLES TO HANDLE UGANDA'S MISSING VALUES +# --------------------------------------------------------- + +if (num_EXTRA_MISSING_FREES == 3 | num_EXTRA_MISSING_FREES == 6) +{ + ta_shift = 3 * num_attrs; + CReps [ta_shift + 4, num_regular_frees + 1] = 1.0 + zero; + CReps [ta_shift + 5, num_regular_frees + 2] = 1.0 + zero; + CReps [ta_shift + 6, num_regular_frees + 3] = 1.0 + zero; + CReps [ta_shift + 7, num_regular_frees + 1] = -1.0 + zero; + CReps [ta_shift + 7, num_regular_frees + 2] = -1.0 + zero; + CReps [ta_shift + 7, num_regular_frees + 3] = -1.0 + zero; +} + +if (num_EXTRA_MISSING_FREES == 6) +{ + ta_shift = 7 * num_attrs; + CReps [ta_shift + 4, num_regular_frees + 4] = 1.0 + zero; + CReps [ta_shift + 5, num_regular_frees + 5] = 1.0 + zero; + CReps [ta_shift + 6, num_regular_frees + 6] = 1.0 + zero; + CReps [ta_shift + 7, num_regular_frees + 4] = -1.0 + zero; + CReps [ta_shift + 7, num_regular_frees + 5] = -1.0 + zero; + CReps [ta_shift + 7, num_regular_frees + 6] = -1.0 + zero; +} + + +# --------------------------------------------------------------------------------------- +# +# In all regressions, except the last few "special" ones, there are 4 factors: +# x[t] ~ aggregate[t], x[t-1], (x[t-1] - x[t-2]) +# The last regressions are for regularization, but they also follow the 4-factor pattern. + +num_factors = 4; + +# We have one regression equation per time-term for each attribute, +# plus a few "special" regularization regression equations: + +num_special_regs = 12; +if (is_GROUP_4_ENABLED == 1) { + num_special_regs = 16; +} + +num_reg_eqs = num_terms * num_attrs + num_special_regs; + +RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); +RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + +# All regression equations for the same attribute share the same parameters, regardless +# of the term; some parameters are shared across multiple attributes, (those attributes +# whose behavior is believed to be similar) as specified in the table below: + +num_params = 28; +if (is_GROUP_4_ENABLED == 1) { + num_params = 35; +} + +# Factors: -self[t] total[t] self[t-1] self[t-1]- +# self[t-2] +# PARAMS: +# Group 1: 1.0 prm#01 prm#08 prm#09 Row #01 = free#01 + ... + free#06 +# Group 1: " prm#02 prm#10 prm#11 Row #02 = free#01 +# Group 1: " prm#03 " " Row #03 = free#02 +# Group 1: " prm#04 " " Row #04 = free#03 +# Group 1: " prm#05 " " Row #05 = free#04 +# Group 1: " prm#06 " " Row #06 = free#05 +# Group 1: " prm#07 " " Row #07 = free#06 +# -------------------------------------------------------------------- +# Group 2: 1.0 prm#12 prm#13 prm#14 Row #08 = free#07 +# -------------------------------------------------------------------- +# Group 3: 1.0 prm#15 prm#22 prm#23 Row #09 = free#08 + ... + free#13 +# Group 3: " prm#16 prm#24 prm#25 Row #10 = free#08 +# Group 3: " prm#17 " " Row #11 = free#09 +# Group 3: " prm#18 " " Row #12 = free#10 +# Group 3: " prm#19 " " Row #13 = free#11 +# Group 3: " prm#20 " " Row #14 = free#12 +# Group 3: " prm#21 " " Row #15 = free#13 +# -------------------------------------------------------------------- +# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED +# Group 4: 1.0 prm#29 prm#32 prm#33 Row #16 = free#14 + free#15 +# Group 4: " prm#30 prm#34 prm#35 Row #17 = free#14 +# Group 4: " prm#31 " " Row #18 = free#15 +# -------------------------------------------------------------------- +# Group 5: 1.0 prm#26 prm#27 prm#28 Row #19 = free#01 + ... + free#15 +# +# (The aggregates in Groups 1..4 regress on the total cost in Group 5; +# the total cost in Group 5 regresses on the intercept.) + +# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS: +# Factors: 1.0 -1.0 0.0 0.0 +# PARAMS: +# prm#27 1.0 0.0 0.0 # self[t-1] +# prm#28 0.0 0.0 0.0 # trend +# prm#08 0.0 0.0 0.0 # self[t-1] +# prm#09 0.0 0.0 0.0 # trend +# prm#10 0.0 0.0 0.0 # self[t-1] +# prm#11 0.0 0.0 0.0 # trend +# prm#13 0.0 0.0 0.0 # self[t-1] +# prm#14 0.0 0.0 0.0 # trend +# prm#22 0.0 0.0 0.0 # self[t-1] +# prm#23 0.0 0.0 0.0 # trend +# prm#24 0.0 0.0 0.0 # self[t-1] +# prm#25 0.0 0.0 0.0 # trend +### GROUP-4 ZEROS: THESE EQUATIONS USE REVOKED PARAMETERS AND DO NOT APPEAR +# prm#32 0.0 0.0 0.0 # self[t-1] +# prm#33 0.0 0.0 0.0 # trend +# prm#34 0.0 0.0 0.0 # self[t-1] +# prm#35 0.0 0.0 0.0 # trend +# +# --------------------------------------------------------------------------------------- + + + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS +# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS +# --------------------------------------------------------- + + +for (t in 1 : num_terms) { + for (i in 1 : num_attrs) { + +reg_index = ((t-1) * num_attrs + i - 1) * num_factors; + +# ------------------------------- +# SETTING FACTORS #1, #3, and #4: +# ------------------------------- + +if (t == 1 & i != 19) { # THESE "REGRESSIONS" ARE DIFFERENT (MORE LIKE REGULARIZATIONS): + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t] + RegresValueMap [reg_index + 3, (t-1) * num_attrs + i ] = 1.5 + zero; # 3rd factor is approximated as: + RegresValueMap [reg_index + 3, t * num_attrs + i ] = -0.3 + zero; # 1.5 x[t] - 0.3 x[t+1] - 0.2 x[t+2] = + RegresValueMap [reg_index + 3, (t+1) * num_attrs + i ] = -0.2 + zero; # x[t] - 0.5 (x[t+1] - x[t]) - 0.2 (x[t+2] - x[t+1]) +} +if (t == 2) { + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] = 1.0 + zero; # 3rd factor: x[t-1] + w = 0.5; + RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] = (- 1 - w) + zero; # 4th factor is approximated as: + RegresValueMap [reg_index + 4, (t-1) * num_attrs + i ] = (1 + 2*w) + zero; # - (1+w)x[t-1] + (1+2w)x[t] - w x[t+1] = + RegresValueMap [reg_index + 4, t * num_attrs + i ] = (- w) + zero; # (x[t]-x[t-1]) - w * ((x[t+1]-x[t]) - (x[t]-x[t-1])) +} +if (t >= 3) { + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i ] = -1.0 + zero; # 1st factor: -x[t] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i ] = 1.0 + zero; # 3rd factor: x[t-1] + RegresValueMap [reg_index + 4, (t-2) * num_attrs + i ] = 1.0 + zero; # 4th factor is + RegresValueMap [reg_index + 4, (t-3) * num_attrs + i ] = -1.0 + zero; # x[t-1] - x[t-2] +} + +# ------------------------------------------- +# SETTING FACTOR #2 DEPENDS ON THE ATTRIBUTE: +# ------------------------------------------- + +if (i == 1) { # GROUP 1 SUBTOTAL + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] +} +if (2 <= i & i <= 7) { # GROUP 1 ATTRIBUTES + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 1] = 1.0 + zero; # 2nd factor: Row#01[t] +} + +if (i == 8) { # GROUP 2 SUBTOTAL + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] +} + +if (i == 9) { # GROUP 3 SUBTOTAL + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] +} +if (10 <= i & i <= 15) { # GROUP 3 ATTRIBUTES: + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 9] = 1.0 + zero; # 2nd factor: Row#09[t] +} + +if (i == 16) { # GROUP 4 SUBTOTAL + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] +} +if (17 <= i & i <= 18) { # GROUP 4 ATTRIBUTES: + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] = 1.0 + zero; # 2nd factor: Row#16[t] +} + +if (i == 19 & t >= 2) { # THE TOTAL, ONLY FOR t >= 2 + RegresFactorDefault [reg_index + 2, 1] = 1.0 + zero; # 2nd factor: Intercept +} + +### +### SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS ARE HANDLED SEPARATELY! +### + +}} + + +# ---------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS +# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS +# ---------------------------------------------------------- + +RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); +RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + +for (t in 1 : num_terms) { + ta_shift = (t-1) * num_attrs - 1; + +# Group 1 attributes: + reg_index = (ta_shift + 1) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 1] = 1.0 + zero; # Param #01 + RegresParamMap [reg_index + 3, 8] = 1.0 + zero; # Param #08 + RegresParamMap [reg_index + 4, 9] = 1.0 + zero; # Param #09 + for (i in 2 : 7) { + reg_index = (ta_shift + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, i] = 1.0 + zero; # Param #02-#07 + RegresParamMap [reg_index + 3, 10] = 1.0 + zero; # Param #10 + RegresParamMap [reg_index + 4, 11] = 1.0 + zero; # Param #11 + } + +# Group 2 attribute: + reg_index = (ta_shift + 8) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 12] = 1.0 + zero; # Param #12 + RegresParamMap [reg_index + 3, 13] = 1.0 + zero; # Param #13 + RegresParamMap [reg_index + 4, 14] = 1.0 + zero; # Param #14 + +# Group 3 attributes: + reg_index = (ta_shift + 9) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 15] = 1.0 + zero; # Param #17 + RegresParamMap [reg_index + 3, 22] = 1.0 + zero; # Param #22 + RegresParamMap [reg_index + 4, 23] = 1.0 + zero; # Param #23 + for (i in 10 : 15) { + reg_index = (ta_shift + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 6 + i] = 1.0 + zero; # Param #16-#21 + RegresParamMap [reg_index + 3, 24] = 1.0 + zero; # Param #24 + RegresParamMap [reg_index + 4, 25] = 1.0 + zero; # Param #25 + } + +# Group 4 attributes: +if (is_GROUP_4_ENABLED == 1) { + reg_index = (ta_shift + 16) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 29] = 1.0 + zero; # Param #29 + RegresParamMap [reg_index + 3, 32] = 1.0 + zero; # Param #32 + RegresParamMap [reg_index + 4, 33] = 1.0 + zero; # Param #33 + for (i in 17 : 18) { + reg_index = (ta_shift + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero; # Param #30-#31 + RegresParamMap [reg_index + 3, 34] = 1.0 + zero; # Param #34 + RegresParamMap [reg_index + 4, 35] = 1.0 + zero; # Param #35 + } +} + +# Group 5 attribute: + reg_index = (ta_shift + 19) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 26] = 1.0 + zero; # Param #26 + RegresParamMap [reg_index + 3, 27] = 1.0 + zero; # Param #27 + RegresParamMap [reg_index + 4, 28] = 1.0 + zero; # Param #28 +} + + +# ---------------------------------------------------------------------- +# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION +# ---------------------------------------------------------------------- + +RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); + +global_weight = 0.5 + zero; + +attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms])); +max_attr_size = max (attribute_size); + +for (t in 1 : num_terms) { + for (i in 1 : num_attrs) { + + scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001; + acceptable_drift = scale_down * max_attr_size * 0.002; + if (t == 1) { + acceptable_drift = acceptable_drift * 10; + } + + regeqn = (t-1) * num_attrs + i; + RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2); + +}} + + + + +# ---------------------------------------------------------------- +# SPECIAL REGULARIZATION EQUATIONS FOR PARAMETERS +# GENERATE ALL THEIR AFFINE MAPS AND SCALE MULTIPLIERS ("WEIGHTS") +# ---------------------------------------------------------------- + +acceptable_drift = 0.02; + +# DO WHAT (ALMOST) ALL REGULARIZATIONS NEED +for (i in 1:num_special_regs) { + reg_index = (num_reg_eqs - num_special_regs + i - 1) * num_factors; + RegresFactorDefault [reg_index + 1, 1] = 1.0 + zero; + RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero; + regeqn = num_reg_eqs - num_special_regs + i; + RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift ^ 2); +} + +reg_index = (num_reg_eqs - num_special_regs) * num_factors; + +# PARAMETER #27, TOTAL's "self[t-1]" + RegresParamMap [reg_index + 1, 27] = 1.0 + zero; + RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; + + regeqn = num_reg_eqs - num_special_regs + 1; + drift_acceptable_here = acceptable_drift / 4; + RegresScaleMult [regeqn, 1] = global_weight / (drift_acceptable_here ^ 2); + +reg_index = reg_index + num_factors; + +# PARAMETER #28, TOTAL's "trend" + RegresParamMap [reg_index + 1, 28] = 1.0 + zero; + RegresCoeffDefault [reg_index + 2, 1] = 0.7 + zero; +### RegresParamMap [reg_index + 2, 27] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #08, GROUP-1 SUBTOTAL's "self[t-1]" + RegresParamMap [reg_index + 1, 08] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #09, GROUP-1 SUBTOTAL's "trend" + RegresParamMap [reg_index + 1, 09] = 1.0 + zero; + RegresParamMap [reg_index + 2, 08] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #10, GROUP-1 VALUE's "self[t-1]" + RegresParamMap [reg_index + 1, 10] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #11, GROUP-1 VALUE's "trend" + RegresParamMap [reg_index + 1, 11] = 1.0 + zero; + RegresParamMap [reg_index + 2, 10] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #13, GROUP-2 SUBTOTAL's "self[t-1]" + RegresParamMap [reg_index + 1, 13] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #14, GROUP-2 SUBTOTAL's "trend" + RegresParamMap [reg_index + 1, 14] = 1.0 + zero; + RegresParamMap [reg_index + 2, 13] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #22, GROUP-3 SUBTOTAL's "self[t-1]" + RegresParamMap [reg_index + 1, 22] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #23, GROUP-3 SUBTOTAL's "trend" + RegresParamMap [reg_index + 1, 23] = 1.0 + zero; + RegresParamMap [reg_index + 2, 22] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #24, GROUP-3 VALUE's "self[t-1]" + RegresParamMap [reg_index + 1, 24] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #25, GROUP-3 VALUE's "trend" + RegresParamMap [reg_index + 1, 25] = 1.0 + zero; + RegresParamMap [reg_index + 2, 24] = 1.0 + zero; +reg_index = reg_index + num_factors; + +if (is_GROUP_4_ENABLED == 1) { + +# PARAMETER #32, GROUP-4 SUBTOTAL's "self[t-1]" + RegresParamMap [reg_index + 1, 32] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #33, GROUP-4 SUBTOTAL's "trend" + RegresParamMap [reg_index + 1, 33] = 1.0 + zero; + RegresParamMap [reg_index + 2, 32] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #34, GROUP-4 VALUE's "self[t-1]" + RegresParamMap [reg_index + 1, 34] = 1.0 + zero; +reg_index = reg_index + num_factors; + +# PARAMETER #35, GROUP-4 VALUE's "trend" + RegresParamMap [reg_index + 1, 35] = 1.0 + zero; + RegresParamMap [reg_index + 2, 34] = 1.0 + zero; +reg_index = reg_index + num_factors; +} + + + +# -------------------------------- +# WRITE OUT ALL GENERATED MATRICES +# -------------------------------- + +initial_reports_preprocessed = matrix (0.0, rows = num_attrs, cols = num_terms); +initial_reports_preprocessed [, 2:(num_known_terms+1)] = initial_reports [, 1:num_known_terms]; + +write (initial_reports_preprocessed, $2, format="text"); +write (CReps, $3, format="text"); +write (RegresValueMap, $4, format="text"); +write (RegresFactorDefault,$5, format="text"); +write (RegresParamMap, $6, format="text"); +write (RegresCoeffDefault, $7, format="text"); +write (RegresScaleMult, $8, format="text");
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml b/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml index 735efe7..82bf551 100644 --- a/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml +++ b/src/test/scripts/applications/impute/wfundInputGenerator.pre2013-08-26.dml @@ -1,442 +1,442 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode -# -args -# test/scripts/applications/impute/initial_reports -# test/scripts/applications/impute/CReps -# test/scripts/applications/impute/RegresValueMap -# test/scripts/applications/impute/RegresFactorDefault -# test/scripts/applications/impute/RegresParamMap -# test/scripts/applications/impute/RegresCoeffDefault -# test/scripts/applications/impute/RegresScaleMult - -is_GROUP_4_ENABLED = 1; # = 1 or 0 -num_known_terms = 6; # The number of known term reports, feel free to change -num_predicted_terms = 1; # The number of predicted term reports, feel free to change - -num_terms = num_known_terms + num_predicted_terms; -num_attrs = 19; - -num_frees_per_term = 13; -if (is_GROUP_4_ENABLED == 1) { - num_frees_per_term = 15; -} -num_frees = num_predicted_terms * num_frees_per_term; - -zero = matrix (0.0, rows = 1, cols = 1); - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS -# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS -# --------------------------------------------------------- - -CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); - -for (dt in 1:num_predicted_terms) -{ - ta_shift = (num_known_terms + dt - 1) * num_attrs; - fv_shift = (dt - 1) * num_frees_per_term; -# constraint that row1 = row2 + row3 + row4 + row5 + row6 + row7 -# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6 - CReps [ta_shift + 1, fv_shift + 1] = 1.0 + zero; - CReps [ta_shift + 1, fv_shift + 2] = 1.0 + zero; - CReps [ta_shift + 1, fv_shift + 3] = 1.0 + zero; - CReps [ta_shift + 1, fv_shift + 4] = 1.0 + zero; - CReps [ta_shift + 1, fv_shift + 5] = 1.0 + zero; - CReps [ta_shift + 1, fv_shift + 6] = 1.0 + zero; - CReps [ta_shift + 2, fv_shift + 1] = 1.0 + zero; - CReps [ta_shift + 3, fv_shift + 2] = 1.0 + zero; - CReps [ta_shift + 4, fv_shift + 3] = 1.0 + zero; - CReps [ta_shift + 5, fv_shift + 4] = 1.0 + zero; - CReps [ta_shift + 6, fv_shift + 5] = 1.0 + zero; - CReps [ta_shift + 7, fv_shift + 6] = 1.0 + zero; - -# row 8 is free variable not appearing in any non-free variable - CReps [ta_shift + 8, fv_shift + 7] = 1.0 + zero; - -# constraint that row9 = row10 + row11 + row12 + row13 + row14 + row15 -# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13 - CReps [ta_shift + 9, fv_shift + 8] = 1.0 + zero; - CReps [ta_shift + 9, fv_shift + 9] = 1.0 + zero; - CReps [ta_shift + 9, fv_shift + 10] = 1.0 + zero; - CReps [ta_shift + 9, fv_shift + 11] = 1.0 + zero; - CReps [ta_shift + 9, fv_shift + 12] = 1.0 + zero; - CReps [ta_shift + 9, fv_shift + 13] = 1.0 + zero; - CReps [ta_shift + 10, fv_shift + 8] = 1.0 + zero; - CReps [ta_shift + 11, fv_shift + 9] = 1.0 + zero; - CReps [ta_shift + 12, fv_shift + 10] = 1.0 + zero; - CReps [ta_shift + 13, fv_shift + 11] = 1.0 + zero; - CReps [ta_shift + 14, fv_shift + 12] = 1.0 + zero; - CReps [ta_shift + 15, fv_shift + 13] = 1.0 + zero; - -# constraint that row16 = row14 + row15 -# translated to free vars: row16 = free14 + free15 - if (is_GROUP_4_ENABLED == 1) { - CReps [ta_shift + 16, fv_shift + 14] = 1.0 + zero; - CReps [ta_shift + 16, fv_shift + 15] = 1.0 + zero; - CReps [ta_shift + 17, fv_shift + 14] = 1.0 + zero; - CReps [ta_shift + 18, fv_shift + 15] = 1.0 + zero; - } - -# constraint that row19 = total cost (all free variables) -# translated to free vars: row19 = all free variables - CReps [ta_shift + 19, fv_shift + 1] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 2] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 3] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 4] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 5] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 6] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 7] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 8] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 9] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 10] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 11] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 12] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 13] = 1.0 + zero; - if (is_GROUP_4_ENABLED == 1) { - CReps [ta_shift + 19, fv_shift + 14] = 1.0 + zero; - CReps [ta_shift + 19, fv_shift + 15] = 1.0 + zero; - } -} - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS -# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS -# --------------------------------------------------------- - -# In all regressions, except the last few "special" ones, there are 4 factors: -# x[t] ~ aggregate[t], x[t-1], (x[t-1] - x[t-2]) -# The last regressions are for regularization, but they also follow the 4-factor pattern. -num_factors = 4; - -# We have one regression equation per time-term for each attribute, -# plus a few "special" regularization regression equations: -num_special_regs = 12; -if (is_GROUP_4_ENABLED == 1) { - num_special_regs = 16; -} - -num_reg_eqs = num_terms * num_attrs + num_special_regs; - -RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); -RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - -# All regression equations for the same attribute share the same parameters, regardless -# of the term; some parameters are shared across multiple attributes, (those attributes -# whose behavior is believed to be similar) as specified in the table below: - -num_params = 28; -if (is_GROUP_4_ENABLED == 1) { - num_params = 35; -} - -# Factors: -self[t] total[t] self[t-1] self[t-1]- -# self[t-2] -# PARAMS: -# Group 1: 1.0 prm#01 prm#08 prm#09 Row #01 = free#01 + ... + free#06 -# Group 1: " prm#02 prm#10 prm#11 Row #02 = free#01 -# Group 1: " prm#03 " " Row #03 = free#02 -# Group 1: " prm#04 " " Row #04 = free#03 -# Group 1: " prm#05 " " Row #05 = free#04 -# Group 1: " prm#06 " " Row #06 = free#05 -# Group 1: " prm#07 " " Row #07 = free#06 -# -------------------------------------------------------------------- -# Group 2: 1.0 prm#12 prm#13 prm#14 Row #08 = free#07 -# -------------------------------------------------------------------- -# Group 3: 1.0 prm#15 prm#22 prm#23 Row #09 = free#08 + ... + free#13 -# Group 3: " prm#16 prm#24 prm#25 Row #10 = free#08 -# Group 3: " prm#17 " " Row #11 = free#09 -# Group 3: " prm#18 " " Row #12 = free#10 -# Group 3: " prm#19 " " Row #13 = free#11 -# Group 3: " prm#20 " " Row #14 = free#12 -# Group 3: " prm#21 " " Row #15 = free#13 -# -------------------------------------------------------------------- -# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED -# Group 4: 1.0 prm#29 prm#32 prm#33 Row #16 = free#14 + free#15 -# Group 4: " prm#30 prm#34 prm#35 Row #17 = free#14 -# Group 4: " prm#31 " " Row #18 = free#15 -# -------------------------------------------------------------------- -# Group 5: 1.0 prm#26 prm#27 prm#28 Row #19 = free#01 + ... + free#15 -# -# (The aggregates in Groups 1..4 regress on the total cost in Group 5; -# the total cost in Group 5 regresses on the intercept.) - -# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS: -# Factors: 1.0 -1.0 0.0 0.0 -# PARAMS: -# prm#27 1.0 0.0 0.0 -# prm#28 0.0 0.0 0.0 -# prm#08 0.0 0.0 0.0 -# prm#09 0.0 0.0 0.0 -# prm#10 0.0 0.0 0.0 -# prm#11 0.0 0.0 0.0 -# prm#13 0.0 0.0 0.0 -# prm#14 0.0 0.0 0.0 -# prm#22 0.0 0.0 0.0 -# prm#23 0.0 0.0 0.0 -# prm#24 0.0 0.0 0.0 -# prm#25 0.0 0.0 0.0 -# prm#32 0.0 0.0 0.0 # GROUP-4 ZEROS: -# prm#33 0.0 0.0 0.0 # THESE EQUATIONS -# prm#34 0.0 0.0 0.0 # USE REVOKED PARAMETERS -# prm#35 0.0 0.0 0.0 # AND DO NOT APPEAR - - - -for (t in 1 : num_terms) -{ -# Group 1 attributes: - for (i in 1 : 7) { - reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # 1st factor is -x[t] - if (i == 1) { - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] - } else { - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 1] = 1.0 + zero; # 2nd factor: Row#01[t] - } - if (t == 1) { - RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] - } else { - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # 3rd factor: x[t-1] - } - if (t >= 3) { - RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = 1.0 + zero; # 4th factor is - RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] - } - } - -# Group 2 attribute: - reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 8] = -1.0 + zero; # 1st factor is -x[t] - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] - if (t == 1) { - RegresValueMap [reg_index + 3, 8] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] - } else { - RegresValueMap [reg_index + 3, (t-2) * num_attrs + 8] = 1.0 + zero; # 3rd factor: x[t-1] - } - if (t >= 3) { - RegresValueMap [reg_index + 4, (t-2) * num_attrs + 8] = 1.0 + zero; # 4th factor is - RegresValueMap [reg_index + 4, (t-3) * num_attrs + 8] = -1.0 + zero; # x[t-1] - x[t-2] - } - -# Group 3 attributes: - for (i in 9 : 15) { - reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # 1st factor is -x[t] - if (i == 9) { - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] - } else { - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 9] = 1.0 + zero; # 2nd factor: Row#09[t] - } - if (t == 1) { - RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] - } else { - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # 3rd factor: x[t-1] - } - if (t >= 3) { - RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = 1.0 + zero; # 4th factor is - RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] - } - } - -# Group 4 attributes: - for (i in 16 : 18) { - reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # 1st factor is -x[t] - if (i == 16) { - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] - } else { - RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] = 1.0 + zero; # 2nd factor: Row#16[t] - } - if (t == 1) { - RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] - } else { - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # 3rd factor: x[t-1] - } - if (t >= 3) { - RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = 1.0 + zero; # 4th factor is - RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] - } - } - -# Group 5 attribute: - reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors; - if (t >= 2) { - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # 1st factor: -x[t] - RegresFactorDefault [reg_index + 2, 1] = 1.0 + zero; # 2nd factor: Intercept - RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] = 1.0 + zero; # 3rd factor: x[t-1] - } - if (t >= 3) { - RegresValueMap [reg_index + 4, (t-2) * num_attrs + 19] = 1.0 + zero; # 4th factor is - RegresValueMap [reg_index + 4, (t-3) * num_attrs + 19] = -1.0 + zero; # x[t-1] - x[t-2] - } -} - -reg_index = num_terms * num_attrs * num_factors; -for (i in 1:num_special_regs) -{ - RegresFactorDefault [reg_index + 1, 1] = 1.0 + zero; - RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero; - reg_index = reg_index + num_factors; -} - -# ---------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS -# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS -# ---------------------------------------------------------- - -RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); -RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - -for (t in 1 : num_terms) { -# Group 1 attributes: - reg_index = ((t-1) * num_attrs - 1 + 1) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 1] = 1.0 + zero; # Param #01 - RegresParamMap [reg_index + 3, 8] = 1.0 + zero; # Param #08 - RegresParamMap [reg_index + 4, 9] = 1.0 + zero; # Param #09 - for (i in 2 : 7) { - reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, i] = 1.0 + zero; # Param #02-#07 - RegresParamMap [reg_index + 3, 10] = 1.0 + zero; # Param #10 - RegresParamMap [reg_index + 4, 11] = 1.0 + zero; # Param #11 - } -# Group 2 attribute: - reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 12] = 1.0 + zero; # Param #12 - RegresParamMap [reg_index + 3, 13] = 1.0 + zero; # Param #13 - RegresParamMap [reg_index + 4, 14] = 1.0 + zero; # Param #14 -# Group 3 attributes: - reg_index = ((t-1) * num_attrs - 1 + 9) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 15] = 1.0 + zero; # Param #17 - RegresParamMap [reg_index + 3, 22] = 1.0 + zero; # Param #22 - RegresParamMap [reg_index + 4, 23] = 1.0 + zero; # Param #23 - for (i in 10 : 15) { - reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 6 + i] = 1.0 + zero; # Param #16-#21 - RegresParamMap [reg_index + 3, 24] = 1.0 + zero; # Param #24 - RegresParamMap [reg_index + 4, 25] = 1.0 + zero; # Param #25 - } - -# Group 4 attributes: -if (is_GROUP_4_ENABLED == 1) { - reg_index = ((t-1) * num_attrs - 1 + 16) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 29] = 1.0 + zero; # Param #29 - RegresParamMap [reg_index + 3, 32] = 1.0 + zero; # Param #32 - RegresParamMap [reg_index + 4, 33] = 1.0 + zero; # Param #33 - for (i in 17 : 18) { - reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero; # Param #30-#31 - RegresParamMap [reg_index + 3, 34] = 1.0 + zero; # Param #34 - RegresParamMap [reg_index + 4, 35] = 1.0 + zero; # Param #35 - } -} - -# Group 5 attribute: - reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 26] = 1.0 + zero; # Param #26 - RegresParamMap [reg_index + 3, 27] = 1.0 + zero; # Param #27 - RegresParamMap [reg_index + 4, 28] = 1.0 + zero; # Param #28 -} - -reg_index = num_terms * num_attrs * num_factors; - RegresParamMap [reg_index + 1, 27] = 1.0 + zero; # Param #27 - RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 28] = 1.0 + zero; # Param #28 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 08] = 1.0 + zero; # Param #08 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 09] = 1.0 + zero; # Param #09 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 10] = 1.0 + zero; # Param #10 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 11] = 1.0 + zero; # Param #11 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 13] = 1.0 + zero; # Param #13 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 14] = 1.0 + zero; # Param #14 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 22] = 1.0 + zero; # Param #22 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 23] = 1.0 + zero; # Param #23 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 24] = 1.0 + zero; # Param #24 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 25] = 1.0 + zero; # Param #25 - -if (is_GROUP_4_ENABLED == 1) { - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 32] = 1.0 + zero; # Param #32 - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 33] = 1.0 + zero; # Param #33 - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 34] = 1.0 + zero; # Param #34 - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 35] = 1.0 + zero; # Param #35 -} - -# ---------------------------------------------------------- -# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION -# ---------------------------------------------------------- - -RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); -initial_reports = read ($1); - -global_weight = 0.5 + zero; - -attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms])); -max_attr_size = max (attribute_size); - -for (t in 1 : num_terms) { - for (i in 1 : num_attrs) { - regeqn = (t-1) * num_attrs + i; - scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001; - acceptable_drift = scale_down * max_attr_size * 0.001; - RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); - } -} - -for (i in 1 : num_special_regs) { - regeqn = num_terms * num_attrs + i; - acceptable_drift = 0.01; - RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); -} - -# -------------------------------- -# WRITE OUT ALL GENERATED MATRICES -# -------------------------------- - -# write (initial_reports, $1, format="text"); -write (CReps, $2, format="text"); -write (RegresValueMap, $3, format="text"); -write (RegresFactorDefault,$4, format="text"); -write (RegresParamMap, $5, format="text"); -write (RegresCoeffDefault, $6, format="text"); -write (RegresScaleMult, $7, format="text"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode +# -args +# test/scripts/applications/impute/initial_reports +# test/scripts/applications/impute/CReps +# test/scripts/applications/impute/RegresValueMap +# test/scripts/applications/impute/RegresFactorDefault +# test/scripts/applications/impute/RegresParamMap +# test/scripts/applications/impute/RegresCoeffDefault +# test/scripts/applications/impute/RegresScaleMult + +is_GROUP_4_ENABLED = 1; # = 1 or 0 +num_known_terms = 6; # The number of known term reports, feel free to change +num_predicted_terms = 1; # The number of predicted term reports, feel free to change + +num_terms = num_known_terms + num_predicted_terms; +num_attrs = 19; + +num_frees_per_term = 13; +if (is_GROUP_4_ENABLED == 1) { + num_frees_per_term = 15; +} +num_frees = num_predicted_terms * num_frees_per_term; + +zero = matrix (0.0, rows = 1, cols = 1); + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS +# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS +# --------------------------------------------------------- + +CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); + +for (dt in 1:num_predicted_terms) +{ + ta_shift = (num_known_terms + dt - 1) * num_attrs; + fv_shift = (dt - 1) * num_frees_per_term; +# constraint that row1 = row2 + row3 + row4 + row5 + row6 + row7 +# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6 + CReps [ta_shift + 1, fv_shift + 1] = 1.0 + zero; + CReps [ta_shift + 1, fv_shift + 2] = 1.0 + zero; + CReps [ta_shift + 1, fv_shift + 3] = 1.0 + zero; + CReps [ta_shift + 1, fv_shift + 4] = 1.0 + zero; + CReps [ta_shift + 1, fv_shift + 5] = 1.0 + zero; + CReps [ta_shift + 1, fv_shift + 6] = 1.0 + zero; + CReps [ta_shift + 2, fv_shift + 1] = 1.0 + zero; + CReps [ta_shift + 3, fv_shift + 2] = 1.0 + zero; + CReps [ta_shift + 4, fv_shift + 3] = 1.0 + zero; + CReps [ta_shift + 5, fv_shift + 4] = 1.0 + zero; + CReps [ta_shift + 6, fv_shift + 5] = 1.0 + zero; + CReps [ta_shift + 7, fv_shift + 6] = 1.0 + zero; + +# row 8 is free variable not appearing in any non-free variable + CReps [ta_shift + 8, fv_shift + 7] = 1.0 + zero; + +# constraint that row9 = row10 + row11 + row12 + row13 + row14 + row15 +# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13 + CReps [ta_shift + 9, fv_shift + 8] = 1.0 + zero; + CReps [ta_shift + 9, fv_shift + 9] = 1.0 + zero; + CReps [ta_shift + 9, fv_shift + 10] = 1.0 + zero; + CReps [ta_shift + 9, fv_shift + 11] = 1.0 + zero; + CReps [ta_shift + 9, fv_shift + 12] = 1.0 + zero; + CReps [ta_shift + 9, fv_shift + 13] = 1.0 + zero; + CReps [ta_shift + 10, fv_shift + 8] = 1.0 + zero; + CReps [ta_shift + 11, fv_shift + 9] = 1.0 + zero; + CReps [ta_shift + 12, fv_shift + 10] = 1.0 + zero; + CReps [ta_shift + 13, fv_shift + 11] = 1.0 + zero; + CReps [ta_shift + 14, fv_shift + 12] = 1.0 + zero; + CReps [ta_shift + 15, fv_shift + 13] = 1.0 + zero; + +# constraint that row16 = row14 + row15 +# translated to free vars: row16 = free14 + free15 + if (is_GROUP_4_ENABLED == 1) { + CReps [ta_shift + 16, fv_shift + 14] = 1.0 + zero; + CReps [ta_shift + 16, fv_shift + 15] = 1.0 + zero; + CReps [ta_shift + 17, fv_shift + 14] = 1.0 + zero; + CReps [ta_shift + 18, fv_shift + 15] = 1.0 + zero; + } + +# constraint that row19 = total cost (all free variables) +# translated to free vars: row19 = all free variables + CReps [ta_shift + 19, fv_shift + 1] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 2] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 3] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 4] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 5] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 6] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 7] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 8] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 9] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 10] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 11] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 12] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 13] = 1.0 + zero; + if (is_GROUP_4_ENABLED == 1) { + CReps [ta_shift + 19, fv_shift + 14] = 1.0 + zero; + CReps [ta_shift + 19, fv_shift + 15] = 1.0 + zero; + } +} + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS +# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS +# --------------------------------------------------------- + +# In all regressions, except the last few "special" ones, there are 4 factors: +# x[t] ~ aggregate[t], x[t-1], (x[t-1] - x[t-2]) +# The last regressions are for regularization, but they also follow the 4-factor pattern. +num_factors = 4; + +# We have one regression equation per time-term for each attribute, +# plus a few "special" regularization regression equations: +num_special_regs = 12; +if (is_GROUP_4_ENABLED == 1) { + num_special_regs = 16; +} + +num_reg_eqs = num_terms * num_attrs + num_special_regs; + +RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); +RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + +# All regression equations for the same attribute share the same parameters, regardless +# of the term; some parameters are shared across multiple attributes, (those attributes +# whose behavior is believed to be similar) as specified in the table below: + +num_params = 28; +if (is_GROUP_4_ENABLED == 1) { + num_params = 35; +} + +# Factors: -self[t] total[t] self[t-1] self[t-1]- +# self[t-2] +# PARAMS: +# Group 1: 1.0 prm#01 prm#08 prm#09 Row #01 = free#01 + ... + free#06 +# Group 1: " prm#02 prm#10 prm#11 Row #02 = free#01 +# Group 1: " prm#03 " " Row #03 = free#02 +# Group 1: " prm#04 " " Row #04 = free#03 +# Group 1: " prm#05 " " Row #05 = free#04 +# Group 1: " prm#06 " " Row #06 = free#05 +# Group 1: " prm#07 " " Row #07 = free#06 +# -------------------------------------------------------------------- +# Group 2: 1.0 prm#12 prm#13 prm#14 Row #08 = free#07 +# -------------------------------------------------------------------- +# Group 3: 1.0 prm#15 prm#22 prm#23 Row #09 = free#08 + ... + free#13 +# Group 3: " prm#16 prm#24 prm#25 Row #10 = free#08 +# Group 3: " prm#17 " " Row #11 = free#09 +# Group 3: " prm#18 " " Row #12 = free#10 +# Group 3: " prm#19 " " Row #13 = free#11 +# Group 3: " prm#20 " " Row #14 = free#12 +# Group 3: " prm#21 " " Row #15 = free#13 +# -------------------------------------------------------------------- +# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED +# Group 4: 1.0 prm#29 prm#32 prm#33 Row #16 = free#14 + free#15 +# Group 4: " prm#30 prm#34 prm#35 Row #17 = free#14 +# Group 4: " prm#31 " " Row #18 = free#15 +# -------------------------------------------------------------------- +# Group 5: 1.0 prm#26 prm#27 prm#28 Row #19 = free#01 + ... + free#15 +# +# (The aggregates in Groups 1..4 regress on the total cost in Group 5; +# the total cost in Group 5 regresses on the intercept.) + +# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS: +# Factors: 1.0 -1.0 0.0 0.0 +# PARAMS: +# prm#27 1.0 0.0 0.0 +# prm#28 0.0 0.0 0.0 +# prm#08 0.0 0.0 0.0 +# prm#09 0.0 0.0 0.0 +# prm#10 0.0 0.0 0.0 +# prm#11 0.0 0.0 0.0 +# prm#13 0.0 0.0 0.0 +# prm#14 0.0 0.0 0.0 +# prm#22 0.0 0.0 0.0 +# prm#23 0.0 0.0 0.0 +# prm#24 0.0 0.0 0.0 +# prm#25 0.0 0.0 0.0 +# prm#32 0.0 0.0 0.0 # GROUP-4 ZEROS: +# prm#33 0.0 0.0 0.0 # THESE EQUATIONS +# prm#34 0.0 0.0 0.0 # USE REVOKED PARAMETERS +# prm#35 0.0 0.0 0.0 # AND DO NOT APPEAR + + + +for (t in 1 : num_terms) +{ +# Group 1 attributes: + for (i in 1 : 7) { + reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # 1st factor is -x[t] + if (i == 1) { + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] + } else { + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 1] = 1.0 + zero; # 2nd factor: Row#01[t] + } + if (t == 1) { + RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] + } else { + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # 3rd factor: x[t-1] + } + if (t >= 3) { + RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = 1.0 + zero; # 4th factor is + RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] + } + } + +# Group 2 attribute: + reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 8] = -1.0 + zero; # 1st factor is -x[t] + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] + if (t == 1) { + RegresValueMap [reg_index + 3, 8] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] + } else { + RegresValueMap [reg_index + 3, (t-2) * num_attrs + 8] = 1.0 + zero; # 3rd factor: x[t-1] + } + if (t >= 3) { + RegresValueMap [reg_index + 4, (t-2) * num_attrs + 8] = 1.0 + zero; # 4th factor is + RegresValueMap [reg_index + 4, (t-3) * num_attrs + 8] = -1.0 + zero; # x[t-1] - x[t-2] + } + +# Group 3 attributes: + for (i in 9 : 15) { + reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # 1st factor is -x[t] + if (i == 9) { + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] + } else { + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 9] = 1.0 + zero; # 2nd factor: Row#09[t] + } + if (t == 1) { + RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] + } else { + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # 3rd factor: x[t-1] + } + if (t >= 3) { + RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = 1.0 + zero; # 4th factor is + RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] + } + } + +# Group 4 attributes: + for (i in 16 : 18) { + reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # 1st factor is -x[t] + if (i == 16) { + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 19] = 1.0 + zero; # 2nd factor: Row#19[t] + } else { + RegresValueMap [reg_index + 2, (t-1) * num_attrs + 16] = 1.0 + zero; # 2nd factor: Row#16[t] + } + if (t == 1) { + RegresValueMap [reg_index + 3, i] = 1.0 + zero; # For t = 1 the 3rd factor is x[t] = x[1] + } else { + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # 3rd factor: x[t-1] + } + if (t >= 3) { + RegresValueMap [reg_index + 4, (t-2) * num_attrs + i] = 1.0 + zero; # 4th factor is + RegresValueMap [reg_index + 4, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] + } + } + +# Group 5 attribute: + reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors; + if (t >= 2) { + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # 1st factor: -x[t] + RegresFactorDefault [reg_index + 2, 1] = 1.0 + zero; # 2nd factor: Intercept + RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] = 1.0 + zero; # 3rd factor: x[t-1] + } + if (t >= 3) { + RegresValueMap [reg_index + 4, (t-2) * num_attrs + 19] = 1.0 + zero; # 4th factor is + RegresValueMap [reg_index + 4, (t-3) * num_attrs + 19] = -1.0 + zero; # x[t-1] - x[t-2] + } +} + +reg_index = num_terms * num_attrs * num_factors; +for (i in 1:num_special_regs) +{ + RegresFactorDefault [reg_index + 1, 1] = 1.0 + zero; + RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero; + reg_index = reg_index + num_factors; +} + +# ---------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS +# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS +# ---------------------------------------------------------- + +RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); +RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + +for (t in 1 : num_terms) { +# Group 1 attributes: + reg_index = ((t-1) * num_attrs - 1 + 1) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 1] = 1.0 + zero; # Param #01 + RegresParamMap [reg_index + 3, 8] = 1.0 + zero; # Param #08 + RegresParamMap [reg_index + 4, 9] = 1.0 + zero; # Param #09 + for (i in 2 : 7) { + reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, i] = 1.0 + zero; # Param #02-#07 + RegresParamMap [reg_index + 3, 10] = 1.0 + zero; # Param #10 + RegresParamMap [reg_index + 4, 11] = 1.0 + zero; # Param #11 + } +# Group 2 attribute: + reg_index = ((t-1) * num_attrs - 1 + 8) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 12] = 1.0 + zero; # Param #12 + RegresParamMap [reg_index + 3, 13] = 1.0 + zero; # Param #13 + RegresParamMap [reg_index + 4, 14] = 1.0 + zero; # Param #14 +# Group 3 attributes: + reg_index = ((t-1) * num_attrs - 1 + 9) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 15] = 1.0 + zero; # Param #17 + RegresParamMap [reg_index + 3, 22] = 1.0 + zero; # Param #22 + RegresParamMap [reg_index + 4, 23] = 1.0 + zero; # Param #23 + for (i in 10 : 15) { + reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 6 + i] = 1.0 + zero; # Param #16-#21 + RegresParamMap [reg_index + 3, 24] = 1.0 + zero; # Param #24 + RegresParamMap [reg_index + 4, 25] = 1.0 + zero; # Param #25 + } + +# Group 4 attributes: +if (is_GROUP_4_ENABLED == 1) { + reg_index = ((t-1) * num_attrs - 1 + 16) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 29] = 1.0 + zero; # Param #29 + RegresParamMap [reg_index + 3, 32] = 1.0 + zero; # Param #32 + RegresParamMap [reg_index + 4, 33] = 1.0 + zero; # Param #33 + for (i in 17 : 18) { + reg_index = ((t-1) * num_attrs - 1 + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 13 + i] = 1.0 + zero; # Param #30-#31 + RegresParamMap [reg_index + 3, 34] = 1.0 + zero; # Param #34 + RegresParamMap [reg_index + 4, 35] = 1.0 + zero; # Param #35 + } +} + +# Group 5 attribute: + reg_index = ((t-1) * num_attrs - 1 + 19) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 26] = 1.0 + zero; # Param #26 + RegresParamMap [reg_index + 3, 27] = 1.0 + zero; # Param #27 + RegresParamMap [reg_index + 4, 28] = 1.0 + zero; # Param #28 +} + +reg_index = num_terms * num_attrs * num_factors; + RegresParamMap [reg_index + 1, 27] = 1.0 + zero; # Param #27 + RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 28] = 1.0 + zero; # Param #28 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 08] = 1.0 + zero; # Param #08 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 09] = 1.0 + zero; # Param #09 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 10] = 1.0 + zero; # Param #10 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 11] = 1.0 + zero; # Param #11 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 13] = 1.0 + zero; # Param #13 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 14] = 1.0 + zero; # Param #14 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 22] = 1.0 + zero; # Param #22 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 23] = 1.0 + zero; # Param #23 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 24] = 1.0 + zero; # Param #24 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 25] = 1.0 + zero; # Param #25 + +if (is_GROUP_4_ENABLED == 1) { + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 32] = 1.0 + zero; # Param #32 + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 33] = 1.0 + zero; # Param #33 + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 34] = 1.0 + zero; # Param #34 + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 35] = 1.0 + zero; # Param #35 +} + +# ---------------------------------------------------------- +# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION +# ---------------------------------------------------------- + +RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); +initial_reports = read ($1); + +global_weight = 0.5 + zero; + +attribute_size = rowMeans (abs (initial_reports [, 1:num_known_terms])); +max_attr_size = max (attribute_size); + +for (t in 1 : num_terms) { + for (i in 1 : num_attrs) { + regeqn = (t-1) * num_attrs + i; + scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001; + acceptable_drift = scale_down * max_attr_size * 0.001; + RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); + } +} + +for (i in 1 : num_special_regs) { + regeqn = num_terms * num_attrs + i; + acceptable_drift = 0.01; + RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); +} + +# -------------------------------- +# WRITE OUT ALL GENERATED MATRICES +# -------------------------------- + +# write (initial_reports, $1, format="text"); +write (CReps, $2, format="text"); +write (RegresValueMap, $3, format="text"); +write (RegresFactorDefault,$4, format="text"); +write (RegresParamMap, $5, format="text"); +write (RegresCoeffDefault, $6, format="text"); +write (RegresScaleMult, $7, format="text");
