http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/old/wfundInputGenerator.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/old/wfundInputGenerator.dml b/src/test/scripts/applications/impute/old/wfundInputGenerator.dml index 978dbe7..8f6836c 100644 --- a/src/test/scripts/applications/impute/old/wfundInputGenerator.dml +++ b/src/test/scripts/applications/impute/old/wfundInputGenerator.dml @@ -1,403 +1,403 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode -# -args -# test/scripts/applications/impute/initial_reports -# test/scripts/applications/impute/CReps -# test/scripts/applications/impute/RegresValueMap -# test/scripts/applications/impute/RegresFactorDefault -# test/scripts/applications/impute/RegresParamMap -# test/scripts/applications/impute/RegresCoeffDefault -# test/scripts/applications/impute/RegresScaleMult - -is_GROUP_4_ENABLED = 0; # = 1 or 0 - -num_terms = 6; # The number of term reports, feel free to change -num_attrs = 19; - -num_frees = 13; -if (is_GROUP_4_ENABLED == 1) { - num_frees = 15; # The estimated last report had 15 degrees of freedom -} - -zero = matrix (0.0, rows = 1, cols = 1); - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS -# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS -# --------------------------------------------------------- - -CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); - -# constraint that row1 = row2 + row3 + row4 + row5 + row6 + row7 -# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6 -CReps [(num_terms-1) * num_attrs + 1, 1] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 1, 2] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 1, 3] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 1, 4] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 1, 5] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 1, 6] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 2, 1] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 3, 2] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 4, 3] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 5, 4] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 6, 5] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 7, 6] = 1.0 + zero; - -# row 8 is free variable not appearing in any non-free variable -CReps [(num_terms-1) * num_attrs + 8, 7] = 1.0 + zero; - -# constraint that row9 = row10 + row11 + row12 + row13 + row14 + row15 -# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13 -CReps [(num_terms-1) * num_attrs + 9, 8] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 9, 9] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 9, 10] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 9, 11] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 9, 12] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 9, 13] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 10, 8] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 11, 9] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 12, 10] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 13, 11] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 14, 12] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 15, 13] = 1.0 + zero; - -# constraint that row16 = row14 + row15 -# translated to free vars: row16 = free14 + free15 -if (is_GROUP_4_ENABLED == 1) { - CReps [(num_terms-1) * num_attrs + 16, 14] = 1.0 + zero; - CReps [(num_terms-1) * num_attrs + 16, 15] = 1.0 + zero; - CReps [(num_terms-1) * num_attrs + 17, 14] = 1.0 + zero; - CReps [(num_terms-1) * num_attrs + 18, 15] = 1.0 + zero; -} - -# constraint that row19 = total cost (all free variables) -# translated to free vars: row19 = all free variables -CReps [(num_terms-1) * num_attrs + 19, 1] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 2] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 3] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 4] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 5] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 6] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 7] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 8] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 9] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 10] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 11] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 12] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 19, 13] = 1.0 + zero; -if (is_GROUP_4_ENABLED == 1) { - CReps [(num_terms-1) * num_attrs + 19, 14] = 1.0 + zero; - CReps [(num_terms-1) * num_attrs + 19, 15] = 1.0 + zero; -} - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS -# AFFINE MAP = LINER MAP + A VECTOR OF DEFAULTS -# --------------------------------------------------------- - -# In all regressions, except the last few "special" ones, there are 4 factors: -# x[t] ~ x[t-1], (x[t-1] - x[t-2]), aggregate[t] -# The last regressions are for regularization, but they also follow the 4-factor pattern. -num_factors = 4; - -# We have one regression equation per time-term, except the first two terms, for each -# attribute, plus a few "special" regularization regression equations: -num_special_regs = 12; -if (is_GROUP_4_ENABLED == 1) { - num_special_regs = 16; -} - -num_reg_eqs = (num_terms - 2) * num_attrs + num_special_regs; - -RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); -RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - -# All regression equations for the same attribute share the same parameters, regardless -# of the term; some parameters are shared across multiple attributes, (those attributes -# whose behavior is believed to be similar) as specified in the table below: - -num_params = 28; -if (is_GROUP_4_ENABLED == 1) { - num_params = 35; -} - -# Factors: -self[t] self[t-1] self[t-1]- total[t] -# self[t-2] -# PARAMS: -# Group 1: 1.0 prm#01 prm#02 prm#03 Row #01 = free#01 + ... + free#06 -# Group 1: " prm#04 prm#05 prm#06 Row #02 = free#01 -# Group 1: " " " prm#07 Row #03 = free#02 -# Group 1: " " " prm#08 Row #04 = free#03 -# Group 1: " " " prm#09 Row #05 = free#04 -# Group 1: " " " prm#10 Row #06 = free#05 -# Group 1: " " " prm#11 Row #07 = free#06 -# Group 2: 1.0 prm#12 prm#13 prm#14 Row #08 = free#07 -# Group 3: 1.0 prm#15 prm#16 prm#17 Row #09 = free#08 + ... + free#13 -# Group 3: " prm#18 prm#19 prm#20 Row #10 = free#08 -# Group 3: " " " prm#21 Row #11 = free#09 -# Group 3: " " " prm#22 Row #12 = free#10 -# Group 3: " " " prm#23 Row #13 = free#11 -# Group 3: " " " prm#24 Row #14 = free#12 -# Group 3: " " " prm#25 Row #15 = free#13 - -# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED -# Group 4: 1.0 prm#29 prm#30 prm#31 Row #16 = free#14 + free#15 -# Group 4: " prm#32 prm#33 prm#34 Row #17 = free#14 -# Group 4: " " " prm#35 Row #18 = free#15 - -# Group 5: 1.0 prm#26 prm#27 prm#28 Row #19 = free#01 + ... + free#15 -# -# (The aggregates in Groups 1..4 regress on the total cost in Group 5; -# the total cost in Group 5 regresses on the intercept.) - -# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS: -# Factors: 1.0 -1.0 0.0 0.0 -# PARAMS: -# prm#26 1.0 0.0 0.0 -# prm#27 0.0 0.0 0.0 -# prm#01 0.0 0.0 0.0 -# prm#02 0.0 0.0 0.0 -# prm#04 0.0 0.0 0.0 -# prm#05 0.0 0.0 0.0 -# prm#12 0.0 0.0 0.0 -# prm#13 0.0 0.0 0.0 -# prm#15 0.0 0.0 0.0 -# prm#16 0.0 0.0 0.0 -# prm#18 0.0 0.0 0.0 -# prm#19 0.0 0.0 0.0 -# prm#29 0.0 0.0 0.0 # GROUP-4 ZEROS: -# prm#30 0.0 0.0 0.0 # THESE EQUATIONS -# prm#32 0.0 0.0 0.0 # USE REVOKED PARAMETERS -# prm#33 0.0 0.0 0.0 # AND DO NOT APPEAR - - -for (t in 3 : num_terms) -{ -# Group 1 attributes: - for (i in 1 : 7) { - reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # First factor is -x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is - RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] - if (i == 1) { - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] - } else { - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 1] = 1.0 + zero; # 4th factor: Row#01[t] - } - } - -# Group 2 attribute: - reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 8] = -1.0 + zero; # First factor is -x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + 8] = 1.0 + zero; # Second factor is x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + 8] = 1.0 + zero; # Third factor is - RegresValueMap [reg_index + 3, (t-3) * num_attrs + 8] = -1.0 + zero; # x[t-1] - x[t-2] - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] - -# Group 3 attributes: - for (i in 9 : 15) { - reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # First factor is -x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is - RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] - if (i == 9) { - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] - } else { - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 9] = 1.0 + zero; # 4th factor: Row#09[t] - } - } - -# Group 4 attributes: - for (i in 16 : 18) { - reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # First factor is -x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is - RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] - if (i == 16) { - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] - } else { - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 16] = 1.0 + zero; # 4th factor: Row#16[t] - } - } - -# Group 5 attribute: - reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # First factor is -x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + 19] = 1.0 + zero; # Second factor is x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] = 1.0 + zero; # Third factor is - RegresValueMap [reg_index + 3, (t-3) * num_attrs + 19] = -1.0 + zero; # x[t-1] - x[t-2] - RegresFactorDefault [reg_index + 4, 1] = 1.0 + zero; # The Intercept -} - -for (i in 1:num_special_regs) -{ - reg_index = ((num_terms - 2) * num_attrs - 1 + i) * num_factors; - RegresFactorDefault [reg_index + 1, 1] = 1.0 + zero; - RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero; -} - -# ---------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS -# AT REGRESSION FACTORS: A LINER MAP + A VECTOR OF DEFAULTS -# ---------------------------------------------------------- - -RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); -RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - -for (t in 3 : num_terms) { -# Group 1 attributes: - reg_index = ((t-3) * num_attrs - 1 + 1) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 1] = 1.0 + zero; # Param #01 - RegresParamMap [reg_index + 3, 2] = 1.0 + zero; # Param #02 - RegresParamMap [reg_index + 4, 3] = 1.0 + zero; # Param #03 - for (i in 2 : 7) { - reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 4] = 1.0 + zero; # Param #04 - RegresParamMap [reg_index + 3, 5] = 1.0 + zero; # Param #05 - RegresParamMap [reg_index + 4, 4 + i] = 1.0 + zero; # Param #06-#11 - } -# Group 2 attribute: - reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 12] = 1.0 + zero; # Param #12 - RegresParamMap [reg_index + 3, 13] = 1.0 + zero; # Param #13 - RegresParamMap [reg_index + 4, 14] = 1.0 + zero; # Param #14 -# Group 3 attributes: - reg_index = ((t-3) * num_attrs - 1 + 9) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 15] = 1.0 + zero; # Param #15 - RegresParamMap [reg_index + 3, 16] = 1.0 + zero; # Param #16 - RegresParamMap [reg_index + 4, 17] = 1.0 + zero; # Param #17 - for (i in 10 : 15) { - reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 18] = 1.0 + zero; # Param #18 - RegresParamMap [reg_index + 3, 19] = 1.0 + zero; # Param #19 - RegresParamMap [reg_index + 4, 10 + i] = 1.0 + zero; # Param #20-#25 - } - -# Group 4 attributes: -if (is_GROUP_4_ENABLED == 1) { - reg_index = ((t-3) * num_attrs - 1 + 16) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 29] = 1.0 + zero; # Param #29 - RegresParamMap [reg_index + 3, 30] = 1.0 + zero; # Param #30 - RegresParamMap [reg_index + 4, 31] = 1.0 + zero; # Param #31 - for (i in 17 : 18) { - reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 32] = 1.0 + zero; # Param #32 - RegresParamMap [reg_index + 3, 33] = 1.0 + zero; # Param #33 - RegresParamMap [reg_index + 4, 17 + i] = 1.0 + zero; # Param #34-#35 - } -} - -# Group 5 attribute: - reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors; - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 - RegresParamMap [reg_index + 2, 26] = 1.0 + zero; # Param #26 - RegresParamMap [reg_index + 3, 27] = 1.0 + zero; # Param #27 - RegresParamMap [reg_index + 4, 28] = 1.0 + zero; # Param #28 -} - -reg_index = ((num_terms - 2) * num_attrs) * num_factors; - RegresParamMap [reg_index + 1, 26] = 1.0 + zero; # Param #26 - RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 27] = 1.0 + zero; # Param #27 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 01] = 1.0 + zero; # Param #01 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 02] = 1.0 + zero; # Param #02 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 04] = 1.0 + zero; # Param #04 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 05] = 1.0 + zero; # Param #05 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 12] = 1.0 + zero; # Param #12 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 13] = 1.0 + zero; # Param #13 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 15] = 1.0 + zero; # Param #15 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 16] = 1.0 + zero; # Param #16 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 18] = 1.0 + zero; # Param #18 -reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 19] = 1.0 + zero; # Param #19 - -if (is_GROUP_4_ENABLED == 1) { - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 29] = 1.0 + zero; # Param #29 - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 30] = 1.0 + zero; # Param #30 - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 32] = 1.0 + zero; # Param #32 - reg_index = reg_index + num_factors; - RegresParamMap [reg_index + 1, 33] = 1.0 + zero; # Param #33 -} - -# ---------------------------------------------------------- -# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION -# ---------------------------------------------------------- - -RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); -initial_reports = read ($1); - -global_weight = 0.5 + zero; - -attribute_size = rowMeans (abs (initial_reports [, 1:(num_terms-1)])); -max_attr_size = max (attribute_size); - -for (t in 3 : num_terms) { - for (i in 1 : num_attrs) { - regeqn = (t-3) * num_attrs + i; - scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001; - acceptable_drift = scale_down * max_attr_size * 0.001; - RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); - } -} - -regeqn = (num_terms - 2) * num_attrs + 1; -for (i in 1 : num_special_regs) { - acceptable_drift = 0.01; - RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); - regeqn = regeqn + 1; -} - -# -------------------------------- -# WRITE OUT ALL GENERATED MATRICES -# -------------------------------- - -# write (initial_reports, $1, format="text"); -write (CReps, $2, format="text"); -write (RegresValueMap, $3, format="text"); -write (RegresFactorDefault,$4, format="text"); -write (RegresParamMap, $5, format="text"); -write (RegresCoeffDefault, $6, format="text"); -write (RegresScaleMult, $7, format="text"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# hadoop jar SystemML.jar -f test/scripts/applications/impute/wfundInputGenerator.dml -exec singlenode +# -args +# test/scripts/applications/impute/initial_reports +# test/scripts/applications/impute/CReps +# test/scripts/applications/impute/RegresValueMap +# test/scripts/applications/impute/RegresFactorDefault +# test/scripts/applications/impute/RegresParamMap +# test/scripts/applications/impute/RegresCoeffDefault +# test/scripts/applications/impute/RegresScaleMult + +is_GROUP_4_ENABLED = 0; # = 1 or 0 + +num_terms = 6; # The number of term reports, feel free to change +num_attrs = 19; + +num_frees = 13; +if (is_GROUP_4_ENABLED == 1) { + num_frees = 15; # The estimated last report had 15 degrees of freedom +} + +zero = matrix (0.0, rows = 1, cols = 1); + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS +# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS +# --------------------------------------------------------- + +CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); + +# constraint that row1 = row2 + row3 + row4 + row5 + row6 + row7 +# translated to free vars: row1 = free1 + free2 + free3 + free4 + free5 + free6 +CReps [(num_terms-1) * num_attrs + 1, 1] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 1, 2] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 1, 3] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 1, 4] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 1, 5] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 1, 6] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 2, 1] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 3, 2] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 4, 3] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 5, 4] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 6, 5] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 7, 6] = 1.0 + zero; + +# row 8 is free variable not appearing in any non-free variable +CReps [(num_terms-1) * num_attrs + 8, 7] = 1.0 + zero; + +# constraint that row9 = row10 + row11 + row12 + row13 + row14 + row15 +# translated to free vars: row9 = free8 + free9 + free10 + free11 + free12 + free13 +CReps [(num_terms-1) * num_attrs + 9, 8] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 9, 9] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 9, 10] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 9, 11] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 9, 12] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 9, 13] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 10, 8] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 11, 9] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 12, 10] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 13, 11] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 14, 12] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 15, 13] = 1.0 + zero; + +# constraint that row16 = row14 + row15 +# translated to free vars: row16 = free14 + free15 +if (is_GROUP_4_ENABLED == 1) { + CReps [(num_terms-1) * num_attrs + 16, 14] = 1.0 + zero; + CReps [(num_terms-1) * num_attrs + 16, 15] = 1.0 + zero; + CReps [(num_terms-1) * num_attrs + 17, 14] = 1.0 + zero; + CReps [(num_terms-1) * num_attrs + 18, 15] = 1.0 + zero; +} + +# constraint that row19 = total cost (all free variables) +# translated to free vars: row19 = all free variables +CReps [(num_terms-1) * num_attrs + 19, 1] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 2] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 3] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 4] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 5] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 6] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 7] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 8] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 9] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 10] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 11] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 12] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 19, 13] = 1.0 + zero; +if (is_GROUP_4_ENABLED == 1) { + CReps [(num_terms-1) * num_attrs + 19, 14] = 1.0 + zero; + CReps [(num_terms-1) * num_attrs + 19, 15] = 1.0 + zero; +} + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS +# AFFINE MAP = LINER MAP + A VECTOR OF DEFAULTS +# --------------------------------------------------------- + +# In all regressions, except the last few "special" ones, there are 4 factors: +# x[t] ~ x[t-1], (x[t-1] - x[t-2]), aggregate[t] +# The last regressions are for regularization, but they also follow the 4-factor pattern. +num_factors = 4; + +# We have one regression equation per time-term, except the first two terms, for each +# attribute, plus a few "special" regularization regression equations: +num_special_regs = 12; +if (is_GROUP_4_ENABLED == 1) { + num_special_regs = 16; +} + +num_reg_eqs = (num_terms - 2) * num_attrs + num_special_regs; + +RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); +RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + +# All regression equations for the same attribute share the same parameters, regardless +# of the term; some parameters are shared across multiple attributes, (those attributes +# whose behavior is believed to be similar) as specified in the table below: + +num_params = 28; +if (is_GROUP_4_ENABLED == 1) { + num_params = 35; +} + +# Factors: -self[t] self[t-1] self[t-1]- total[t] +# self[t-2] +# PARAMS: +# Group 1: 1.0 prm#01 prm#02 prm#03 Row #01 = free#01 + ... + free#06 +# Group 1: " prm#04 prm#05 prm#06 Row #02 = free#01 +# Group 1: " " " prm#07 Row #03 = free#02 +# Group 1: " " " prm#08 Row #04 = free#03 +# Group 1: " " " prm#09 Row #05 = free#04 +# Group 1: " " " prm#10 Row #06 = free#05 +# Group 1: " " " prm#11 Row #07 = free#06 +# Group 2: 1.0 prm#12 prm#13 prm#14 Row #08 = free#07 +# Group 3: 1.0 prm#15 prm#16 prm#17 Row #09 = free#08 + ... + free#13 +# Group 3: " prm#18 prm#19 prm#20 Row #10 = free#08 +# Group 3: " " " prm#21 Row #11 = free#09 +# Group 3: " " " prm#22 Row #12 = free#10 +# Group 3: " " " prm#23 Row #13 = free#11 +# Group 3: " " " prm#24 Row #14 = free#12 +# Group 3: " " " prm#25 Row #15 = free#13 + +# GROUP-4 ZEROS: FIVE PARAMETERS REVOKED +# Group 4: 1.0 prm#29 prm#30 prm#31 Row #16 = free#14 + free#15 +# Group 4: " prm#32 prm#33 prm#34 Row #17 = free#14 +# Group 4: " " " prm#35 Row #18 = free#15 + +# Group 5: 1.0 prm#26 prm#27 prm#28 Row #19 = free#01 + ... + free#15 +# +# (The aggregates in Groups 1..4 regress on the total cost in Group 5; +# the total cost in Group 5 regresses on the intercept.) + +# THE LAST FEW "SPECIAL" REGULARIZATION EQUATIONS: +# Factors: 1.0 -1.0 0.0 0.0 +# PARAMS: +# prm#26 1.0 0.0 0.0 +# prm#27 0.0 0.0 0.0 +# prm#01 0.0 0.0 0.0 +# prm#02 0.0 0.0 0.0 +# prm#04 0.0 0.0 0.0 +# prm#05 0.0 0.0 0.0 +# prm#12 0.0 0.0 0.0 +# prm#13 0.0 0.0 0.0 +# prm#15 0.0 0.0 0.0 +# prm#16 0.0 0.0 0.0 +# prm#18 0.0 0.0 0.0 +# prm#19 0.0 0.0 0.0 +# prm#29 0.0 0.0 0.0 # GROUP-4 ZEROS: +# prm#30 0.0 0.0 0.0 # THESE EQUATIONS +# prm#32 0.0 0.0 0.0 # USE REVOKED PARAMETERS +# prm#33 0.0 0.0 0.0 # AND DO NOT APPEAR + + +for (t in 3 : num_terms) +{ +# Group 1 attributes: + for (i in 1 : 7) { + reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # First factor is -x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is + RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] + if (i == 1) { + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] + } else { + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 1] = 1.0 + zero; # 4th factor: Row#01[t] + } + } + +# Group 2 attribute: + reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 8] = -1.0 + zero; # First factor is -x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + 8] = 1.0 + zero; # Second factor is x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + 8] = 1.0 + zero; # Third factor is + RegresValueMap [reg_index + 3, (t-3) * num_attrs + 8] = -1.0 + zero; # x[t-1] - x[t-2] + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] + +# Group 3 attributes: + for (i in 9 : 15) { + reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # First factor is -x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is + RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] + if (i == 9) { + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] + } else { + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 9] = 1.0 + zero; # 4th factor: Row#09[t] + } + } + +# Group 4 attributes: + for (i in 16 : 18) { + reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = -1.0 + zero; # First factor is -x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is + RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] + if (i == 16) { + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 19] = 1.0 + zero; # 4th factor: Row#19[t] + } else { + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 16] = 1.0 + zero; # 4th factor: Row#16[t] + } + } + +# Group 5 attribute: + reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 19] = -1.0 + zero; # First factor is -x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + 19] = 1.0 + zero; # Second factor is x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + 19] = 1.0 + zero; # Third factor is + RegresValueMap [reg_index + 3, (t-3) * num_attrs + 19] = -1.0 + zero; # x[t-1] - x[t-2] + RegresFactorDefault [reg_index + 4, 1] = 1.0 + zero; # The Intercept +} + +for (i in 1:num_special_regs) +{ + reg_index = ((num_terms - 2) * num_attrs - 1 + i) * num_factors; + RegresFactorDefault [reg_index + 1, 1] = 1.0 + zero; + RegresFactorDefault [reg_index + 2, 1] = -1.0 + zero; +} + +# ---------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS +# AT REGRESSION FACTORS: A LINER MAP + A VECTOR OF DEFAULTS +# ---------------------------------------------------------- + +RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); +RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + +for (t in 3 : num_terms) { +# Group 1 attributes: + reg_index = ((t-3) * num_attrs - 1 + 1) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 1] = 1.0 + zero; # Param #01 + RegresParamMap [reg_index + 3, 2] = 1.0 + zero; # Param #02 + RegresParamMap [reg_index + 4, 3] = 1.0 + zero; # Param #03 + for (i in 2 : 7) { + reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 4] = 1.0 + zero; # Param #04 + RegresParamMap [reg_index + 3, 5] = 1.0 + zero; # Param #05 + RegresParamMap [reg_index + 4, 4 + i] = 1.0 + zero; # Param #06-#11 + } +# Group 2 attribute: + reg_index = ((t-3) * num_attrs - 1 + 8) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 12] = 1.0 + zero; # Param #12 + RegresParamMap [reg_index + 3, 13] = 1.0 + zero; # Param #13 + RegresParamMap [reg_index + 4, 14] = 1.0 + zero; # Param #14 +# Group 3 attributes: + reg_index = ((t-3) * num_attrs - 1 + 9) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 15] = 1.0 + zero; # Param #15 + RegresParamMap [reg_index + 3, 16] = 1.0 + zero; # Param #16 + RegresParamMap [reg_index + 4, 17] = 1.0 + zero; # Param #17 + for (i in 10 : 15) { + reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 18] = 1.0 + zero; # Param #18 + RegresParamMap [reg_index + 3, 19] = 1.0 + zero; # Param #19 + RegresParamMap [reg_index + 4, 10 + i] = 1.0 + zero; # Param #20-#25 + } + +# Group 4 attributes: +if (is_GROUP_4_ENABLED == 1) { + reg_index = ((t-3) * num_attrs - 1 + 16) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 29] = 1.0 + zero; # Param #29 + RegresParamMap [reg_index + 3, 30] = 1.0 + zero; # Param #30 + RegresParamMap [reg_index + 4, 31] = 1.0 + zero; # Param #31 + for (i in 17 : 18) { + reg_index = ((t-3) * num_attrs - 1 + i) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 32] = 1.0 + zero; # Param #32 + RegresParamMap [reg_index + 3, 33] = 1.0 + zero; # Param #33 + RegresParamMap [reg_index + 4, 17 + i] = 1.0 + zero; # Param #34-#35 + } +} + +# Group 5 attribute: + reg_index = ((t-3) * num_attrs - 1 + 19) * num_factors; + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; # Default coefficient = 1.0 + RegresParamMap [reg_index + 2, 26] = 1.0 + zero; # Param #26 + RegresParamMap [reg_index + 3, 27] = 1.0 + zero; # Param #27 + RegresParamMap [reg_index + 4, 28] = 1.0 + zero; # Param #28 +} + +reg_index = ((num_terms - 2) * num_attrs) * num_factors; + RegresParamMap [reg_index + 1, 26] = 1.0 + zero; # Param #26 + RegresCoeffDefault [reg_index + 2, 1] = 1.0 + zero; +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 27] = 1.0 + zero; # Param #27 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 01] = 1.0 + zero; # Param #01 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 02] = 1.0 + zero; # Param #02 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 04] = 1.0 + zero; # Param #04 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 05] = 1.0 + zero; # Param #05 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 12] = 1.0 + zero; # Param #12 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 13] = 1.0 + zero; # Param #13 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 15] = 1.0 + zero; # Param #15 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 16] = 1.0 + zero; # Param #16 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 18] = 1.0 + zero; # Param #18 +reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 19] = 1.0 + zero; # Param #19 + +if (is_GROUP_4_ENABLED == 1) { + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 29] = 1.0 + zero; # Param #29 + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 30] = 1.0 + zero; # Param #30 + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 32] = 1.0 + zero; # Param #32 + reg_index = reg_index + num_factors; + RegresParamMap [reg_index + 1, 33] = 1.0 + zero; # Param #33 +} + +# ---------------------------------------------------------- +# GENERATE A VECTOR OF SCALE MULTIPLIERS, ONE PER REGRESSION +# ---------------------------------------------------------- + +RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); +initial_reports = read ($1); + +global_weight = 0.5 + zero; + +attribute_size = rowMeans (abs (initial_reports [, 1:(num_terms-1)])); +max_attr_size = max (attribute_size); + +for (t in 3 : num_terms) { + for (i in 1 : num_attrs) { + regeqn = (t-3) * num_attrs + i; + scale_down = sqrt (attribute_size [i, 1] / max_attr_size) * 0.999 + 0.001; + acceptable_drift = scale_down * max_attr_size * 0.001; + RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); + } +} + +regeqn = (num_terms - 2) * num_attrs + 1; +for (i in 1 : num_special_regs) { + acceptable_drift = 0.01; + RegresScaleMult [regeqn, 1] = global_weight / (acceptable_drift^2); + regeqn = regeqn + 1; +} + +# -------------------------------- +# WRITE OUT ALL GENERATED MATRICES +# -------------------------------- + +# write (initial_reports, $1, format="text"); +write (CReps, $2, format="text"); +write (RegresValueMap, $3, format="text"); +write (RegresFactorDefault,$4, format="text"); +write (RegresParamMap, $5, format="text"); +write (RegresCoeffDefault, $6, format="text"); +write (RegresScaleMult, $7, format="text");
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/test/testInputGenerator.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/test/testInputGenerator.dml b/src/test/scripts/applications/impute/test/testInputGenerator.dml index 9eaf034..2040756 100644 --- a/src/test/scripts/applications/impute/test/testInputGenerator.dml +++ b/src/test/scripts/applications/impute/test/testInputGenerator.dml @@ -1,152 +1,152 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# Generates synthetic data to try inputeGaussMCMC.dml -# How To Run: -# -# -# hadoop jar SystemML.jar -f test/scripts/applications/impute/test/testInputGenerator.dml -exec singlenode -# -args test/scripts/applications/impute/test/initial_reports test/scripts/applications/impute/test/CReps -# test/scripts/applications/impute/test/RegresValueMap test/scripts/applications/impute/test/RegresParamMap - -num_terms = 40; # The number of term reports, feel free to change -num_attrs = 6; # 4 regular attributes, 1 total cost, 1 auxiliary ("macroeconomic") -num_frees = 4; # We estimate the last report, which has 4 degrees of freedom -num_factors = 4; # In regressions: x[t] ~ x[t-1], (x[t-1] - x[t-2]), total_cost[t] - -# We have one regression equation per term, except the first two terms, -# for each attribute except the auxiliary attribute: -num_reg_eqs = (num_terms - 2) * (num_attrs - 1); - -# All regression equations for the same attribute share the same parameters, -# regardless of the term: -num_params = num_factors * (num_attrs - 1); - -# GENERATE THE INITIAL REPORTS MATRIX (with the last term report set to 0.0) - -initial_reports_matrix = matrix (0.0, rows = num_attrs, cols = num_terms); - -# We assume that the terms are quarterly. -# Auxiliary attribute is = sqrt(1.1)^t, a steady exponential growth of 21% a year. -# The total cost regresses on the auxiliary attribute and shows a combination of -# exponential and cyclic behavior year after year. - -zero = matrix (0.0, rows = 1, cols = 1); - -initial_reports_matrix [6, 1] = zero + 1; # auxiliary attribute -for (t in 2 : num_terms) { - initial_reports_matrix [6, t] = initial_reports_matrix [6, t-1] * sqrt (1.1); -} - -initial_reports_matrix [1, 1] = zero + 1 * 0.4615107865026; -initial_reports_matrix [2, 1] = zero + 1 * 0.0270996863066; -initial_reports_matrix [3, 1] = zero + 1 * 0.3772761445953; -initial_reports_matrix [4, 1] = zero + 1 * 0.1341133825954; -initial_reports_matrix [5, 1] = zero + 1; # total cost attribute - -initial_reports_matrix [1, 2] = zero + 2 * 0.3281440348352; -initial_reports_matrix [2, 2] = zero + 2 * 0.0345738029588; -initial_reports_matrix [3, 2] = zero + 2 * 0.4052452565031; -initial_reports_matrix [4, 2] = zero + 2 * 0.2320369057028; -initial_reports_matrix [5, 2] = zero + 2; # total cost attribute - -for (t in 3 : (num_terms - 1)) -{ - initial_reports_matrix [5, t] = - - 1.1 * initial_reports_matrix [5, t-1] - + 1.1 * (initial_reports_matrix [5, t-1] - initial_reports_matrix [5, t-2]) - + 3.0 * (initial_reports_matrix [6, t]); - - initial_reports_matrix [1, t] = - 0.45 * initial_reports_matrix [1, t-1] - + 0.00 * (initial_reports_matrix [1, t-1] - initial_reports_matrix [1, t-2]) - + 0.2243041078721 * (initial_reports_matrix [5, t]); - - initial_reports_matrix [2, t] = - 0.00 * initial_reports_matrix [2, t-1] - + 0.45 * (initial_reports_matrix [2, t-1] - initial_reports_matrix [2, t-2]) - + 0.0417492985298 * (initial_reports_matrix [5, t]); - - initial_reports_matrix [3, t] = - - 0.40 * initial_reports_matrix [3, t-1] - + 0.00 * (initial_reports_matrix [3, t-1] - initial_reports_matrix [3, t-2]) - + 0.4807004854222 * (initial_reports_matrix [5, t]); - - initial_reports_matrix [4, t] = - - 0.20 * initial_reports_matrix [4, t-1] - + 0.30 * (initial_reports_matrix [4, t-1] - initial_reports_matrix [4, t-2]) - + 0.2549604916594 * (initial_reports_matrix [5, t]); -} - -# GENERATE A LINEAR MAP FROM FREE VARIABLES TO THE REPORTS - -CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); -CReps [(num_terms-1) * num_attrs + 1, 1] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 2, 2] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 3, 3] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 4, 4] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 5, 1] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 5, 2] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 5, 3] = 1.0 + zero; -CReps [(num_terms-1) * num_attrs + 5, 4] = 1.0 + zero; - -# GENERATE A LINEAR MAP FROM REPORTS TO REGRESSION FACTORS - -RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); - -for (t in 3 : num_terms) { - for (i in 1 : (num_attrs - 2)) { - reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = 1.0 + zero; # First factor is x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is - RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 5] = 1.0 + zero; # 4th factor = total_cost[t] - } - # For the total cost itself, the regression is almost the same, except the last line: - reg_index = ((t-3)*(num_attrs-1)-1 + 5) * num_factors; - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 5] = 1.0 + zero; # First factor is x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + 5] = 1.0 + zero; # Second factor is x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + 5] = 1.0 + zero; # Third factor is - RegresValueMap [reg_index + 3, (t-3) * num_attrs + 5] = -1.0 + zero; # x[t-1] - x[t-2] - RegresValueMap [reg_index + 4, (t-1) * num_attrs + 6] = 1.0 + zero; # 4th factor = auxiliary[t] -} - -# GENERATE A LINEAR MAP FROM PARAMETERS TO REGRESSION FACTORS - -RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); - -for (t in 3 : num_terms) { - for (i in 1 : (num_attrs - 1)) { - reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors; - RegresParamMap [reg_index + 1, 0 * (num_attrs-1) + i] = 1.0 + zero; - RegresParamMap [reg_index + 2, 1 * (num_attrs-1) + i] = 1.0 + zero; - RegresParamMap [reg_index + 3, 2 * (num_attrs-1) + i] = 1.0 + zero; - RegresParamMap [reg_index + 4, 3 * (num_attrs-1) + i] = 1.0 + zero; - } -} - -# WRITE OUT ALL GENERATED MATRICES - -write (initial_reports_matrix, $1, format="text"); -write (CReps, $2, format="text"); -write (RegresValueMap, $3, format="text"); -write (RegresParamMap, $4, format="text"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# Generates synthetic data to try inputeGaussMCMC.dml +# How To Run: +# +# +# hadoop jar SystemML.jar -f test/scripts/applications/impute/test/testInputGenerator.dml -exec singlenode +# -args test/scripts/applications/impute/test/initial_reports test/scripts/applications/impute/test/CReps +# test/scripts/applications/impute/test/RegresValueMap test/scripts/applications/impute/test/RegresParamMap + +num_terms = 40; # The number of term reports, feel free to change +num_attrs = 6; # 4 regular attributes, 1 total cost, 1 auxiliary ("macroeconomic") +num_frees = 4; # We estimate the last report, which has 4 degrees of freedom +num_factors = 4; # In regressions: x[t] ~ x[t-1], (x[t-1] - x[t-2]), total_cost[t] + +# We have one regression equation per term, except the first two terms, +# for each attribute except the auxiliary attribute: +num_reg_eqs = (num_terms - 2) * (num_attrs - 1); + +# All regression equations for the same attribute share the same parameters, +# regardless of the term: +num_params = num_factors * (num_attrs - 1); + +# GENERATE THE INITIAL REPORTS MATRIX (with the last term report set to 0.0) + +initial_reports_matrix = matrix (0.0, rows = num_attrs, cols = num_terms); + +# We assume that the terms are quarterly. +# Auxiliary attribute is = sqrt(1.1)^t, a steady exponential growth of 21% a year. +# The total cost regresses on the auxiliary attribute and shows a combination of +# exponential and cyclic behavior year after year. + +zero = matrix (0.0, rows = 1, cols = 1); + +initial_reports_matrix [6, 1] = zero + 1; # auxiliary attribute +for (t in 2 : num_terms) { + initial_reports_matrix [6, t] = initial_reports_matrix [6, t-1] * sqrt (1.1); +} + +initial_reports_matrix [1, 1] = zero + 1 * 0.4615107865026; +initial_reports_matrix [2, 1] = zero + 1 * 0.0270996863066; +initial_reports_matrix [3, 1] = zero + 1 * 0.3772761445953; +initial_reports_matrix [4, 1] = zero + 1 * 0.1341133825954; +initial_reports_matrix [5, 1] = zero + 1; # total cost attribute + +initial_reports_matrix [1, 2] = zero + 2 * 0.3281440348352; +initial_reports_matrix [2, 2] = zero + 2 * 0.0345738029588; +initial_reports_matrix [3, 2] = zero + 2 * 0.4052452565031; +initial_reports_matrix [4, 2] = zero + 2 * 0.2320369057028; +initial_reports_matrix [5, 2] = zero + 2; # total cost attribute + +for (t in 3 : (num_terms - 1)) +{ + initial_reports_matrix [5, t] = + - 1.1 * initial_reports_matrix [5, t-1] + + 1.1 * (initial_reports_matrix [5, t-1] - initial_reports_matrix [5, t-2]) + + 3.0 * (initial_reports_matrix [6, t]); + + initial_reports_matrix [1, t] = + 0.45 * initial_reports_matrix [1, t-1] + + 0.00 * (initial_reports_matrix [1, t-1] - initial_reports_matrix [1, t-2]) + + 0.2243041078721 * (initial_reports_matrix [5, t]); + + initial_reports_matrix [2, t] = + 0.00 * initial_reports_matrix [2, t-1] + + 0.45 * (initial_reports_matrix [2, t-1] - initial_reports_matrix [2, t-2]) + + 0.0417492985298 * (initial_reports_matrix [5, t]); + + initial_reports_matrix [3, t] = + - 0.40 * initial_reports_matrix [3, t-1] + + 0.00 * (initial_reports_matrix [3, t-1] - initial_reports_matrix [3, t-2]) + + 0.4807004854222 * (initial_reports_matrix [5, t]); + + initial_reports_matrix [4, t] = + - 0.20 * initial_reports_matrix [4, t-1] + + 0.30 * (initial_reports_matrix [4, t-1] - initial_reports_matrix [4, t-2]) + + 0.2549604916594 * (initial_reports_matrix [5, t]); +} + +# GENERATE A LINEAR MAP FROM FREE VARIABLES TO THE REPORTS + +CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); +CReps [(num_terms-1) * num_attrs + 1, 1] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 2, 2] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 3, 3] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 4, 4] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 5, 1] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 5, 2] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 5, 3] = 1.0 + zero; +CReps [(num_terms-1) * num_attrs + 5, 4] = 1.0 + zero; + +# GENERATE A LINEAR MAP FROM REPORTS TO REGRESSION FACTORS + +RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); + +for (t in 3 : num_terms) { + for (i in 1 : (num_attrs - 2)) { + reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + i] = 1.0 + zero; # First factor is x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + i] = 1.0 + zero; # Second factor is x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + i] = 1.0 + zero; # Third factor is + RegresValueMap [reg_index + 3, (t-3) * num_attrs + i] = -1.0 + zero; # x[t-1] - x[t-2] + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 5] = 1.0 + zero; # 4th factor = total_cost[t] + } + # For the total cost itself, the regression is almost the same, except the last line: + reg_index = ((t-3)*(num_attrs-1)-1 + 5) * num_factors; + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 5] = 1.0 + zero; # First factor is x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + 5] = 1.0 + zero; # Second factor is x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + 5] = 1.0 + zero; # Third factor is + RegresValueMap [reg_index + 3, (t-3) * num_attrs + 5] = -1.0 + zero; # x[t-1] - x[t-2] + RegresValueMap [reg_index + 4, (t-1) * num_attrs + 6] = 1.0 + zero; # 4th factor = auxiliary[t] +} + +# GENERATE A LINEAR MAP FROM PARAMETERS TO REGRESSION FACTORS + +RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); + +for (t in 3 : num_terms) { + for (i in 1 : (num_attrs - 1)) { + reg_index = ((t-3)*(num_attrs-1)-1 + i) * num_factors; + RegresParamMap [reg_index + 1, 0 * (num_attrs-1) + i] = 1.0 + zero; + RegresParamMap [reg_index + 2, 1 * (num_attrs-1) + i] = 1.0 + zero; + RegresParamMap [reg_index + 3, 2 * (num_attrs-1) + i] = 1.0 + zero; + RegresParamMap [reg_index + 4, 3 * (num_attrs-1) + i] = 1.0 + zero; + } +} + +# WRITE OUT ALL GENERATED MATRICES + +write (initial_reports_matrix, $1, format="text"); +write (CReps, $2, format="text"); +write (RegresValueMap, $3, format="text"); +write (RegresParamMap, $4, format="text"); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml b/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml index 9a20214..fcbf47c 100644 --- a/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml +++ b/src/test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml @@ -1,174 +1,174 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# hadoop jar SystemML.jar -f test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml -exec singlenode -# -args -# test/scripts/applications/impute/initial_reports -# test/scripts/applications/impute/CReps -# test/scripts/applications/impute/RegresValueMap -# test/scripts/applications/impute/RegresFactorDefault -# test/scripts/applications/impute/RegresParamMap -# test/scripts/applications/impute/RegresCoeffDefault -# test/scripts/applications/impute/RegresScaleMult - - -# GENERATE SYNTHETIC "INITIAL REPORTS" - -num_terms = 10; -num_series = 10; -num_attrs = 2 * num_series; -num_frees = num_series * (num_terms + 1); - -initial_reports = Rand (rows = num_attrs, cols = num_terms, min = -50.0, max = 50.0); - -for (s in 1:num_series) { - for (t in 1:(num_terms - 1)) { - val = 400 - (t - 14.16) * (t - 5.5) * (t + 3.16) / 2.463552; - initial_reports [2 * (s-1) + 1, t] = initial_reports [2 * (s-1) + 1, t] + val; - } -} - -zero = matrix (0.0, rows = 1, cols = 1); - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS -# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS -# --------------------------------------------------------- - -CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); - -for (s in 1:num_series) { - for (t in 0:num_terms) { - ta_shift = (t - 1) * num_attrs + 2 * s; - if (t == 0) { - ta_shift = (num_terms - 1) * num_attrs + (2 * s - 1); - } - CReps [ta_shift, t * num_series + s] = 1.0 + zero; -} } - -# In all regressions, except the last few "special" ones, there are 3 factors -# (here "x" are the "states" and "y" are the "observations"): -# Observation regression: y[t]-x[t] ~ a * 1 ### + b * (y[t-1]-x[t-1]) -# State-change regression: x[t] ~ c * x[t-1] + d * (x[t-1]-x[t-2]) - -num_factors = 3; -num_reg_eqs = num_terms * 2 * num_series; -num_params = 3 * num_series; - -RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); -RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - - -# --------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS -# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS -# --------------------------------------------------------- - - -for (t in 1 : num_terms) { - for (s in 1 : num_series) { - - reg_index = ((t-1) * num_series + (s-1)) * 2 * num_factors; - -# Observation regression: - - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s - 1] = -1.0 + zero; # 1st factor: - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s ] = 1.0 + zero; # -(y[t]-x[t]) - - RegresFactorDefault [reg_index + 2, 1] = 1.0 + zero; # 2nd factor: Intercept - -# RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s - 1] = 1.0 + zero; # 3rd factor: -# RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s ] = -1.0 + zero; # y[t-1]-x[t-1] - - reg_index = reg_index + num_factors; - -# State-change regression: - - if (t >= 3) { - RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s] = -1.0 + zero; # 1st factor: -x[t] - RegresValueMap [reg_index + 2, (t-2) * num_attrs + 2 * s] = 1.0 + zero; # 2nd factor: x[t-1] - RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s] = 1.0 + zero; # 3rd factor: - RegresValueMap [reg_index + 3, (t-3) * num_attrs + 2 * s] = -1.0 + zero; # x[t-1]-x[t-2] - } - } -} - -# ---------------------------------------------------------- -# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS -# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS -# ---------------------------------------------------------- - -RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); -RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); - -for (t in 1 : num_terms) { - for (s in 1 : num_series) { - - reg_index = ((t-1) * num_series + (s-1)) * 2 * num_factors; - -# Observation regression: - - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; - RegresParamMap [reg_index + 2, 3 * (s-1) + 1] = 1.0 + zero; - -# RegresParamMap [reg_index + 3, 4 * (s-1) + 2] = 1.0 + zero; - - reg_index = reg_index + num_factors; - -# State-change regression: - - if (t >= 3) { - RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; - RegresParamMap [reg_index + 2, 3 * (s-1) + 2] = 1.0 + zero; - RegresParamMap [reg_index + 3, 3 * (s-1) + 3] = 1.0 + zero; - } - } -} - -# ---------------------------------------------------------------------- -# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION -# ---------------------------------------------------------------------- - -RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); -global_weight = 0.5 + zero; -acceptable_drift = 1.0; - -for (t in 1 : num_terms) { - for (s in 1 : num_series) { - reg_id = ((t-1) * num_series + (s-1)) * 2 + 1; - RegresScaleMult [reg_id , 1] = global_weight / (acceptable_drift ^ 2); - RegresScaleMult [reg_id + 1, 1] = global_weight / (acceptable_drift ^ 2); - } -} - - -# -------------------------------- -# WRITE OUT ALL GENERATED MATRICES -# -------------------------------- - - -write (initial_reports, $1, format="text"); -write (CReps, $2, format="text"); -write (RegresValueMap, $3, format="text"); -write (RegresFactorDefault,$4, format="text"); -write (RegresParamMap, $5, format="text"); -write (RegresCoeffDefault, $6, format="text"); -write (RegresScaleMult, $7, format="text"); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# hadoop jar SystemML.jar -f test/scripts/applications/impute/testShadowRecurrenceInputGenerator.dml -exec singlenode +# -args +# test/scripts/applications/impute/initial_reports +# test/scripts/applications/impute/CReps +# test/scripts/applications/impute/RegresValueMap +# test/scripts/applications/impute/RegresFactorDefault +# test/scripts/applications/impute/RegresParamMap +# test/scripts/applications/impute/RegresCoeffDefault +# test/scripts/applications/impute/RegresScaleMult + + +# GENERATE SYNTHETIC "INITIAL REPORTS" + +num_terms = 10; +num_series = 10; +num_attrs = 2 * num_series; +num_frees = num_series * (num_terms + 1); + +initial_reports = Rand (rows = num_attrs, cols = num_terms, min = -50.0, max = 50.0); + +for (s in 1:num_series) { + for (t in 1:(num_terms - 1)) { + val = 400 - (t - 14.16) * (t - 5.5) * (t + 3.16) / 2.463552; + initial_reports [2 * (s-1) + 1, t] = initial_reports [2 * (s-1) + 1, t] + val; + } +} + +zero = matrix (0.0, rows = 1, cols = 1); + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM FREE VARIABLES TO THE REPORTS +# AFFINE MAP = LINEAR MAP + INITIAL (DEFAULT) REPORTS +# --------------------------------------------------------- + +CReps = matrix (0.0, rows = (num_terms * num_attrs), cols = num_frees); + +for (s in 1:num_series) { + for (t in 0:num_terms) { + ta_shift = (t - 1) * num_attrs + 2 * s; + if (t == 0) { + ta_shift = (num_terms - 1) * num_attrs + (2 * s - 1); + } + CReps [ta_shift, t * num_series + s] = 1.0 + zero; +} } + +# In all regressions, except the last few "special" ones, there are 3 factors +# (here "x" are the "states" and "y" are the "observations"): +# Observation regression: y[t]-x[t] ~ a * 1 ### + b * (y[t-1]-x[t-1]) +# State-change regression: x[t] ~ c * x[t-1] + d * (x[t-1]-x[t-2]) + +num_factors = 3; +num_reg_eqs = num_terms * 2 * num_series; +num_params = 3 * num_series; + +RegresValueMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = (num_terms * num_attrs)); +RegresFactorDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + + +# --------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM REPORTS TO REGRESSION FACTORS +# AFFINE MAP = LINEAR MAP + A VECTOR OF DEFAULTS +# --------------------------------------------------------- + + +for (t in 1 : num_terms) { + for (s in 1 : num_series) { + + reg_index = ((t-1) * num_series + (s-1)) * 2 * num_factors; + +# Observation regression: + + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s - 1] = -1.0 + zero; # 1st factor: + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s ] = 1.0 + zero; # -(y[t]-x[t]) + + RegresFactorDefault [reg_index + 2, 1] = 1.0 + zero; # 2nd factor: Intercept + +# RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s - 1] = 1.0 + zero; # 3rd factor: +# RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s ] = -1.0 + zero; # y[t-1]-x[t-1] + + reg_index = reg_index + num_factors; + +# State-change regression: + + if (t >= 3) { + RegresValueMap [reg_index + 1, (t-1) * num_attrs + 2 * s] = -1.0 + zero; # 1st factor: -x[t] + RegresValueMap [reg_index + 2, (t-2) * num_attrs + 2 * s] = 1.0 + zero; # 2nd factor: x[t-1] + RegresValueMap [reg_index + 3, (t-2) * num_attrs + 2 * s] = 1.0 + zero; # 3rd factor: + RegresValueMap [reg_index + 3, (t-3) * num_attrs + 2 * s] = -1.0 + zero; # x[t-1]-x[t-2] + } + } +} + +# ---------------------------------------------------------- +# GENERATE AN AFFINE MAP FROM PARAMETERS TO THE COEFFICIENTS +# AT REGRESSION FACTORS: A LINEAR MAP + A VECTOR OF DEFAULTS +# ---------------------------------------------------------- + +RegresParamMap = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = num_params); +RegresCoeffDefault = matrix (0.0, rows = (num_reg_eqs * num_factors), cols = 1); + +for (t in 1 : num_terms) { + for (s in 1 : num_series) { + + reg_index = ((t-1) * num_series + (s-1)) * 2 * num_factors; + +# Observation regression: + + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; + RegresParamMap [reg_index + 2, 3 * (s-1) + 1] = 1.0 + zero; + +# RegresParamMap [reg_index + 3, 4 * (s-1) + 2] = 1.0 + zero; + + reg_index = reg_index + num_factors; + +# State-change regression: + + if (t >= 3) { + RegresCoeffDefault [reg_index + 1, 1] = 1.0 + zero; + RegresParamMap [reg_index + 2, 3 * (s-1) + 2] = 1.0 + zero; + RegresParamMap [reg_index + 3, 3 * (s-1) + 3] = 1.0 + zero; + } + } +} + +# ---------------------------------------------------------------------- +# GENERATE A VECTOR OF SCALE MULTIPLIERS ("WEIGHTS"), ONE PER REGRESSION +# ---------------------------------------------------------------------- + +RegresScaleMult = matrix (1.0, rows = num_reg_eqs, cols = 1); +global_weight = 0.5 + zero; +acceptable_drift = 1.0; + +for (t in 1 : num_terms) { + for (s in 1 : num_series) { + reg_id = ((t-1) * num_series + (s-1)) * 2 + 1; + RegresScaleMult [reg_id , 1] = global_weight / (acceptable_drift ^ 2); + RegresScaleMult [reg_id + 1, 1] = global_weight / (acceptable_drift ^ 2); + } +} + + +# -------------------------------- +# WRITE OUT ALL GENERATED MATRICES +# -------------------------------- + + +write (initial_reports, $1, format="text"); +write (CReps, $2, format="text"); +write (RegresValueMap, $3, format="text"); +write (RegresFactorDefault,$4, format="text"); +write (RegresParamMap, $5, format="text"); +write (RegresCoeffDefault, $6, format="text"); +write (RegresScaleMult, $7, format="text"); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/tmp.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/impute/tmp.dml b/src/test/scripts/applications/impute/tmp.dml index b70a4ad..9e0417e 100644 --- a/src/test/scripts/applications/impute/tmp.dml +++ b/src/test/scripts/applications/impute/tmp.dml @@ -1,128 +1,128 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -setwd ("test/scripts/applications/glm"); -source ("Misc.dml"); - -blahblah = 0.0 / 0.0; # -0.00099999999; -print (blahblah); -x = matrix (0.0, rows = 55, cols = 1); -x [55, 1] = blahblah; -print (castAsScalar (x [55, 1])); -for (i in 1:9) { - x [i, 1] = -0.001 * i; -} -for (i in 1:5) { - x [(9 * i + 1):(9 * i + 9), 1] = x [(9 * i - 8):(9 * i), 1] * 10; -} -y = atan_temporary (x); -z = tan (y); -for (i in 1:nrow(x)) { - [x_m, x_e] = round_to_print (castAsScalar (x[i,1])); - [a_m, a_e] = round_to_print (castAsScalar (y[i,1])); - [t_m, t_e] = round_to_print (castAsScalar (z[i,1])); - print ("x = " + x_m + "E" + x_e + "; atan(x) = " + a_m + "E" + a_e + "; tan(atan(x)) = " + t_m + "E" + t_e); -} - - - - - - -coeff_a = -3.14; -coeff_b = 3.14 * (2 - 3 - 1.7); -coeff_c = -3.14 * (2 * (-3) + 2 * (-1.7) + (-3) * (-1.7)); -coeff_d = 3.14 * (2 * (-3) * (-1.7)); - - - coeff_aa = coeff_b / coeff_a; - coeff_bb = coeff_c / coeff_a; - coeff_cc = coeff_d / coeff_a; - - coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0; - coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0; - - if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q) - { - two_pi_third = 2.0943951023931954923084289221863; - acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q); - - x = abs (acos_argument); - acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016 - + x * ( 0.0889789874 + x * (-0.0501743046 - + x * ( 0.0308918810 + x * (-0.0170881256 - + x * ( 0.0066700901 + x * (-0.0012624911)))))))); - if (acos_argument >= 0.0) { - coeff_theta = acos_x; - } else { - coeff_theta = 3.1415926535897932384626433832795 - acos_x; - } - - root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0); - root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third); - root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third); - - root_min = min (min (root_1, root_2), root_3); - root_max = max (max (root_1, root_2), root_3); - root_middle = root_1 + root_2 + root_3 - root_min - root_max; - - root_1 = root_min; root_2 = root_middle; root_3 = root_max; - - print ("Three roots: " + (round (root_1 * 10000) / 10000) + ", " + (round (root_2 * 10000) / 10000) + ", " + (round (root_3 * 10000) / 10000)); - - } else { - if (coeff_R >= 0.0) { - sgn_coeff_R = 1.0; - } else { - sgn_coeff_R = -1.0; - } - coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0); - if (coeff_bigA != 0.0) { - root = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0; - } else { - root = - coeff_aa / 3.0; - } - print ("One root: " + (round (root * 10000) / 10000)); - } - -/* -atan_temporary = - function (Matrix [double] Args) return (Matrix [double] AtanArgs) -{ - AbsArgs = abs (Args); - Eks = AbsArgs + ppred (AbsArgs, 0.0, "==") * 0.000000000001; - Eks = ppred (AbsArgs, 1.0, "<=") * Eks + ppred (AbsArgs, 1.0, ">") / Eks; - EksSq = Eks * Eks; - AtanEks = - Eks * ( 1.0000000000 + - EksSq * (-0.3333314528 + # Milton Abramowitz and Irene A. Stegun, Eds. - EksSq * ( 0.1999355085 + # "Handbook of Mathematical Functions" - EksSq * (-0.1420889944 + # U.S. National Bureau of Standards, June 1964 - EksSq * ( 0.1065626393 + # Section 4.4, page 81, Equation 4.4.49 - EksSq * (-0.0752896400 + - EksSq * ( 0.0429096138 + - EksSq * (-0.0161657367 + - EksSq * 0.0028662257 )))))))); - pi_over_two = 1.5707963267948966192313216916398; - AtanAbsArgs = ppred (AbsArgs, 1.0, "<=") * AtanEks + ppred (AbsArgs, 1.0, ">") * (pi_over_two - AtanEks); - AtanArgs = (ppred (Args, 0.0, ">=") - ppred (Args, 0.0, "<")) * AtanAbsArgs; -} +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +setwd ("test/scripts/applications/glm"); +source ("Misc.dml"); + +blahblah = 0.0 / 0.0; # -0.00099999999; +print (blahblah); +x = matrix (0.0, rows = 55, cols = 1); +x [55, 1] = blahblah; +print (castAsScalar (x [55, 1])); +for (i in 1:9) { + x [i, 1] = -0.001 * i; +} +for (i in 1:5) { + x [(9 * i + 1):(9 * i + 9), 1] = x [(9 * i - 8):(9 * i), 1] * 10; +} +y = atan_temporary (x); +z = tan (y); +for (i in 1:nrow(x)) { + [x_m, x_e] = round_to_print (castAsScalar (x[i,1])); + [a_m, a_e] = round_to_print (castAsScalar (y[i,1])); + [t_m, t_e] = round_to_print (castAsScalar (z[i,1])); + print ("x = " + x_m + "E" + x_e + "; atan(x) = " + a_m + "E" + a_e + "; tan(atan(x)) = " + t_m + "E" + t_e); +} + + + + + + +coeff_a = -3.14; +coeff_b = 3.14 * (2 - 3 - 1.7); +coeff_c = -3.14 * (2 * (-3) + 2 * (-1.7) + (-3) * (-1.7)); +coeff_d = 3.14 * (2 * (-3) * (-1.7)); + + + coeff_aa = coeff_b / coeff_a; + coeff_bb = coeff_c / coeff_a; + coeff_cc = coeff_d / coeff_a; + + coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0; + coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0; + + if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q) + { + two_pi_third = 2.0943951023931954923084289221863; + acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q); + + x = abs (acos_argument); + acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016 + + x * ( 0.0889789874 + x * (-0.0501743046 + + x * ( 0.0308918810 + x * (-0.0170881256 + + x * ( 0.0066700901 + x * (-0.0012624911)))))))); + if (acos_argument >= 0.0) { + coeff_theta = acos_x; + } else { + coeff_theta = 3.1415926535897932384626433832795 - acos_x; + } + + root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0); + root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third); + root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third); + + root_min = min (min (root_1, root_2), root_3); + root_max = max (max (root_1, root_2), root_3); + root_middle = root_1 + root_2 + root_3 - root_min - root_max; + + root_1 = root_min; root_2 = root_middle; root_3 = root_max; + + print ("Three roots: " + (round (root_1 * 10000) / 10000) + ", " + (round (root_2 * 10000) / 10000) + ", " + (round (root_3 * 10000) / 10000)); + + } else { + if (coeff_R >= 0.0) { + sgn_coeff_R = 1.0; + } else { + sgn_coeff_R = -1.0; + } + coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0); + if (coeff_bigA != 0.0) { + root = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0; + } else { + root = - coeff_aa / 3.0; + } + print ("One root: " + (round (root * 10000) / 10000)); + } + +/* +atan_temporary = + function (Matrix [double] Args) return (Matrix [double] AtanArgs) +{ + AbsArgs = abs (Args); + Eks = AbsArgs + ppred (AbsArgs, 0.0, "==") * 0.000000000001; + Eks = ppred (AbsArgs, 1.0, "<=") * Eks + ppred (AbsArgs, 1.0, ">") / Eks; + EksSq = Eks * Eks; + AtanEks = + Eks * ( 1.0000000000 + + EksSq * (-0.3333314528 + # Milton Abramowitz and Irene A. Stegun, Eds. + EksSq * ( 0.1999355085 + # "Handbook of Mathematical Functions" + EksSq * (-0.1420889944 + # U.S. National Bureau of Standards, June 1964 + EksSq * ( 0.1065626393 + # Section 4.4, page 81, Equation 4.4.49 + EksSq * (-0.0752896400 + + EksSq * ( 0.0429096138 + + EksSq * (-0.0161657367 + + EksSq * 0.0028662257 )))))))); + pi_over_two = 1.5707963267948966192313216916398; + AtanAbsArgs = ppred (AbsArgs, 1.0, "<=") * AtanEks + ppred (AbsArgs, 1.0, ">") * (pi_over_two - AtanEks); + AtanArgs = (ppred (Args, 0.0, ">=") - ppred (Args, 0.0, "<")) * AtanAbsArgs; +} */ \ No newline at end of file
