This is an automated email from the ASF dual-hosted git repository. njayaram pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 0e67376c1261d5edf72fd2f73dff28d1f53da797 Author: Nikhil Kak <[email protected]> AuthorDate: Wed Mar 13 11:30:08 2019 -0700 Minibatch Preprocessor: Fix random distribution bug for DL Minibatch preprocessor for deep learning did not distribute the data randomly after creating the buffers. This commit does the needful to fix the same. Co-authored-by: Nikhil Kak <[email protected]> Co-authored-by: Domino Valdano <[email protected]> --- .../postgres/modules/utilities/minibatch_preprocessing.py_in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in index c3fd95d..e1967a8 100644 --- a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in +++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in @@ -52,9 +52,9 @@ MINIBATCH_OUTPUT_DEPENDENT_COLNAME = "dependent_varname" MINIBATCH_OUTPUT_INDEPENDENT_COLNAME = "independent_varname" # These are readonly variables, do not modify -#MADLIB-1300 Adding these variables for DL only at this time. -# For release 2.0 These will be removed and above variables can -# used for regular and DL minibatch. +#MADLIB-1300 Adding these variables for DL only at this time. +# For release 2.0 These will be removed and above variables can +# used for regular and DL minibatch. MINIBATCH_OUTPUT_DEPENDENT_COLNAME_DL = "dependent_var" MINIBATCH_OUTPUT_INDEPENDENT_COLNAME_DL = "independent_var" @@ -89,8 +89,8 @@ class MiniBatchPreProcessorDL: {self.independent_varname}::REAL[], (1/{self.normalizing_const})::REAL) AS x_norm, {dependent_varname_with_offset} AS y, row_number() over() AS row_id - FROM {self.source_table} - """.format(**locals()) + FROM {self.source_table} order by random() + """.format(**locals()) plpy.execute(scalar_mult_sql) # Create the mini-batched output table if is_platform_pg():
