This is an automated email from the ASF dual-hosted git repository.

njayaram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 0e67376c1261d5edf72fd2f73dff28d1f53da797
Author: Nikhil Kak <[email protected]>
AuthorDate: Wed Mar 13 11:30:08 2019 -0700

    Minibatch Preprocessor: Fix random distribution bug for DL
    
    Minibatch preprocessor for deep learning did not distribute the data
    randomly after creating the buffers. This commit does the needful to
    fix the same.
    
    Co-authored-by: Nikhil Kak <[email protected]>
    Co-authored-by: Domino Valdano <[email protected]>
---
 .../postgres/modules/utilities/minibatch_preprocessing.py_in   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in 
b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in
index c3fd95d..e1967a8 100644
--- a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in
+++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in
@@ -52,9 +52,9 @@ MINIBATCH_OUTPUT_DEPENDENT_COLNAME = "dependent_varname"
 MINIBATCH_OUTPUT_INDEPENDENT_COLNAME = "independent_varname"
 
 # These are readonly variables, do not modify
-#MADLIB-1300 Adding these variables for DL only at this time. 
-# For release 2.0 These will be removed and above variables can 
-# used for regular and DL minibatch. 
+#MADLIB-1300 Adding these variables for DL only at this time.
+# For release 2.0 These will be removed and above variables can
+# used for regular and DL minibatch.
 MINIBATCH_OUTPUT_DEPENDENT_COLNAME_DL = "dependent_var"
 MINIBATCH_OUTPUT_INDEPENDENT_COLNAME_DL = "independent_var"
 
@@ -89,8 +89,8 @@ class MiniBatchPreProcessorDL:
                 {self.independent_varname}::REAL[], 
(1/{self.normalizing_const})::REAL) AS x_norm,
                 {dependent_varname_with_offset} AS y,
                 row_number() over() AS row_id
-            FROM {self.source_table}
-            """.format(**locals())
+            FROM {self.source_table} order by random()
+        """.format(**locals())
         plpy.execute(scalar_mult_sql)
         # Create the mini-batched output table
         if is_platform_pg():

Reply via email to