This is an automated email from the ASF dual-hosted git repository.

njayaram pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new f6a7ddb  DL: Fix predict with 'NULL' string class values
f6a7ddb is described below

commit f6a7ddbbeb35f045475e286615845fc92b15f097
Author: Nandish Jayaram <[email protected]>
AuthorDate: Fri Jun 7 14:29:59 2019 -0700

    DL: Fix predict with 'NULL' string class values
    
    JIRA: MADLIB-1357
    Fix handling of 'NULL' string for class values in predict. 'NULL' string
    as a class value was getting reported the same way as a Postgres NULL
    class value (i.e., empty). This commit double-quotes a 'NULL' string
    class value in the relevant query.
    NOTE: In predict, if we use 'prob' as the pred_type, we create a column
    for each distinct class level. The column name for Postgres NULL class
    level is 'prob_NULL', and the column name for a 'NULL' string class
    value will be 'prob_"NULL'.
    
    Closes #408
---
 .../postgres/modules/utilities/utilities.py_in     | 32 ++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/ports/postgres/modules/utilities/utilities.py_in 
b/src/ports/postgres/modules/utilities/utilities.py_in
index e6e31c5..4e142aa 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -443,6 +443,19 @@ def create_cols_from_array_sql_string(py_list, 
sql_array_col, colname,
             Output:
                 (ARRAY['cat','dog'])[sqlcol[1]+1]::TEXT AS estimated_pred
 
+    @NOTE:
+        If py_list is [None, 'cat', 'dog', NULL']:
+        then the SQL query string returned would create the following
+        column names:
+            prob_NULL, prob_cat, 'prob_dog', and 'prob_"NULL'.
+        1. Notice that for None, which represents Postgres' NULL value, the
+        column name will be 'prob_NULL',
+        2. and to differentiate the column name for a string 'NULL', the
+        resulting column name will be 'prob_"NULL'.
+
+        The weird quoting in this column name is due to calling strip after
+        quote_ident in the code below.
+
     @returns:
         @param, str, that can be used in a SQL query.
 
@@ -458,7 +471,21 @@ def create_cols_from_array_sql_string(py_list, 
sql_array_col, colname,
         _assert(py_list.count(None) <= 1,
                 "{0}: Input list should contain at most 1 None element.".
                     format(module_name))
-        py_list = ['NULL' if ele is None else ele for ele in py_list]
+        def py_list_str(ele):
+            """
+                A python None is converted to a SQL NULL.
+                String 'NULL' is converted to SQL 'NULL' string by quoting
+                it to '"NULL"'. This quoting is necessary for Postgres to
+                differentiate between NULL and 'NULL' in the SQL query
+                string returned by create_cols_from_array_sql_string.
+            """
+            if ele is None:
+                return 'NULL'
+            elif isinstance(ele, str) and ele.lower()=='null':
+                return '"{0}"'.format(ele)
+            return ele
+
+        py_list = list(map(py_list_str, py_list))
         if has_one_ele:
             # Query to choose the value in the first element of
             # sql_array_col which is the index to access in py_list.
@@ -475,7 +502,8 @@ def create_cols_from_array_sql_string(py_list, 
sql_array_col, colname,
 
             # we cannot call sql quote_ident on the py_list entries because
             # aliasing does not support quote_ident. Hence calling our
-            # python implementation of quote_ident
+            # python implementation of quote_ident. We must call strip()
+            # after quote_ident since the resulting SQL query fails otherwise.
             select_clause = ', '.join(
                 ['CAST({sql_array_col}[{j}] AS {coltype}) AS 
"{final_colname}"'.
                     format(j=i + 1,

Reply via email to