Repository: madlib
Updated Branches:
  refs/heads/master 2cde01d1f -> 59844eb1f


Madpack: Add a script for automating changelist creation

Closes #318


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/59844eb1
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/59844eb1
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/59844eb1

Branch: refs/heads/master
Commit: 59844eb1f3362950f49623f2365c74332fd520b7
Parents: 2cde01d
Author: Orhan Kislal <[email protected]>
Authored: Mon Sep 17 08:54:28 2018 +0300
Committer: Orhan Kislal <[email protected]>
Committed: Mon Sep 17 08:58:17 2018 +0300

----------------------------------------------------------------------
 src/madpack/create_changelist.py | 242 ++++++++++++++++++++++++++++++++++
 src/madpack/diff_udf.sql         |  95 ++++---------
 src/madpack/diff_udt.sql         |   3 +-
 3 files changed, 266 insertions(+), 74 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/59844eb1/src/madpack/create_changelist.py
----------------------------------------------------------------------
diff --git a/src/madpack/create_changelist.py b/src/madpack/create_changelist.py
new file mode 100644
index 0000000..a689319
--- /dev/null
+++ b/src/madpack/create_changelist.py
@@ -0,0 +1,242 @@
+#!/usr/bin/python
+# 
------------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# 
------------------------------------------------------------------------------
+
+# Create changelist for any two branches/tags
+
+# Prequisites:
+# The old version has to be installed in the "madlib_old_vers" schema
+# The new version has to be installed in the "madlib" (default) schema
+# Two branches/tags must exist locally (run 'git fetch' to ensure you have the 
latest version)
+# The current branch does not matter
+
+# Usage (must be executed in the src/madpack directory):
+# python create_changelist.py <database name> <old version branch> <new 
version branch> <changelist filename>
+# If you are using the master branch, plase make sure to edit the branch/tag 
in the output file
+
+# Example (should be equivalent to changelist_1.13_1.14.yaml):
+# python create_changelist.py madlib rel/v1.13 rel/v1.14 chtest1.yaml
+
+import sys
+import os
+
+database = sys.argv[1]
+old_vers = sys.argv[2]
+new_vers = sys.argv[3]
+ch_filename = sys.argv[4]
+
+if os.path.exists(ch_filename):
+    print "{0} already exists".format(ch_filename)
+    raise SystemExit
+
+err1 = os.system("""psql {0} -l > /dev/null""".format(database))
+if err1 != 0:
+    print "Database {0} does not exist".format(database)
+    raise SystemExit
+
+err1 = os.system("""psql {0} -c "select madlib_old_vers.version()" > /dev/null
+                 """.format(database))
+if err1 != 0:
+    print "MADlib is not installed in the madlib_old_vers schema. Please refer 
to the Prequisites."
+    raise SystemExit
+
+err1 = os.system("""psql {0} -c "select madlib.version()" > /dev/null
+                 """.format(database))
+if err1 != 0:
+    print "MADlib is not installed in the madlib schema. Please refer to the 
Prequisites."
+    raise SystemExit
+
+print "Creating changelist {0}".format(ch_filename)
+os.system("rm -f /tmp/madlib_tmp_nm.txt /tmp/madlib_tmp_udf.txt 
/tmp/madlib_tmp_udt.txt")
+try:
+    # Find the new modules using the git diff
+    err1 = os.system("git diff {old_vers} {new_vers} --name-only 
--diff-filter=A > /tmp/madlib_tmp_nm.txt".format(**locals()))
+    if err1 != 0:
+        print "Git diff failed. Please ensure that branches/tags are fetched."
+        raise SystemExit
+
+    f = open("/tmp/madlib_tmp_cl.yaml", "w")
+    f.write(
+"""# 
------------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# 
------------------------------------------------------------------------------
+""")
+
+    f.write(
+    """
+# Changelist for MADlib version {old_vers} to {new_vers}
+
+# This file contains all changes that were introduced in a new version of
+# MADlib. This changelist is used by the upgrade script to detect what objects
+# should be upgraded (while retaining all other objects from the previous 
version)
+
+# New modules (actually .sql_in files) added in upgrade version
+# For these files the sql_in code is retained as is with the functions in the
+# file installed on the upgrade version. All other files (that don't have
+# updates), are cleaned up to remove object replacements
+""".format(**locals()))
+
+    # Find the new .sql_in files that are not in test folders
+    f.write("new module:\n")
+    with open('/tmp/madlib_tmp_nm.txt') as fp:
+        for line in fp:
+            if 'sql_in' in line and '/test/' not in line:
+                 f.write('    ' + line.split('/')[5].split('.')[0]+':\n')
+
+    # Find the changed types and keep a list for future use
+    os.system("psql {0} -f diff_udt.sql > 
/tmp/madlib_tmp_udt.txt".format(database))
+
+    f.write("\n# Changes in the types (UDT) including removal and 
modification\n")
+    f.write("udt:\n")
+    udt_list=[]
+    with open('/tmp/madlib_tmp_udt.txt') as fp:
+        for line in fp:
+
+            # Both the type and its array form shows up as separate types
+            # We don't need both of them so we remove the array
+            if 'UDT' in line and '[]' not in line:
+                ch_type = line.split('|')[1].strip()
+                udt_list.append(ch_type)
+                f.write('    ' + ch_type +":\n")
+
+    # Find the list of UDFs and UDAs
+    # There are two main sources for these lists.
+    # 1. The functions that actually got changed
+    # 2. The functions that depend on a changed type
+
+    # We will keep two lists (for UDF and UDA) and fill them as we parse the
+    # output of diff functions
+
+    udf_list=[]
+    uda_list=[]
+    current_list = udf_list
+
+    # Find the changed functions/aggregates via the diff_udf script and write 
them to a file
+    os.system("psql {0} -f diff_udf.sql > 
/tmp/madlib_tmp_udf.txt".format(database))
+
+    # The entries in the file are ordered by type.
+    # Read them line by line and add to the udf list until the first aggregate
+    with open('/tmp/madlib_tmp_udf.txt') as fp:
+        for line in fp:
+            if 'type' in line:
+                # When we get the first aggregate, we switch the current list
+                if 'agg' in line:
+                    current_list = uda_list
+            if 'UDF' in line:
+                current_list.append(line.split('|')[1])
+
+    current_list = udf_list
+
+    # Find the function that return a changed type
+    # Note that we already ran the diff_udf.sql file
+    # This means the get_functions() function is already defined in the 
database
+    if os.path.exists("/tmp/madlib_tmp_typedep.txt"):
+        os.system("rm /tmp/madlib_tmp_typedep.txt")
+    os.system("touch /tmp/madlib_tmp_typedep.txt")
+    for t in udt_list:
+
+        os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ 
" > /dev/null """.format(**locals()))
+
+        # Find all of the old functions that return this particular type t and 
write to a table
+        os.system("""psql {database} -c "SELECT 
get_functions('__tmp__madlib__', 'madlib_old_vers', 'madlib_old_vers.{t}')" > 
/dev/null """.format(**locals()))
+
+        # Order them in descending order on type so that we read the functions 
first and aggregates last
+        os.system("""psql {database} -x -c "SELECT type, name, retype, 
argtypes FROM __tmp__madlib__ ORDER BY type DESC" > /tmp/madlib_tmp_typedep.txt 
""".format(**locals()))
+
+        os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ 
" > /dev/null """.format(**locals()))
+
+        with open('/tmp/madlib_tmp_typedep.txt') as fp:
+            for line in fp:
+                if '|' in line:
+                    sp = line.split('|')
+
+                    # Type is only used for switching the current list from 
udf to uda
+                    if sp[0].strip() == 'type':
+                        if sp[1].strip() == 'agg':
+                            current_list = uda_list
+
+                    # Add the functions with formatting
+                    elif sp[0].strip() == 'name':
+                        current_list.append('    - ' + sp[1].strip() + ":\n")
+                    elif sp[0].strip() == 'retype':
+                        current_list.append('        rettype: schema_madlib.' 
+ sp[1].strip() + "\n")
+                    elif sp[0].strip() == 'argtypes':
+                        current_list.append('        argument: ' + 
sp[1].strip() + "\n")
+
+    # Write the UDF and UDA lists to the changelist
+    f.write(
+    """
+# List of the UDF changes that affect the user externally. This includes change
+# in function name, return type, argument order or types, or removal of
+# the function. In each case, the original function is as good as removed and a
+# new function is created. In such cases, we should abort the upgrade if there
+# are user views dependent on this function, since the original function will
+# not be present in the upgraded version.
+""")
+
+    f.write("udf:\n")
+    for line in udf_list:
+        f.write(line)
+
+    f.write("""
+# Changes to aggregates (UDA) including removal and modification
+# Overloaded functions should be mentioned separately
+""")
+
+    f.write("uda:\n")
+    for line in uda_list:
+        f.write(line)
+
+    #TODO: We need to decide how we want to check for these
+    f.write(
+    """
+# List of the UDC, UDO and UDOC changes.
+""")
+    f.write("udc:\n")
+    f.write("udo:\n")
+    f.write("udoc:\n")
+    f.close()
+
+    # Copy the new changelist file to its proper location
+    # This helps to keep the madlib folder clean in case the program stops
+    # unexpectedly
+    os.system("cp /tmp/madlib_tmp_cl.yaml {0}".format(ch_filename))
+
+except:
+    print "Something went wrong! The changelist might be wrong/corrupted."
+    raise
+finally:
+    os.system("rm -f /tmp/madlib_tmp_nm.txt /tmp/madlib_tmp_udf.txt "
+              "/tmp/madlib_tmp_udt.txt /tmp/madlib_tmp_cl.yaml "
+              "/tmp/madlib_tmp_typedep.txt")

http://git-wip-us.apache.org/repos/asf/madlib/blob/59844eb1/src/madpack/diff_udf.sql
----------------------------------------------------------------------
diff --git a/src/madpack/diff_udf.sql b/src/madpack/diff_udf.sql
index 4e0b9aa..d73093c 100644
--- a/src/madpack/diff_udf.sql
+++ b/src/madpack/diff_udf.sql
@@ -11,71 +11,13 @@ RETURNS text AS $$
 $$ LANGUAGE plpythonu;
 
 
-CREATE OR REPLACE FUNCTION get_functions(schema_name text)
+CREATE OR REPLACE FUNCTION get_functions(table_name text, schema_name text,
+                                         type_filter text)
 RETURNS VOID AS
 $$
     import plpy
     plpy.execute("""
-        CREATE TABLE functions_madlib_new_version AS
-        SELECT
-            "schema", "name", filter_schema("retype", 'madlib') retype,
-            filter_schema("argtypes", 'madlib') argtypes, "type"
-        FROM
-        (
-
-            SELECT n.nspname as "schema",
-                p.proname as "name",
-                CASE WHEN p.proretset THEN 'SETOF ' ELSE '' END ||
-                pg_catalog.format_type(p.prorettype, NULL) as "retype",
-                CASE WHEN proallargtypes IS NOT NULL THEN
-                    pg_catalog.array_to_string(ARRAY(
-                        SELECT
-                            pio || ptyp
-                        FROM
-                        (
-                            SELECT
-                                CASE
-                                  WHEN p.proargmodes[s.i] = 'i' THEN ''
-                                  WHEN p.proargmodes[s.i] = 'o' THEN 'OUT '
-                                  WHEN p.proargmodes[s.i] = 'b' THEN 'INOUT '
-                                  WHEN p.proargmodes[s.i] = 'v' THEN 'VARIADIC 
'
-                                END  AS pio,
-                            --CASE
-                            --  WHEN COALESCE(p.proargnames[s.i], '') = '' 
THEN ''
-                            --  ELSE p.proargnames[s.i] || ' '
-                            --END ||
-                            pg_catalog.format_type(p.proallargtypes[s.i], 
NULL) AS ptyp
-                          FROM
-                            pg_catalog.generate_series(1, 
pg_catalog.array_upper(p.proallargtypes, 1)) AS s(i)
-                        ) qx
-                        WHERE pio = ''
-                        ), ', ')
-                ELSE
-                    pg_catalog.array_to_string(ARRAY(
-                      SELECT
-                        --CASE
-                        --  WHEN COALESCE(p.proargnames[s.i+1], '') = '' THEN 
''
-                        --  ELSE p.proargnames[s.i+1] || ' '
-                        --  END ||
-                        pg_catalog.format_type(p.proargtypes[s.i], NULL)
-                      FROM
-                        pg_catalog.generate_series(0, 
pg_catalog.array_upper(p.proargtypes, 1)) AS s(i)
-                    ), ', ')
-                END AS "argtypes",
-                CASE
-                  WHEN p.proisagg THEN 'agg'
-                  WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype 
THEN 'trigger'
-                  ELSE 'normal'
-                END AS "type"
-            FROM pg_catalog.pg_proc p
-                 LEFT JOIN pg_catalog.pg_namespace n
-                 ON n.oid = p.pronamespace
-            WHERE n.nspname ~ '^(madlib)$'
-            ORDER BY 1, 2, 4
-        ) q
-        """.format(schema_name=schema_name))
-    plpy.execute("""
-        CREATE TABLE functions_madlib_old_version AS
+        CREATE TABLE {table_name} AS
         SELECT
             "schema", "name", filter_schema("retype", '{schema_name}') retype,
             filter_schema("argtypes", '{schema_name}') argtypes, "type"
@@ -132,19 +74,24 @@ $$
             WHERE n.nspname ~ '^({schema_name})$'
             ORDER BY 1, 2, 4
         ) q
-        """.format(schema_name=schema_name))
+        WHERE retype LIKE '{type_filter}' OR retype LIKE '{type_filter}[]'
+        """.format(table_name=table_name, schema_name=schema_name, 
type_filter=type_filter))
 $$ LANGUAGE plpythonu;
 
 
 DROP TABLE IF EXISTS functions_madlib_old_version;
 DROP TABLE IF EXISTS functions_madlib_new_version;
 
-SELECT get_functions('madlib_old_vers');
+SELECT get_functions('functions_madlib_old_version','madlib_old_vers','%');
+SELECT get_functions('functions_madlib_new_version','madlib','%');
 
 SELECT
+    type,
     --'\t-' || name || ':' || '\n\t\t-rettype: ' || retype || 
'\n\t\t-argument: ' || argtypes
-    '    - ' || name || ':' || '\n        rettype: ' || retype || '\n        
argument: ' || argtypes AS "Dropped UDFs"
-    , type
+    '   - ' || name || ':' AS "Dropped UDF part1",
+    '       rettype: ' || retype AS "Dropped UDF part2",
+    '       argument: ' || argtypes AS "Dropped UDF part3"
+
 FROM
 (
 SELECT
@@ -156,14 +103,16 @@ FROM
     USING (name, retype, argtypes)
 WHERE new_version.name IS NULL
 ) q
-ORDER by type, "Dropped UDFs";
+ORDER by type DESC, "Dropped UDF part1", "Dropped UDF part2", "Dropped UDF 
part3";
 
 ----------------------------------------
 
 SELECT
+    type,
     --'\t-' || name || ':' || '\n\t\t-rettype: ' || retype || 
'\n\t\t-argument: ' || argtypes
-    '    - ' || name || ':' || '\n        rettype: ' || retype || '\n        
argument: ' || argtypes AS "Changed UDFs"
---    , type
+    '    - ' || name || ':' AS "Changed UDF part1",
+    '        rettype: ' || retype AS "Changed UDF part2",
+    '        argument: ' || argtypes AS "Changed UDF part3"
 FROM
 (
 SELECT
@@ -176,14 +125,16 @@ FROM
 -- WHERE FALSE
 WHERE old_version.retype in ('') -- '__logregr_result', 'summary_result', 
'linregr_result', 'mlogregr_result', 'marginal_logregr_result', 
'marginal_mlogregr_result', 'intermediate_cox_prop_hazards_result', 
'__utils_scaled_data')
 ) q
-ORDER by type, "Changed UDFs";
+ORDER by type DESC, "Changed UDF part1", "Changed UDF part2", "Changed UDF 
part3";
 
 ----------------------------------------
 
 SELECT
+    type,
     --'\t-' || name || ':' || '\n\t\t-rettype: ' || retype || 
'\n\t\t-argument: ' || argtypes
-    '    - ' || name || ':' || '\n        rettype: ' || retype || '\n        
argument: ' || argtypes AS "Suspected UDFs"
---    , type
+    '    - ' || name || ':' AS "Suspected UDF part1",
+    '        rettype: ' || retype AS "Suspected UDF part2",
+    '        argument: ' || argtypes AS "Suspected UDF part3"
 FROM
 (
 SELECT
@@ -195,4 +146,4 @@ FROM
     USING (name, retype, argtypes)
 WHERE old_version.argtypes SIMILAR TO 'NOT-A-TYPE' -- 
'%(__logregr_result|summary_result|linregr_result|mlogregr_result|marginal_logregr_result|marginal_mlogregr_result|intermediate_cox_prop_hazards_result|__utils_scaled_data)%'
 ) q
-ORDER by type, "Suspected UDFs";
+ORDER by type DESC, "Suspected UDF part1", "Suspected UDF part2", "Suspected 
UDF part3";

http://git-wip-us.apache.org/repos/asf/madlib/blob/59844eb1/src/madpack/diff_udt.sql
----------------------------------------------------------------------
diff --git a/src/madpack/diff_udt.sql b/src/madpack/diff_udt.sql
index 1070ea4..1f49cf1 100644
--- a/src/madpack/diff_udt.sql
+++ b/src/madpack/diff_udt.sql
@@ -119,5 +119,4 @@ FROM
 WHERE old_vers.typrelid <> 0; -- 0 means base type
 
 SELECT
-    array_upper(detect_changed_types('types_common'), 1) AS N,
-    detect_changed_types('types_common') AS "Changed UDTs";
+    unnest(detect_changed_types('types_common')) AS "Changed UDTs";

Reply via email to