Repository: madlib Updated Branches: refs/heads/master 2cde01d1f -> 59844eb1f
Madpack: Add a script for automating changelist creation Closes #318 Project: http://git-wip-us.apache.org/repos/asf/madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/59844eb1 Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/59844eb1 Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/59844eb1 Branch: refs/heads/master Commit: 59844eb1f3362950f49623f2365c74332fd520b7 Parents: 2cde01d Author: Orhan Kislal <[email protected]> Authored: Mon Sep 17 08:54:28 2018 +0300 Committer: Orhan Kislal <[email protected]> Committed: Mon Sep 17 08:58:17 2018 +0300 ---------------------------------------------------------------------- src/madpack/create_changelist.py | 242 ++++++++++++++++++++++++++++++++++ src/madpack/diff_udf.sql | 95 ++++--------- src/madpack/diff_udt.sql | 3 +- 3 files changed, 266 insertions(+), 74 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/59844eb1/src/madpack/create_changelist.py ---------------------------------------------------------------------- diff --git a/src/madpack/create_changelist.py b/src/madpack/create_changelist.py new file mode 100644 index 0000000..a689319 --- /dev/null +++ b/src/madpack/create_changelist.py @@ -0,0 +1,242 @@ +#!/usr/bin/python +# ------------------------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------------------------ + +# Create changelist for any two branches/tags + +# Prequisites: +# The old version has to be installed in the "madlib_old_vers" schema +# The new version has to be installed in the "madlib" (default) schema +# Two branches/tags must exist locally (run 'git fetch' to ensure you have the latest version) +# The current branch does not matter + +# Usage (must be executed in the src/madpack directory): +# python create_changelist.py <database name> <old version branch> <new version branch> <changelist filename> +# If you are using the master branch, plase make sure to edit the branch/tag in the output file + +# Example (should be equivalent to changelist_1.13_1.14.yaml): +# python create_changelist.py madlib rel/v1.13 rel/v1.14 chtest1.yaml + +import sys +import os + +database = sys.argv[1] +old_vers = sys.argv[2] +new_vers = sys.argv[3] +ch_filename = sys.argv[4] + +if os.path.exists(ch_filename): + print "{0} already exists".format(ch_filename) + raise SystemExit + +err1 = os.system("""psql {0} -l > /dev/null""".format(database)) +if err1 != 0: + print "Database {0} does not exist".format(database) + raise SystemExit + +err1 = os.system("""psql {0} -c "select madlib_old_vers.version()" > /dev/null + """.format(database)) +if err1 != 0: + print "MADlib is not installed in the madlib_old_vers schema. Please refer to the Prequisites." + raise SystemExit + +err1 = os.system("""psql {0} -c "select madlib.version()" > /dev/null + """.format(database)) +if err1 != 0: + print "MADlib is not installed in the madlib schema. Please refer to the Prequisites." + raise SystemExit + +print "Creating changelist {0}".format(ch_filename) +os.system("rm -f /tmp/madlib_tmp_nm.txt /tmp/madlib_tmp_udf.txt /tmp/madlib_tmp_udt.txt") +try: + # Find the new modules using the git diff + err1 = os.system("git diff {old_vers} {new_vers} --name-only --diff-filter=A > /tmp/madlib_tmp_nm.txt".format(**locals())) + if err1 != 0: + print "Git diff failed. Please ensure that branches/tags are fetched." + raise SystemExit + + f = open("/tmp/madlib_tmp_cl.yaml", "w") + f.write( +"""# ------------------------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------------------------ +""") + + f.write( + """ +# Changelist for MADlib version {old_vers} to {new_vers} + +# This file contains all changes that were introduced in a new version of +# MADlib. This changelist is used by the upgrade script to detect what objects +# should be upgraded (while retaining all other objects from the previous version) + +# New modules (actually .sql_in files) added in upgrade version +# For these files the sql_in code is retained as is with the functions in the +# file installed on the upgrade version. All other files (that don't have +# updates), are cleaned up to remove object replacements +""".format(**locals())) + + # Find the new .sql_in files that are not in test folders + f.write("new module:\n") + with open('/tmp/madlib_tmp_nm.txt') as fp: + for line in fp: + if 'sql_in' in line and '/test/' not in line: + f.write(' ' + line.split('/')[5].split('.')[0]+':\n') + + # Find the changed types and keep a list for future use + os.system("psql {0} -f diff_udt.sql > /tmp/madlib_tmp_udt.txt".format(database)) + + f.write("\n# Changes in the types (UDT) including removal and modification\n") + f.write("udt:\n") + udt_list=[] + with open('/tmp/madlib_tmp_udt.txt') as fp: + for line in fp: + + # Both the type and its array form shows up as separate types + # We don't need both of them so we remove the array + if 'UDT' in line and '[]' not in line: + ch_type = line.split('|')[1].strip() + udt_list.append(ch_type) + f.write(' ' + ch_type +":\n") + + # Find the list of UDFs and UDAs + # There are two main sources for these lists. + # 1. The functions that actually got changed + # 2. The functions that depend on a changed type + + # We will keep two lists (for UDF and UDA) and fill them as we parse the + # output of diff functions + + udf_list=[] + uda_list=[] + current_list = udf_list + + # Find the changed functions/aggregates via the diff_udf script and write them to a file + os.system("psql {0} -f diff_udf.sql > /tmp/madlib_tmp_udf.txt".format(database)) + + # The entries in the file are ordered by type. + # Read them line by line and add to the udf list until the first aggregate + with open('/tmp/madlib_tmp_udf.txt') as fp: + for line in fp: + if 'type' in line: + # When we get the first aggregate, we switch the current list + if 'agg' in line: + current_list = uda_list + if 'UDF' in line: + current_list.append(line.split('|')[1]) + + current_list = udf_list + + # Find the function that return a changed type + # Note that we already ran the diff_udf.sql file + # This means the get_functions() function is already defined in the database + if os.path.exists("/tmp/madlib_tmp_typedep.txt"): + os.system("rm /tmp/madlib_tmp_typedep.txt") + os.system("touch /tmp/madlib_tmp_typedep.txt") + for t in udt_list: + + os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ " > /dev/null """.format(**locals())) + + # Find all of the old functions that return this particular type t and write to a table + os.system("""psql {database} -c "SELECT get_functions('__tmp__madlib__', 'madlib_old_vers', 'madlib_old_vers.{t}')" > /dev/null """.format(**locals())) + + # Order them in descending order on type so that we read the functions first and aggregates last + os.system("""psql {database} -x -c "SELECT type, name, retype, argtypes FROM __tmp__madlib__ ORDER BY type DESC" > /tmp/madlib_tmp_typedep.txt """.format(**locals())) + + os.system("""psql {database} -c "DROP TABLE IF EXISTS __tmp__madlib__ " > /dev/null """.format(**locals())) + + with open('/tmp/madlib_tmp_typedep.txt') as fp: + for line in fp: + if '|' in line: + sp = line.split('|') + + # Type is only used for switching the current list from udf to uda + if sp[0].strip() == 'type': + if sp[1].strip() == 'agg': + current_list = uda_list + + # Add the functions with formatting + elif sp[0].strip() == 'name': + current_list.append(' - ' + sp[1].strip() + ":\n") + elif sp[0].strip() == 'retype': + current_list.append(' rettype: schema_madlib.' + sp[1].strip() + "\n") + elif sp[0].strip() == 'argtypes': + current_list.append(' argument: ' + sp[1].strip() + "\n") + + # Write the UDF and UDA lists to the changelist + f.write( + """ +# List of the UDF changes that affect the user externally. This includes change +# in function name, return type, argument order or types, or removal of +# the function. In each case, the original function is as good as removed and a +# new function is created. In such cases, we should abort the upgrade if there +# are user views dependent on this function, since the original function will +# not be present in the upgraded version. +""") + + f.write("udf:\n") + for line in udf_list: + f.write(line) + + f.write(""" +# Changes to aggregates (UDA) including removal and modification +# Overloaded functions should be mentioned separately +""") + + f.write("uda:\n") + for line in uda_list: + f.write(line) + + #TODO: We need to decide how we want to check for these + f.write( + """ +# List of the UDC, UDO and UDOC changes. +""") + f.write("udc:\n") + f.write("udo:\n") + f.write("udoc:\n") + f.close() + + # Copy the new changelist file to its proper location + # This helps to keep the madlib folder clean in case the program stops + # unexpectedly + os.system("cp /tmp/madlib_tmp_cl.yaml {0}".format(ch_filename)) + +except: + print "Something went wrong! The changelist might be wrong/corrupted." + raise +finally: + os.system("rm -f /tmp/madlib_tmp_nm.txt /tmp/madlib_tmp_udf.txt " + "/tmp/madlib_tmp_udt.txt /tmp/madlib_tmp_cl.yaml " + "/tmp/madlib_tmp_typedep.txt") http://git-wip-us.apache.org/repos/asf/madlib/blob/59844eb1/src/madpack/diff_udf.sql ---------------------------------------------------------------------- diff --git a/src/madpack/diff_udf.sql b/src/madpack/diff_udf.sql index 4e0b9aa..d73093c 100644 --- a/src/madpack/diff_udf.sql +++ b/src/madpack/diff_udf.sql @@ -11,71 +11,13 @@ RETURNS text AS $$ $$ LANGUAGE plpythonu; -CREATE OR REPLACE FUNCTION get_functions(schema_name text) +CREATE OR REPLACE FUNCTION get_functions(table_name text, schema_name text, + type_filter text) RETURNS VOID AS $$ import plpy plpy.execute(""" - CREATE TABLE functions_madlib_new_version AS - SELECT - "schema", "name", filter_schema("retype", 'madlib') retype, - filter_schema("argtypes", 'madlib') argtypes, "type" - FROM - ( - - SELECT n.nspname as "schema", - p.proname as "name", - CASE WHEN p.proretset THEN 'SETOF ' ELSE '' END || - pg_catalog.format_type(p.prorettype, NULL) as "retype", - CASE WHEN proallargtypes IS NOT NULL THEN - pg_catalog.array_to_string(ARRAY( - SELECT - pio || ptyp - FROM - ( - SELECT - CASE - WHEN p.proargmodes[s.i] = 'i' THEN '' - WHEN p.proargmodes[s.i] = 'o' THEN 'OUT ' - WHEN p.proargmodes[s.i] = 'b' THEN 'INOUT ' - WHEN p.proargmodes[s.i] = 'v' THEN 'VARIADIC ' - END AS pio, - --CASE - -- WHEN COALESCE(p.proargnames[s.i], '') = '' THEN '' - -- ELSE p.proargnames[s.i] || ' ' - --END || - pg_catalog.format_type(p.proallargtypes[s.i], NULL) AS ptyp - FROM - pg_catalog.generate_series(1, pg_catalog.array_upper(p.proallargtypes, 1)) AS s(i) - ) qx - WHERE pio = '' - ), ', ') - ELSE - pg_catalog.array_to_string(ARRAY( - SELECT - --CASE - -- WHEN COALESCE(p.proargnames[s.i+1], '') = '' THEN '' - -- ELSE p.proargnames[s.i+1] || ' ' - -- END || - pg_catalog.format_type(p.proargtypes[s.i], NULL) - FROM - pg_catalog.generate_series(0, pg_catalog.array_upper(p.proargtypes, 1)) AS s(i) - ), ', ') - END AS "argtypes", - CASE - WHEN p.proisagg THEN 'agg' - WHEN p.prorettype = 'pg_catalog.trigger'::pg_catalog.regtype THEN 'trigger' - ELSE 'normal' - END AS "type" - FROM pg_catalog.pg_proc p - LEFT JOIN pg_catalog.pg_namespace n - ON n.oid = p.pronamespace - WHERE n.nspname ~ '^(madlib)$' - ORDER BY 1, 2, 4 - ) q - """.format(schema_name=schema_name)) - plpy.execute(""" - CREATE TABLE functions_madlib_old_version AS + CREATE TABLE {table_name} AS SELECT "schema", "name", filter_schema("retype", '{schema_name}') retype, filter_schema("argtypes", '{schema_name}') argtypes, "type" @@ -132,19 +74,24 @@ $$ WHERE n.nspname ~ '^({schema_name})$' ORDER BY 1, 2, 4 ) q - """.format(schema_name=schema_name)) + WHERE retype LIKE '{type_filter}' OR retype LIKE '{type_filter}[]' + """.format(table_name=table_name, schema_name=schema_name, type_filter=type_filter)) $$ LANGUAGE plpythonu; DROP TABLE IF EXISTS functions_madlib_old_version; DROP TABLE IF EXISTS functions_madlib_new_version; -SELECT get_functions('madlib_old_vers'); +SELECT get_functions('functions_madlib_old_version','madlib_old_vers','%'); +SELECT get_functions('functions_madlib_new_version','madlib','%'); SELECT + type, --'\t-' || name || ':' || '\n\t\t-rettype: ' || retype || '\n\t\t-argument: ' || argtypes - ' - ' || name || ':' || '\n rettype: ' || retype || '\n argument: ' || argtypes AS "Dropped UDFs" - , type + ' - ' || name || ':' AS "Dropped UDF part1", + ' rettype: ' || retype AS "Dropped UDF part2", + ' argument: ' || argtypes AS "Dropped UDF part3" + FROM ( SELECT @@ -156,14 +103,16 @@ FROM USING (name, retype, argtypes) WHERE new_version.name IS NULL ) q -ORDER by type, "Dropped UDFs"; +ORDER by type DESC, "Dropped UDF part1", "Dropped UDF part2", "Dropped UDF part3"; ---------------------------------------- SELECT + type, --'\t-' || name || ':' || '\n\t\t-rettype: ' || retype || '\n\t\t-argument: ' || argtypes - ' - ' || name || ':' || '\n rettype: ' || retype || '\n argument: ' || argtypes AS "Changed UDFs" --- , type + ' - ' || name || ':' AS "Changed UDF part1", + ' rettype: ' || retype AS "Changed UDF part2", + ' argument: ' || argtypes AS "Changed UDF part3" FROM ( SELECT @@ -176,14 +125,16 @@ FROM -- WHERE FALSE WHERE old_version.retype in ('') -- '__logregr_result', 'summary_result', 'linregr_result', 'mlogregr_result', 'marginal_logregr_result', 'marginal_mlogregr_result', 'intermediate_cox_prop_hazards_result', '__utils_scaled_data') ) q -ORDER by type, "Changed UDFs"; +ORDER by type DESC, "Changed UDF part1", "Changed UDF part2", "Changed UDF part3"; ---------------------------------------- SELECT + type, --'\t-' || name || ':' || '\n\t\t-rettype: ' || retype || '\n\t\t-argument: ' || argtypes - ' - ' || name || ':' || '\n rettype: ' || retype || '\n argument: ' || argtypes AS "Suspected UDFs" --- , type + ' - ' || name || ':' AS "Suspected UDF part1", + ' rettype: ' || retype AS "Suspected UDF part2", + ' argument: ' || argtypes AS "Suspected UDF part3" FROM ( SELECT @@ -195,4 +146,4 @@ FROM USING (name, retype, argtypes) WHERE old_version.argtypes SIMILAR TO 'NOT-A-TYPE' -- '%(__logregr_result|summary_result|linregr_result|mlogregr_result|marginal_logregr_result|marginal_mlogregr_result|intermediate_cox_prop_hazards_result|__utils_scaled_data)%' ) q -ORDER by type, "Suspected UDFs"; +ORDER by type DESC, "Suspected UDF part1", "Suspected UDF part2", "Suspected UDF part3"; http://git-wip-us.apache.org/repos/asf/madlib/blob/59844eb1/src/madpack/diff_udt.sql ---------------------------------------------------------------------- diff --git a/src/madpack/diff_udt.sql b/src/madpack/diff_udt.sql index 1070ea4..1f49cf1 100644 --- a/src/madpack/diff_udt.sql +++ b/src/madpack/diff_udt.sql @@ -119,5 +119,4 @@ FROM WHERE old_vers.typrelid <> 0; -- 0 means base type SELECT - array_upper(detect_changed_types('types_common'), 1) AS N, - detect_changed_types('types_common') AS "Changed UDTs"; + unnest(detect_changed_types('types_common')) AS "Changed UDTs";
