I put together a rebased version of the patch for cfbot. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From ee5805dc450f081b77ae3a7df315ceafb6ccc5e1 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson <dan...@yesql.se> Date: Mon, 13 Mar 2023 14:46:24 +0100 Subject: [PATCH v4 1/1] pg_upgrade: run all data type checks per connection
The checks for data type usage were each connecting to all databases in the cluster and running their query. On cluster which have a lot of databases this can become unnecessarily expensive. This moves the checks to run in a single connection instead to minimize connection setup/teardown overhead. Reviewed-by: Nathan Bossart <nathandboss...@gmail.com> Reviewed-by: Justin Pryzby <pry...@telsasoft.com> Discussion: https://postgr.es/m/bb4c76f-d416-4f9f-949e-dbe950d37...@yesql.se --- src/bin/pg_upgrade/check.c | 575 ++++++++++++++++++++------------ src/bin/pg_upgrade/pg_upgrade.h | 29 +- src/bin/pg_upgrade/version.c | 289 +++------------- 3 files changed, 433 insertions(+), 460 deletions(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 64024e3b9e..c829aed26e 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -10,6 +10,7 @@ #include "postgres_fe.h" #include "catalog/pg_authid_d.h" +#include "catalog/pg_class_d.h" #include "catalog/pg_collation.h" #include "fe_utils/string_utils.h" #include "mb/pg_wchar.h" @@ -23,14 +24,375 @@ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster); static void check_for_user_defined_postfix_ops(ClusterInfo *cluster); static void check_for_incompatible_polymorphics(ClusterInfo *cluster); static void check_for_tables_with_oids(ClusterInfo *cluster); -static void check_for_composite_data_type_usage(ClusterInfo *cluster); -static void check_for_reg_data_type_usage(ClusterInfo *cluster); -static void check_for_aclitem_data_type_usage(ClusterInfo *cluster); -static void check_for_jsonb_9_4_usage(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(ClusterInfo *new_cluster); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); +/* + * Data type usage checks. Each check for problematic data type usage is + * defined in this array with metadata, SQL query for finding the data type + * and a function pointer for determining if the check should be executed + * for the current version. + */ +static int n_data_types_usage_checks = 7; +static DataTypesUsageChecks data_types_usage_checks[] = { + /* + * Look for composite types that were made during initdb *or* belong to + * information_schema; that's important in case information_schema was + * dropped and reloaded. + * + * The cutoff OID here should match the source cluster's value of + * FirstNormalObjectId. We hardcode it rather than using that C #define + * because, if that #define is ever changed, our own version's value is + * NOT what to use. Eventually we may need a test on the source cluster's + * version to select the correct value. + */ + {.status = "Checking for system-defined composite types in user tables", + .report_filename = "tables_using_composite.txt", + .base_query = + "SELECT t.oid FROM pg_catalog.pg_type t " + "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid " + " WHERE typtype = 'c' AND (t.oid < 16384 OR nspname = 'information_schema')", + .report_text = + "Your installation contains system-defined composite type(s) in user tables.\n" + "These type OIDs are not stable across PostgreSQL versions,\n" + "so this cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n" + "A list of the problem columns is in the file:", + .version_hook = NULL}, + + /* + * 9.3 -> 9.4 + * Fully implement the 'line' data type in 9.4, which previously returned + * "not enabled" by default and was only functionally enabled with a + * compile-time switch; as of 9.4 "line" has a different on-disk + * representation format. + */ + {.status = "Checking for incompatible \"line\" data type", + .report_filename = "tables_using_line.txt", + .base_query = + "SELECT 'pg_catalog.line'::pg_catalog.regtype AS oid", + .report_text = + "your installation contains the \"line\" data type in user tables.\n" + "this data type changed its internal and input/output format\n" + "between your old and new versions so this\n" + "cluster cannot currently be upgraded. you can\n" + "drop the problem columns and restart the upgrade.\n" + "a list of the problem columns is in the file:", + .version_hook = old_9_3_check_for_line_data_type_usage}, + + /* + * pg_upgrade only preserves these system values: + * pg_class.oid + * pg_type.oid + * pg_enum.oid + * + * Many of the reg* data types reference system catalog info that is + * not preserved, and hence these data types cannot be used in user + * tables upgraded by pg_upgrade. + */ + {.status = "Checking for reg* data types in user tables", + .report_filename = "tables_using_reg.txt", + /* + * Note: older servers will not have all of these reg* types, so we have + * to write the query like this rather than depending on casts to regtype. + */ + .base_query = + "SELECT oid FROM pg_catalog.pg_type t " + "WHERE t.typnamespace = " + " (SELECT oid FROM pg_catalog.pg_namespace " + " WHERE nspname = 'pg_catalog') " + " AND t.typname IN ( " + /* pg_class.oid is preserved, so 'regclass' is OK */ + " 'regcollation', " + " 'regconfig', " + " 'regdictionary', " + " 'regnamespace', " + " 'regoper', " + " 'regoperator', " + " 'regproc', " + " 'regprocedure' " + /* pg_authid.oid is preserved, so 'regrole' is OK */ + /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */ + " )", + .report_text = + "Your installation contains one of the reg* data types in user tables.\n" + "These data types reference system OIDs that are not preserved by\n" + "pg_upgrade, so this cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n" + "A list of the problem columns is in the file:", + .version_hook = NULL}, + + /* + * PG 16 increased the size of the 'aclitem' type, which breaks the on-disk + * format for existing data. + */ + {.status = "Checking for incompatible aclitem data type in user tables", + .report_filename = "tables_using_aclitem.txt", + .base_query = + "SELECT 'pg_catalog.aclitem'::pg_catalog.regtype AS oid", + .report_text = + "Your installation contains the \"aclitem\" data type in user tables.\n" + "The internal format of \"aclitem\" changed in PostgreSQL version 16\n" + "so this cluster cannot currently be upgraded. You can drop the\n" + "problem columns and restart the upgrade. A list of the problem\n" + "columns is in the file:", + .version_hook = check_for_aclitem_data_type_usage}, + + /* + * It's no longer allowed to create tables or views with "unknown"-type + * columns. We do not complain about views with such columns, because + * they should get silently converted to "text" columns during the DDL + * dump and reload; it seems unlikely to be worth making users do that + * by hand. However, if there's a table with such a column, the DDL + * reload will fail, so we should pre-detect that rather than failing + * mid-upgrade. Worse, if there's a matview with such a column, the + * DDL reload will silently change it to "text" which won't match the + * on-disk storage (which is like "cstring"). So we *must* reject that. + */ + {.status = "Checking for invalid \"unknown\" user columns", + .report_filename = "tables_using_unknown.txt", + .base_query = + "SELECT 'pg_catalog.unknown'::pg_catalog.regtype AS oid", + .report_text = + "Your installation contains the \"unknown\" data type in user tables.\n" + "This data type is no longer allowed in tables, so this\n" + "cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n" + "A list of the problem columns is in the file:", + .version_hook = old_9_6_check_for_unknown_data_type_usage}, + + /* + * PG 12 changed the 'sql_identifier' type storage to be based on name, + * not varchar, which breaks on-disk format for existing data. So we need + * to prevent upgrade when used in user objects (tables, indexes, ...). + * In 12, the sql_identifier data type was switched from name to varchar, + * which does affect the storage (name is by-ref, but not varlena). This + * means user tables using sql_identifier for columns are broken because + * the on-disk format is different. + */ + {.status = "Checking for invalid \"sql_identifier\" user columns", + .report_filename = "tables_using_sql_identifier.txt", + .base_query = + "SELECT 'information_schema.sql_identifier'::pg_catalog.regtype AS oid", + .report_text = + "Your installation contains the \"sql_identifier\" data type in user tables.\n" + "The on-disk format for this data type has changed, so this\n" + "cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n" + "A list of the problem columns is in the file:", + .version_hook = old_11_check_for_sql_identifier_data_type_usage}, + + /* + * JSONB changed its storage format during 9.4 beta, so check for it. + */ + {.status = "Checking for incompatible \"jsonb\" data type", + .report_filename = "tables_using_jsonb.txt", + .base_query = + "SELECT 'pg_catalog.jsonb'::pg_catalog.regtype AS oid", + .report_text = + "Your installation contains the \"jsonb\" data type in user tables.\n" + "The internal format of \"jsonb\" changed during 9.4 beta so this\n" + "cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n" + "A list of the problem columns is in the file:", + .version_hook = check_for_jsonb_9_4_usage}, +}; + +/* + * check_for_data_types_usage() + * Detect whether there are any stored columns depending on given type(s) + * + * If so, write a report to the given file name and signal a failure to the + * user. + * + * The checks to run are defined in a DataTypesUsageChecks structure where + * each check has a metadata for explaining errors to the user, a base_query, + * a report filename and a function pointer hook for validating if the check + * should be executed given the cluster at hand. + * + * base_query should be a SELECT yielding a single column named "oid", + * containing the pg_type OIDs of one or more types that are known to have + * inconsistent on-disk representations across server versions. + * + * We check for the type(s) in tables, matviews, and indexes, but not views; + * there's no storage involved in a view. + */ +static void +check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks *checks) +{ + bool found = false; + bool *results; + PQExpBufferData report; + + prep_status("Checking for data type usage"); + + /* Prepare an array to store the results of checks in */ + results = pg_malloc(sizeof(bool) * n_data_types_usage_checks); + memset(results, true, sizeof(*results)); + + prep_status_progress("checking all databases"); + + /* + * Connect to each database in the cluster and run all defined checks + * against that database before trying the next one. + */ + for (int dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) + { + DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; + PGconn *conn = connectToServer(cluster, active_db->db_name); + + for (int checknum = 0; checknum < n_data_types_usage_checks; checknum++) + { + PGresult *res; + int ntups; + int i_nspname; + int i_relname; + int i_attname; + FILE *script = NULL; + bool db_used = false; + char output_path[MAXPGPATH]; + DataTypesUsageChecks *cur_check = &checks[checknum]; + + /* + * Make sure that the check applies to the current cluster version + * and skip if not. If no check hook has been defined we run the + * check for all versions. + */ + if (cur_check->version_hook && !cur_check->version_hook(cluster)) + { + cur_check++; + continue; + } + + snprintf(output_path, sizeof(output_path), "%s/%s", + log_opts.basedir, + cur_check->report_filename); + + /* + * The type(s) of interest might be wrapped in a domain, array, + * composite, or range, and these container types can be nested (to + * varying extents depending on server version, but that's not of + * concern here). To handle all these cases we need a recursive CTE. + */ + res = executeQueryOrDie(conn, + "WITH RECURSIVE oids AS ( " + /* start with the type(s) returned by base_query */ + " %s " + " UNION ALL " + " SELECT * FROM ( " + /* inner WITH because we can only reference the CTE once */ + " WITH x AS (SELECT oid FROM oids) " + /* domains on any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' " + " UNION ALL " + /* arrays over any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' " + " UNION ALL " + /* composite types containing any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x " + " WHERE t.typtype = 'c' AND " + " t.oid = c.reltype AND " + " c.oid = a.attrelid AND " + " NOT a.attisdropped AND " + " a.atttypid = x.oid " + " UNION ALL " + /* ranges containing any type selected so far */ + " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x " + " WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid" + " ) foo " + ") " + /* now look for stored columns of any such type */ + "SELECT n.nspname, c.relname, a.attname " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_namespace n, " + " pg_catalog.pg_attribute a " + "WHERE c.oid = a.attrelid AND " + " NOT a.attisdropped AND " + " a.atttypid IN (SELECT oid FROM oids) AND " + " c.relkind IN (" + CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_MATVIEW) ", " + CppAsString2(RELKIND_INDEX) ") AND " + " c.relnamespace = n.oid AND " + /* exclude possible orphaned temp tables */ + " n.nspname !~ '^pg_temp_' AND " + " n.nspname !~ '^pg_toast_temp_' AND " + /* exclude system catalogs, too */ + " n.nspname NOT IN ('pg_catalog', 'information_schema')", + cur_check->base_query); + + ntups = PQntuples(res); + + /* + * The datatype was found, so extract the data and log to the + * requested filename. We need to open the file for appending + * since the check might have already found the type in another + * database earlier in the loop. + */ + if (ntups) + { + /* + * Make sure we have a buffer to save reports to now that we + * found a first failing check. + */ + if (!found) + initPQExpBuffer(&report); + found = true; + + /* + * If this is the first time we see an error for the check in + * question then print a status message of the failure. + */ + if (results[checknum]) + { + pg_log(PG_REPORT, " failed check: %s", cur_check->status); + appendPQExpBuffer(&report, "\n%s\n %s\n", + cur_check->report_text, output_path); + } + results[checknum] = false; + + i_nspname = PQfnumber(res, "nspname"); + i_relname = PQfnumber(res, "relname"); + i_attname = PQfnumber(res, "attname"); + + for (int rowno = 0; rowno < ntups; rowno++) + { + found = true; + if (script == NULL && (script = fopen_priv(output_path, "a")) == NULL) + pg_fatal("could not open file \"%s\": %s", + output_path, + strerror(errno)); + if (!db_used) + { + fprintf(script, "In database: %s\n", active_db->db_name); + db_used = true; + } + fprintf(script, " %s.%s.%s\n", + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname), + PQgetvalue(res, rowno, i_attname)); + } + + if (script) + { + fclose(script); + script = NULL; + } + } + + PQclear(res); + cur_check++; + } + + PQfinish(conn); + } + + if (found) + pg_fatal("Data type checks failed: %s", report.data); + + check_ok(); +} /* * fix_path_separator @@ -100,16 +462,9 @@ check_and_dump_old_cluster(bool live_check) check_is_install_user(&old_cluster); check_proper_datallowconn(&old_cluster); check_for_prepared_transactions(&old_cluster); - check_for_composite_data_type_usage(&old_cluster); - check_for_reg_data_type_usage(&old_cluster); check_for_isn_and_int8_passing_mismatch(&old_cluster); - /* - * PG 16 increased the size of the 'aclitem' type, which breaks the - * on-disk format for existing data. - */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1500) - check_for_aclitem_data_type_usage(&old_cluster); + check_for_data_types_usage(&old_cluster, data_types_usage_checks); /* * PG 14 changed the function signature of encoding conversion functions. @@ -141,21 +496,12 @@ check_and_dump_old_cluster(bool live_check) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100) check_for_tables_with_oids(&old_cluster); - /* - * PG 12 changed the 'sql_identifier' type storage to be based on name, - * not varchar, which breaks on-disk format for existing data. So we need - * to prevent upgrade when used in user objects (tables, indexes, ...). - */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100) - old_11_check_for_sql_identifier_data_type_usage(&old_cluster); - /* * Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged * hash indexes */ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906) { - old_9_6_check_for_unknown_data_type_usage(&old_cluster); if (user_opts.check) old_9_6_invalidate_hash_indexes(&old_cluster, true); } @@ -164,14 +510,6 @@ check_and_dump_old_cluster(bool live_check) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 905) check_for_pg_role_prefix(&old_cluster); - if (GET_MAJOR_VERSION(old_cluster.major_version) == 904 && - old_cluster.controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER) - check_for_jsonb_9_4_usage(&old_cluster); - - /* Pre-PG 9.4 had a different 'line' data type internal format */ - if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903) - old_9_3_check_for_line_data_type_usage(&old_cluster); - /* * While not a check option, we do this now because this is the only time * the old server is running. @@ -1084,185 +1422,6 @@ check_for_tables_with_oids(ClusterInfo *cluster) check_ok(); } - -/* - * check_for_composite_data_type_usage() - * Check for system-defined composite types used in user tables. - * - * The OIDs of rowtypes of system catalogs and information_schema views - * can change across major versions; unlike user-defined types, we have - * no mechanism for forcing them to be the same in the new cluster. - * Hence, if any user table uses one, that's problematic for pg_upgrade. - */ -static void -check_for_composite_data_type_usage(ClusterInfo *cluster) -{ - bool found; - Oid firstUserOid; - char output_path[MAXPGPATH]; - char *base_query; - - prep_status("Checking for system-defined composite types in user tables"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_composite.txt"); - - /* - * Look for composite types that were made during initdb *or* belong to - * information_schema; that's important in case information_schema was - * dropped and reloaded. - * - * The cutoff OID here should match the source cluster's value of - * FirstNormalObjectId. We hardcode it rather than using that C #define - * because, if that #define is ever changed, our own version's value is - * NOT what to use. Eventually we may need a test on the source cluster's - * version to select the correct value. - */ - firstUserOid = 16384; - - base_query = psprintf("SELECT t.oid FROM pg_catalog.pg_type t " - "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid " - " WHERE typtype = 'c' AND (t.oid < %u OR nspname = 'information_schema')", - firstUserOid); - - found = check_for_data_types_usage(cluster, base_query, output_path); - - free(base_query); - - if (found) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains system-defined composite type(s) in user tables.\n" - "These type OIDs are not stable across PostgreSQL versions,\n" - "so this cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - -/* - * check_for_reg_data_type_usage() - * pg_upgrade only preserves these system values: - * pg_class.oid - * pg_type.oid - * pg_enum.oid - * - * Many of the reg* data types reference system catalog info that is - * not preserved, and hence these data types cannot be used in user - * tables upgraded by pg_upgrade. - */ -static void -check_for_reg_data_type_usage(ClusterInfo *cluster) -{ - bool found; - char output_path[MAXPGPATH]; - - prep_status("Checking for reg* data types in user tables"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_reg.txt"); - - /* - * Note: older servers will not have all of these reg* types, so we have - * to write the query like this rather than depending on casts to regtype. - */ - found = check_for_data_types_usage(cluster, - "SELECT oid FROM pg_catalog.pg_type t " - "WHERE t.typnamespace = " - " (SELECT oid FROM pg_catalog.pg_namespace " - " WHERE nspname = 'pg_catalog') " - " AND t.typname IN ( " - /* pg_class.oid is preserved, so 'regclass' is OK */ - " 'regcollation', " - " 'regconfig', " - " 'regdictionary', " - " 'regnamespace', " - " 'regoper', " - " 'regoperator', " - " 'regproc', " - " 'regprocedure' " - /* pg_authid.oid is preserved, so 'regrole' is OK */ - /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */ - " )", - output_path); - - if (found) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains one of the reg* data types in user tables.\n" - "These data types reference system OIDs that are not preserved by\n" - "pg_upgrade, so this cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - -/* - * check_for_aclitem_data_type_usage - * - * aclitem changed its storage format in 16, so check for it. - */ -static void -check_for_aclitem_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for incompatible \"aclitem\" data type in user tables"); - - snprintf(output_path, sizeof(output_path), "tables_using_aclitem.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.aclitem", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"aclitem\" data type in user tables.\n" - "The internal format of \"aclitem\" changed in PostgreSQL version 16\n" - "so this cluster cannot currently be upgraded. You can drop the\n" - "problem columns and restart the upgrade. A list of the problem\n" - "columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - -/* - * check_for_jsonb_9_4_usage() - * - * JSONB changed its storage format during 9.4 beta, so check for it. - */ -static void -check_for_jsonb_9_4_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for incompatible \"jsonb\" data type"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_jsonb.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.jsonb", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"jsonb\" data type in user tables.\n" - "The internal format of \"jsonb\" changed during 9.4 beta so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - /* * check_for_pg_role_prefix() * diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 3eea0139c7..208bfbb68e 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -328,6 +328,21 @@ typedef struct } OSInfo; +/* Function signature for data type check version hook */ +typedef bool (*DataTypesUsageVersionCheck)(ClusterInfo *cluster); + +/* + * DataTypesUsageChecks + */ +typedef struct +{ + const char *status; /* status line to print to the user */ + const char *report_filename; /* filename to store report to */ + const char *base_query; /* Query to extract the oid of the datatype */ + const char *report_text; /* Text to store to report in case of error */ + DataTypesUsageVersionCheck version_hook; +} DataTypesUsageChecks; + /* * Global variables */ @@ -450,19 +465,15 @@ unsigned int str2uint(const char *str); /* version.c */ -bool check_for_data_types_usage(ClusterInfo *cluster, - const char *base_query, - const char *output_path); -bool check_for_data_type_usage(ClusterInfo *cluster, - const char *type_name, - const char *output_path); -void old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster); -void old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster); +bool old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster); +bool check_for_jsonb_9_4_usage(ClusterInfo *cluster); +bool old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster); +bool old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster); void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode); -void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster); void report_extension_updates(ClusterInfo *cluster); +bool check_for_aclitem_data_type_usage(ClusterInfo *cluster); /* parallel.c */ void parallel_exec_prog(const char *log_file, const char *opt_log_file, diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index 403a6d7cfa..828a975ac0 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -9,236 +9,41 @@ #include "postgres_fe.h" -#include "catalog/pg_class_d.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" - -/* - * check_for_data_types_usage() - * Detect whether there are any stored columns depending on given type(s) - * - * If so, write a report to the given file name, and return true. - * - * base_query should be a SELECT yielding a single column named "oid", - * containing the pg_type OIDs of one or more types that are known to have - * inconsistent on-disk representations across server versions. - * - * We check for the type(s) in tables, matviews, and indexes, but not views; - * there's no storage involved in a view. - */ bool -check_for_data_types_usage(ClusterInfo *cluster, - const char *base_query, - const char *output_path) +old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster) { - bool found = false; - FILE *script = NULL; - int dbnum; - - for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) - { - DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; - PGconn *conn = connectToServer(cluster, active_db->db_name); - PQExpBufferData querybuf; - PGresult *res; - bool db_used = false; - int ntups; - int rowno; - int i_nspname, - i_relname, - i_attname; - - /* - * The type(s) of interest might be wrapped in a domain, array, - * composite, or range, and these container types can be nested (to - * varying extents depending on server version, but that's not of - * concern here). To handle all these cases we need a recursive CTE. - */ - initPQExpBuffer(&querybuf); - appendPQExpBuffer(&querybuf, - "WITH RECURSIVE oids AS ( " - /* start with the type(s) returned by base_query */ - " %s " - " UNION ALL " - " SELECT * FROM ( " - /* inner WITH because we can only reference the CTE once */ - " WITH x AS (SELECT oid FROM oids) " - /* domains on any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' " - " UNION ALL " - /* arrays over any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' " - " UNION ALL " - /* composite types containing any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x " - " WHERE t.typtype = 'c' AND " - " t.oid = c.reltype AND " - " c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid = x.oid " - " UNION ALL " - /* ranges containing any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x " - " WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid" - " ) foo " - ") " - /* now look for stored columns of any such type */ - "SELECT n.nspname, c.relname, a.attname " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_namespace n, " - " pg_catalog.pg_attribute a " - "WHERE c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid IN (SELECT oid FROM oids) AND " - " c.relkind IN (" - CppAsString2(RELKIND_RELATION) ", " - CppAsString2(RELKIND_MATVIEW) ", " - CppAsString2(RELKIND_INDEX) ") AND " - " c.relnamespace = n.oid AND " - /* exclude possible orphaned temp tables */ - " n.nspname !~ '^pg_temp_' AND " - " n.nspname !~ '^pg_toast_temp_' AND " - /* exclude system catalogs, too */ - " n.nspname NOT IN ('pg_catalog', 'information_schema')", - base_query); - - res = executeQueryOrDie(conn, "%s", querybuf.data); - - ntups = PQntuples(res); - i_nspname = PQfnumber(res, "nspname"); - i_relname = PQfnumber(res, "relname"); - i_attname = PQfnumber(res, "attname"); - for (rowno = 0; rowno < ntups; rowno++) - { - found = true; - if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) - pg_fatal("could not open file \"%s\": %s", output_path, - strerror(errno)); - if (!db_used) - { - fprintf(script, "In database: %s\n", active_db->db_name); - db_used = true; - } - fprintf(script, " %s.%s.%s\n", - PQgetvalue(res, rowno, i_nspname), - PQgetvalue(res, rowno, i_relname), - PQgetvalue(res, rowno, i_attname)); - } - - PQclear(res); - - termPQExpBuffer(&querybuf); - - PQfinish(conn); - } - - if (script) - fclose(script); + /* Pre-PG 9.4 had a different 'line' data type internal format */ + if (GET_MAJOR_VERSION(cluster->major_version) <= 903) + return true; - return found; + return false; } /* - * check_for_data_type_usage() - * Detect whether there are any stored columns depending on the given type - * - * If so, write a report to the given file name, and return true. + * check_for_jsonb_9_4_usage() * - * type_name should be a fully qualified type name. This is just a - * trivial wrapper around check_for_data_types_usage() to convert a - * type name into a base query. + * JSONB changed its storage format during 9.4 beta, so check for it. */ bool -check_for_data_type_usage(ClusterInfo *cluster, - const char *type_name, - const char *output_path) -{ - bool found; - char *base_query; - - base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid", - type_name); - - found = check_for_data_types_usage(cluster, base_query, output_path); - - free(base_query); - - return found; -} - - -/* - * old_9_3_check_for_line_data_type_usage() - * 9.3 -> 9.4 - * Fully implement the 'line' data type in 9.4, which previously returned - * "not enabled" by default and was only functionally enabled with a - * compile-time switch; as of 9.4 "line" has a different on-disk - * representation format. - */ -void -old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster) +check_for_jsonb_9_4_usage(ClusterInfo *cluster) { - char output_path[MAXPGPATH]; + if (GET_MAJOR_VERSION(cluster->major_version) == 904 && + cluster->controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER) + return true; - prep_status("Checking for incompatible \"line\" data type"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_line.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.line", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"line\" data type in user tables.\n" - "This data type changed its internal and input/output format\n" - "between your old and new versions so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); + return false; } - -/* - * old_9_6_check_for_unknown_data_type_usage() - * 9.6 -> 10 - * It's no longer allowed to create tables or views with "unknown"-type - * columns. We do not complain about views with such columns, because - * they should get silently converted to "text" columns during the DDL - * dump and reload; it seems unlikely to be worth making users do that - * by hand. However, if there's a table with such a column, the DDL - * reload will fail, so we should pre-detect that rather than failing - * mid-upgrade. Worse, if there's a matview with such a column, the - * DDL reload will silently change it to "text" which won't match the - * on-disk storage (which is like "cstring"). So we *must* reject that. - */ -void +bool old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster) { - char output_path[MAXPGPATH]; - - prep_status("Checking for invalid \"unknown\" user columns"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_unknown.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.unknown", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"unknown\" data type in user tables.\n" - "This data type is no longer allowed in tables, so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); + /* Pre-PG 10 allowed tables with 'unknown' type columns */ + if (GET_MAJOR_VERSION(cluster->major_version) <= 906) + return true; + return false; } /* @@ -353,41 +158,20 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) check_ok(); } -/* - * old_11_check_for_sql_identifier_data_type_usage() - * 11 -> 12 - * In 12, the sql_identifier data type was switched from name to varchar, - * which does affect the storage (name is by-ref, but not varlena). This - * means user tables using sql_identifier for columns are broken because - * the on-disk format is different. - */ -void +bool old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster) { - char output_path[MAXPGPATH]; - - prep_status("Checking for invalid \"sql_identifier\" user columns"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_sql_identifier.txt"); - - if (check_for_data_type_usage(cluster, "information_schema.sql_identifier", - output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables.\n" - "The on-disk format for this data type has changed, so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); + /* + * PG 12 changed the 'sql_identifier' type storage to be based on name, + * not varchar, which breaks on-disk format for existing data. So we need + * to prevent upgrade when used in user objects (tables, indexes, ...). + */ + if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100) + return true; + + return false; } - /* * report_extension_updates() * Report extensions that should be updated. @@ -459,3 +243,22 @@ report_extension_updates(ClusterInfo *cluster) else check_ok(); } + +/* + * check_for_aclitem_data_type_usage + * + * aclitem changed its storage format in 16, so check for it. + */ +bool +check_for_aclitem_data_type_usage(ClusterInfo *cluster) +{ + /* + * PG 16 increased the size of the 'aclitem' type, which breaks the on-disk + * format for existing data. + */ + if (GET_MAJOR_VERSION(cluster->major_version) <= 1500) + return true; + + return false; +} + -- 2.25.1