This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit b0b1c36713845d82c1c3ad1e84d9595d3a66640b Author: Chris Hajas <[email protected]> AuthorDate: Thu Oct 6 10:54:00 2022 -0700 Dump number of segments during minirepro and gpsd (#14225) This commit includes a couple improvements when dumping minirepros and gpsd: 1. Dump the number of segments. This is needed to replicate the plan, and often we have to ask users/customers this separately. Instead just retrieve it during the minirepro 2. Output a `set optimizer=off` for when the minirepro is being restored. The short insert statements are much faster (~10X) using planner than Orca, especially if done in debug buiild. Output this by default so I don't have to modify this manually. --- gpMgmt/bin/gpsd | 19 ++++++++++++++++++- gpMgmt/bin/minirepro | 23 +++++++++++++++++++---- src/test/regress/expected/minirepro.out | 12 ++++++++---- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/gpMgmt/bin/gpsd b/gpMgmt/bin/gpsd index ad61a1e3de..fe877a38de 100755 --- a/gpMgmt/bin/gpsd +++ b/gpMgmt/bin/gpsd @@ -35,10 +35,19 @@ def ResultIter(cursor, arraysize=1000): def getVersion(envOpts): cmd = subprocess.Popen('psql --pset footer -Atqc "select version()" template1', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=envOpts) if cmd.wait() != 0: - sys.stderr.write('\nError while trying to find HAWQ/GPDB version.\n\n' + cmd.communicate()[1].decode('ascii') + '\n\n') + sys.stderr.write('\nError while trying to find GPDB version.\n\n' + cmd.communicate()[1].decode('ascii') + '\n\n') sys.exit(1) return cmd.communicate()[0].decode('ascii') +def get_num_segments(cursor): + query = "select count(*) from gp_segment_configuration where role='p' and content >=0;" + try: + cursor.execute(query) + except pgdb.DatabaseError as e: + sys.stderr.write('\nError while trying to retrieve number of segments.\n\n' + str(e) + '\n\n') + sys.exit(1) + vals = cursor.fetchone() + return vals[0] def dumpSchema(connectionInfo, envOpts): dump_cmd = 'pg_dump -h {host} -p {port} -U {user} -s -x -O {database}'.format(**connectionInfo) @@ -133,6 +142,7 @@ def main(): version = getVersion(envOpts) + timestamp = datetime.datetime.today() connectionInfo = { @@ -142,9 +152,14 @@ def main(): 'database': db, 'options': pgoptions } + num_segments = 0 + with closing(pgdb.connect(**connectionInfo)) as connection: + with closing(connection.cursor()) as cursor: + num_segments = get_num_segments(cursor) sys.stdout.writelines(['\n-- Greenplum database Statistics Dump', '\n-- Copyright (C) 2007 - 2014 Pivotal' '\n-- Database: ' + db, + '\n-- Num Segments: ' + str(num_segments), '\n-- Date: ' + timestamp.date().isoformat(), '\n-- Time: ' + timestamp.time().isoformat(), '\n-- CmdLine: ' + ' '.join(sys.argv), @@ -168,6 +183,8 @@ def main(): '\n-- Allow system table modifications', '\n-- ', '\nset allow_system_table_mods=true;\n\n']) + # turn off optimizer when loading stats. Orca adds a bit of overhead, but it's significant when small insrt queries take 1 vs .1ms + sys.stdout.writelines('set optimizer to off;\n\n') sys.stdout.flush() try: diff --git a/gpMgmt/bin/minirepro b/gpMgmt/bin/minirepro index 5342dcf175..cf3c28115e 100755 --- a/gpMgmt/bin/minirepro +++ b/gpMgmt/bin/minirepro @@ -97,11 +97,21 @@ def get_server_version(cursor): query = "select version()" try: cursor.execute(query) - vals = cursor.fetchone() - return vals[0] except pgdb.DatabaseError as e: - sys.stderr.write('\nError while trying to find HAWQ/GPDB version.\n\n' + str(e) + '\n\n') + sys.stderr.write('\nError while trying to find GPDB version.\n\n' + str(e) + '\n\n') sys.exit(1) + vals = cursor.fetchone() + return vals[0] + +def get_num_segments(cursor): + query = "select count(*) from gp_segment_configuration where role='p' and content >=0;" + try: + cursor.execute(query) + except pgdb.DatabaseError as e: + sys.stderr.write('\nError while trying to retrieve number of segments.\n\n' + str(e) + '\n\n') + sys.exit(1) + vals = cursor.fetchone() + return vals[0] def parse_cmd_line(): p = OptionParser(usage='Usage: %prog <database> [options]', version='%prog '+version, conflict_handler="resolve", epilog="WARNING: This tool collects statistics about your data, including most common values, which requires some data elements to be included in the output file. Please review output file to ensure it is within corporate policy to transport the output file.") @@ -295,6 +305,8 @@ def main(): # get server version, which is dumped to minirepro output file server_ver = get_server_version(cursor) + num_segments = get_num_segments(cursor) + """ invoke gp_toolkit UDF, dump object oids as json text input: query file name @@ -329,6 +341,8 @@ def main(): # make sure we connect with the right database f_out.writelines('\\connect ' + db + '\n\n') + # turn off optimizer when loading stats. Orca adds a bit of overhead, but it's significant when small insrt queries take 1 vs .1ms + f_out.writelines('set optimizer to off;\n\n') # first create schema DDLs print("Writing schema DDLs ...") @@ -362,7 +376,8 @@ def main(): f_out.writelines(['\n-- ', '\n-- Query text', '\n-- \n\n']) - + line = 'set optimizer_segments = ' + str(num_segments) + ';' + f_out.writelines('\n-- ' + line + '\n') with open(query_file, 'r') as query_f: for line in query_f: f_out.writelines('-- ' + line) diff --git a/src/test/regress/expected/minirepro.out b/src/test/regress/expected/minirepro.out index 79d67481f9..0b4a8e5a06 100644 --- a/src/test/regress/expected/minirepro.out +++ b/src/test/regress/expected/minirepro.out @@ -64,6 +64,7 @@ SET SET SET SET +SET SET set_config ------------ @@ -146,6 +147,7 @@ SET SET SET SET +SET SET set_config ------------ @@ -230,6 +232,7 @@ SET SET SET SET +SET SET set_config ------------ @@ -271,6 +274,7 @@ SET SET SET SET +SET SET set_config ------------ @@ -283,10 +287,10 @@ SET SET SET SET -psql:data/minirepro.sql:46: ERROR: only shared relations can be placed in pg_global tablespace -psql:data/minirepro.sql:48: ERROR: permission denied: "pg_tablespace" is a system catalog -psql:data/minirepro.sql:56: ERROR: permission denied: "pg_tablespace" is a system catalog -psql:data/minirepro.sql:64: ERROR: permission denied: "pg_tablespace" is a system catalog +psql:data/minirepro.sql:48: ERROR: only shared relations can be placed in pg_global tablespace +psql:data/minirepro.sql:50: ERROR: permission denied: "pg_tablespace" is a system catalog +psql:data/minirepro.sql:58: ERROR: permission denied: "pg_tablespace" is a system catalog +psql:data/minirepro.sql:66: ERROR: permission denied: "pg_tablespace" is a system catalog SET UPDATE 1 DELETE 1 --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
