HAWQ-991. update help info for the update hawq register in tools/doc/hawqregister_help
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/8cc4a042 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/8cc4a042 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/8cc4a042 Branch: refs/heads/master Commit: 8cc4a042e9d9703d4f3d45689ab8d1f984cfcaa4 Parents: af48376 Author: xunzhang <[email protected]> Authored: Wed Aug 17 22:34:21 2016 +0800 Committer: rlei <[email protected]> Committed: Fri Aug 19 10:57:40 2016 +0800 ---------------------------------------------------------------------- .../ManagementTool/test_hawq_register.cpp | 2 +- tools/bin/hawqregister | 50 ++++++++++++++++---- tools/doc/hawqregister_help | 40 +++++++++++----- 3 files changed, 69 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/src/test/feature/ManagementTool/test_hawq_register.cpp ---------------------------------------------------------------------- diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp index e6fead8..00934a9 100644 --- a/src/test/feature/ManagementTool/test_hawq_register.cpp +++ b/src/test/feature/ManagementTool/test_hawq_register.cpp @@ -228,7 +228,7 @@ TEST_F(TestHawqRegister, TestUsage1ParquetRandomly2) { string filePath = rootPath + relativePath; EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put -f " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq")); util.execute("drop table if exists nt;"); - util.execute("create table nt(i int) with (appendonly=true, orientation=parquet);"); + util.execute("create table nt(i int) with (appendonly=true, orientation=parquet) distributed randomly;"); EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq nt")); util.query("select * from nt;", 3); util.execute("insert into nt values(1);"); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/tools/bin/hawqregister ---------------------------------------------------------------------- diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister index 2b492de..7a20906 100755 --- a/tools/bin/hawqregister +++ b/tools/bin/hawqregister @@ -51,19 +51,49 @@ def option_parser(): parser.add_option('-d', '--database', default = 'postgres', dest = 'database', help='database name') parser.add_option('-f', '--filepath', dest = 'filepath', help='file name in HDFS') parser.add_option('-c', '--config', dest = 'yml_config', default = '', help='configuration file in YAML format') - return parser.parse_args() + return parser def option_parser_yml(yml_file): import yaml with open(yml_file, 'r') as f: params = yaml.load(f) + # check if valid configuration yaml file + attrs = ['FileFormat', 'DFS_URL', 'Distribution_Policy'] + for attr in attrs: + if attr not in params.keys(): + logger.error('Wrong configuration yaml file format, see example in "hawq register --help"') + sys.exit(1) if params['FileFormat'] == 'Parquet': + attrs = ['Parquet_FileLocations', 'Parquet_Schema'] + for attr in attrs: + if attr not in params.keys(): + logger.error('Wrong configuration yaml file format, see example in "hawq register --help"') + sys.exit(1) + if not params['Parquet_FileLocations'].get('Files'): + logger.error('Wrong configuration yaml file format, see example in "hawq register --help"') + sys.exit(1) + if not len(params['Parquet_FileLocations']['Files']): + return 'Parquet', '', params['Parquet_Schema'], params['Distribution_Policy'] offset = params['Parquet_FileLocations']['Files'][0]['path'].rfind('/') - filepath = params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset] if len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'] + filepath = (params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset] + if len(params['Parquet_FileLocations']['Files']) != 1 + else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path']) return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy'] + attrs = ['AO_FileLocations', 'AO_Schema'] + for attr in attrs: + if attr not in params.keys(): + logger.error('Wrong configuration yaml file format, see example in "hawq register --help"') + sys.exit(1) + if not (params['AO_FileLocations']['Files']): + return 'AO', '', params['AO_Schema'], params['Distribution_Policy'] + if not params['AO_FileLocations'].get('Files'): + logger.error('Wrong configuration yaml file format, see example in "hawq register --help"') + sys.exit(1) offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/') - filepath = params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset] if len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'] + filepath = (params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset] + if len(params['AO_FileLocations']['Files']) != 1 + else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path']) return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy'] @@ -107,18 +137,18 @@ def get_seg_name(dburl, tablename, database, fmt): def check_hash_type(dburl, tablename): - '''Check whether target table is hash-typed, in that case simple insertion does not work''' + '''Check whether target table is hash distributed, in that case simple insertion does not work''' try: query = "select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;" % tablename conn = dbconn.connect(dburl, False) rows = dbconn.execSQL(conn, query) conn.commit() if not rows.rowcount: - logger.error('Target not found in table gp_distribution_policy.') + logger.error('Table not found in table gp_distribution_policy.' % tablename) sys.exit(1) for row in rows: if row[0]: - logger.error('Cannot register file(s) to a table which is hash-typed.') + logger.error('Cannot register file(s) to a table which is hash distribuetd.') sys.exit(1) conn.close() except DatabaseError, ex: @@ -154,8 +184,7 @@ def get_metadata_from_database(dburl, tablename, seg_name): logger.error('Failed to execute query "%s"' % query) sys.exit(1) for row in rows: - tabledir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + "/" + str(row[3]) + "/" - #tabledir = '/'.join([row[0], str(row[1]), str(row[2]), str(row[3]), '']) + tabledir = '/'.join([row[0].strip(), str(row[1]), str(row[2]), str(row[3]), '']) return firstsegno, tabledir @@ -263,9 +292,10 @@ def insert_metadata_into_database(dburl, databasename, tablename, seg_name, firs if __name__ == '__main__': - options, args = option_parser() + parser = option_parser() + options, args = parser.parse_args() if len(args) != 1 or (options.yml_config and options.filepath): - logger.error('Incorrect usage!\n Correct usage: "hawq register [-h hostname] [-p port] [-U username] [-d database] [-f filepath] tablename"\n or "hawq register [-h hostname] [-p port] [-U username] [-d database] [-c config] tablename"\n') + parser.print_help(sys.stderr) sys.exit(1) if local_ssh('hadoop'): logger.error('command "hadoop" is not available.') http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/tools/doc/hawqregister_help ---------------------------------------------------------------------- diff --git a/tools/doc/hawqregister_help b/tools/doc/hawqregister_help index ade1e3a..a664127 100644 --- a/tools/doc/hawqregister_help +++ b/tools/doc/hawqregister_help @@ -1,12 +1,14 @@ COMMAND NAME: hawq register -Register parquet files generated by other system into the corrsponding table in HAWQ +Usage1: Register parquet files generated by other system into the corrsponding table in HAWQ +Usage2: Register parquet/ao table from laterst-sync-metadata in yaml format ***************************************************** SYNOPSIS ***************************************************** -hawq register [-h hostname] [-p port] [-U username] <databasename> <tablename> <hdfspath> +Usage1: hawq register [-h hostname] [-p port] [-U username] [-d databasename] [-f filepath] <tablename> +Usage2: hawq register [-h hostname] [-p port] [-U username] [-d databasename] [-c config] <tablename> hawq register help hawq register -? @@ -17,6 +19,7 @@ hawq register --version DESCRIPTION ***************************************************** +Use Case1: "hawq register" is a utility to register file(s) on HDFS into the table in HAWQ. It moves the file in the path(if path refers to a file) or files under the path(if path refers to a @@ -33,23 +36,24 @@ is created by using "distributed by" statement when creating that table. The file(s) to be registered and the table in HAWQ must be in the same HDFS cluster. +Use Case2: +User should be able to use hawq register to register table files into a new HAWQ cluster. +It is some kind of protecting against corruption from users' perspective. +Users use the last-known-good metadata to update the portion of catalog managing HDFS blocks. +The table files or dictionary should be backuped(such as using distcp) into the same path in the new HDFS setting. + +To use "hawq register", HAWQ must have been started. +Currently "hawq register" supports both AO and Parquet formats in this case. +The partition table is not supported in this version, and we will support it soon. + ***************************************************** Arguments ***************************************************** -<databasename> - -Name of the database to be operated on. - <tablename> Name of the table to be registered into. -<hdfspath> - -The path of the file or the directory containing the files -that will be registered. - ***************************************************** OPTIONS ***************************************************** @@ -85,7 +89,7 @@ CONNECTION OPTIONS system user name. ***************************************************** -EXAMPLES +EXAMPLE FOR USAGE1 ***************************************************** Run "hawq register" to register a parquet file in HDFS with path @@ -104,6 +108,18 @@ update the meta data of the table 'parquet_table' in HAWQ which is in the table 'pg_aoseg.pg_paqseg_77160'. ***************************************************** +EXAMPLE FOR USAGE2 +***************************************************** +$ psql -c "drop table if exists table;" +$ psql -c "create table table(i int) with (appendonly=true, orientation=parquet) distributed by (i);" +$ psql -c "insert into table values(1), (2), (3);" +$ hawq extract -d postgres -o t.yml table +$ hawq register -d postgres -c t.yml newtable +In this example, suppose that "table" is a table in old HAWQ Cluster, user dump "t.yml" yaml file to +save the metadata of "table". To register the "newtable" in a new HAWQ Cluster, user run "hawq register" +to register the newtable with the given yaml file "t.yml". + +***************************************************** DATA TYPES ***************************************************** The data types used in HAWQ and parquet format are not the same, so there is a
