HAWQ-991. update help info for the update hawq register in 
tools/doc/hawqregister_help


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/8cc4a042
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/8cc4a042
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/8cc4a042

Branch: refs/heads/master
Commit: 8cc4a042e9d9703d4f3d45689ab8d1f984cfcaa4
Parents: af48376
Author: xunzhang <[email protected]>
Authored: Wed Aug 17 22:34:21 2016 +0800
Committer: rlei <[email protected]>
Committed: Fri Aug 19 10:57:40 2016 +0800

----------------------------------------------------------------------
 .../ManagementTool/test_hawq_register.cpp       |  2 +-
 tools/bin/hawqregister                          | 50 ++++++++++++++++----
 tools/doc/hawqregister_help                     | 40 +++++++++++-----
 3 files changed, 69 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp 
b/src/test/feature/ManagementTool/test_hawq_register.cpp
index e6fead8..00934a9 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -228,7 +228,7 @@ TEST_F(TestHawqRegister, TestUsage1ParquetRandomly2) {
   string filePath = rootPath + relativePath;
   EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put -f " + filePath + " 
hdfs://localhost:8020/hawq_register_hawq.paq"));
   util.execute("drop table if exists nt;");
-  util.execute("create table nt(i int) with (appendonly=true, 
orientation=parquet);");
+  util.execute("create table nt(i int) with (appendonly=true, 
orientation=parquet) distributed randomly;");
   EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) 
HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq nt"));
        util.query("select * from nt;", 3);
        util.execute("insert into nt values(1);");

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 2b492de..7a20906 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -51,19 +51,49 @@ def option_parser():
     parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
     parser.add_option('-f', '--filepath', dest = 'filepath', help='file name 
in HDFS')
     parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
-    return parser.parse_args()
+    return parser
 
 
 def option_parser_yml(yml_file):
     import yaml
     with open(yml_file, 'r') as f:
         params = yaml.load(f)
+    # check if valid configuration yaml file
+    attrs = ['FileFormat', 'DFS_URL', 'Distribution_Policy']
+    for attr in attrs:
+        if attr not in params.keys():
+            logger.error('Wrong configuration yaml file format, see example in 
"hawq register --help"')
+            sys.exit(1)
     if params['FileFormat'] == 'Parquet':
+        attrs = ['Parquet_FileLocations', 'Parquet_Schema']
+        for attr in attrs:
+            if attr not in params.keys():
+                logger.error('Wrong configuration yaml file format, see 
example in "hawq register --help"')
+                sys.exit(1)
+        if not params['Parquet_FileLocations'].get('Files'):
+            logger.error('Wrong configuration yaml file format, see example in 
"hawq register --help"')
+            sys.exit(1)
+        if not len(params['Parquet_FileLocations']['Files']):
+            return 'Parquet', '', params['Parquet_Schema'], 
params['Distribution_Policy']
         offset = params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
-        filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
+        filepath = (params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset]
+                    if len(params['Parquet_FileLocations']['Files']) != 1
+                    else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'])
         return 'Parquet', filepath, params['Parquet_Schema'], 
params['Distribution_Policy']
+    attrs = ['AO_FileLocations', 'AO_Schema']
+    for attr in attrs:
+        if attr not in params.keys():
+            logger.error('Wrong configuration yaml file format, see example in 
"hawq register --help"')
+            sys.exit(1)
+    if not (params['AO_FileLocations']['Files']):
+        return 'AO', '', params['AO_Schema'], params['Distribution_Policy']
+    if not params['AO_FileLocations'].get('Files'):
+        logger.error('Wrong configuration yaml file format, see example in 
"hawq register --help"')
+        sys.exit(1)
     offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
-    filepath = params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset] if 
len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path']
+    filepath = (params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset]
+                if len(params['AO_FileLocations']['Files']) != 1
+                else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'])
     return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy']
 
 
@@ -107,18 +137,18 @@ def get_seg_name(dburl, tablename, database, fmt):
 
 
 def check_hash_type(dburl, tablename):
-    '''Check whether target table is hash-typed, in that case simple insertion 
does not work'''
+    '''Check whether target table is hash distributed, in that case simple 
insertion does not work'''
     try:
         query = "select attrnums from gp_distribution_policy, pg_class where 
pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;" % 
tablename
         conn = dbconn.connect(dburl, False)
         rows = dbconn.execSQL(conn, query)
         conn.commit()
         if not rows.rowcount:
-            logger.error('Target not found in table gp_distribution_policy.')
+            logger.error('Table not found in table gp_distribution_policy.' % 
tablename)
             sys.exit(1)
         for row in rows:
             if row[0]:
-                logger.error('Cannot register file(s) to a table which is 
hash-typed.')
+                logger.error('Cannot register file(s) to a table which is hash 
distribuetd.')
                 sys.exit(1)
         conn.close()
     except DatabaseError, ex:
@@ -154,8 +184,7 @@ def get_metadata_from_database(dburl, tablename, seg_name):
         logger.error('Failed to execute query "%s"' % query)
         sys.exit(1)
     for row in rows:
-        tabledir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + 
"/" + str(row[3]) + "/"
-        #tabledir = '/'.join([row[0], str(row[1]), str(row[2]), str(row[3]), 
''])
+        tabledir = '/'.join([row[0].strip(), str(row[1]), str(row[2]), 
str(row[3]), ''])
     return firstsegno, tabledir
 
 
@@ -263,9 +292,10 @@ def insert_metadata_into_database(dburl, databasename, 
tablename, seg_name, firs
 
 
 if __name__ == '__main__':
-    options, args = option_parser()
+    parser = option_parser()
+    options, args = parser.parse_args()
     if len(args) != 1 or (options.yml_config and options.filepath):
-        logger.error('Incorrect usage!\n Correct usage: "hawq register [-h 
hostname] [-p port] [-U username] [-d database] [-f filepath] tablename"\n or 
"hawq register [-h hostname] [-p port] [-U username] [-d database] [-c config] 
tablename"\n')
+        parser.print_help(sys.stderr)
         sys.exit(1)
     if local_ssh('hadoop'):
         logger.error('command "hadoop" is not available.')

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/tools/doc/hawqregister_help
----------------------------------------------------------------------
diff --git a/tools/doc/hawqregister_help b/tools/doc/hawqregister_help
index ade1e3a..a664127 100644
--- a/tools/doc/hawqregister_help
+++ b/tools/doc/hawqregister_help
@@ -1,12 +1,14 @@
 COMMAND NAME: hawq register
 
-Register parquet files generated by other system into the corrsponding table 
in HAWQ
+Usage1: Register parquet files generated by other system into the corrsponding 
table in HAWQ
+Usage2: Register parquet/ao table from laterst-sync-metadata in yaml format
 
 *****************************************************
 SYNOPSIS
 *****************************************************
 
-hawq register [-h hostname] [-p port] [-U username] <databasename> <tablename> 
<hdfspath>
+Usage1: hawq register [-h hostname] [-p port] [-U username] [-d databasename] 
[-f filepath] <tablename>
+Usage2: hawq register [-h hostname] [-p port] [-U username] [-d databasename] 
[-c config] <tablename>
 
 hawq register help
 hawq register -?
@@ -17,6 +19,7 @@ hawq register --version
 DESCRIPTION
 *****************************************************
 
+Use Case1:
 "hawq register" is a utility to register file(s) on HDFS into
 the table in HAWQ. It moves the file in the path(if path
 refers to a file) or files under the path(if path refers to a
@@ -33,23 +36,24 @@ is created by using "distributed by" statement when 
creating that table.
 The file(s) to be registered and the table in HAWQ must be in the
 same HDFS cluster.
 
+Use Case2:
+User should be able to use hawq register to register table files into a new 
HAWQ cluster.
+It is some kind of protecting against corruption from users' perspective.
+Users use the last-known-good metadata to update the portion of catalog 
managing HDFS blocks.
+The table files or dictionary should be backuped(such as using distcp) into 
the same path in the new HDFS setting.
+
+To use "hawq register", HAWQ must have been started.
+Currently "hawq register" supports both AO and Parquet formats in this case.
+The partition table is not supported in this version, and we will support it 
soon.
+
 *****************************************************
 Arguments
 *****************************************************
 
-<databasename>
-
-Name of the database to be operated on.
-
 <tablename>
 
 Name of the table to be registered into.
 
-<hdfspath>
-
-The path of the file or the directory containing the files
-that will be registered.
-
 *****************************************************
 OPTIONS
 *****************************************************
@@ -85,7 +89,7 @@ CONNECTION OPTIONS
   system user name.
 
 *****************************************************
-EXAMPLES
+EXAMPLE FOR USAGE1
 *****************************************************
 
 Run "hawq register" to register a parquet file in HDFS with path
@@ -104,6 +108,18 @@ update the meta data of the table 'parquet_table' in HAWQ 
which is in the
 table 'pg_aoseg.pg_paqseg_77160'.
 
 *****************************************************
+EXAMPLE FOR USAGE2
+*****************************************************
+$ psql -c "drop table if exists table;"
+$ psql -c "create table table(i int) with (appendonly=true, 
orientation=parquet) distributed by (i);"
+$ psql -c "insert into table values(1), (2), (3);"
+$ hawq extract -d postgres -o t.yml table
+$ hawq register -d postgres -c t.yml newtable
+In this example, suppose that "table" is a table in old HAWQ Cluster, user 
dump "t.yml" yaml file to
+save the metadata of "table". To register the "newtable" in a new HAWQ 
Cluster, user run "hawq register"
+to register the newtable with the given yaml file "t.yml".
+
+*****************************************************
 DATA TYPES
 *****************************************************
 The data types used in HAWQ and parquet format are not the same, so there is a

Reply via email to