Repository: incubator-hawq
Updated Branches:
  refs/heads/master eb10af42d -> 08ed4bc39


HAWQ-1061. Add check_sizes_valid for all mode of hawq register.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/08ed4bc3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/08ed4bc3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/08ed4bc3

Branch: refs/heads/master
Commit: 08ed4bc39f42ecf546e8e02bf54b28397154add8
Parents: eb10af4
Author: xunzhang <xunzhang...@gmail.com>
Authored: Tue Sep 20 17:25:22 2016 +0800
Committer: Ruilong Huo <r...@pivotal.io>
Committed: Tue Sep 20 18:11:09 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawqregister | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08ed4bc3/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 89e9f4b..d030854 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -66,8 +66,8 @@ def register_yaml_dict_check(D):
             logger.error('Wrong configuration yaml file format: "%s" attribute 
does not exist.\n See example in "hawq register --help".' % attr)
             sys.exit(1)
     if D['Bucketnum'] <= 0:
-            logger.error('Bucketnum should not be zero, please check your yaml 
configuration file.')
-            sys.exit(1)
+        logger.error('Bucketnum should not be zero, please check your yaml 
configuration file.')
+        sys.exit(1)
     if D['FileFormat'] in ['Parquet', 'AO']:
         prefix = D['FileFormat']
         local_check_list = ['%s_FileLocations' % prefix, '%s_Schema' % prefix]
@@ -139,9 +139,9 @@ class GpRegisterAccessor(object):
         schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info])
         partlist = ""
         for index in range(len(partitions_constraint)):
-          if index > 0:
-              partlist += ", "
-          partlist = partlist + "partition " + partitions_name[index] + " " + 
partitions_constraint[index]
+            if index > 0:
+                partlist += ", "
+            partlist = partlist + "partition " + partitions_name[index] + " " 
+ partitions_constraint[index]
           
         fmt = 'ROW' if fmt == 'AO' else fmt
         if fmt == 'ROW':
@@ -150,7 +150,7 @@ class GpRegisterAccessor(object):
                          % (tablename, schema, fmt, 
file_locations['CompressionType'], file_locations['CompressionLevel'], 
file_locations['Checksum'], bucket_number, distrbution_policy))
             else:
                 query = ('create table %s(%s) with (appendonly=true, 
orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) 
%s %s (%s);'
-                         % (tablename, schema,fmt, 
file_locations['CompressionType'], file_locations['CompressionLevel'], 
file_locations['Checksum'], bucket_number, distrbution_policy, partitionby, 
partlist))
+                         % (tablename, schema, fmt, 
file_locations['CompressionType'], file_locations['CompressionLevel'], 
file_locations['Checksum'], bucket_number, distrbution_policy, partitionby, 
partlist))
         else: # Parquet
             if partitionby is None:
                 query = ('create table %s(%s) with (appendonly=true, 
orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, 
rowgroupsize=%s, bucketnum=%s) %s;'
@@ -168,7 +168,7 @@ class GpRegisterAccessor(object):
             logger.error('Table %s is not an append-only table. There is no 
record in gp_distribution_policy table.' % tablename)
             sys.exit(1)
         if rows[0]['attrnums']:
-            logger.error('Cannot register file(s) to a table which is hash 
distribuetd.')
+            logger.error('Cannot register file(s) to a table which is hash 
distributed.')
             sys.exit(1)
 
     # pg_paqseg_#
@@ -397,6 +397,14 @@ class HawqRegister(object):
                     logger.info('%s is not a file in hdfs, please check the 
yaml configuration file.' % fn)
                     sys.exit(1)
 
+        def check_sizes_valid():
+            for k, fn in enumerate(self.files):
+                hdfscmd = 'hdfs dfs -du %s' % fn
+                _, out, _ = local_ssh_output(hdfscmd)
+                if self.sizes[k] > int(out.strip().split()[0]):
+                    logger.error('File size(%s) in yaml configuration file 
should not exceed actual length(%s) of file %s.' % (self.sizes[k], 
out.strip().split()[0], fn))
+                    sys.exit(1)
+
         if self.yml:
             option_parser_yml(options.yml_config)
             self.filepath = self.files[0][:self.files[0].rfind('/')] if 
self.files else ''
@@ -413,7 +421,7 @@ class HawqRegister(object):
             check_hash_type() # Usage1 only support randomly distributed table
         if not self.filepath:
             if self.mode == 'first':
-                logger('Please specify filepath with -f option.')
+                logger.info('Please specify filepath with -f option.')
             else:
                 logger.info('Hawq Register Succeed.')
             sys.exit(0)
@@ -482,6 +490,7 @@ class HawqRegister(object):
                 logger.error('-e option is only supported with single file 
case.')
                 sys.exit(1)
             self.sizes = [self.filesize]
+        check_sizes_valid()
 
         if self.file_format == 'Parquet':
             self._check_parquet_format(self.files)
@@ -633,7 +642,7 @@ class HawqRegister(object):
         for pos, constraint in enumerate(self.partitions_constraint):
             if partitions.has_key(constraint):
                 mappings.extend([(partitions[constraint], 
(self.partitions_filepaths[pos][i], self.partitions_filesizes[pos][i]))
-                            for i in 
xrange(len(self.partitions_filepaths[pos]))])
+                                for i in 
xrange(len(self.partitions_filepaths[pos]))])
         return mappings
 
     def register(self):

Reply via email to