Repository: incubator-hawq Updated Branches: refs/heads/master eb10af42d -> 08ed4bc39
HAWQ-1061. Add check_sizes_valid for all mode of hawq register. Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/08ed4bc3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/08ed4bc3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/08ed4bc3 Branch: refs/heads/master Commit: 08ed4bc39f42ecf546e8e02bf54b28397154add8 Parents: eb10af4 Author: xunzhang <[email protected]> Authored: Tue Sep 20 17:25:22 2016 +0800 Committer: Ruilong Huo <[email protected]> Committed: Tue Sep 20 18:11:09 2016 +0800 ---------------------------------------------------------------------- tools/bin/hawqregister | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08ed4bc3/tools/bin/hawqregister ---------------------------------------------------------------------- diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister index 89e9f4b..d030854 100755 --- a/tools/bin/hawqregister +++ b/tools/bin/hawqregister @@ -66,8 +66,8 @@ def register_yaml_dict_check(D): logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % attr) sys.exit(1) if D['Bucketnum'] <= 0: - logger.error('Bucketnum should not be zero, please check your yaml configuration file.') - sys.exit(1) + logger.error('Bucketnum should not be zero, please check your yaml configuration file.') + sys.exit(1) if D['FileFormat'] in ['Parquet', 'AO']: prefix = D['FileFormat'] local_check_list = ['%s_FileLocations' % prefix, '%s_Schema' % prefix] @@ -139,9 +139,9 @@ class GpRegisterAccessor(object): schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) partlist = "" for index in range(len(partitions_constraint)): - if index > 0: - partlist += ", " - partlist = partlist + "partition " + partitions_name[index] + " " + partitions_constraint[index] + if index > 0: + partlist += ", " + partlist = partlist + "partition " + partitions_name[index] + " " + partitions_constraint[index] fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': @@ -150,7 +150,7 @@ class GpRegisterAccessor(object): % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s %s (%s);' - % (tablename, schema,fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy, partitionby, partlist)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy, partitionby, partlist)) else: # Parquet if partitionby is None: query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' @@ -168,7 +168,7 @@ class GpRegisterAccessor(object): logger.error('Table %s is not an append-only table. There is no record in gp_distribution_policy table.' % tablename) sys.exit(1) if rows[0]['attrnums']: - logger.error('Cannot register file(s) to a table which is hash distribuetd.') + logger.error('Cannot register file(s) to a table which is hash distributed.') sys.exit(1) # pg_paqseg_# @@ -397,6 +397,14 @@ class HawqRegister(object): logger.info('%s is not a file in hdfs, please check the yaml configuration file.' % fn) sys.exit(1) + def check_sizes_valid(): + for k, fn in enumerate(self.files): + hdfscmd = 'hdfs dfs -du %s' % fn + _, out, _ = local_ssh_output(hdfscmd) + if self.sizes[k] > int(out.strip().split()[0]): + logger.error('File size(%s) in yaml configuration file should not exceed actual length(%s) of file %s.' % (self.sizes[k], out.strip().split()[0], fn)) + sys.exit(1) + if self.yml: option_parser_yml(options.yml_config) self.filepath = self.files[0][:self.files[0].rfind('/')] if self.files else '' @@ -413,7 +421,7 @@ class HawqRegister(object): check_hash_type() # Usage1 only support randomly distributed table if not self.filepath: if self.mode == 'first': - logger('Please specify filepath with -f option.') + logger.info('Please specify filepath with -f option.') else: logger.info('Hawq Register Succeed.') sys.exit(0) @@ -482,6 +490,7 @@ class HawqRegister(object): logger.error('-e option is only supported with single file case.') sys.exit(1) self.sizes = [self.filesize] + check_sizes_valid() if self.file_format == 'Parquet': self._check_parquet_format(self.files) @@ -633,7 +642,7 @@ class HawqRegister(object): for pos, constraint in enumerate(self.partitions_constraint): if partitions.has_key(constraint): mappings.extend([(partitions[constraint], (self.partitions_filepaths[pos][i], self.partitions_filesizes[pos][i])) - for i in xrange(len(self.partitions_filepaths[pos]))]) + for i in xrange(len(self.partitions_filepaths[pos]))]) return mappings def register(self):
