Repository: incubator-hawq Updated Branches: refs/heads/master 77e245dde -> 410f35a7c
HAWQ-991. Check bucketnum and encoding consistency in hawq register. Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/410f35a7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/410f35a7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/410f35a7 Branch: refs/heads/master Commit: 410f35a7c6105713afc90a349b626650267a21dd Parents: 77e245d Author: xunzhang <xunzhang...@gmail.com> Authored: Sun Sep 18 17:52:23 2016 +0800 Committer: Lili Ma <ictmal...@gmail.com> Committed: Mon Sep 19 11:08:15 2016 +0800 ---------------------------------------------------------------------- tools/bin/hawqregister | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/410f35a7/tools/bin/hawqregister ---------------------------------------------------------------------- diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister index 0c75662..1366bce 100755 --- a/tools/bin/hawqregister +++ b/tools/bin/hawqregister @@ -65,6 +65,9 @@ def register_yaml_dict_check(D): if D.get(attr) is None: logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % attr) sys.exit(1) + if D['Bucketnum'] <= 0: + logger.error('Bucketnum should not be zero, please check your yaml configuration file.') + sys.exit(1) if D['FileFormat'] in ['Parquet', 'AO']: prefix = D['FileFormat'] local_check_list = ['%s_FileLocations' % prefix, '%s_Schema' % prefix] @@ -198,6 +201,14 @@ class GpRegisterAccessor(object): tabledir = '/'.join([D['location'].strip(), str(D['tablespace_oid']), str(D['database_oid']), str(D['relfilenode']), '']) return firstsegno, tabledir + def get_database_encoding_indx(self, database): + query = "select encoding from pg_database where datname = '%s';" % database + return self.exec_query(query)[0]['encoding'] + + def get_database_encoding(self, encoding_indx): + query = "select pg_encoding_to_char(%s);" % encoding_indx + return self.exec_query(query)[0]['pg_encoding_to_char'] + def update_catalog(self, query): self.conn.query(query) @@ -242,6 +253,13 @@ class HawqRegister(object): logger.error('Files to be registered must be multiple times to the bucket number of hash table.') sys.exit(1) + def check_database_encoding(): + encoding_indx = self.accessor.get_database_encoding_indx(self.database) + encoding = self.accessor.get_database_encoding(encoding_indx) + if self.encoding.strip() != encoding: + logger.error('Database encoding from yaml configuration file(%s) is not consistent with encoding from input args(%s).' % (self.encoding, encoding)) + sys.exit(1) + def create_table(): return self.accessor.do_create_table(self.tablename, self.schema, self.file_format, self.distribution_policy, self.file_locations, self.bucket_number) @@ -282,7 +300,7 @@ class HawqRegister(object): def set_yml_dataa(file_format, files, sizes, schema, distribution_policy, file_locations,\ bucket_number, partitionby, partitions_constraint, partitions_name, partitions_compression_level,\ - partitions_compression_type, partitions_checksum, partitions_filepaths, partitions_filesizes): + partitions_compression_type, partitions_checksum, partitions_filepaths, partitions_filesizes, encoding): self.file_format = file_format self.files = files self.sizes = sizes @@ -298,6 +316,7 @@ class HawqRegister(object): self.partitions_checksum = partitions_checksum self.partitions_filepaths = partitions_filepaths self.partitions_filesizes = partitions_filesizes + self.encoding = encoding def option_parser_yml(yml_file): import yaml @@ -311,8 +330,7 @@ class HawqRegister(object): partitions_checksum = [] partitions_compression_level = [] partitions_compression_type = [] - files = [] - sizes = [] + files, sizes = [], [] if params['FileFormat'].lower() == 'parquet': partitionby = params.get('Parquet_FileLocations').get('PartitionBy') @@ -329,8 +347,9 @@ class HawqRegister(object): partitions_name = [d['Name'] for d in params['Parquet_FileLocations']['Partitions']] if len(params['Parquet_FileLocations']['Files']): files, sizes = [params['DFS_URL'] + d['path'] for d in params['Parquet_FileLocations']['Files']], [d['size'] for d in params['Parquet_FileLocations']['Files']] + encoding = params['Encoding'] set_yml_dataa('Parquet', files, sizes, params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations'], params['Bucketnum'], partitionby,\ - partitions_constraint, partitions_name, partitions_compression_level, partitions_compression_type, partitions_checksum, partitions_filepaths, partitions_filesizes) + partitions_constraint, partitions_name, partitions_compression_level, partitions_compression_type, partitions_checksum, partitions_filepaths, partitions_filesizes, encoding) else: #AO format partitionby = params.get('AO_FileLocations').get('PartitionBy') @@ -347,13 +366,15 @@ class HawqRegister(object): partitions_name = [d['Name'] for d in params['AO_FileLocations']['Partitions']] if len(params['AO_FileLocations']['Files']): files, sizes = [params['DFS_URL'] + d['path'] for d in params['AO_FileLocations']['Files']], [d['size'] for d in params['AO_FileLocations']['Files']] + encoding = params['Encoding'] set_yml_dataa('AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'], partitionby, partitions_constraint,\ - partitions_name, partitions_compression_level, partitions_compression_type, partitions_checksum, partitions_filepaths, partitions_filesizes) + partitions_name, partitions_compression_level, partitions_compression_type, partitions_checksum, partitions_filepaths, partitions_filesizes, encoding) if self.yml: option_parser_yml(options.yml_config) self.filepath = self.files[0][:self.files[0].rfind('/')] if self.files else '' check_distribution_policy() + check_database_encoding() if self.mode != 'force' and self.mode != 'repair': if not create_table(): self.mode = 'second_exist'